Merge pull request #1218 from CREDO23/feature/sandbox-integration
Some checks are pending
Build and Push Docker Images / tag_release (push) Waiting to run
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Blocked by required conditions

[Feat] Add Daytona sandbox code execution + KB search fix
This commit is contained in:
Rohan Verma 2026-04-13 14:11:27 -07:00 committed by GitHub
commit 7c4d1a6af6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 359 additions and 28 deletions

View file

@ -197,6 +197,13 @@ LLAMA_CLOUD_API_KEY=llx-nnn
# AZURE_DI_ENDPOINT=https://your-resource.cognitiveservices.azure.com/ # AZURE_DI_ENDPOINT=https://your-resource.cognitiveservices.azure.com/
# AZURE_DI_KEY=your-key # AZURE_DI_KEY=your-key
# Daytona Sandbox (isolated code execution)
# DAYTONA_SANDBOX_ENABLED=FALSE
# DAYTONA_API_KEY=your-daytona-api-key
# DAYTONA_API_URL=https://app.daytona.io/api
# DAYTONA_TARGET=us
# DAYTONA_SNAPSHOT_ID=
# OPTIONAL: Add these for LangSmith Observability # OPTIONAL: Add these for LangSmith Observability
LANGSMITH_TRACING=true LANGSMITH_TRACING=true
LANGSMITH_ENDPOINT=https://api.smith.langchain.com LANGSMITH_ENDPOINT=https://api.smith.langchain.com

View file

@ -439,6 +439,7 @@ async def create_surfsense_deep_agent(
SurfSenseFilesystemMiddleware( SurfSenseFilesystemMiddleware(
search_space_id=search_space_id, search_space_id=search_space_id,
created_by_id=user_id, created_by_id=user_id,
thread_id=thread_id,
), ),
create_summarization_middleware(llm, StateBackend), create_summarization_middleware(llm, StateBackend),
PatchToolCallsMiddleware(), PatchToolCallsMiddleware(),
@ -466,6 +467,7 @@ async def create_surfsense_deep_agent(
SurfSenseFilesystemMiddleware( SurfSenseFilesystemMiddleware(
search_space_id=search_space_id, search_space_id=search_space_id,
created_by_id=user_id, created_by_id=user_id,
thread_id=thread_id,
), ),
SubAgentMiddleware(backend=StateBackend, subagents=[general_purpose_spec]), SubAgentMiddleware(backend=StateBackend, subagents=[general_purpose_spec]),
create_summarization_middleware(llm, StateBackend), create_summarization_middleware(llm, StateBackend),

View file

@ -7,10 +7,12 @@ This middleware customizes prompts and persists write/edit operations for
from __future__ import annotations from __future__ import annotations
import asyncio import asyncio
import logging
import re import re
from datetime import UTC, datetime from datetime import UTC, datetime
from typing import Annotated, Any from typing import Annotated, Any
from daytona.common.errors import DaytonaError
from deepagents import FilesystemMiddleware from deepagents import FilesystemMiddleware
from deepagents.backends.protocol import EditResult, WriteResult from deepagents.backends.protocol import EditResult, WriteResult
from deepagents.backends.utils import validate_path from deepagents.backends.utils import validate_path
@ -23,6 +25,11 @@ from langchain_core.tools import BaseTool, StructuredTool
from langgraph.types import Command from langgraph.types import Command
from sqlalchemy import delete, select from sqlalchemy import delete, select
from app.agents.new_chat.sandbox import (
_evict_sandbox_cache,
get_or_create_sandbox,
is_sandbox_enabled,
)
from app.db import Chunk, Document, DocumentType, Folder, shielded_async_session from app.db import Chunk, Document, DocumentType, Folder, shielded_async_session
from app.indexing_pipeline.document_chunker import chunk_text from app.indexing_pipeline.document_chunker import chunk_text
from app.utils.document_converters import ( from app.utils.document_converters import (
@ -31,6 +38,8 @@ from app.utils.document_converters import (
generate_unique_identifier_hash, generate_unique_identifier_hash,
) )
logger = logging.getLogger(__name__)
# ============================================================================= # =============================================================================
# System Prompt (injected into every model call by wrap_model_call) # System Prompt (injected into every model call by wrap_model_call)
# ============================================================================= # =============================================================================
@ -40,7 +49,7 @@ SURFSENSE_FILESYSTEM_SYSTEM_PROMPT = """## Following Conventions
- Read files before editing understand existing content before making changes. - Read files before editing understand existing content before making changes.
- Mimic existing style, naming conventions, and patterns. - Mimic existing style, naming conventions, and patterns.
## Filesystem Tools `ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`, `save_document` ## Filesystem Tools
All file paths must start with a `/`. All file paths must start with a `/`.
- ls: list files and directories at a given path. - ls: list files and directories at a given path.
@ -128,6 +137,24 @@ SURFSENSE_GREP_TOOL_DESCRIPTION = """Search for a literal text pattern across fi
Use this to locate relevant document files/chunks before reading full files. Use this to locate relevant document files/chunks before reading full files.
""" """
SURFSENSE_EXECUTE_CODE_TOOL_DESCRIPTION = """Executes Python code in an isolated sandbox environment.
Common data-science packages are pre-installed (pandas, numpy, matplotlib,
scipy, scikit-learn).
When to use this tool: use execute_code for numerical computation, data
analysis, statistics, and any task that benefits from running Python code.
Never perform arithmetic manually when this tool is available.
Usage notes:
- No outbound network access.
- Returns combined stdout/stderr with exit code.
- Use print() to produce output.
- You can create files, run shell commands via subprocess or os.system(),
and use any standard library module.
- Use the optional timeout parameter to override the default timeout.
"""
SURFSENSE_SAVE_DOCUMENT_TOOL_DESCRIPTION = """Permanently saves a document to the user's knowledge base. SURFSENSE_SAVE_DOCUMENT_TOOL_DESCRIPTION = """Permanently saves a document to the user's knowledge base.
This is an expensive operation it creates a new Document record in the This is an expensive operation it creates a new Document record in the
@ -148,17 +175,36 @@ Args:
class SurfSenseFilesystemMiddleware(FilesystemMiddleware): class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
"""SurfSense-specific filesystem middleware with DB persistence for docs.""" """SurfSense-specific filesystem middleware with DB persistence for docs."""
_MAX_EXECUTE_TIMEOUT = 300
def __init__( def __init__(
self, self,
*, *,
search_space_id: int | None = None, search_space_id: int | None = None,
created_by_id: str | None = None, created_by_id: str | None = None,
thread_id: int | str | None = None,
tool_token_limit_before_evict: int | None = 20000, tool_token_limit_before_evict: int | None = 20000,
) -> None: ) -> None:
self._search_space_id = search_space_id self._search_space_id = search_space_id
self._created_by_id = created_by_id self._created_by_id = created_by_id
self._thread_id = thread_id
self._sandbox_available = is_sandbox_enabled() and thread_id is not None
system_prompt = SURFSENSE_FILESYSTEM_SYSTEM_PROMPT
if self._sandbox_available:
system_prompt += (
"\n- execute_code: run Python code in an isolated sandbox."
"\n\n## Code Execution"
"\n\nUse execute_code whenever a task benefits from running code."
" Never perform arithmetic manually."
"\n\nDocuments here are XML-wrapped markdown, not raw data files."
" To work with them programmatically, read the document first,"
" extract the data, write it as a clean file (CSV, JSON, etc.),"
" and then run your code against it."
)
super().__init__( super().__init__(
system_prompt=SURFSENSE_FILESYSTEM_SYSTEM_PROMPT, system_prompt=system_prompt,
custom_tool_descriptions={ custom_tool_descriptions={
"ls": SURFSENSE_LIST_FILES_TOOL_DESCRIPTION, "ls": SURFSENSE_LIST_FILES_TOOL_DESCRIPTION,
"read_file": SURFSENSE_READ_FILE_TOOL_DESCRIPTION, "read_file": SURFSENSE_READ_FILE_TOOL_DESCRIPTION,
@ -168,10 +214,12 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
"grep": SURFSENSE_GREP_TOOL_DESCRIPTION, "grep": SURFSENSE_GREP_TOOL_DESCRIPTION,
}, },
tool_token_limit_before_evict=tool_token_limit_before_evict, tool_token_limit_before_evict=tool_token_limit_before_evict,
max_execute_timeout=self._MAX_EXECUTE_TIMEOUT,
) )
# Remove the execute tool (no sandbox backend)
self.tools = [t for t in self.tools if t.name != "execute"] self.tools = [t for t in self.tools if t.name != "execute"]
self.tools.append(self._create_save_document_tool()) self.tools.append(self._create_save_document_tool())
if self._sandbox_available:
self.tools.append(self._create_execute_code_tool())
@staticmethod @staticmethod
def _run_async_blocking(coro: Any) -> Any: def _run_async_blocking(coro: Any) -> Any:
@ -455,6 +503,108 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
coroutine=async_save_document, coroutine=async_save_document,
) )
def _create_execute_code_tool(self) -> BaseTool:
"""Create execute_code tool backed by a Daytona sandbox."""
def sync_execute_code(
command: Annotated[
str, "Python code to execute. Use print() to see output."
],
runtime: ToolRuntime[None, FilesystemState],
timeout: Annotated[
int | None,
"Optional timeout in seconds.",
] = None,
) -> str:
if timeout is not None:
if timeout < 0:
return f"Error: timeout must be non-negative, got {timeout}."
if timeout > self._MAX_EXECUTE_TIMEOUT:
return f"Error: timeout {timeout}s exceeds maximum ({self._MAX_EXECUTE_TIMEOUT}s)."
return self._run_async_blocking(
self._execute_in_sandbox(command, runtime, timeout)
)
async def async_execute_code(
command: Annotated[
str, "Python code to execute. Use print() to see output."
],
runtime: ToolRuntime[None, FilesystemState],
timeout: Annotated[
int | None,
"Optional timeout in seconds.",
] = None,
) -> str:
if timeout is not None:
if timeout < 0:
return f"Error: timeout must be non-negative, got {timeout}."
if timeout > self._MAX_EXECUTE_TIMEOUT:
return f"Error: timeout {timeout}s exceeds maximum ({self._MAX_EXECUTE_TIMEOUT}s)."
return await self._execute_in_sandbox(command, runtime, timeout)
return StructuredTool.from_function(
name="execute_code",
description=SURFSENSE_EXECUTE_CODE_TOOL_DESCRIPTION,
func=sync_execute_code,
coroutine=async_execute_code,
)
@staticmethod
def _wrap_as_python(code: str) -> str:
"""Wrap Python code in a shell invocation for the sandbox."""
return f"python3 << 'PYEOF'\n{code}\nPYEOF"
async def _execute_in_sandbox(
self,
command: str,
runtime: ToolRuntime[None, FilesystemState],
timeout: int | None,
) -> str:
"""Core logic: get sandbox, sync files, run command, handle retries."""
assert self._thread_id is not None
command = self._wrap_as_python(command)
try:
return await self._try_sandbox_execute(command, runtime, timeout)
except (DaytonaError, Exception) as first_err:
logger.warning(
"Sandbox execute failed for thread %s, retrying: %s",
self._thread_id,
first_err,
)
_evict_sandbox_cache(self._thread_id)
try:
return await self._try_sandbox_execute(command, runtime, timeout)
except Exception:
logger.exception(
"Sandbox retry also failed for thread %s", self._thread_id
)
return "Error: Code execution is temporarily unavailable. Please try again."
async def _try_sandbox_execute(
self,
command: str,
runtime: ToolRuntime[None, FilesystemState],
timeout: int | None,
) -> str:
sandbox, is_new = await get_or_create_sandbox(self._thread_id)
# files = runtime.state.get("files") or {}
# await sync_files_to_sandbox(self._thread_id, files, sandbox, is_new)
result = await sandbox.aexecute(command, timeout=timeout)
output = (result.output or "").strip()
if not output and result.exit_code == 0:
return (
"[Code executed successfully but produced no output. "
"Use print() to display results, then try again.]"
)
parts = [result.output]
if result.exit_code is not None:
status = "succeeded" if result.exit_code == 0 else "failed"
parts.append(f"\n[Command {status} with exit code {result.exit_code}]")
if result.truncated:
parts.append("\n[Output was truncated due to size limits]")
return "".join(parts)
def _create_write_file_tool(self) -> BaseTool: def _create_write_file_tool(self) -> BaseTool:
"""Create write_file — ephemeral for /documents/*, persisted otherwise.""" """Create write_file — ephemeral for /documents/*, persisted otherwise."""
tool_description = ( tool_description = (

View file

@ -774,11 +774,16 @@ class KnowledgeBaseSearchMiddleware(AgentMiddleware): # type: ignore[type-arg]
messages = state.get("messages") or [] messages = state.get("messages") or []
if not messages: if not messages:
return None return None
last_message = messages[-1]
if not isinstance(last_message, HumanMessage): last_human = None
for msg in reversed(messages):
if isinstance(msg, HumanMessage):
last_human = msg
break
if last_human is None:
return None return None
user_text = _extract_text_from_message(last_message).strip() user_text = _extract_text_from_message(last_human).strip()
if not user_text: if not user_text:
return None return None

View file

@ -42,7 +42,7 @@ class _TimeoutAwareSandbox(DaytonaSandbox):
""" """
def execute(self, command: str, *, timeout: int | None = None) -> ExecuteResponse: def execute(self, command: str, *, timeout: int | None = None) -> ExecuteResponse:
t = timeout if timeout is not None else self._timeout t = timeout if timeout is not None else self._default_timeout
result = self._sandbox.process.exec(command, timeout=t) result = self._sandbox.process.exec(command, timeout=t)
return ExecuteResponse( return ExecuteResponse(
output=result.result, output=result.result,
@ -58,8 +58,10 @@ class _TimeoutAwareSandbox(DaytonaSandbox):
_daytona_client: Daytona | None = None _daytona_client: Daytona | None = None
_sandbox_cache: dict[str, _TimeoutAwareSandbox] = {} _sandbox_cache: dict[str, _TimeoutAwareSandbox] = {}
_seeded_files: dict[str, dict[str, str]] = {}
_SANDBOX_CACHE_MAX_SIZE = 20 _SANDBOX_CACHE_MAX_SIZE = 20
THREAD_LABEL_KEY = "surfsense_thread" THREAD_LABEL_KEY = "surfsense_thread"
SANDBOX_DOCUMENTS_ROOT = "/home/daytona/documents"
def is_sandbox_enabled() -> bool: def is_sandbox_enabled() -> bool:
@ -78,14 +80,29 @@ def _get_client() -> Daytona:
return _daytona_client return _daytona_client
def _find_or_create(thread_id: str) -> _TimeoutAwareSandbox: def _sandbox_create_params(
labels: dict[str, str],
) -> CreateSandboxFromSnapshotParams:
snapshot_id = os.environ.get("DAYTONA_SNAPSHOT_ID") or None
return CreateSandboxFromSnapshotParams(
language="python",
labels=labels,
snapshot=snapshot_id,
network_block_all=True,
auto_stop_interval=10,
auto_delete_interval=60,
)
def _find_or_create(thread_id: str) -> tuple[_TimeoutAwareSandbox, bool]:
"""Find an existing sandbox for *thread_id*, or create a new one. """Find an existing sandbox for *thread_id*, or create a new one.
If an existing sandbox is found but is stopped/archived, it will be Returns a tuple of (sandbox, is_new) where *is_new* is True when a
restarted automatically before returning. fresh sandbox was created (first time or replacement after failure).
""" """
client = _get_client() client = _get_client()
labels = {THREAD_LABEL_KEY: thread_id} labels = {THREAD_LABEL_KEY: thread_id}
is_new = False
try: try:
sandbox = client.find_one(labels=labels) sandbox = client.find_one(labels=labels)
@ -109,41 +126,43 @@ def _find_or_create(thread_id: str) -> _TimeoutAwareSandbox:
sandbox.id, sandbox.id,
sandbox.state, sandbox.state,
) )
sandbox = client.create( try:
CreateSandboxFromSnapshotParams(language="python", labels=labels) client.delete(sandbox)
) except Exception:
logger.debug("Could not delete broken sandbox %s", sandbox.id, exc_info=True)
sandbox = client.create(_sandbox_create_params(labels))
is_new = True
logger.info("Created replacement sandbox: %s", sandbox.id) logger.info("Created replacement sandbox: %s", sandbox.id)
elif sandbox.state != SandboxState.STARTED: elif sandbox.state != SandboxState.STARTED:
sandbox.wait_for_sandbox_start(timeout=60) sandbox.wait_for_sandbox_start(timeout=60)
except Exception: except Exception:
logger.info("No existing sandbox for thread %s — creating one", thread_id) logger.info("No existing sandbox for thread %s — creating one", thread_id)
sandbox = client.create( sandbox = client.create(_sandbox_create_params(labels))
CreateSandboxFromSnapshotParams(language="python", labels=labels) is_new = True
)
logger.info("Created new sandbox: %s", sandbox.id) logger.info("Created new sandbox: %s", sandbox.id)
return _TimeoutAwareSandbox(sandbox=sandbox) return _TimeoutAwareSandbox(sandbox=sandbox), is_new
async def get_or_create_sandbox(thread_id: int | str) -> _TimeoutAwareSandbox: async def get_or_create_sandbox(
thread_id: int | str,
) -> tuple[_TimeoutAwareSandbox, bool]:
"""Get or create a sandbox for a conversation thread. """Get or create a sandbox for a conversation thread.
Uses an in-process cache keyed by thread_id so subsequent messages Uses an in-process cache keyed by thread_id so subsequent messages
in the same conversation reuse the sandbox object without an API call. in the same conversation reuse the sandbox object without an API call.
Args:
thread_id: The conversation thread identifier.
Returns: Returns:
DaytonaSandbox connected to the sandbox. Tuple of (sandbox, is_new). *is_new* is True when a fresh sandbox
was created, signalling that file tracking should be reset.
""" """
key = str(thread_id) key = str(thread_id)
cached = _sandbox_cache.get(key) cached = _sandbox_cache.get(key)
if cached is not None: if cached is not None:
logger.info("Reusing cached sandbox for thread %s", key) logger.info("Reusing cached sandbox for thread %s", key)
return cached return cached, False
sandbox = await asyncio.to_thread(_find_or_create, key) sandbox, is_new = await asyncio.to_thread(_find_or_create, key)
_sandbox_cache[key] = sandbox _sandbox_cache[key] = sandbox
if len(_sandbox_cache) > _SANDBOX_CACHE_MAX_SIZE: if len(_sandbox_cache) > _SANDBOX_CACHE_MAX_SIZE:
@ -151,12 +170,60 @@ async def get_or_create_sandbox(thread_id: int | str) -> _TimeoutAwareSandbox:
_sandbox_cache.pop(oldest_key, None) _sandbox_cache.pop(oldest_key, None)
logger.debug("Evicted oldest sandbox cache entry: %s", oldest_key) logger.debug("Evicted oldest sandbox cache entry: %s", oldest_key)
return sandbox return sandbox, is_new
async def sync_files_to_sandbox(
thread_id: int | str,
files: dict[str, dict],
sandbox: _TimeoutAwareSandbox,
is_new: bool,
) -> None:
"""Upload new or changed virtual-filesystem files to the sandbox.
Compares *files* (from ``state["files"]``) against the ``_seeded_files``
tracking dict and uploads only what has changed. When *is_new* is True
the tracking is reset so every file is re-uploaded.
"""
key = str(thread_id)
if is_new:
_seeded_files.pop(key, None)
tracked = _seeded_files.get(key, {})
to_upload: list[tuple[str, bytes]] = []
for vpath, fdata in files.items():
modified_at = fdata.get("modified_at", "")
if tracked.get(vpath) == modified_at:
continue
content = "\n".join(fdata.get("content", []))
sandbox_path = f"{SANDBOX_DOCUMENTS_ROOT}{vpath}"
to_upload.append((sandbox_path, content.encode("utf-8")))
if not to_upload:
return
def _upload() -> None:
sandbox.upload_files(to_upload)
await asyncio.to_thread(_upload)
new_tracked = dict(tracked)
for vpath, fdata in files.items():
new_tracked[vpath] = fdata.get("modified_at", "")
_seeded_files[key] = new_tracked
logger.info("Synced %d file(s) to sandbox for thread %s", len(to_upload), key)
def _evict_sandbox_cache(thread_id: int | str) -> None:
key = str(thread_id)
_sandbox_cache.pop(key, None)
_seeded_files.pop(key, None)
async def delete_sandbox(thread_id: int | str) -> None: async def delete_sandbox(thread_id: int | str) -> None:
"""Delete the sandbox for a conversation thread.""" """Delete the sandbox for a conversation thread."""
_sandbox_cache.pop(str(thread_id), None) _evict_sandbox_cache(thread_id)
def _delete() -> None: def _delete() -> None:
client = _get_client() client = _get_client()
@ -193,7 +260,11 @@ def _get_sandbox_files_dir() -> Path:
def _local_path_for(thread_id: int | str, sandbox_path: str) -> Path: def _local_path_for(thread_id: int | str, sandbox_path: str) -> Path:
"""Map a sandbox-internal absolute path to a local filesystem path.""" """Map a sandbox-internal absolute path to a local filesystem path."""
relative = sandbox_path.lstrip("/") relative = sandbox_path.lstrip("/")
return _get_sandbox_files_dir() / str(thread_id) / relative base = (_get_sandbox_files_dir() / str(thread_id)).resolve()
target = (base / relative).resolve()
if not target.is_relative_to(base):
raise ValueError(f"Path traversal blocked: {sandbox_path}")
return target
def get_local_sandbox_file(thread_id: int | str, sandbox_path: str) -> bytes | None: def get_local_sandbox_file(thread_id: int | str, sandbox_path: str) -> bytes | None:
@ -226,7 +297,7 @@ async def persist_and_delete_sandbox(
Per-file errors are logged but do **not** prevent the sandbox from Per-file errors are logged but do **not** prevent the sandbox from
being deleted freeing Daytona storage is the priority. being deleted freeing Daytona storage is the priority.
""" """
_sandbox_cache.pop(str(thread_id), None) _evict_sandbox_cache(thread_id)
def _persist_and_delete() -> None: def _persist_and_delete() -> None:
client = _get_client() client = _get_client()

View file

@ -0,0 +1,96 @@
"""Create the Daytona snapshot used by SurfSense code-execution sandboxes.
Run from the backend directory:
cd surfsense_backend
uv run python scripts/create_sandbox_snapshot.py
Prerequisites:
- DAYTONA_API_KEY set in surfsense_backend/.env (or exported in shell)
- DAYTONA_API_URL=https://app.daytona.io/api
- DAYTONA_TARGET=us (or eu)
After this script succeeds, add to surfsense_backend/.env:
DAYTONA_SNAPSHOT_ID=surfsense-sandbox
"""
import os
import sys
import time
from pathlib import Path
from dotenv import load_dotenv
_here = Path(__file__).parent
for candidate in [_here / "../surfsense_backend/.env", _here / ".env", _here / "../.env"]:
if candidate.exists():
load_dotenv(candidate)
break
from daytona import CreateSnapshotParams, Daytona, Image # noqa: E402
SNAPSHOT_NAME = "surfsense-sandbox"
PACKAGES = [
"pandas",
"numpy",
"matplotlib",
"scipy",
"scikit-learn",
]
def build_image() -> Image:
"""Build the sandbox image with data-science packages and a /documents symlink."""
return (
Image.debian_slim("3.12")
.pip_install(*PACKAGES)
# Symlink /documents → /home/daytona/documents so the LLM can use
# the same /documents/ path it sees in the virtual filesystem.
.run_commands(
"mkdir -p /home/daytona/documents",
"ln -sfn /home/daytona/documents /documents",
)
)
def main() -> None:
api_key = os.environ.get("DAYTONA_API_KEY")
if not api_key:
print("ERROR: DAYTONA_API_KEY is not set.", file=sys.stderr)
print("Add it to surfsense_backend/.env or export it in your shell.", file=sys.stderr)
sys.exit(1)
daytona = Daytona()
try:
existing = daytona.snapshot.get(SNAPSHOT_NAME)
print(f"Deleting existing snapshot '{SNAPSHOT_NAME}'")
daytona.snapshot.delete(existing)
print(f"Deleted '{SNAPSHOT_NAME}'. Waiting for removal to propagate …")
for attempt in range(30):
time.sleep(2)
try:
daytona.snapshot.get(SNAPSHOT_NAME)
except Exception:
print(f"Confirmed '{SNAPSHOT_NAME}' is gone.\n")
break
else:
print(f"WARNING: '{SNAPSHOT_NAME}' may still exist after 60s. Proceeding anyway.\n")
except Exception:
pass
print(f"Building snapshot '{SNAPSHOT_NAME}'")
print(f"Packages: {', '.join(PACKAGES)}\n")
daytona.snapshot.create(
CreateSnapshotParams(name=SNAPSHOT_NAME, image=build_image()),
on_logs=lambda chunk: print(chunk, end="", flush=True),
)
print(f"\n\nSnapshot '{SNAPSHOT_NAME}' is ready.")
print("\nAdd this to surfsense_backend/.env:")
print(f" DAYTONA_SNAPSHOT_ID={SNAPSHOT_NAME}")
if __name__ == "__main__":
main()