feat(new-chat): integrate filesystem flow into agent pipeline

This commit is contained in:
Anish Sarkar 2026-04-23 15:45:33 +05:30
parent 42d2d2222e
commit 1eadecee23
10 changed files with 574 additions and 25 deletions

View file

@ -32,6 +32,7 @@ from app.agents.new_chat.sandbox import (
get_or_create_sandbox,
is_sandbox_enabled,
)
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.db import Chunk, Document, DocumentType, Folder, shielded_async_session
from app.indexing_pipeline.document_chunker import chunk_text
from app.utils.document_converters import (
@ -50,6 +51,8 @@ SURFSENSE_FILESYSTEM_SYSTEM_PROMPT = """## Following Conventions
- Read files before editing understand existing content before making changes.
- Mimic existing style, naming conventions, and patterns.
- Never claim a file was created/updated unless filesystem tool output confirms success.
- If a file write/edit fails, explicitly report the failure.
## Filesystem Tools
@ -109,13 +112,20 @@ Usage:
- Use chunk IDs (`<chunk id='...'>`) as citations in answers.
"""
SURFSENSE_WRITE_FILE_TOOL_DESCRIPTION = """Writes a new file to the in-memory filesystem (session-only).
SURFSENSE_WRITE_FILE_TOOL_DESCRIPTION = """Writes a new text file to the in-memory filesystem (session-only).
Use this to create scratch/working files during the conversation. Files created
here are ephemeral and will not be saved to the user's knowledge base.
To permanently save a document to the user's knowledge base, use the
`save_document` tool instead.
Supported outputs include common LLM-friendly text formats like markdown, json,
yaml, csv, xml, html, css, sql, and code files.
When creating content from open-ended prompts, produce concrete and useful text,
not placeholders. Avoid adding dates/timestamps unless the user explicitly asks
for them.
"""
SURFSENSE_EDIT_FILE_TOOL_DESCRIPTION = """Performs exact string replacements in files.
@ -182,11 +192,14 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
def __init__(
self,
*,
backend: Any = None,
filesystem_mode: FilesystemMode = FilesystemMode.CLOUD,
search_space_id: int | None = None,
created_by_id: str | None = None,
thread_id: int | str | None = None,
tool_token_limit_before_evict: int | None = 20000,
) -> None:
self._filesystem_mode = filesystem_mode
self._search_space_id = search_space_id
self._created_by_id = created_by_id
self._thread_id = thread_id
@ -204,8 +217,15 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
" extract the data, write it as a clean file (CSV, JSON, etc.),"
" and then run your code against it."
)
if filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER:
system_prompt += (
"\n\n## Local Folder Mode"
"\n\nThis chat is running in desktop local-folder mode."
" Keep all file operations local. Do not use save_document."
)
super().__init__(
backend=backend,
system_prompt=system_prompt,
custom_tool_descriptions={
"ls": SURFSENSE_LIST_FILES_TOOL_DESCRIPTION,
@ -219,7 +239,8 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
max_execute_timeout=self._MAX_EXECUTE_TIMEOUT,
)
self.tools = [t for t in self.tools if t.name != "execute"]
self.tools.append(self._create_save_document_tool())
if self._should_persist_documents():
self.tools.append(self._create_save_document_tool())
if self._sandbox_available:
self.tools.append(self._create_execute_code_tool())
@ -637,15 +658,25 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
runtime: ToolRuntime[None, FilesystemState],
) -> Command | str:
resolved_backend = self._get_backend(runtime)
target_path = self._resolve_write_target_path(file_path, runtime)
try:
validated_path = validate_path(file_path)
validated_path = validate_path(target_path)
except ValueError as exc:
return f"Error: {exc}"
res: WriteResult = resolved_backend.write(validated_path, content)
if res.error:
return res.error
verify_error = self._verify_written_content_sync(
backend=resolved_backend,
path=validated_path,
expected_content=content,
)
if verify_error:
return verify_error
if not self._is_kb_document(validated_path):
if self._should_persist_documents() and not self._is_kb_document(
validated_path
):
persist_result = self._run_async_blocking(
self._persist_new_document(
file_path=validated_path, content=content
@ -682,15 +713,25 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
runtime: ToolRuntime[None, FilesystemState],
) -> Command | str:
resolved_backend = self._get_backend(runtime)
target_path = self._resolve_write_target_path(file_path, runtime)
try:
validated_path = validate_path(file_path)
validated_path = validate_path(target_path)
except ValueError as exc:
return f"Error: {exc}"
res: WriteResult = await resolved_backend.awrite(validated_path, content)
if res.error:
return res.error
verify_error = await self._verify_written_content_async(
backend=resolved_backend,
path=validated_path,
expected_content=content,
)
if verify_error:
return verify_error
if not self._is_kb_document(validated_path):
if self._should_persist_documents() and not self._is_kb_document(
validated_path
):
persist_result = await self._persist_new_document(
file_path=validated_path,
content=content,
@ -726,6 +767,124 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
"""Return True for paths under /documents/ (KB-sourced, XML-wrapped)."""
return path.startswith("/documents/")
def _should_persist_documents(self) -> bool:
"""Only cloud mode persists file content to Document/Chunk tables."""
return self._filesystem_mode == FilesystemMode.CLOUD
@staticmethod
def _get_contract_suggested_path(runtime: ToolRuntime[None, FilesystemState]) -> str:
contract = runtime.state.get("file_operation_contract") or {}
suggested = contract.get("suggested_path")
if isinstance(suggested, str) and suggested.strip():
return suggested.strip()
return "/notes.md"
def _resolve_write_target_path(
self,
file_path: str,
runtime: ToolRuntime[None, FilesystemState],
) -> str:
candidate = file_path.strip()
if not candidate:
return self._get_contract_suggested_path(runtime)
if not candidate.startswith("/"):
return f"/{candidate.lstrip('/')}"
return candidate
@staticmethod
def _is_error_text(value: str) -> bool:
return value.startswith("Error:")
@staticmethod
def _read_for_verification_sync(backend: Any, path: str) -> str:
read_raw = getattr(backend, "read_raw", None)
if callable(read_raw):
return read_raw(path)
return backend.read(path, offset=0, limit=200000)
@staticmethod
async def _read_for_verification_async(backend: Any, path: str) -> str:
aread_raw = getattr(backend, "aread_raw", None)
if callable(aread_raw):
return await aread_raw(path)
return await backend.aread(path, offset=0, limit=200000)
def _verify_written_content_sync(
self,
*,
backend: Any,
path: str,
expected_content: str,
) -> str | None:
actual = self._read_for_verification_sync(backend, path)
if self._is_error_text(actual):
return f"Error: could not verify written file '{path}'."
if actual.rstrip() != expected_content.rstrip():
return (
"Error: file write verification failed; expected content was not fully written "
f"to '{path}'."
)
return None
async def _verify_written_content_async(
self,
*,
backend: Any,
path: str,
expected_content: str,
) -> str | None:
actual = await self._read_for_verification_async(backend, path)
if self._is_error_text(actual):
return f"Error: could not verify written file '{path}'."
if actual.rstrip() != expected_content.rstrip():
return (
"Error: file write verification failed; expected content was not fully written "
f"to '{path}'."
)
return None
def _verify_edited_content_sync(
self,
*,
backend: Any,
path: str,
new_string: str,
) -> tuple[str | None, str | None]:
updated_content = self._read_for_verification_sync(backend, path)
if self._is_error_text(updated_content):
return (
f"Error: could not verify edited file '{path}'.",
None,
)
if new_string and new_string not in updated_content:
return (
"Error: edit verification failed; updated content was not found in "
f"'{path}'.",
None,
)
return None, updated_content
async def _verify_edited_content_async(
self,
*,
backend: Any,
path: str,
new_string: str,
) -> tuple[str | None, str | None]:
updated_content = await self._read_for_verification_async(backend, path)
if self._is_error_text(updated_content):
return (
f"Error: could not verify edited file '{path}'.",
None,
)
if new_string and new_string not in updated_content:
return (
"Error: edit verification failed; updated content was not found in "
f"'{path}'.",
None,
)
return None, updated_content
def _create_edit_file_tool(self) -> BaseTool:
"""Create edit_file with DB persistence (skipped for KB documents)."""
tool_description = (
@ -754,8 +913,9 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
] = False,
) -> Command | str:
resolved_backend = self._get_backend(runtime)
target_path = self._resolve_write_target_path(file_path, runtime)
try:
validated_path = validate_path(file_path)
validated_path = validate_path(target_path)
except ValueError as exc:
return f"Error: {exc}"
res: EditResult = resolved_backend.edit(
@ -767,13 +927,22 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
if res.error:
return res.error
if not self._is_kb_document(validated_path):
read_result = resolved_backend.read(
validated_path, offset=0, limit=200000
)
if read_result.error or read_result.file_data is None:
return f"Error: could not reload edited file '{validated_path}' for persistence."
updated_content = read_result.file_data["content"]
verify_error, updated_content = self._verify_edited_content_sync(
backend=resolved_backend,
path=validated_path,
new_string=new_string,
)
if verify_error:
return verify_error
if self._should_persist_documents() and not self._is_kb_document(
validated_path
):
if updated_content is None:
return (
f"Error: could not reload edited file '{validated_path}' for "
"persistence."
)
persist_result = self._run_async_blocking(
self._persist_edited_document(
file_path=validated_path,
@ -818,8 +987,9 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
] = False,
) -> Command | str:
resolved_backend = self._get_backend(runtime)
target_path = self._resolve_write_target_path(file_path, runtime)
try:
validated_path = validate_path(file_path)
validated_path = validate_path(target_path)
except ValueError as exc:
return f"Error: {exc}"
res: EditResult = await resolved_backend.aedit(
@ -831,13 +1001,22 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
if res.error:
return res.error
if not self._is_kb_document(validated_path):
read_result = await resolved_backend.aread(
validated_path, offset=0, limit=200000
)
if read_result.error or read_result.file_data is None:
return f"Error: could not reload edited file '{validated_path}' for persistence."
updated_content = read_result.file_data["content"]
verify_error, updated_content = await self._verify_edited_content_async(
backend=resolved_backend,
path=validated_path,
new_string=new_string,
)
if verify_error:
return verify_error
if self._should_persist_documents() and not self._is_kb_document(
validated_path
):
if updated_content is None:
return (
f"Error: could not reload edited file '{validated_path}' for "
"persistence."
)
persist_error = await self._persist_edited_document(
file_path=validated_path,
updated_content=updated_content,