refactor(filesystem): streamline filesystem operations by removing cursor-based pagination and enhancing path normalization methods

This commit is contained in:
Anish Sarkar 2026-04-28 00:45:07 +05:30
parent f330d1431c
commit 7bcb6306c5
3 changed files with 69 additions and 239 deletions

View file

@ -28,9 +28,6 @@ from langgraph.types import Command
from sqlalchemy import delete, select
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware.multi_root_local_folder_backend import (
MultiRootLocalFolderBackend,
)
from app.agents.new_chat.sandbox import (
_evict_sandbox_cache,
delete_sandbox,
@ -152,21 +149,19 @@ Notes:
- Cross-mount moves are not supported.
"""
SURFSENSE_LIST_TREE_TOOL_DESCRIPTION = """Lists files/folders recursively with cursor pagination.
SURFSENSE_LIST_TREE_TOOL_DESCRIPTION = """Lists files/folders recursively in a single bounded call.
Use this in desktop local-folder mode to discover nested files at scale.
Args:
- path: absolute mount-prefixed path (e.g., /<mount>/src) or "/" for mount roots.
- max_depth: recursion depth limit (default 8).
- page_size: number of entries to return per page (max 1000).
- cursor: opaque continuation token from a previous call.
- page_size: maximum number of entries returned (max 1000).
- include_files/include_dirs: filter returned entry types.
Returns JSON with:
- entries: [{path, is_dir, size, modified_at, depth}]
- next_cursor: continuation token or null
- has_more: whether additional pages exist
- truncated: true when additional entries were omitted due to page_size
"""
SURFSENSE_GLOB_TOOL_DESCRIPTION = """Find files matching a glob pattern.
@ -251,13 +246,13 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
if filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER:
system_prompt += (
"\n- move_file: move or rename files/folders in local-folder mode."
"\n- list_tree: recursively list nested local paths with cursor pagination."
"\n- list_tree: recursively list nested local paths in one bounded response."
"\n\n## Local Folder Mode"
"\n\nThis chat is running in desktop local-folder mode."
" Keep all file operations local. Do not use save_document."
" Always use mount-prefixed absolute paths like /<folder>/file.ext."
" If you are unsure which mounts are available, call ls('/') first."
" For big trees: use list_tree pages, then grep, then read_file."
" For big trees: use list_tree, then grep, then read_file."
)
super().__init__(
@ -812,35 +807,14 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
"""Only cloud mode persists file content to Document/Chunk tables."""
return self._filesystem_mode == FilesystemMode.CLOUD
def _default_mount_prefix(self, runtime: ToolRuntime[None, FilesystemState]) -> str:
backend = self._get_backend(runtime)
if isinstance(backend, MultiRootLocalFolderBackend):
return f"/{backend.default_mount()}"
return ""
def _normalize_local_mount_path(
self, candidate: str, runtime: ToolRuntime[None, FilesystemState]
) -> str:
backend = self._get_backend(runtime)
mount_prefix = self._default_mount_prefix(runtime)
normalized_candidate = re.sub(r"/+", "/", candidate.strip().replace("\\", "/"))
if not mount_prefix or not isinstance(backend, MultiRootLocalFolderBackend):
if normalized_candidate.startswith("/"):
return normalized_candidate
return f"/{normalized_candidate.lstrip('/')}"
mount_names = set(backend.list_mounts())
if normalized_candidate.startswith("/"):
first_segment = normalized_candidate.lstrip("/").split("/", 1)[0]
if first_segment in mount_names:
return normalized_candidate
return f"{mount_prefix}{normalized_candidate}"
relative = normalized_candidate.lstrip("/")
first_segment = relative.split("/", 1)[0]
if first_segment in mount_names:
return f"/{relative}"
return f"{mount_prefix}/{relative}"
@staticmethod
def _normalize_absolute_path(candidate: str) -> str:
normalized = re.sub(r"/+", "/", candidate.strip().replace("\\", "/"))
if not normalized:
return "/"
if normalized.startswith("/"):
return normalized
return f"/{normalized.lstrip('/')}"
def _get_contract_suggested_path(
self, runtime: ToolRuntime[None, FilesystemState]
@ -848,14 +822,7 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
contract = runtime.state.get("file_operation_contract") or {}
suggested = contract.get("suggested_path")
if isinstance(suggested, str) and suggested.strip():
cleaned = suggested.strip()
if self._filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER:
return self._normalize_local_mount_path(cleaned, runtime)
return cleaned
if self._filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER:
mount_prefix = self._default_mount_prefix(runtime)
if mount_prefix:
return f"{mount_prefix}/notes.md"
return self._normalize_absolute_path(suggested)
return "/notes.md"
def _resolve_write_target_path(
@ -867,7 +834,7 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
if not candidate:
return self._get_contract_suggested_path(runtime)
if self._filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER:
return self._normalize_local_mount_path(candidate, runtime)
return self._normalize_absolute_path(candidate)
if not candidate.startswith("/"):
return f"/{candidate.lstrip('/')}"
return candidate
@ -881,7 +848,7 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
if not candidate:
return ""
if self._filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER:
return self._normalize_local_mount_path(candidate, runtime)
return self._normalize_absolute_path(candidate)
if not candidate.startswith("/"):
return f"/{candidate.lstrip('/')}"
return candidate
@ -895,7 +862,7 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
if candidate == "/":
return "/"
if self._filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER:
return self._normalize_local_mount_path(candidate, runtime)
return self._normalize_absolute_path(candidate)
if not candidate.startswith("/"):
return f"/{candidate.lstrip('/')}"
return candidate
@ -1136,12 +1103,8 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
] = 8,
page_size: Annotated[
int,
"Number of entries to return per page. Defaults to 500 (max 1000).",
"Maximum number of entries to return. Defaults to 500 (max 1000).",
] = 500,
cursor: Annotated[
str | None,
"Opaque cursor from a previous list_tree call.",
] = None,
include_files: Annotated[
bool,
"Whether file entries should be included.",
@ -1171,7 +1134,6 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
validated_path,
max_depth=max_depth,
page_size=page_size,
cursor=cursor,
include_files=include_files,
include_dirs=include_dirs,
)
@ -1193,12 +1155,8 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
] = 8,
page_size: Annotated[
int,
"Number of entries to return per page. Defaults to 500 (max 1000).",
"Maximum number of entries to return. Defaults to 500 (max 1000).",
] = 500,
cursor: Annotated[
str | None,
"Opaque cursor from a previous list_tree call.",
] = None,
include_files: Annotated[
bool,
"Whether file entries should be included.",
@ -1228,7 +1186,6 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
validated_path,
max_depth=max_depth,
page_size=page_size,
cursor=cursor,
include_files=include_files,
include_dirs=include_dirs,
)

View file

@ -9,9 +9,7 @@ import threading
from collections import deque
from contextlib import ExitStack
from pathlib import Path
from time import time
from typing import Any
from uuid import uuid4
from deepagents.backends.protocol import (
EditResult,
@ -43,8 +41,6 @@ class LocalFolderBackend:
self._root = root
self._locks: dict[str, threading.Lock] = {}
self._locks_mu = threading.Lock()
self._tree_sessions: dict[str, dict[str, Any]] = {}
self._tree_sessions_ttl_s = 900
def _lock_for(self, path: str) -> threading.Lock:
with self._locks_mu:
@ -89,16 +85,6 @@ class LocalFolderBackend:
def _clamp_page_size(page_size: int) -> int:
return max(1, min(page_size, 1000))
def _prune_expired_tree_sessions(self) -> None:
now = time()
expired = [
cursor
for cursor, session in self._tree_sessions.items()
if now - float(session.get("last_accessed_at", now)) > self._tree_sessions_ttl_s
]
for cursor in expired:
self._tree_sessions.pop(cursor, None)
def _read_dir_entries(self, directory_path: str) -> list[dict[str, Any]]:
directory = Path(directory_path)
try:
@ -206,148 +192,82 @@ class LocalFolderBackend:
*,
max_depth: int | None = 8,
page_size: int = 500,
cursor: str | None = None,
include_files: bool = True,
include_dirs: bool = True,
) -> dict[str, Any]:
self._prune_expired_tree_sessions()
if not include_files and not include_dirs:
return {
"entries": [],
"next_cursor": None,
"has_more": False,
"truncated": False,
}
normalized_depth = None if max_depth is None else max(0, int(max_depth))
page_limit = self._clamp_page_size(int(page_size))
now = time()
if cursor:
session = self._tree_sessions.get(cursor)
if not session:
return {"error": "Invalid or expired cursor"}
if (
session.get("path") != path
or session.get("max_depth") != normalized_depth
or session.get("include_files") != include_files
or session.get("include_dirs") != include_dirs
):
return {"error": "Cursor options do not match request options"}
state = session
else:
try:
start = self._resolve_virtual(path, allow_root=True)
except ValueError:
return {"error": f"Error: invalid path '{path}'"}
if not start.exists():
return {"error": f"Error: path '{path}' not found"}
if start.is_file():
stat_result = start.stat()
if include_files:
return {
"entries": [
{
"path": self._to_virtual(start, self._root),
"is_dir": False,
"size": stat_result.st_size,
"modified_at": str(stat_result.st_mtime),
"depth": 0,
}
],
"next_cursor": None,
"has_more": False,
"truncated": False,
}
try:
start = self._resolve_virtual(path, allow_root=True)
except ValueError:
return {"error": f"Error: invalid path '{path}'"}
if not start.exists():
return {"error": f"Error: path '{path}' not found"}
if start.is_file():
stat_result = start.stat()
if include_files:
return {
"entries": [],
"next_cursor": None,
"has_more": False,
"entries": [
{
"path": self._to_virtual(start, self._root),
"is_dir": False,
"size": stat_result.st_size,
"modified_at": str(stat_result.st_mtime),
"depth": 0,
}
],
"truncated": False,
}
state = {
"path": path,
"max_depth": normalized_depth,
"include_files": include_files,
"include_dirs": include_dirs,
"pending_dirs": deque([(str(start), 0)]),
"active_dir": None,
"active_depth": 0,
"active_entries": [],
"active_index": 0,
return {
"entries": [],
"truncated": False,
}
pending_dirs: deque[tuple[str, int]] = deque([(str(start), 0)])
entries: list[dict[str, Any]] = []
truncated = False
while len(entries) < page_limit:
active_entries = state.get("active_entries", [])
active_index = int(state.get("active_index", 0))
if active_index >= len(active_entries):
pending_dirs = state.get("pending_dirs", [])
if not pending_dirs:
state["active_entries"] = []
state["active_index"] = 0
break
next_dir_path, next_depth = pending_dirs.popleft()
state["active_dir"] = next_dir_path
state["active_depth"] = next_depth
state["active_entries"] = self._read_dir_entries(next_dir_path)
state["active_index"] = 0
active_entries = state["active_entries"]
active_index = 0
if active_index >= len(active_entries):
continue
item = active_entries[active_index]
state["active_index"] = active_index + 1
item_depth = int(state.get("active_depth", 0)) + 1
if normalized_depth is not None and item_depth > normalized_depth:
continue
if item["is_dir"]:
if normalized_depth is None or item_depth <= normalized_depth:
state["pending_dirs"].append((item["absolute_path"], item_depth))
if include_dirs:
while pending_dirs and not truncated:
next_dir_path, next_depth = pending_dirs.popleft()
active_entries = self._read_dir_entries(next_dir_path)
for item in active_entries:
item_depth = next_depth + 1
if normalized_depth is not None and item_depth > normalized_depth:
continue
if item["is_dir"]:
if normalized_depth is None or item_depth <= normalized_depth:
pending_dirs.append((item["absolute_path"], item_depth))
if include_dirs:
entries.append(
{
"path": item["path"],
"is_dir": True,
"size": 0,
"modified_at": item["modified_at"],
"depth": item_depth,
}
)
elif include_files:
entries.append(
{
"path": item["path"],
"is_dir": True,
"size": 0,
"is_dir": False,
"size": item["size"],
"modified_at": item["modified_at"],
"depth": item_depth,
}
)
elif include_files:
entries.append(
{
"path": item["path"],
"is_dir": False,
"size": item["size"],
"modified_at": item["modified_at"],
"depth": item_depth,
}
)
if len(entries) >= page_limit:
truncated = True
break
has_more = bool(state.get("pending_dirs")) or (
int(state.get("active_index", 0)) < len(state.get("active_entries", []))
)
if has_more:
next_cursor = cursor or uuid4().hex
state["last_accessed_at"] = now
self._tree_sessions[next_cursor] = state
else:
next_cursor = None
if cursor:
self._tree_sessions.pop(cursor, None)
if len(entries) >= page_limit:
truncated = True
break
return {
"entries": entries,
"next_cursor": next_cursor,
"has_more": has_more,
"truncated": truncated,
}
@ -357,7 +277,6 @@ class LocalFolderBackend:
*,
max_depth: int | None = 8,
page_size: int = 500,
cursor: str | None = None,
include_files: bool = True,
include_dirs: bool = True,
) -> dict[str, Any]:
@ -366,7 +285,6 @@ class LocalFolderBackend:
path,
max_depth=max_depth,
page_size=page_size,
cursor=cursor,
include_files=include_files,
include_dirs=include_dirs,
)

View file

@ -3,8 +3,6 @@
from __future__ import annotations
import asyncio
import base64
import json
from pathlib import Path
from typing import Any
@ -109,28 +107,6 @@ class MultiRootLocalFolderBackend:
for mount in self._mount_order
]
@staticmethod
def _encode_tree_cursor(mount: str, local_cursor: str) -> str:
payload = json.dumps(
{"mount": mount, "cursor": local_cursor},
separators=(",", ":"),
).encode("utf-8")
return base64.urlsafe_b64encode(payload).decode("ascii")
@staticmethod
def _decode_tree_cursor(cursor: str) -> tuple[str, str]:
try:
padded = cursor + "=" * ((4 - len(cursor) % 4) % 4)
data = base64.urlsafe_b64decode(padded.encode("ascii"))
parsed = json.loads(data.decode("utf-8"))
except Exception as exc:
raise ValueError("Invalid cursor") from exc
mount = parsed.get("mount")
local_cursor = parsed.get("cursor")
if not isinstance(mount, str) or not isinstance(local_cursor, str):
raise ValueError("Invalid cursor")
return mount, local_cursor
def _transform_infos(self, mount: str, infos: list[FileInfo]) -> list[FileInfo]:
transformed: list[FileInfo] = []
for info in infos:
@ -162,11 +138,10 @@ class MultiRootLocalFolderBackend:
*,
max_depth: int | None = 8,
page_size: int = 500,
cursor: str | None = None,
include_files: bool = True,
include_dirs: bool = True,
) -> dict[str, Any]:
if path == "/" and not cursor:
if path == "/":
entries = [
{
"path": f"/{mount}",
@ -179,20 +154,11 @@ class MultiRootLocalFolderBackend:
]
return {
"entries": entries if include_dirs else [],
"next_cursor": None,
"has_more": False,
"truncated": False,
}
try:
if cursor:
mount, local_cursor = self._decode_tree_cursor(cursor)
if mount not in self._mount_to_backend:
return {"error": "Invalid or expired cursor"}
local_path = "/"
else:
mount, local_path = self._split_mount_path(path)
local_cursor = None
mount, local_path = self._split_mount_path(path)
except ValueError as exc:
return {"error": f"Error: {exc}"}
@ -200,7 +166,6 @@ class MultiRootLocalFolderBackend:
local_path,
max_depth=max_depth,
page_size=page_size,
cursor=local_cursor,
include_files=include_files,
include_dirs=include_dirs,
)
@ -220,16 +185,8 @@ class MultiRootLocalFolderBackend:
}
)
local_next_cursor = self._get_str(result, "next_cursor")
next_cursor = (
self._encode_tree_cursor(mount, local_next_cursor)
if local_next_cursor
else None
)
return {
"entries": entries,
"next_cursor": next_cursor,
"has_more": self._get_bool(result, "has_more"),
"truncated": self._get_bool(result, "truncated"),
}
@ -239,7 +196,6 @@ class MultiRootLocalFolderBackend:
*,
max_depth: int | None = 8,
page_size: int = 500,
cursor: str | None = None,
include_files: bool = True,
include_dirs: bool = True,
) -> dict[str, Any]:
@ -248,7 +204,6 @@ class MultiRootLocalFolderBackend:
path,
max_depth=max_depth,
page_size=page_size,
cursor=cursor,
include_files=include_files,
include_dirs=include_dirs,
)