mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-16 21:05:20 +02:00
refactor(filesystem): streamline filesystem operations by removing cursor-based pagination and enhancing path normalization methods
This commit is contained in:
parent
f330d1431c
commit
7bcb6306c5
3 changed files with 69 additions and 239 deletions
|
|
@ -28,9 +28,6 @@ from langgraph.types import Command
|
|||
from sqlalchemy import delete, select
|
||||
|
||||
from app.agents.new_chat.filesystem_selection import FilesystemMode
|
||||
from app.agents.new_chat.middleware.multi_root_local_folder_backend import (
|
||||
MultiRootLocalFolderBackend,
|
||||
)
|
||||
from app.agents.new_chat.sandbox import (
|
||||
_evict_sandbox_cache,
|
||||
delete_sandbox,
|
||||
|
|
@ -152,21 +149,19 @@ Notes:
|
|||
- Cross-mount moves are not supported.
|
||||
"""
|
||||
|
||||
SURFSENSE_LIST_TREE_TOOL_DESCRIPTION = """Lists files/folders recursively with cursor pagination.
|
||||
SURFSENSE_LIST_TREE_TOOL_DESCRIPTION = """Lists files/folders recursively in a single bounded call.
|
||||
|
||||
Use this in desktop local-folder mode to discover nested files at scale.
|
||||
|
||||
Args:
|
||||
- path: absolute mount-prefixed path (e.g., /<mount>/src) or "/" for mount roots.
|
||||
- max_depth: recursion depth limit (default 8).
|
||||
- page_size: number of entries to return per page (max 1000).
|
||||
- cursor: opaque continuation token from a previous call.
|
||||
- page_size: maximum number of entries returned (max 1000).
|
||||
- include_files/include_dirs: filter returned entry types.
|
||||
|
||||
Returns JSON with:
|
||||
- entries: [{path, is_dir, size, modified_at, depth}]
|
||||
- next_cursor: continuation token or null
|
||||
- has_more: whether additional pages exist
|
||||
- truncated: true when additional entries were omitted due to page_size
|
||||
"""
|
||||
|
||||
SURFSENSE_GLOB_TOOL_DESCRIPTION = """Find files matching a glob pattern.
|
||||
|
|
@ -251,13 +246,13 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
|
|||
if filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER:
|
||||
system_prompt += (
|
||||
"\n- move_file: move or rename files/folders in local-folder mode."
|
||||
"\n- list_tree: recursively list nested local paths with cursor pagination."
|
||||
"\n- list_tree: recursively list nested local paths in one bounded response."
|
||||
"\n\n## Local Folder Mode"
|
||||
"\n\nThis chat is running in desktop local-folder mode."
|
||||
" Keep all file operations local. Do not use save_document."
|
||||
" Always use mount-prefixed absolute paths like /<folder>/file.ext."
|
||||
" If you are unsure which mounts are available, call ls('/') first."
|
||||
" For big trees: use list_tree pages, then grep, then read_file."
|
||||
" For big trees: use list_tree, then grep, then read_file."
|
||||
)
|
||||
|
||||
super().__init__(
|
||||
|
|
@ -812,35 +807,14 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
|
|||
"""Only cloud mode persists file content to Document/Chunk tables."""
|
||||
return self._filesystem_mode == FilesystemMode.CLOUD
|
||||
|
||||
def _default_mount_prefix(self, runtime: ToolRuntime[None, FilesystemState]) -> str:
|
||||
backend = self._get_backend(runtime)
|
||||
if isinstance(backend, MultiRootLocalFolderBackend):
|
||||
return f"/{backend.default_mount()}"
|
||||
return ""
|
||||
|
||||
def _normalize_local_mount_path(
|
||||
self, candidate: str, runtime: ToolRuntime[None, FilesystemState]
|
||||
) -> str:
|
||||
backend = self._get_backend(runtime)
|
||||
mount_prefix = self._default_mount_prefix(runtime)
|
||||
normalized_candidate = re.sub(r"/+", "/", candidate.strip().replace("\\", "/"))
|
||||
if not mount_prefix or not isinstance(backend, MultiRootLocalFolderBackend):
|
||||
if normalized_candidate.startswith("/"):
|
||||
return normalized_candidate
|
||||
return f"/{normalized_candidate.lstrip('/')}"
|
||||
|
||||
mount_names = set(backend.list_mounts())
|
||||
if normalized_candidate.startswith("/"):
|
||||
first_segment = normalized_candidate.lstrip("/").split("/", 1)[0]
|
||||
if first_segment in mount_names:
|
||||
return normalized_candidate
|
||||
return f"{mount_prefix}{normalized_candidate}"
|
||||
|
||||
relative = normalized_candidate.lstrip("/")
|
||||
first_segment = relative.split("/", 1)[0]
|
||||
if first_segment in mount_names:
|
||||
return f"/{relative}"
|
||||
return f"{mount_prefix}/{relative}"
|
||||
@staticmethod
|
||||
def _normalize_absolute_path(candidate: str) -> str:
|
||||
normalized = re.sub(r"/+", "/", candidate.strip().replace("\\", "/"))
|
||||
if not normalized:
|
||||
return "/"
|
||||
if normalized.startswith("/"):
|
||||
return normalized
|
||||
return f"/{normalized.lstrip('/')}"
|
||||
|
||||
def _get_contract_suggested_path(
|
||||
self, runtime: ToolRuntime[None, FilesystemState]
|
||||
|
|
@ -848,14 +822,7 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
|
|||
contract = runtime.state.get("file_operation_contract") or {}
|
||||
suggested = contract.get("suggested_path")
|
||||
if isinstance(suggested, str) and suggested.strip():
|
||||
cleaned = suggested.strip()
|
||||
if self._filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER:
|
||||
return self._normalize_local_mount_path(cleaned, runtime)
|
||||
return cleaned
|
||||
if self._filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER:
|
||||
mount_prefix = self._default_mount_prefix(runtime)
|
||||
if mount_prefix:
|
||||
return f"{mount_prefix}/notes.md"
|
||||
return self._normalize_absolute_path(suggested)
|
||||
return "/notes.md"
|
||||
|
||||
def _resolve_write_target_path(
|
||||
|
|
@ -867,7 +834,7 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
|
|||
if not candidate:
|
||||
return self._get_contract_suggested_path(runtime)
|
||||
if self._filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER:
|
||||
return self._normalize_local_mount_path(candidate, runtime)
|
||||
return self._normalize_absolute_path(candidate)
|
||||
if not candidate.startswith("/"):
|
||||
return f"/{candidate.lstrip('/')}"
|
||||
return candidate
|
||||
|
|
@ -881,7 +848,7 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
|
|||
if not candidate:
|
||||
return ""
|
||||
if self._filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER:
|
||||
return self._normalize_local_mount_path(candidate, runtime)
|
||||
return self._normalize_absolute_path(candidate)
|
||||
if not candidate.startswith("/"):
|
||||
return f"/{candidate.lstrip('/')}"
|
||||
return candidate
|
||||
|
|
@ -895,7 +862,7 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
|
|||
if candidate == "/":
|
||||
return "/"
|
||||
if self._filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER:
|
||||
return self._normalize_local_mount_path(candidate, runtime)
|
||||
return self._normalize_absolute_path(candidate)
|
||||
if not candidate.startswith("/"):
|
||||
return f"/{candidate.lstrip('/')}"
|
||||
return candidate
|
||||
|
|
@ -1136,12 +1103,8 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
|
|||
] = 8,
|
||||
page_size: Annotated[
|
||||
int,
|
||||
"Number of entries to return per page. Defaults to 500 (max 1000).",
|
||||
"Maximum number of entries to return. Defaults to 500 (max 1000).",
|
||||
] = 500,
|
||||
cursor: Annotated[
|
||||
str | None,
|
||||
"Opaque cursor from a previous list_tree call.",
|
||||
] = None,
|
||||
include_files: Annotated[
|
||||
bool,
|
||||
"Whether file entries should be included.",
|
||||
|
|
@ -1171,7 +1134,6 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
|
|||
validated_path,
|
||||
max_depth=max_depth,
|
||||
page_size=page_size,
|
||||
cursor=cursor,
|
||||
include_files=include_files,
|
||||
include_dirs=include_dirs,
|
||||
)
|
||||
|
|
@ -1193,12 +1155,8 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
|
|||
] = 8,
|
||||
page_size: Annotated[
|
||||
int,
|
||||
"Number of entries to return per page. Defaults to 500 (max 1000).",
|
||||
"Maximum number of entries to return. Defaults to 500 (max 1000).",
|
||||
] = 500,
|
||||
cursor: Annotated[
|
||||
str | None,
|
||||
"Opaque cursor from a previous list_tree call.",
|
||||
] = None,
|
||||
include_files: Annotated[
|
||||
bool,
|
||||
"Whether file entries should be included.",
|
||||
|
|
@ -1228,7 +1186,6 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
|
|||
validated_path,
|
||||
max_depth=max_depth,
|
||||
page_size=page_size,
|
||||
cursor=cursor,
|
||||
include_files=include_files,
|
||||
include_dirs=include_dirs,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -9,9 +9,7 @@ import threading
|
|||
from collections import deque
|
||||
from contextlib import ExitStack
|
||||
from pathlib import Path
|
||||
from time import time
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from deepagents.backends.protocol import (
|
||||
EditResult,
|
||||
|
|
@ -43,8 +41,6 @@ class LocalFolderBackend:
|
|||
self._root = root
|
||||
self._locks: dict[str, threading.Lock] = {}
|
||||
self._locks_mu = threading.Lock()
|
||||
self._tree_sessions: dict[str, dict[str, Any]] = {}
|
||||
self._tree_sessions_ttl_s = 900
|
||||
|
||||
def _lock_for(self, path: str) -> threading.Lock:
|
||||
with self._locks_mu:
|
||||
|
|
@ -89,16 +85,6 @@ class LocalFolderBackend:
|
|||
def _clamp_page_size(page_size: int) -> int:
|
||||
return max(1, min(page_size, 1000))
|
||||
|
||||
def _prune_expired_tree_sessions(self) -> None:
|
||||
now = time()
|
||||
expired = [
|
||||
cursor
|
||||
for cursor, session in self._tree_sessions.items()
|
||||
if now - float(session.get("last_accessed_at", now)) > self._tree_sessions_ttl_s
|
||||
]
|
||||
for cursor in expired:
|
||||
self._tree_sessions.pop(cursor, None)
|
||||
|
||||
def _read_dir_entries(self, directory_path: str) -> list[dict[str, Any]]:
|
||||
directory = Path(directory_path)
|
||||
try:
|
||||
|
|
@ -206,148 +192,82 @@ class LocalFolderBackend:
|
|||
*,
|
||||
max_depth: int | None = 8,
|
||||
page_size: int = 500,
|
||||
cursor: str | None = None,
|
||||
include_files: bool = True,
|
||||
include_dirs: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
self._prune_expired_tree_sessions()
|
||||
if not include_files and not include_dirs:
|
||||
return {
|
||||
"entries": [],
|
||||
"next_cursor": None,
|
||||
"has_more": False,
|
||||
"truncated": False,
|
||||
}
|
||||
|
||||
normalized_depth = None if max_depth is None else max(0, int(max_depth))
|
||||
page_limit = self._clamp_page_size(int(page_size))
|
||||
now = time()
|
||||
|
||||
if cursor:
|
||||
session = self._tree_sessions.get(cursor)
|
||||
if not session:
|
||||
return {"error": "Invalid or expired cursor"}
|
||||
if (
|
||||
session.get("path") != path
|
||||
or session.get("max_depth") != normalized_depth
|
||||
or session.get("include_files") != include_files
|
||||
or session.get("include_dirs") != include_dirs
|
||||
):
|
||||
return {"error": "Cursor options do not match request options"}
|
||||
state = session
|
||||
else:
|
||||
try:
|
||||
start = self._resolve_virtual(path, allow_root=True)
|
||||
except ValueError:
|
||||
return {"error": f"Error: invalid path '{path}'"}
|
||||
if not start.exists():
|
||||
return {"error": f"Error: path '{path}' not found"}
|
||||
if start.is_file():
|
||||
stat_result = start.stat()
|
||||
if include_files:
|
||||
return {
|
||||
"entries": [
|
||||
{
|
||||
"path": self._to_virtual(start, self._root),
|
||||
"is_dir": False,
|
||||
"size": stat_result.st_size,
|
||||
"modified_at": str(stat_result.st_mtime),
|
||||
"depth": 0,
|
||||
}
|
||||
],
|
||||
"next_cursor": None,
|
||||
"has_more": False,
|
||||
"truncated": False,
|
||||
}
|
||||
try:
|
||||
start = self._resolve_virtual(path, allow_root=True)
|
||||
except ValueError:
|
||||
return {"error": f"Error: invalid path '{path}'"}
|
||||
if not start.exists():
|
||||
return {"error": f"Error: path '{path}' not found"}
|
||||
if start.is_file():
|
||||
stat_result = start.stat()
|
||||
if include_files:
|
||||
return {
|
||||
"entries": [],
|
||||
"next_cursor": None,
|
||||
"has_more": False,
|
||||
"entries": [
|
||||
{
|
||||
"path": self._to_virtual(start, self._root),
|
||||
"is_dir": False,
|
||||
"size": stat_result.st_size,
|
||||
"modified_at": str(stat_result.st_mtime),
|
||||
"depth": 0,
|
||||
}
|
||||
],
|
||||
"truncated": False,
|
||||
}
|
||||
state = {
|
||||
"path": path,
|
||||
"max_depth": normalized_depth,
|
||||
"include_files": include_files,
|
||||
"include_dirs": include_dirs,
|
||||
"pending_dirs": deque([(str(start), 0)]),
|
||||
"active_dir": None,
|
||||
"active_depth": 0,
|
||||
"active_entries": [],
|
||||
"active_index": 0,
|
||||
return {
|
||||
"entries": [],
|
||||
"truncated": False,
|
||||
}
|
||||
|
||||
pending_dirs: deque[tuple[str, int]] = deque([(str(start), 0)])
|
||||
entries: list[dict[str, Any]] = []
|
||||
truncated = False
|
||||
while len(entries) < page_limit:
|
||||
active_entries = state.get("active_entries", [])
|
||||
active_index = int(state.get("active_index", 0))
|
||||
if active_index >= len(active_entries):
|
||||
pending_dirs = state.get("pending_dirs", [])
|
||||
if not pending_dirs:
|
||||
state["active_entries"] = []
|
||||
state["active_index"] = 0
|
||||
break
|
||||
next_dir_path, next_depth = pending_dirs.popleft()
|
||||
state["active_dir"] = next_dir_path
|
||||
state["active_depth"] = next_depth
|
||||
state["active_entries"] = self._read_dir_entries(next_dir_path)
|
||||
state["active_index"] = 0
|
||||
active_entries = state["active_entries"]
|
||||
active_index = 0
|
||||
|
||||
if active_index >= len(active_entries):
|
||||
continue
|
||||
|
||||
item = active_entries[active_index]
|
||||
state["active_index"] = active_index + 1
|
||||
item_depth = int(state.get("active_depth", 0)) + 1
|
||||
if normalized_depth is not None and item_depth > normalized_depth:
|
||||
continue
|
||||
if item["is_dir"]:
|
||||
if normalized_depth is None or item_depth <= normalized_depth:
|
||||
state["pending_dirs"].append((item["absolute_path"], item_depth))
|
||||
if include_dirs:
|
||||
while pending_dirs and not truncated:
|
||||
next_dir_path, next_depth = pending_dirs.popleft()
|
||||
active_entries = self._read_dir_entries(next_dir_path)
|
||||
for item in active_entries:
|
||||
item_depth = next_depth + 1
|
||||
if normalized_depth is not None and item_depth > normalized_depth:
|
||||
continue
|
||||
if item["is_dir"]:
|
||||
if normalized_depth is None or item_depth <= normalized_depth:
|
||||
pending_dirs.append((item["absolute_path"], item_depth))
|
||||
if include_dirs:
|
||||
entries.append(
|
||||
{
|
||||
"path": item["path"],
|
||||
"is_dir": True,
|
||||
"size": 0,
|
||||
"modified_at": item["modified_at"],
|
||||
"depth": item_depth,
|
||||
}
|
||||
)
|
||||
elif include_files:
|
||||
entries.append(
|
||||
{
|
||||
"path": item["path"],
|
||||
"is_dir": True,
|
||||
"size": 0,
|
||||
"is_dir": False,
|
||||
"size": item["size"],
|
||||
"modified_at": item["modified_at"],
|
||||
"depth": item_depth,
|
||||
}
|
||||
)
|
||||
elif include_files:
|
||||
entries.append(
|
||||
{
|
||||
"path": item["path"],
|
||||
"is_dir": False,
|
||||
"size": item["size"],
|
||||
"modified_at": item["modified_at"],
|
||||
"depth": item_depth,
|
||||
}
|
||||
)
|
||||
|
||||
if len(entries) >= page_limit:
|
||||
truncated = True
|
||||
break
|
||||
|
||||
has_more = bool(state.get("pending_dirs")) or (
|
||||
int(state.get("active_index", 0)) < len(state.get("active_entries", []))
|
||||
)
|
||||
if has_more:
|
||||
next_cursor = cursor or uuid4().hex
|
||||
state["last_accessed_at"] = now
|
||||
self._tree_sessions[next_cursor] = state
|
||||
else:
|
||||
next_cursor = None
|
||||
if cursor:
|
||||
self._tree_sessions.pop(cursor, None)
|
||||
if len(entries) >= page_limit:
|
||||
truncated = True
|
||||
break
|
||||
|
||||
return {
|
||||
"entries": entries,
|
||||
"next_cursor": next_cursor,
|
||||
"has_more": has_more,
|
||||
"truncated": truncated,
|
||||
}
|
||||
|
||||
|
|
@ -357,7 +277,6 @@ class LocalFolderBackend:
|
|||
*,
|
||||
max_depth: int | None = 8,
|
||||
page_size: int = 500,
|
||||
cursor: str | None = None,
|
||||
include_files: bool = True,
|
||||
include_dirs: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
|
|
@ -366,7 +285,6 @@ class LocalFolderBackend:
|
|||
path,
|
||||
max_depth=max_depth,
|
||||
page_size=page_size,
|
||||
cursor=cursor,
|
||||
include_files=include_files,
|
||||
include_dirs=include_dirs,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,8 +3,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
|
@ -109,28 +107,6 @@ class MultiRootLocalFolderBackend:
|
|||
for mount in self._mount_order
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _encode_tree_cursor(mount: str, local_cursor: str) -> str:
|
||||
payload = json.dumps(
|
||||
{"mount": mount, "cursor": local_cursor},
|
||||
separators=(",", ":"),
|
||||
).encode("utf-8")
|
||||
return base64.urlsafe_b64encode(payload).decode("ascii")
|
||||
|
||||
@staticmethod
|
||||
def _decode_tree_cursor(cursor: str) -> tuple[str, str]:
|
||||
try:
|
||||
padded = cursor + "=" * ((4 - len(cursor) % 4) % 4)
|
||||
data = base64.urlsafe_b64decode(padded.encode("ascii"))
|
||||
parsed = json.loads(data.decode("utf-8"))
|
||||
except Exception as exc:
|
||||
raise ValueError("Invalid cursor") from exc
|
||||
mount = parsed.get("mount")
|
||||
local_cursor = parsed.get("cursor")
|
||||
if not isinstance(mount, str) or not isinstance(local_cursor, str):
|
||||
raise ValueError("Invalid cursor")
|
||||
return mount, local_cursor
|
||||
|
||||
def _transform_infos(self, mount: str, infos: list[FileInfo]) -> list[FileInfo]:
|
||||
transformed: list[FileInfo] = []
|
||||
for info in infos:
|
||||
|
|
@ -162,11 +138,10 @@ class MultiRootLocalFolderBackend:
|
|||
*,
|
||||
max_depth: int | None = 8,
|
||||
page_size: int = 500,
|
||||
cursor: str | None = None,
|
||||
include_files: bool = True,
|
||||
include_dirs: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
if path == "/" and not cursor:
|
||||
if path == "/":
|
||||
entries = [
|
||||
{
|
||||
"path": f"/{mount}",
|
||||
|
|
@ -179,20 +154,11 @@ class MultiRootLocalFolderBackend:
|
|||
]
|
||||
return {
|
||||
"entries": entries if include_dirs else [],
|
||||
"next_cursor": None,
|
||||
"has_more": False,
|
||||
"truncated": False,
|
||||
}
|
||||
|
||||
try:
|
||||
if cursor:
|
||||
mount, local_cursor = self._decode_tree_cursor(cursor)
|
||||
if mount not in self._mount_to_backend:
|
||||
return {"error": "Invalid or expired cursor"}
|
||||
local_path = "/"
|
||||
else:
|
||||
mount, local_path = self._split_mount_path(path)
|
||||
local_cursor = None
|
||||
mount, local_path = self._split_mount_path(path)
|
||||
except ValueError as exc:
|
||||
return {"error": f"Error: {exc}"}
|
||||
|
||||
|
|
@ -200,7 +166,6 @@ class MultiRootLocalFolderBackend:
|
|||
local_path,
|
||||
max_depth=max_depth,
|
||||
page_size=page_size,
|
||||
cursor=local_cursor,
|
||||
include_files=include_files,
|
||||
include_dirs=include_dirs,
|
||||
)
|
||||
|
|
@ -220,16 +185,8 @@ class MultiRootLocalFolderBackend:
|
|||
}
|
||||
)
|
||||
|
||||
local_next_cursor = self._get_str(result, "next_cursor")
|
||||
next_cursor = (
|
||||
self._encode_tree_cursor(mount, local_next_cursor)
|
||||
if local_next_cursor
|
||||
else None
|
||||
)
|
||||
return {
|
||||
"entries": entries,
|
||||
"next_cursor": next_cursor,
|
||||
"has_more": self._get_bool(result, "has_more"),
|
||||
"truncated": self._get_bool(result, "truncated"),
|
||||
}
|
||||
|
||||
|
|
@ -239,7 +196,6 @@ class MultiRootLocalFolderBackend:
|
|||
*,
|
||||
max_depth: int | None = 8,
|
||||
page_size: int = 500,
|
||||
cursor: str | None = None,
|
||||
include_files: bool = True,
|
||||
include_dirs: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
|
|
@ -248,7 +204,6 @@ class MultiRootLocalFolderBackend:
|
|||
path,
|
||||
max_depth=max_depth,
|
||||
page_size=page_size,
|
||||
cursor=cursor,
|
||||
include_files=include_files,
|
||||
include_dirs=include_dirs,
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue