feat: improved agent streaming

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-04-29 07:20:31 -07:00
parent afb4b09cde
commit c110f5b955
60 changed files with 8068 additions and 303 deletions

View file

@ -30,6 +30,7 @@ from collections.abc import Awaitable, Callable
from typing import TYPE_CHECKING, Any
from langchain.agents.middleware import AgentMiddleware
from langchain_core.callbacks import adispatch_custom_event
from langchain_core.messages import ToolMessage
from app.agents.new_chat.feature_flags import get_flags
@ -144,11 +145,19 @@ class ActionLogMiddleware(AgentMiddleware):
result=result,
)
tool_call_id = _resolve_tool_call_id(request)
chat_turn_id = _resolve_chat_turn_id(request)
row = AgentActionLog(
thread_id=self._thread_id,
user_id=self._user_id,
search_space_id=self._search_space_id,
turn_id=_resolve_turn_id(request),
# ``turn_id`` is the deprecated alias of ``tool_call_id``
# kept for one release for safe rollback. New consumers
# should read ``tool_call_id`` directly.
turn_id=tool_call_id,
tool_call_id=tool_call_id,
chat_turn_id=chat_turn_id,
message_id=_resolve_message_id(request),
tool_name=tool_name,
args=args_payload,
@ -160,11 +169,41 @@ class ActionLogMiddleware(AgentMiddleware):
async with shielded_async_session() as session:
session.add(row)
await session.commit()
row_id = int(row.id) if row.id is not None else None
row_created_at = row.created_at
except Exception:
logger.warning(
"ActionLogMiddleware failed to persist action log row",
exc_info=True,
)
return
# Surface a side-channel SSE event so the chat tool card can
# render a Revert button immediately after the row is durable.
# ``stream_new_chat`` translates this into a
# ``data-action-log`` SSE event. We DO NOT include the
# ``reverse_descriptor`` payload here; only a presence flag.
try:
await adispatch_custom_event(
"action_log",
{
"id": row_id,
"lc_tool_call_id": tool_call_id,
"chat_turn_id": chat_turn_id,
"tool_name": tool_name,
"reversible": bool(reversible),
"reverse_descriptor_present": reverse_descriptor is not None,
"created_at": row_created_at.isoformat()
if row_created_at
else None,
"error": error_payload is not None,
},
)
except Exception:
logger.debug(
"ActionLogMiddleware failed to dispatch action_log event",
exc_info=True,
)
def _render_reverse(
self,
@ -254,7 +293,8 @@ def _resolve_args_payload(request: Any) -> dict[str, Any] | None:
}
def _resolve_turn_id(request: Any) -> str | None:
def _resolve_tool_call_id(request: Any) -> str | None:
"""Return the LangChain ``tool_call.id`` for this request, if any."""
try:
call = getattr(request, "tool_call", None) or {}
if isinstance(call, dict):
@ -266,9 +306,40 @@ def _resolve_turn_id(request: Any) -> str | None:
return None
# Deprecated alias kept for one release. Old callers and tests treated
# ``turn_id`` as if it carried the LangChain tool_call id; the new column
# lives under ``tool_call_id``. Both resolve to the same value today.
_resolve_turn_id = _resolve_tool_call_id
def _resolve_chat_turn_id(request: Any) -> str | None:
"""Return ``configurable.turn_id`` for this request, if accessible.
``ToolRuntime.config`` is exposed by LangGraph (see
``langgraph/prebuilt/tool_node.py``); the chat-turn correlation id
lives at ``runtime.config["configurable"]["turn_id"]``.
"""
try:
runtime = getattr(request, "runtime", None)
if runtime is None:
return None
config = getattr(runtime, "config", None)
if not isinstance(config, dict):
return None
configurable = config.get("configurable")
if not isinstance(configurable, dict):
return None
value = configurable.get("turn_id")
if isinstance(value, str) and value:
return value
except Exception: # pragma: no cover - defensive
pass
return None
def _resolve_message_id(request: Any) -> str | None:
"""Tool-call IDs serve as best-available message correlator at this layer."""
return _resolve_turn_id(request)
return _resolve_tool_call_id(request)
def _resolve_result_id(result: Any) -> str | None:

View file

@ -102,6 +102,8 @@ current working directory (`cwd`, default `/documents`).
- cd(path): change the current working directory.
- pwd(): print the current working directory.
- move_file(source, dest): move/rename a file under `/documents/`.
- rm(path): delete a single file under `/documents/` (no `-r`).
- rmdir(path): delete an empty directory under `/documents/`.
- list_tree(path, max_depth, page_size): recursively list files/folders.
## Persistence Rules
@ -112,8 +114,9 @@ current working directory (`cwd`, default `/documents`).
`/documents/temp_scratch.md`) are **discarded** at end of turn use this
prefix for any scratch/working content you do NOT want saved.
- All other paths (outside `/documents/` and not `temp_*`) are rejected.
- mkdir/move_file are staged this turn and committed at end of turn alongside
any new/edited documents.
- mkdir/move_file/rm/rmdir are staged this turn and committed at end of
turn alongside any new/edited documents. Snapshot/revert is enabled
for every destructive operation when action logging is on.
## Reading Documents Efficiently
@ -176,6 +179,8 @@ directory (`cwd`).
- cd(path): change the current working directory.
- pwd(): print the current working directory.
- move_file(source, dest): move/rename a file.
- rm(path): delete a single file from disk (no `-r`). NOT reversible.
- rmdir(path): delete an empty directory from disk. NOT reversible.
- list_tree(path, max_depth, page_size): recursively list files/folders.
## Workflow Tips
@ -184,6 +189,8 @@ directory (`cwd`).
- For large trees, prefer `list_tree` then `grep` then `read_file` over
brute-force directory traversal.
- Cross-mount moves are not supported.
- Desktop deletes hit disk immediately and cannot be undone via the
agent's revert flow — confirm before calling `rm`/`rmdir`.
"""
)
@ -355,6 +362,42 @@ Notes:
- Parent folders are created as needed.
"""
_CLOUD_RM_TOOL_DESCRIPTION = """Deletes a single file under `/documents/`.
Mirrors POSIX `rm path` (no `-r`, no glob expansion). Stages the deletion
for end-of-turn commit; the row is removed only after the agent's turn
finishes successfully.
Args:
- path: absolute or relative file path. Cannot point at a directory use
`rmdir` for empty folders. Cannot target the root or `/documents`.
Notes:
- The action is reversible via the per-action revert flow when action
logging is enabled.
- The anonymous uploaded document is read-only and cannot be deleted.
"""
_CLOUD_RMDIR_TOOL_DESCRIPTION = """Deletes an empty directory under `/documents/`.
Mirrors POSIX `rmdir path`: refuses non-empty directories. Recursive
deletion (`rm -r`) is intentionally NOT supported clear contents with
`rm` first.
Args:
- path: absolute or relative directory path. Cannot target the root,
`/documents`, the current cwd, or any ancestor of cwd (use `cd` to
move out first).
Notes:
- Emptiness is evaluated against the post-staged view, so a same-turn
`rm /a/x.md` followed by `rmdir /a` is fine.
- If the directory was added in this same turn via `mkdir` and never
committed, the staged mkdir is dropped instead of issuing a delete.
- The action is reversible via the per-action revert flow when action
logging is enabled.
"""
# --- desktop-only ----------------------------------------------------------
_DESKTOP_LIST_FILES_TOOL_DESCRIPTION = """Lists files and directories at the given path.
@ -421,6 +464,28 @@ Notes:
- Parent folders are created as needed.
"""
_DESKTOP_RM_TOOL_DESCRIPTION = """Deletes a single file from disk.
Mirrors POSIX `rm path` (no `-r`, no glob expansion). The deletion hits
disk immediately. Desktop deletes are NOT reversible via the agent's
revert flow.
Args:
- path: absolute mount-prefixed file path. Cannot point at a directory
use `rmdir` for empty folders.
"""
_DESKTOP_RMDIR_TOOL_DESCRIPTION = """Deletes an empty directory from disk.
Mirrors POSIX `rmdir path`: refuses non-empty directories. Recursive
deletion is NOT supported. The deletion hits disk immediately and is
NOT reversible via the agent's revert flow.
Args:
- path: absolute mount-prefixed directory path. Cannot target the mount
root or any directory containing files/subfolders.
"""
def _build_tool_descriptions(filesystem_mode: FilesystemMode) -> dict[str, str]:
"""Pick the active-mode description for every filesystem tool."""
@ -437,6 +502,8 @@ def _build_tool_descriptions(filesystem_mode: FilesystemMode) -> dict[str, str]:
"mkdir": _CLOUD_MKDIR_TOOL_DESCRIPTION,
"cd": SURFSENSE_CD_TOOL_DESCRIPTION,
"pwd": SURFSENSE_PWD_TOOL_DESCRIPTION,
"rm": _CLOUD_RM_TOOL_DESCRIPTION,
"rmdir": _CLOUD_RMDIR_TOOL_DESCRIPTION,
}
return {
"ls": _DESKTOP_LIST_FILES_TOOL_DESCRIPTION,
@ -450,6 +517,8 @@ def _build_tool_descriptions(filesystem_mode: FilesystemMode) -> dict[str, str]:
"mkdir": _DESKTOP_MKDIR_TOOL_DESCRIPTION,
"cd": SURFSENSE_CD_TOOL_DESCRIPTION,
"pwd": SURFSENSE_PWD_TOOL_DESCRIPTION,
"rm": _DESKTOP_RM_TOOL_DESCRIPTION,
"rmdir": _DESKTOP_RMDIR_TOOL_DESCRIPTION,
}
@ -476,6 +545,21 @@ def _basename(path: str) -> str:
return path.rsplit("/", 1)[-1]
def _is_ancestor_of(candidate: str, target: str) -> bool:
"""True iff ``candidate`` is a strict ancestor directory of ``target``.
``target`` itself is NOT considered an ancestor (use equality for that).
Both paths are assumed to be canonicalised, absolute, and free of
trailing slashes (except the root ``/``).
"""
if not candidate.startswith("/") or not target.startswith("/"):
return False
if candidate == target:
return False
prefix = candidate.rstrip("/") + "/"
return target.startswith(prefix)
class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
"""SurfSense-specific filesystem middleware (cloud + desktop)."""
@ -519,6 +603,8 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
self.tools.append(self._create_cd_tool())
self.tools.append(self._create_pwd_tool())
self.tools.append(self._create_move_file_tool())
self.tools.append(self._create_rm_tool())
self.tools.append(self._create_rmdir_tool())
self.tools.append(self._create_list_tree_tool())
if self._sandbox_available:
self.tools.append(self._create_execute_code_tool())
@ -941,6 +1027,7 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
}
if self._is_cloud():
update["dirty_paths"] = [path]
update["dirty_path_tool_calls"] = {path: runtime.tool_call_id}
return Command(update=update)
def sync_write_file(
@ -1036,6 +1123,7 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
}
if self._is_cloud():
update["dirty_paths"] = [path]
update["dirty_path_tool_calls"] = {path: runtime.tool_call_id}
if doc_id_to_attach is not None:
update["doc_id_by_path"] = {path: doc_id_to_attach}
return Command(update=update)
@ -1103,6 +1191,9 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
return Command(
update={
"staged_dirs": [validated],
"staged_dir_tool_calls": {
validated: runtime.tool_call_id,
},
"messages": [
ToolMessage(
content=(
@ -1372,7 +1463,14 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
files_update: dict[str, Any] = {source: None, dest: source_file_data}
update: dict[str, Any] = {
"files": files_update,
"pending_moves": [{"source": source, "dest": dest, "overwrite": False}],
"pending_moves": [
{
"source": source,
"dest": dest,
"overwrite": False,
"tool_call_id": runtime.tool_call_id,
}
],
"messages": [
ToolMessage(
content=(
@ -1396,6 +1494,323 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
update["dirty_paths"] = new_dirty
return Command(update=update)
# ------------------------------------------------------------------ tool: rm
def _create_rm_tool(self) -> BaseTool:
tool_description = (
self._custom_tool_descriptions.get("rm") or _CLOUD_RM_TOOL_DESCRIPTION
)
async def async_rm(
path: Annotated[
str,
"Absolute or relative path to the file to delete.",
],
runtime: ToolRuntime[None, SurfSenseFilesystemState],
) -> Command | str:
if not path or not path.strip():
return "Error: path is required."
target = self._resolve_relative(path, runtime)
try:
validated = validate_path(target)
except ValueError as exc:
return f"Error: {exc}"
if self._is_cloud():
if validated in ("/", DOCUMENTS_ROOT):
return f"Error: refusing to rm '{validated}'."
if not validated.startswith(DOCUMENTS_ROOT + "/"):
return (
"Error: cloud rm must target a path under /documents/ "
f"(got '{validated}')."
)
anon = runtime.state.get("kb_anon_doc") or {}
if isinstance(anon, dict) and str(anon.get("path") or "") == validated:
return "Error: the anonymous uploaded document is read-only."
# Refuse if the path looks like a directory.
staged_dirs = list(runtime.state.get("staged_dirs") or [])
if validated in staged_dirs:
return (
f"Error: '{validated}' is a directory. Use rmdir for "
"empty directories."
)
pending_dir_deletes = list(
runtime.state.get("pending_dir_deletes") or []
)
if any(
isinstance(d, dict) and d.get("path") == validated
for d in pending_dir_deletes
):
return f"Error: '{validated}' is already queued for rmdir."
backend = self._get_backend(runtime)
if isinstance(backend, KBPostgresBackend):
# Detect "is a directory" via `ls`: if the path lists
# children we know it's a folder. Otherwise we still
# need to confirm it's a real file before staging.
children = await backend.als_info(validated)
if children:
return (
f"Error: '{validated}' is a directory. Use rmdir for "
"empty directories."
)
# Already queued for delete this turn?
pending_deletes = list(runtime.state.get("pending_deletes") or [])
if any(
isinstance(d, dict) and d.get("path") == validated
for d in pending_deletes
):
return f"'{validated}' is already queued for deletion."
# Resolve doc_id (best-effort): file in state or DB.
files_state = runtime.state.get("files") or {}
doc_id_by_path = runtime.state.get("doc_id_by_path") or {}
resolved_doc_id: int | None = doc_id_by_path.get(validated)
if (
validated not in files_state
and resolved_doc_id is None
and isinstance(backend, KBPostgresBackend)
):
loaded = await backend._load_file_data(validated)
if loaded is None:
return f"Error: file '{validated}' not found."
_, resolved_doc_id = loaded
files_update: dict[str, Any] = {validated: None}
update: dict[str, Any] = {
"pending_deletes": [
{
"path": validated,
"tool_call_id": runtime.tool_call_id,
}
],
"files": files_update,
"doc_id_by_path": {validated: None},
"messages": [
ToolMessage(
content=(
f"Staged delete of '{validated}' (will commit at "
"end of turn)."
),
tool_call_id=runtime.tool_call_id,
)
],
}
# Drop the path from dirty_paths so a same-turn write+rm
# doesn't recreate the doc at commit time.
dirty_paths = list(runtime.state.get("dirty_paths") or [])
if validated in dirty_paths:
new_dirty: list[Any] = [_CLEAR]
for entry in dirty_paths:
if entry != validated:
new_dirty.append(entry)
update["dirty_paths"] = new_dirty
update["dirty_path_tool_calls"] = {validated: None}
return Command(update=update)
# Desktop mode — hit disk immediately.
backend = self._get_backend(runtime)
adelete = getattr(backend, "adelete_file", None)
if not callable(adelete):
return "Error: rm is not supported by the active backend."
res: WriteResult = await adelete(validated)
if res.error:
return res.error
update_desktop: dict[str, Any] = {
"files": {validated: None},
"messages": [
ToolMessage(
content=f"Deleted file '{res.path or validated}'",
tool_call_id=runtime.tool_call_id,
)
],
}
return Command(update=update_desktop)
def sync_rm(
path: Annotated[
str,
"Absolute or relative path to the file to delete.",
],
runtime: ToolRuntime[None, SurfSenseFilesystemState],
) -> Command | str:
return self._run_async_blocking(async_rm(path, runtime))
return StructuredTool.from_function(
name="rm",
description=tool_description,
func=sync_rm,
coroutine=async_rm,
)
# ------------------------------------------------------------------ tool: rmdir
def _create_rmdir_tool(self) -> BaseTool:
tool_description = (
self._custom_tool_descriptions.get("rmdir") or _CLOUD_RMDIR_TOOL_DESCRIPTION
)
async def async_rmdir(
path: Annotated[
str,
"Absolute or relative path of the empty directory to delete.",
],
runtime: ToolRuntime[None, SurfSenseFilesystemState],
) -> Command | str:
if not path or not path.strip():
return "Error: path is required."
target = self._resolve_relative(path, runtime)
try:
validated = validate_path(target)
except ValueError as exc:
return f"Error: {exc}"
if self._is_cloud():
if validated in ("/", DOCUMENTS_ROOT):
return f"Error: refusing to rmdir '{validated}'."
if not validated.startswith(DOCUMENTS_ROOT + "/"):
return (
"Error: cloud rmdir must target a path under /documents/ "
f"(got '{validated}')."
)
cwd = self._current_cwd(runtime)
if validated == cwd or _is_ancestor_of(validated, cwd):
return (
f"Error: cannot rmdir '{validated}' because the current "
"cwd is at or under it. cd out first."
)
staged_dirs = list(runtime.state.get("staged_dirs") or [])
pending_dir_deletes = list(
runtime.state.get("pending_dir_deletes") or []
)
if any(
isinstance(d, dict) and d.get("path") == validated
for d in pending_dir_deletes
):
return f"'{validated}' is already queued for deletion."
backend = self._get_backend(runtime)
# The path must currently exist either in DB folder paths or
# in staged_dirs. We rely on KBPostgresBackend.als_info (which
# already accounts for pending deletes/moves) to evaluate
# both existence and emptiness against the post-staged view.
exists_in_staged = validated in staged_dirs
children: list[Any] = []
if isinstance(backend, KBPostgresBackend):
children = list(await backend.als_info(validated))
# Detect "is a file" — if als_info returns no children but
# the path is actually a file, we should reject. We use
# _load_file_data to disambiguate file vs missing folder.
if (
isinstance(backend, KBPostgresBackend)
and not children
and not exists_in_staged
):
loaded = await backend._load_file_data(validated)
if loaded is not None:
return (
f"Error: '{validated}' is a file. Use rm to delete files."
)
# Confirm folder exists in DB by checking the parent listing.
parent = posixpath.dirname(validated) or "/"
parent_listing = await backend.als_info(parent)
parent_has_dir = any(
info.get("path") == validated and info.get("is_dir")
for info in parent_listing
)
if not parent_has_dir:
return f"Error: directory '{validated}' not found."
if children:
return (
f"Error: directory '{validated}' is not empty. "
"Remove contents first."
)
# Same-turn mkdir un-stage: drop the staged_dirs entry
# entirely and skip queuing a DB delete (nothing was ever
# committed).
if exists_in_staged:
rest = [d for d in staged_dirs if d != validated]
return Command(
update={
"staged_dirs": [_CLEAR, *rest],
"staged_dir_tool_calls": {validated: None},
"messages": [
ToolMessage(
content=(f"Un-staged directory '{validated}'."),
tool_call_id=runtime.tool_call_id,
)
],
}
)
return Command(
update={
"pending_dir_deletes": [
{
"path": validated,
"tool_call_id": runtime.tool_call_id,
}
],
"messages": [
ToolMessage(
content=(
f"Staged rmdir of '{validated}' (will commit "
"at end of turn)."
),
tool_call_id=runtime.tool_call_id,
)
],
}
)
# Desktop mode — hit disk immediately.
backend = self._get_backend(runtime)
armdir = getattr(backend, "armdir", None)
if not callable(armdir):
return "Error: rmdir is not supported by the active backend."
res: WriteResult = await armdir(validated)
if res.error:
return res.error
return Command(
update={
"messages": [
ToolMessage(
content=f"Deleted directory '{res.path or validated}'",
tool_call_id=runtime.tool_call_id,
)
],
}
)
def sync_rmdir(
path: Annotated[
str,
"Absolute or relative path of the empty directory to delete.",
],
runtime: ToolRuntime[None, SurfSenseFilesystemState],
) -> Command | str:
return self._run_async_blocking(async_rmdir(path, runtime))
return StructuredTool.from_function(
name="rmdir",
description=tool_description,
func=sync_rmdir,
coroutine=async_rmdir,
)
# ------------------------------------------------------------------ tool: list_tree
def _create_list_tree_tool(self) -> BaseTool:

View file

@ -115,6 +115,12 @@ class KBPostgresBackend(BackendProtocol):
def _pending_moves(self) -> list[dict[str, Any]]:
return list(self.state.get("pending_moves") or [])
def _pending_deletes(self) -> list[dict[str, Any]]:
return list(self.state.get("pending_deletes") or [])
def _pending_dir_deletes(self) -> list[dict[str, Any]]:
return list(self.state.get("pending_dir_deletes") or [])
def _kb_anon_doc(self) -> dict[str, Any] | None:
anon = self.state.get("kb_anon_doc")
return anon if isinstance(anon, dict) else None
@ -140,18 +146,28 @@ class KBPostgresBackend(BackendProtocol):
return path
return path.rstrip("/") if path != "/" else path
def _moved_view_paths(
def _pending_filesystem_view(
self,
existing: dict[str, dict[str, Any]],
) -> tuple[set[str], dict[str, str]]:
"""Apply ``pending_moves`` to a path set and return ``(removed, alias)``.
) -> tuple[set[str], dict[str, str], set[str]]:
"""Compute removed/aliased/dir-suppressed paths from staged ops.
Removed paths should disappear from listings; ``alias[source] = dest``
means a virtual entry should appear at ``dest`` even if no DB row is
yet there.
Returns ``(removed, alias, deleted_dirs)`` where:
* ``removed`` paths to drop from listings (sources of pending moves
AND paths queued for ``rm``).
* ``alias`` ``{source: dest}`` for pending moves; the dest should
appear as a virtual entry even when no DB row is at that path yet.
* ``deleted_dirs`` folder paths queued for ``rmdir``; their entire
subtree (descendants) is suppressed from listings/glob/grep.
Entries in ``existing`` (the ``files`` state cache) keyed by a
removed path are popped so a same-turn delete-after-write doesn't
leave a stale virtual file in listings.
"""
removed: set[str] = set()
alias: dict[str, str] = {}
deleted_dirs: set[str] = set()
for move in self._pending_moves():
src = move.get("source")
dst = move.get("dest")
@ -160,7 +176,23 @@ class KBPostgresBackend(BackendProtocol):
removed.add(src)
alias[src] = dst
existing.pop(src, None)
return removed, alias
for entry in self._pending_deletes():
path = entry.get("path") if isinstance(entry, dict) else None
if not path:
continue
removed.add(path)
existing.pop(path, None)
for entry in self._pending_dir_deletes():
path = entry.get("path") if isinstance(entry, dict) else None
if not path:
continue
deleted_dirs.add(path)
return removed, alias, deleted_dirs
@staticmethod
def _is_dir_suppressed(path: str, deleted_dirs: set[str]) -> bool:
"""Return True iff ``path`` is at-or-under any directory in ``deleted_dirs``."""
return any(path == d or _is_under(path, d) for d in deleted_dirs)
# ------------------------------------------------------------------ ls/read
@ -189,7 +221,7 @@ class KBPostgresBackend(BackendProtocol):
seen.add(anon_path)
files = self._state_files()
moved_removed, moved_alias = self._moved_view_paths(files)
moved_removed, moved_alias, deleted_dirs = self._pending_filesystem_view(files)
if normalized.startswith(DOCUMENTS_ROOT) or normalized == "/":
try:
@ -203,7 +235,12 @@ class KBPostgresBackend(BackendProtocol):
for info in db_infos:
p = info.get("path", "")
if not p or p in seen or p in moved_removed:
if (
not p
or p in seen
or p in moved_removed
or self._is_dir_suppressed(p, deleted_dirs)
):
continue
infos.append(info)
seen.add(p)
@ -212,6 +249,8 @@ class KBPostgresBackend(BackendProtocol):
if src not in seen:
if not _is_under(dst, normalized):
continue
if self._is_dir_suppressed(dst, deleted_dirs):
continue
rel = (
dst[len(normalized) :].lstrip("/")
if normalized != "/"
@ -247,6 +286,8 @@ class KBPostgresBackend(BackendProtocol):
continue
if not _is_under(staged, normalized):
continue
if self._is_dir_suppressed(staged, deleted_dirs):
continue
rel = (
staged[len(normalized) :].lstrip("/")
if normalized != "/"
@ -265,14 +306,26 @@ class KBPostgresBackend(BackendProtocol):
for sub in sorted(subdir_paths):
if sub in seen:
continue
if self._is_dir_suppressed(sub, deleted_dirs):
continue
infos.append(FileInfo(path=sub, is_dir=True, size=0, modified_at=""))
seen.add(sub)
for path_key, fd in files.items():
if not isinstance(path_key, str) or path_key in seen:
continue
# Tombstones (None values) are deletion markers from `rm`. The
# deepagents reducer normally pops them, but a stale tombstone
# surviving a checkpoint must NOT be reported as a child here —
# otherwise rmdir mistakenly sees the deleted file as content.
if fd is None:
continue
if not _is_under(path_key, normalized) or path_key == normalized:
continue
if path_key in moved_removed or self._is_dir_suppressed(
path_key, deleted_dirs
):
continue
if normalized == "/":
rel = path_key.lstrip("/")
else:
@ -550,10 +603,12 @@ class KBPostgresBackend(BackendProtocol):
seen: set[str] = set()
files = self._state_files()
moved_removed, _ = self._moved_view_paths(files)
moved_removed, _, deleted_dirs = self._pending_filesystem_view(files)
regex = re.compile(fnmatch.translate(pattern))
for path_key, fd in files.items():
if path_key in moved_removed:
if path_key in moved_removed or self._is_dir_suppressed(
path_key, deleted_dirs
):
continue
if not _is_under(path_key, normalized):
continue
@ -595,7 +650,11 @@ class KBPostgresBackend(BackendProtocol):
folder_id=row.folder_id,
index=index,
)
if candidate in seen or candidate in moved_removed:
if (
candidate in seen
or candidate in moved_removed
or self._is_dir_suppressed(candidate, deleted_dirs)
):
continue
if not _is_under(candidate, normalized):
continue
@ -634,10 +693,12 @@ class KBPostgresBackend(BackendProtocol):
matches: list[GrepMatch] = []
files = self._state_files()
moved_removed, _ = self._moved_view_paths(files)
moved_removed, _, deleted_dirs = self._pending_filesystem_view(files)
glob_re = re.compile(fnmatch.translate(glob)) if glob else None
for path_key, fd in files.items():
if path_key in moved_removed:
if path_key in moved_removed or self._is_dir_suppressed(
path_key, deleted_dirs
):
continue
if not _is_under(path_key, normalized):
continue
@ -695,7 +756,11 @@ class KBPostgresBackend(BackendProtocol):
)
for doc_id, chunk_id, content in chunk_buffer:
candidate = doc_id_to_path.get(doc_id)
if not candidate or candidate in moved_removed:
if (
not candidate
or candidate in moved_removed
or self._is_dir_suppressed(candidate, deleted_dirs)
):
continue
if not _is_under(candidate, normalized):
continue
@ -769,7 +834,7 @@ class KBPostgresBackend(BackendProtocol):
return {"entries": [], "truncated": False}
files = self._state_files()
moved_removed, _ = self._moved_view_paths(files)
moved_removed, _, deleted_dirs = self._pending_filesystem_view(files)
anon = self._kb_anon_doc()
anon_path = str(anon.get("path") or "") if anon else ""
@ -795,6 +860,8 @@ class KBPostgresBackend(BackendProtocol):
for _fid, fpath in sorted(index.folder_paths.items(), key=lambda kv: kv[1]):
if not _is_under(fpath, normalized):
continue
if self._is_dir_suppressed(fpath, deleted_dirs):
continue
depth = _depth_of(fpath)
if max_depth is not None and depth > max_depth:
continue
@ -811,6 +878,8 @@ class KBPostgresBackend(BackendProtocol):
for staged in self._staged_dirs():
if not _is_under(staged, normalized):
continue
if self._is_dir_suppressed(staged, deleted_dirs):
continue
depth = _depth_of(staged)
if max_depth is not None and depth > max_depth:
continue
@ -835,7 +904,9 @@ class KBPostgresBackend(BackendProtocol):
folder_id=row.folder_id,
index=index,
)
if candidate in moved_removed:
if candidate in moved_removed or self._is_dir_suppressed(
candidate, deleted_dirs
):
continue
if not _is_under(candidate, normalized):
continue
@ -875,6 +946,10 @@ class KBPostgresBackend(BackendProtocol):
continue
if not _is_under(path_key, normalized):
continue
if path_key in moved_removed or self._is_dir_suppressed(
path_key, deleted_dirs
):
continue
if any(e["path"] == path_key for e in entries):
continue
if not (

View file

@ -201,6 +201,12 @@ class KnowledgeTreeMiddleware(AgentMiddleware): # type: ignore[type-arg]
)
all_paths = sorted(set(folder_paths + doc_paths + [DOCUMENTS_ROOT]))
# Pre-compute which folders have at least one descendant (folder or doc).
# A folder is "empty" iff no path in `all_paths` is strictly under it.
# Used to emit an explicit "(empty)" marker so the LLM doesn't have to
# infer emptiness from indentation alone.
non_empty_folders = self._compute_non_empty_folders(folder_paths, doc_paths)
lines: list[str] = []
for path in all_paths:
depth = (
@ -214,7 +220,10 @@ class KnowledgeTreeMiddleware(AgentMiddleware): # type: ignore[type-arg]
path.rsplit("/", 1)[-1] if path != DOCUMENTS_ROOT else "/documents"
)
if is_dir:
lines.append(f"{indent}{display}/")
if path != DOCUMENTS_ROOT and path not in non_empty_folders:
lines.append(f"{indent}{display}/ (empty)")
else:
lines.append(f"{indent}{display}/")
else:
lines.append(f"{indent}{display}")
if len(lines) >= self.max_entries:
@ -235,6 +244,35 @@ class KnowledgeTreeMiddleware(AgentMiddleware): # type: ignore[type-arg]
return self._format_root_summary(folder_paths, doc_paths)
@staticmethod
def _compute_non_empty_folders(
folder_paths: list[str], doc_paths: list[str]
) -> set[str]:
"""Return the set of folder paths that contain at least one descendant.
A folder is "non-empty" if any document path or any other folder path
is strictly under it. Documents propagate emptiness up to every
ancestor folder, while a sub-folder only marks its direct ancestors
non-empty (so a chain of empty folders all read ``(empty)``).
"""
non_empty: set[str] = set()
folder_set = set(folder_paths)
for doc_path in doc_paths:
parent = doc_path.rsplit("/", 1)[0]
while parent and parent != DOCUMENTS_ROOT:
if parent in folder_set:
non_empty.add(parent)
parent = parent.rsplit("/", 1)[0]
for child in folder_paths:
parent = child.rsplit("/", 1)[0]
while parent and parent != DOCUMENTS_ROOT and parent in folder_set:
non_empty.add(parent)
parent = parent.rsplit("/", 1)[0]
return non_empty
def _format_root_summary(
self, folder_paths: list[str], doc_paths: list[str]
) -> str:

View file

@ -360,6 +360,74 @@ class LocalFolderBackend:
self.move, source_path, destination_path, overwrite
)
def delete_file(self, file_path: str) -> WriteResult:
"""Hard-delete a single file under root.
Refuses directories, root, and missing paths. Roughly mirrors POSIX
``rm path``; ``-r`` recursion and glob expansion are explicitly
out of scope.
"""
try:
path = self._resolve_virtual(file_path)
except ValueError:
return WriteResult(error=f"Error: Invalid path '{file_path}'")
with self._lock_for(file_path):
if not path.exists():
return WriteResult(error=f"Error: File '{file_path}' not found")
if path.is_dir():
return WriteResult(
error=(
f"Error: '{file_path}' is a directory. "
"Use rmdir for empty directories."
)
)
try:
os.unlink(path)
except OSError as exc:
return WriteResult(
error=f"Error: failed to delete '{file_path}': {exc}"
)
return WriteResult(path=file_path, files_update=None)
async def adelete_file(self, file_path: str) -> WriteResult:
return await asyncio.to_thread(self.delete_file, file_path)
def rmdir(self, dir_path: str) -> WriteResult:
"""Hard-delete an empty directory under root.
Refuses files, root, missing paths, and non-empty directories.
``os.rmdir`` is naturally empty-only; we pre-check so the error is
clearer for the agent.
"""
try:
path = self._resolve_virtual(dir_path)
except ValueError:
return WriteResult(error=f"Error: Invalid path '{dir_path}'")
with self._lock_for(dir_path):
if not path.exists():
return WriteResult(error=f"Error: Directory '{dir_path}' not found")
if not path.is_dir():
return WriteResult(error=f"Error: '{dir_path}' is not a directory")
try:
next(path.iterdir())
except StopIteration:
pass
else:
return WriteResult(
error=(
f"Error: directory '{dir_path}' is not empty. "
"Remove its contents first."
)
)
try:
os.rmdir(path)
except OSError as exc:
return WriteResult(error=f"Error: failed to rmdir '{dir_path}': {exc}")
return WriteResult(path=dir_path, files_update=None)
async def armdir(self, dir_path: str) -> WriteResult:
return await asyncio.to_thread(self.rmdir, dir_path)
def edit(
self,
file_path: str,

View file

@ -285,6 +285,34 @@ class MultiRootLocalFolderBackend:
overwrite,
)
def delete_file(self, file_path: str) -> WriteResult:
try:
mount, local_path = self._split_mount_path(file_path)
except ValueError as exc:
return WriteResult(error=f"Error: {exc}")
result = self._mount_to_backend[mount].delete_file(local_path)
if result.path:
result.path = self._prefix_mount_path(mount, result.path)
return result
async def adelete_file(self, file_path: str) -> WriteResult:
return await asyncio.to_thread(self.delete_file, file_path)
def rmdir(self, dir_path: str) -> WriteResult:
try:
mount, local_path = self._split_mount_path(dir_path)
except ValueError as exc:
return WriteResult(error=f"Error: {exc}")
if local_path == "/":
return WriteResult(error=f"Error: cannot rmdir mount root '{dir_path}'")
result = self._mount_to_backend[mount].rmdir(local_path)
if result.path:
result.path = self._prefix_mount_path(mount, result.path)
return result
async def armdir(self, dir_path: str) -> WriteResult:
return await asyncio.to_thread(self.rmdir, dir_path)
def edit(
self,
file_path: str,