feat: improved agent streaming

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-04-29 07:20:31 -07:00
parent afb4b09cde
commit c110f5b955
60 changed files with 8068 additions and 303 deletions

View file

@ -168,6 +168,8 @@ class TestModeSpecificPrompts:
"edit_file",
"move_file",
"mkdir",
"rm",
"rmdir",
"list_tree",
"grep",
):
@ -182,6 +184,8 @@ class TestModeSpecificPrompts:
"edit_file",
"move_file",
"mkdir",
"rm",
"rmdir",
"list_tree",
"grep",
):
@ -190,6 +194,18 @@ class TestModeSpecificPrompts:
assert "/documents/" not in text, f"{name} mentions cloud namespace"
assert "temp_" not in text, f"{name} mentions cloud temp_ semantics"
def test_cloud_descs_include_rm_and_rmdir(self):
descs = _build_tool_descriptions(FilesystemMode.CLOUD)
assert "rm" in descs and "rmdir" in descs
assert "Deletes a single file" in descs["rm"]
assert "Deletes an empty directory" in descs["rmdir"]
assert "rmdir" in descs["rmdir"] and "POSIX" in descs["rmdir"]
def test_desktop_descs_warn_about_irreversibility(self):
descs = _build_tool_descriptions(FilesystemMode.DESKTOP_LOCAL_FOLDER)
assert "NOT reversible" in descs["rm"]
assert "NOT reversible" in descs["rmdir"]
def test_sandbox_addendum_appended_when_available(self):
prompt = _build_filesystem_system_prompt(
FilesystemMode.CLOUD, sandbox_available=True

View file

@ -0,0 +1,309 @@
"""Unit tests for the kb_persistence snapshot helpers.
The full ``commit_staged_filesystem_state`` body exercises a real session
in integration tests; here we verify the building blocks used by the
snapshot/revert pipeline:
* ``_find_action_ids_batch`` issues a SINGLE query for N tool_call_ids
(regression guard against the N+1 lookup pattern).
* ``_mark_action_reversible`` is a no-op when ``action_id`` is ``None``.
* ``_doc_revision_payload`` and ``_load_chunks_for_snapshot`` produce the
shape the snapshot helpers consume.
These tests use ``MagicMock`` / ``AsyncMock`` against a fake session so
the assertions run in milliseconds and don't require Postgres.
"""
from __future__ import annotations
from typing import Any
from unittest.mock import AsyncMock, MagicMock
import pytest
from app.agents.new_chat.middleware import kb_persistence
pytestmark = pytest.mark.unit
class _FakeResult:
def __init__(self, rows: list[Any] | None = None, scalar: Any = None) -> None:
self._rows = rows or []
self._scalar = scalar
def all(self) -> list[Any]:
return list(self._rows)
def scalar_one_or_none(self) -> Any:
return self._scalar
class _FakeSession:
def __init__(self) -> None:
self.execute = AsyncMock()
@pytest.mark.asyncio
async def test_find_action_ids_batch_issues_single_query() -> None:
"""The lookup MUST be a single ``IN (...)`` SELECT, not N selects."""
session = _FakeSession()
session.execute.return_value = _FakeResult(
rows=[
MagicMock(id=11, tool_call_id="tc-a"),
MagicMock(id=22, tool_call_id="tc-b"),
MagicMock(id=33, tool_call_id="tc-c"),
]
)
mapping = await kb_persistence._find_action_ids_batch(
session, # type: ignore[arg-type]
thread_id=1,
tool_call_ids={"tc-a", "tc-b", "tc-c"},
)
assert mapping == {"tc-a": 11, "tc-b": 22, "tc-c": 33}
assert session.execute.await_count == 1, (
"Snapshot binding must batch into ONE query; got "
f"{session.execute.await_count} (regression: N+1 lookup pattern)."
)
@pytest.mark.asyncio
async def test_find_action_ids_batch_short_circuits_when_thread_id_missing() -> None:
session = _FakeSession()
mapping = await kb_persistence._find_action_ids_batch(
session, # type: ignore[arg-type]
thread_id=None,
tool_call_ids={"tc-a"},
)
assert mapping == {}
assert session.execute.await_count == 0
@pytest.mark.asyncio
async def test_find_action_ids_batch_short_circuits_when_no_calls() -> None:
session = _FakeSession()
mapping = await kb_persistence._find_action_ids_batch(
session, # type: ignore[arg-type]
thread_id=42,
tool_call_ids=set(),
)
assert mapping == {}
assert session.execute.await_count == 0
@pytest.mark.asyncio
async def test_mark_action_reversible_is_noop_for_null_id() -> None:
session = _FakeSession()
await kb_persistence._mark_action_reversible(session, action_id=None) # type: ignore[arg-type]
assert session.execute.await_count == 0
@pytest.mark.asyncio
async def test_mark_action_reversible_runs_update_for_real_id() -> None:
session = _FakeSession()
await kb_persistence._mark_action_reversible(session, action_id=99) # type: ignore[arg-type]
assert session.execute.await_count == 1
def test_doc_revision_payload_captures_metadata_virtual_path() -> None:
"""Snapshot helpers must capture ``metadata_before`` for revert reuse."""
doc = MagicMock()
doc.content = "body"
doc.title = "notes.md"
doc.folder_id = 7
doc.document_metadata = {"virtual_path": "/documents/team/notes.md"}
payload = kb_persistence._doc_revision_payload(
doc, chunks_before=[{"content": "x"}]
)
assert payload["title_before"] == "notes.md"
assert payload["folder_id_before"] == 7
assert payload["content_before"] == "body"
assert payload["chunks_before"] == [{"content": "x"}]
assert payload["metadata_before"] == {"virtual_path": "/documents/team/notes.md"}
def test_doc_revision_payload_handles_missing_metadata() -> None:
doc = MagicMock()
doc.content = ""
doc.title = ""
doc.folder_id = None
doc.document_metadata = None
payload = kb_persistence._doc_revision_payload(doc)
assert payload["metadata_before"] is None
@pytest.mark.asyncio
async def test_load_chunks_for_snapshot_returns_content_only() -> None:
"""Snapshot chunks intentionally omit embeddings (regenerated on revert)."""
session = _FakeSession()
session.execute.return_value = _FakeResult(
rows=[
MagicMock(content="alpha"),
MagicMock(content="beta"),
]
)
chunks = await kb_persistence._load_chunks_for_snapshot(
session,
doc_id=42, # type: ignore[arg-type]
)
assert chunks == [{"content": "alpha"}, {"content": "beta"}]
# ---------------------------------------------------------------------------
# Deferred reversibility-flip dispatches.
#
# The snapshot helpers used to dispatch ``action_log_updated`` directly
# from inside the SAVEPOINT block. That meant the SSE side-channel
# could tell the UI a row was reversible while the OUTER transaction
# was still pending — and if the outer commit failed, every SAVEPOINT
# rolled back too, leaving the UI in a state inconsistent with
# durable storage. The deferred-dispatch contract fixes that:
#
# • when a ``deferred_dispatches`` list is provided, the helper
# APPENDS the action_id and does NOT dispatch;
# • the caller (``commit_staged_filesystem_state``) flushes the list
# only AFTER ``await session.commit()`` succeeds; on rollback it
# clears the list so nothing is emitted.
# ---------------------------------------------------------------------------
class _NestedCtx:
"""Async context manager mimicking ``session.begin_nested()``."""
async def __aenter__(self) -> _NestedCtx:
return self
async def __aexit__(self, exc_type, exc, tb) -> bool:
return False
@pytest.mark.asyncio
async def test_pre_write_snapshot_defers_dispatch_when_list_provided(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Helpers MUST queue dispatches when ``deferred_dispatches`` is set."""
session = MagicMock()
session.begin_nested = MagicMock(return_value=_NestedCtx())
session.execute = AsyncMock(return_value=_FakeResult(rows=[]))
session.flush = AsyncMock()
def _add(rev: Any) -> None:
rev.id = 17
session.add = MagicMock(side_effect=_add)
dispatched: list[int] = []
async def _fake_dispatch(action_id: int | None) -> None:
if action_id is not None:
dispatched.append(int(action_id))
monkeypatch.setattr(
kb_persistence, "_dispatch_reversibility_update", _fake_dispatch
)
deferred: list[int] = []
doc = MagicMock(id=99, document_metadata={"virtual_path": "/documents/x.md"})
doc.title = "x.md"
doc.folder_id = None
doc.content = "body"
rev_id = await kb_persistence._snapshot_document_pre_write(
session, # type: ignore[arg-type]
doc=doc,
action_id=42,
search_space_id=1,
turn_id="t-1",
deferred_dispatches=deferred,
)
assert rev_id == 17
# Inline dispatch must NOT have fired; the action_id is queued.
assert dispatched == []
assert deferred == [42]
@pytest.mark.asyncio
async def test_pre_write_snapshot_dispatches_inline_when_list_omitted(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Direct callers (no outer transaction) keep the legacy inline dispatch."""
session = MagicMock()
session.begin_nested = MagicMock(return_value=_NestedCtx())
session.execute = AsyncMock(return_value=_FakeResult(rows=[]))
session.flush = AsyncMock()
def _add(rev: Any) -> None:
rev.id = 7
session.add = MagicMock(side_effect=_add)
dispatched: list[int] = []
async def _fake_dispatch(action_id: int | None) -> None:
if action_id is not None:
dispatched.append(int(action_id))
monkeypatch.setattr(
kb_persistence, "_dispatch_reversibility_update", _fake_dispatch
)
doc = MagicMock(id=11, document_metadata={"virtual_path": "/documents/y.md"})
doc.title = "y.md"
doc.folder_id = None
doc.content = "body"
await kb_persistence._snapshot_document_pre_write(
session, # type: ignore[arg-type]
doc=doc,
action_id=88,
search_space_id=1,
turn_id="t-1",
# No deferred_dispatches arg — fall back to inline dispatch.
)
assert dispatched == [88]
@pytest.mark.asyncio
async def test_pre_mkdir_snapshot_defers_dispatch_when_list_provided(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Folder mkdir snapshots honour the same deferred-dispatch contract."""
session = MagicMock()
session.begin_nested = MagicMock(return_value=_NestedCtx())
session.execute = AsyncMock() # _mark_action_reversible calls execute
session.flush = AsyncMock()
def _add(rev: Any) -> None:
rev.id = 3
session.add = MagicMock(side_effect=_add)
dispatched: list[int] = []
async def _fake_dispatch(action_id: int | None) -> None:
if action_id is not None:
dispatched.append(int(action_id))
monkeypatch.setattr(
kb_persistence, "_dispatch_reversibility_update", _fake_dispatch
)
deferred: list[int] = []
folder = MagicMock(id=2, name="f", parent_id=None, position="a0")
await kb_persistence._snapshot_folder_pre_mkdir(
session, # type: ignore[arg-type]
folder=folder,
action_id=55,
search_space_id=1,
turn_id="t-1",
deferred_dispatches=deferred,
)
assert dispatched == []
assert deferred == [55]

View file

@ -0,0 +1,139 @@
"""Unit tests for ``KnowledgeTreeMiddleware`` rendering.
The empty-folder marker is critical UX: without it, the LLM cannot
distinguish a leaf folder containing one document from a leaf folder
that has no descendants at all, and ends up firing ``rmdir`` on
non-empty folders. These tests pin the rendering contract so that
contract cannot silently regress.
"""
from __future__ import annotations
from app.agents.new_chat.middleware.knowledge_tree import KnowledgeTreeMiddleware
from app.agents.new_chat.path_resolver import DOCUMENTS_ROOT
def _compute(folder_paths: list[str], doc_paths: list[str]) -> set[str]:
return KnowledgeTreeMiddleware._compute_non_empty_folders(folder_paths, doc_paths)
class TestComputeNonEmptyFolders:
def test_folder_with_direct_document_is_non_empty(self):
folder_paths = [f"{DOCUMENTS_ROOT}/Travel/Boarding Pass"]
doc_paths = [
f"{DOCUMENTS_ROOT}/Travel/Boarding Pass/southwest.pdf.xml",
]
non_empty = _compute(folder_paths, doc_paths)
assert f"{DOCUMENTS_ROOT}/Travel/Boarding Pass" in non_empty
def test_truly_empty_leaf_folder_is_not_non_empty(self):
folder_paths = [f"{DOCUMENTS_ROOT}/Travel/Boarding Pass"]
doc_paths: list[str] = []
assert _compute(folder_paths, doc_paths) == set()
def test_documents_propagate_up_to_all_ancestors(self):
folder_paths = [
f"{DOCUMENTS_ROOT}/A",
f"{DOCUMENTS_ROOT}/A/B",
f"{DOCUMENTS_ROOT}/A/B/C",
]
doc_paths = [f"{DOCUMENTS_ROOT}/A/B/C/file.xml"]
non_empty = _compute(folder_paths, doc_paths)
assert non_empty == {
f"{DOCUMENTS_ROOT}/A",
f"{DOCUMENTS_ROOT}/A/B",
f"{DOCUMENTS_ROOT}/A/B/C",
}
def test_chain_with_subfolders_marks_only_leaf_empty(self):
# POSIX-like semantic: a folder is "empty" only if it has no
# immediate children (docs OR sub-folders). The model needs this
# because parallel ``rmdir`` calls all see the same starting state,
# so trying to rmdir a parent before its children is never safe.
folder_paths = [
f"{DOCUMENTS_ROOT}/X",
f"{DOCUMENTS_ROOT}/X/Y",
f"{DOCUMENTS_ROOT}/X/Y/Z",
]
non_empty = _compute(folder_paths, [])
# Only ``X/Y/Z`` (the leaf) is empty. ``X`` and ``X/Y`` each have a
# sub-folder child, so they are non-empty and should NOT carry the
# ``(empty)`` marker.
assert non_empty == {f"{DOCUMENTS_ROOT}/X", f"{DOCUMENTS_ROOT}/X/Y"}
def test_sibling_with_doc_does_not_mark_other_sibling_non_empty(self):
# Mirrors a real DB layout where every intermediate folder is
# materialized in the ``folders`` table.
folder_paths = [
f"{DOCUMENTS_ROOT}/Travel",
f"{DOCUMENTS_ROOT}/Travel/Boarding Pass",
f"{DOCUMENTS_ROOT}/Travel/Notes",
]
doc_paths = [f"{DOCUMENTS_ROOT}/Travel/Notes/itinerary.xml"]
non_empty = _compute(folder_paths, doc_paths)
# ``Travel`` is non-empty because it has children, ``Notes`` is non-empty
# because of the doc, but ``Boarding Pass`` (sibling leaf) is empty.
assert f"{DOCUMENTS_ROOT}/Travel" in non_empty
assert f"{DOCUMENTS_ROOT}/Travel/Notes" in non_empty
assert f"{DOCUMENTS_ROOT}/Travel/Boarding Pass" not in non_empty
class TestFormatTreeRendering:
"""Integration check: empty leaf gets ``(empty)`` marker; non-empty doesn't."""
def _render(
self,
folder_paths: list[str],
doc_specs: list[dict],
) -> str:
from app.agents.new_chat.path_resolver import PathIndex
index = PathIndex(
folder_paths={i + 1: p for i, p in enumerate(folder_paths)},
)
class _Row:
def __init__(self, **kw):
self.__dict__.update(kw)
docs = [_Row(**spec) for spec in doc_specs]
mw = KnowledgeTreeMiddleware(
search_space_id=1,
filesystem_mode=None, # type: ignore[arg-type]
)
return mw._format_tree(index, docs)
def test_renders_empty_marker_only_for_truly_empty_folders(self):
# Reproduces the failure scenario from the bug report:
# ``Boarding Pass`` is empty (its only doc was just deleted), while
# ``Tax Returns`` still has ``federal.pdf``. All intermediate
# folders are present in the index, mirroring the real DB layout.
folder_paths = [
"/documents/File Upload",
"/documents/File Upload/2026-04-08",
"/documents/File Upload/2026-04-08/Travel",
"/documents/File Upload/2026-04-08/Travel/Boarding Pass",
"/documents/File Upload/2026-04-15",
"/documents/File Upload/2026-04-15/Finance",
"/documents/File Upload/2026-04-15/Finance/Tax Returns",
]
tax_returns_folder_id = (
folder_paths.index("/documents/File Upload/2026-04-15/Finance/Tax Returns")
+ 1
)
rendered = self._render(
folder_paths=folder_paths,
doc_specs=[
{
"id": 100,
"title": "federal.pdf",
"folder_id": tax_returns_folder_id,
},
],
)
assert "Boarding Pass/ (empty)" in rendered
assert "Tax Returns/ (empty)" not in rendered
# Intermediate ancestors of the doc must NOT be marked empty.
assert "Finance/ (empty)" not in rendered
assert "2026-04-15/ (empty)" not in rendered

View file

@ -69,3 +69,74 @@ def test_local_backend_write_rejects_missing_parent_directory(tmp_path: Path):
assert write.error is not None
assert "parent directory" in write.error
assert not (tmp_path / "tempoo").exists()
def test_local_backend_delete_file_success(tmp_path: Path):
backend = LocalFolderBackend(str(tmp_path))
(tmp_path / "delete-me.md").write_text("bye")
res = backend.delete_file("/delete-me.md")
assert res.error is None
assert res.path == "/delete-me.md"
assert not (tmp_path / "delete-me.md").exists()
def test_local_backend_delete_file_rejects_directory(tmp_path: Path):
backend = LocalFolderBackend(str(tmp_path))
(tmp_path / "subdir").mkdir()
res = backend.delete_file("/subdir")
assert res.error is not None
assert "directory" in res.error
assert (tmp_path / "subdir").exists()
def test_local_backend_delete_file_missing_returns_error(tmp_path: Path):
backend = LocalFolderBackend(str(tmp_path))
res = backend.delete_file("/nope.md")
assert res.error is not None
assert "not found" in res.error
def test_local_backend_rmdir_success(tmp_path: Path):
backend = LocalFolderBackend(str(tmp_path))
(tmp_path / "empty").mkdir()
res = backend.rmdir("/empty")
assert res.error is None
assert res.path == "/empty"
assert not (tmp_path / "empty").exists()
def test_local_backend_rmdir_rejects_non_empty(tmp_path: Path):
backend = LocalFolderBackend(str(tmp_path))
(tmp_path / "withkid").mkdir()
(tmp_path / "withkid" / "child.md").write_text("x")
res = backend.rmdir("/withkid")
assert res.error is not None
assert "not empty" in res.error
assert (tmp_path / "withkid" / "child.md").exists()
def test_local_backend_rmdir_rejects_file(tmp_path: Path):
backend = LocalFolderBackend(str(tmp_path))
(tmp_path / "f.md").write_text("x")
res = backend.rmdir("/f.md")
assert res.error is not None
assert "not a directory" in res.error
def test_local_backend_rmdir_rejects_root(tmp_path: Path):
"""``rmdir /`` MUST fail. The exact error wording comes from
``_resolve_virtual`` (root resolves to outside the sandbox); what
matters is that the call returns an error and does NOT delete the
sandbox root on disk."""
backend = LocalFolderBackend(str(tmp_path))
res = backend.rmdir("/")
assert res.error is not None
assert "Invalid path" in res.error or "root" in res.error
assert tmp_path.exists()