feat: improved agent streaming

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-04-29 07:20:31 -07:00
parent afb4b09cde
commit c110f5b955
60 changed files with 8068 additions and 303 deletions

View file

@ -15,6 +15,17 @@ from app.agents.new_chat.middleware.action_log import ActionLogMiddleware
from app.agents.new_chat.tools.registry import ToolDefinition
@dataclass
class _FakeRuntime:
"""Minimal stand-in for ``ToolRuntime`` used in unit tests.
``ActionLogMiddleware`` reads ``runtime.config['configurable']['turn_id']``
to populate the new ``chat_turn_id`` column (see migration 135).
"""
config: dict[str, Any] | None = None
@dataclass
class _FakeRequest:
"""Minimal stand-in for ToolCallRequest used in unit tests."""
@ -120,6 +131,9 @@ class TestActionLogMiddlewarePersistence:
"args": {"color": "red", "size": 3},
"id": "tc-abc",
},
runtime=_FakeRuntime(
config={"configurable": {"turn_id": "42:1700000000000"}}
),
)
result_msg = ToolMessage(content="ok", tool_call_id="tc-abc", id="msg-1")
handler = AsyncMock(return_value=result_msg)
@ -142,6 +156,32 @@ class TestActionLogMiddlewarePersistence:
assert row.error is None
assert row.reverse_descriptor is None
assert row.reversible is False
# Migration 135: ``turn_id`` is the deprecated alias of ``tool_call_id``;
# ``chat_turn_id`` comes from ``runtime.config['configurable']['turn_id']``.
assert row.tool_call_id == "tc-abc"
assert row.turn_id == "tc-abc"
assert row.chat_turn_id == "42:1700000000000"
@pytest.mark.asyncio
async def test_chat_turn_id_none_when_runtime_missing(
self, patch_get_flags, fake_session_factory
) -> None:
"""``chat_turn_id`` falls back to NULL when ``runtime.config`` is absent."""
captured, factory = fake_session_factory
mw = ActionLogMiddleware(thread_id=1, search_space_id=1, user_id=None)
request = _FakeRequest(
tool_call={"name": "make_widget", "args": {}, "id": "tc-1"},
runtime=None,
)
handler = AsyncMock(return_value=ToolMessage(content="ok", tool_call_id="tc-1"))
with (
patch_get_flags(_enabled_flags()),
patch("app.db.shielded_async_session", side_effect=lambda: factory()),
):
await mw.awrap_tool_call(request, handler)
row = captured["rows"][0]
assert row.tool_call_id == "tc-1"
assert row.chat_turn_id is None
@pytest.mark.asyncio
async def test_writes_row_on_failure_and_reraises(
@ -293,6 +333,76 @@ class TestReverseDescriptor:
assert row.reversible is False
class TestActionLogDispatch:
"""Verify ``adispatch_custom_event`` fires after commit."""
@pytest.mark.asyncio
async def test_dispatches_action_log_event_on_success(
self, patch_get_flags, fake_session_factory
) -> None:
_captured, factory = fake_session_factory
mw = ActionLogMiddleware(thread_id=42, search_space_id=7, user_id="u1")
request = _FakeRequest(
tool_call={
"name": "make_widget",
"args": {"color": "red"},
"id": "tc-evt",
},
runtime=_FakeRuntime(
config={"configurable": {"turn_id": "42:1700000000000"}}
),
)
result_msg = ToolMessage(content="ok", tool_call_id="tc-evt", id="msg-42")
handler = AsyncMock(return_value=result_msg)
dispatch_mock = AsyncMock()
with (
patch_get_flags(_enabled_flags()),
patch("app.db.shielded_async_session", side_effect=lambda: factory()),
patch(
"app.agents.new_chat.middleware.action_log.adispatch_custom_event",
dispatch_mock,
),
):
await mw.awrap_tool_call(request, handler)
dispatch_mock.assert_awaited_once()
call_args = dispatch_mock.await_args
assert call_args is not None
assert call_args.args[0] == "action_log"
payload = call_args.args[1]
assert payload["lc_tool_call_id"] == "tc-evt"
assert payload["chat_turn_id"] == "42:1700000000000"
assert payload["tool_name"] == "make_widget"
assert payload["reversible"] is False
assert payload["reverse_descriptor_present"] is False
assert payload["error"] is False
@pytest.mark.asyncio
async def test_no_dispatch_when_persistence_fails(self, patch_get_flags) -> None:
"""If commit fails the dispatch is suppressed (no row to surface)."""
mw = ActionLogMiddleware(thread_id=1, search_space_id=1, user_id=None)
request = _FakeRequest(
tool_call={"name": "make_widget", "args": {}, "id": "tc1"}
)
handler = AsyncMock(return_value=ToolMessage(content="ok", tool_call_id="tc1"))
dispatch_mock = AsyncMock()
def _exploding_session():
raise RuntimeError("DB is down")
with (
patch_get_flags(_enabled_flags()),
patch("app.db.shielded_async_session", side_effect=_exploding_session),
patch(
"app.agents.new_chat.middleware.action_log.adispatch_custom_event",
dispatch_mock,
),
):
await mw.awrap_tool_call(request, handler)
dispatch_mock.assert_not_awaited()
class TestArgsTruncation:
@pytest.mark.asyncio
async def test_huge_args_payload_is_truncated(

View file

@ -0,0 +1,122 @@
"""Tests for the desktop-mode safety ruleset.
In desktop mode the agent operates against the user's real disk with no
revision history, so destructive filesystem operations must require
explicit approval. These tests pin the set of tools that get the ``ask``
gate so it cannot silently regress.
"""
from __future__ import annotations
import pytest
from app.agents.new_chat.middleware.permission import PermissionMiddleware
from app.agents.new_chat.permissions import (
Rule,
Ruleset,
aggregate_action,
evaluate_many,
)
pytestmark = pytest.mark.unit
# Mirror the ruleset built inside ``chat_deepagent._build_compiled_agent_blocking``
# when ``filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER``. Keeping a
# copy here means the rule contract has a focused regression test even when
# the larger graph-build helper is hard to instantiate in unit tests.
DESKTOP_SAFETY_RULESET = Ruleset(
rules=[
Rule(permission="rm", pattern="*", action="ask"),
Rule(permission="rmdir", pattern="*", action="ask"),
Rule(permission="move_file", pattern="*", action="ask"),
Rule(permission="edit_file", pattern="*", action="ask"),
Rule(permission="write_file", pattern="*", action="ask"),
],
origin="desktop_safety",
)
SURFSENSE_DEFAULTS = Ruleset(
rules=[Rule(permission="*", pattern="*", action="allow")],
origin="surfsense_defaults",
)
def _action_for(tool_name: str, *rulesets: Ruleset) -> str:
rules = evaluate_many(tool_name, [tool_name], *rulesets)
return aggregate_action(rules)
class TestDesktopSafetyRulesGateDestructiveOps:
@pytest.mark.parametrize(
"tool_name",
["rm", "rmdir", "move_file", "edit_file", "write_file"],
)
def test_destructive_op_resolves_to_ask(self, tool_name: str) -> None:
# surfsense_defaults says "allow */*"; desktop_safety must override
# because it's layered later (last-match-wins).
action = _action_for(tool_name, SURFSENSE_DEFAULTS, DESKTOP_SAFETY_RULESET)
assert action == "ask", (
f"{tool_name} must require approval in desktop mode "
f"(no revert path on real disk); got {action!r}"
)
@pytest.mark.parametrize(
"tool_name",
["read_file", "ls", "list_tree", "grep", "glob", "cd", "pwd", "mkdir"],
)
def test_safe_ops_remain_allowed(self, tool_name: str) -> None:
# Read-only and trivially-reversible tools must NOT get gated —
# otherwise every navigation in desktop mode pops an interrupt.
action = _action_for(tool_name, SURFSENSE_DEFAULTS, DESKTOP_SAFETY_RULESET)
assert action == "allow", (
f"{tool_name} should not be gated in desktop mode; got {action!r}"
)
class TestDesktopSafetyOverridesAllowDefault:
def test_layer_order_last_match_wins(self) -> None:
# If desktop_safety is layered BEFORE surfsense_defaults, the allow
# default would win and the safety net would be inert. This test
# protects against accidentally swapping the rulesets in
# ``_build_compiled_agent_blocking``.
action = _action_for("rm", DESKTOP_SAFETY_RULESET, SURFSENSE_DEFAULTS)
# Layered "wrong way" — the broad allow now wins.
assert action == "allow"
# Correct order: defaults < desktop_safety -> ask wins.
action = _action_for("rm", SURFSENSE_DEFAULTS, DESKTOP_SAFETY_RULESET)
assert action == "ask"
class TestPermissionMiddlewareIntegration:
def test_middleware_raises_interrupt_for_rm_in_desktop_mode(self) -> None:
from langchain_core.messages import AIMessage
from app.agents.new_chat.errors import RejectedError
mw = PermissionMiddleware(rulesets=[SURFSENSE_DEFAULTS, DESKTOP_SAFETY_RULESET])
# Stub the interrupt to a "reject" decision so we can assert the
# ask path was taken without spinning up the LangGraph runtime.
mw._raise_interrupt = lambda **kw: {"decision_type": "reject"} # type: ignore[assignment]
state = {
"messages": [
AIMessage(
content="",
tool_calls=[
{
"name": "rm",
"args": {"path": "/Users/me/Documents/important.docx"},
"id": "tc-rm",
}
],
)
]
}
class _FakeRuntime:
config: dict = {"configurable": {"thread_id": "test"}}
with pytest.raises(RejectedError):
mw.after_model(state, _FakeRuntime())

View file

@ -0,0 +1,111 @@
"""Tests for the default auto-approval list in ``hitl.request_approval``.
These pin the policy that low-stakes connector creation tools (drafts,
new-file creates) skip the HITL interrupt by default. Without this set,
every "draft my newsletter" turn used to fire ~3 interrupts before any
useful work happened.
"""
from __future__ import annotations
import pytest
from app.agents.new_chat.tools.hitl import (
DEFAULT_AUTO_APPROVED_TOOLS,
HITLResult,
request_approval,
)
pytestmark = pytest.mark.unit
class TestDefaultAutoApprovedToolsList:
def test_set_contains_expected_creation_tools(self) -> None:
# If anyone changes the policy list, we want a single test to
# update so the contract is explicit. Keep this in sync with
# ``hitl.DEFAULT_AUTO_APPROVED_TOOLS``.
expected = {
"create_gmail_draft",
"update_gmail_draft",
"create_notion_page",
"create_confluence_page",
"create_google_drive_file",
"create_dropbox_file",
"create_onedrive_file",
}
assert expected == DEFAULT_AUTO_APPROVED_TOOLS
def test_set_is_immutable(self) -> None:
# frozenset prevents accidental at-runtime mutation that would
# silently widen the auto-approval surface.
assert isinstance(DEFAULT_AUTO_APPROVED_TOOLS, frozenset)
def test_send_tools_are_not_auto_approved(self) -> None:
# External-broadcast tools must always prompt.
for tool_name in (
"send_gmail_email",
"send_discord_message",
"send_teams_message",
"delete_notion_page",
"create_calendar_event",
"delete_calendar_event",
):
assert tool_name not in DEFAULT_AUTO_APPROVED_TOOLS, (
f"{tool_name} must remain HITL-gated"
)
class TestRequestApprovalAutoBypass:
def test_auto_approved_tool_skips_interrupt(self) -> None:
# No interrupt mock set up — if the function attempted to call
# ``langgraph.types.interrupt`` it would raise GraphInterrupt.
# The fact that we get a clean HITLResult proves the bypass.
result = request_approval(
action_type="gmail_draft_creation",
tool_name="create_gmail_draft",
params={"to": "alice@example.com", "subject": "hi", "body": "hey"},
)
assert isinstance(result, HITLResult)
assert result.rejected is False
assert result.decision_type == "auto_approved"
# Original params are preserved untouched (no user edits possible).
assert result.params == {
"to": "alice@example.com",
"subject": "hi",
"body": "hey",
}
def test_non_listed_tool_still_attempts_interrupt(self) -> None:
# A tool NOT in the default list must reach ``langgraph.interrupt``.
# Outside a runnable context that call raises a RuntimeError —
# which is exactly the signal we want: the bypass did NOT fire.
with pytest.raises(RuntimeError, match="runnable context"):
request_approval(
action_type="gmail_email_send",
tool_name="send_gmail_email",
params={"to": "alice@example.com", "subject": "hi", "body": "hey"},
)
def test_user_trusted_tools_still_take_precedence(self) -> None:
# ``trusted_tools`` (per-connector "always allow" from MCP/UI)
# was checked BEFORE the default list and must keep working
# for tools outside the default list.
result = request_approval(
action_type="mcp_tool_call",
tool_name="my_custom_mcp_tool",
params={"x": 1},
trusted_tools=["my_custom_mcp_tool"],
)
assert result.decision_type == "trusted"
assert result.rejected is False
def test_auto_approved_overrides_no_trusted_tools(self) -> None:
# When trusted_tools is empty and tool is in the default list,
# we should still bypass — proves the order in request_approval.
result = request_approval(
action_type="notion_page_creation",
tool_name="create_notion_page",
params={"title": "Plan"},
trusted_tools=[],
)
assert result.decision_type == "auto_approved"

View file

@ -0,0 +1,333 @@
"""Cloud-mode behavior tests for the new ``rm`` and ``rmdir`` filesystem tools.
The tools build ``Command(update=...)`` payloads that the persistence
middleware applies at end of turn. These tests stub out the backend and
runtime to assert the staging payload shape:
* ``rm`` queues into ``pending_deletes`` and tombstones state files.
* ``rm`` rejects directories, ``/documents``, root, and the anonymous doc.
* ``rmdir`` queues into ``pending_dir_deletes`` and rejects non-empty dirs.
* ``rmdir`` un-stages a same-turn ``mkdir`` rather than queuing a delete.
* ``rmdir`` refuses to drop the cwd or any of its ancestors.
* ``KBPostgresBackend`` view-helpers honor staged deletes.
"""
from __future__ import annotations
from types import SimpleNamespace
from typing import Any
from unittest.mock import AsyncMock
import pytest
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware.filesystem import SurfSenseFilesystemMiddleware
from app.agents.new_chat.middleware.kb_postgres_backend import KBPostgresBackend
pytestmark = pytest.mark.unit
def _make_middleware(mode: FilesystemMode = FilesystemMode.CLOUD):
middleware = SurfSenseFilesystemMiddleware.__new__(SurfSenseFilesystemMiddleware)
middleware._filesystem_mode = mode
middleware._custom_tool_descriptions = {}
return middleware
def _runtime(state: dict[str, Any] | None = None, *, tool_call_id: str = "tc-abc"):
state = state or {}
state.setdefault("cwd", "/documents")
return SimpleNamespace(state=state, tool_call_id=tool_call_id)
class _KBBackendStub(KBPostgresBackend):
"""Construct-able subclass of :class:`KBPostgresBackend` for tests.
We bypass the real ``__init__`` (which expects a runtime + DB session)
and inject just the methods the rm/rmdir tools touch. The class
inheritance keeps ``isinstance(backend, KBPostgresBackend)`` checks
inside the tools happy, which is what gates them from the desktop
code path.
"""
def __init__(self, *, children=None, file_data=None) -> None:
self.als_info = AsyncMock(return_value=children or [])
self._load_file_data = AsyncMock(
return_value=(file_data, 17) if file_data is not None else None
)
def _make_backend_stub(*, children=None, file_data=None) -> KBPostgresBackend:
return _KBBackendStub(children=children, file_data=file_data)
def _bind_backend(middleware, backend):
"""Inject a backend resolver onto the middleware test instance."""
middleware._get_backend = lambda runtime: backend
return backend
# ---------------------------------------------------------------------------
# rm
# ---------------------------------------------------------------------------
class TestRmStaging:
@pytest.mark.asyncio
async def test_stages_delete_and_tombstones_state(self):
m = _make_middleware()
_bind_backend(m, _make_backend_stub(children=[], file_data={"content": ["x"]}))
runtime = _runtime(
{
"cwd": "/documents",
"files": {"/documents/notes.md": {"content": ["hello"]}},
"doc_id_by_path": {"/documents/notes.md": 17},
},
tool_call_id="tc-1",
)
tool = m._create_rm_tool()
result = await tool.coroutine("/documents/notes.md", runtime=runtime)
assert hasattr(result, "update"), f"expected Command, got {result!r}"
update = result.update
assert update["pending_deletes"] == [
{"path": "/documents/notes.md", "tool_call_id": "tc-1"}
]
assert update["files"] == {"/documents/notes.md": None}
assert update["doc_id_by_path"] == {"/documents/notes.md": None}
@pytest.mark.asyncio
async def test_rejects_documents_root(self):
m = _make_middleware()
runtime = _runtime()
tool = m._create_rm_tool()
result = await tool.coroutine("/documents", runtime=runtime)
assert isinstance(result, str)
assert "refusing to rm" in result
@pytest.mark.asyncio
async def test_rejects_root(self):
m = _make_middleware()
runtime = _runtime()
tool = m._create_rm_tool()
result = await tool.coroutine("/", runtime=runtime)
assert isinstance(result, str)
assert "refusing to rm" in result
@pytest.mark.asyncio
async def test_rejects_directory_via_staged_dirs(self):
m = _make_middleware()
runtime = _runtime(
{
"staged_dirs": ["/documents/team-x"],
}
)
tool = m._create_rm_tool()
result = await tool.coroutine("/documents/team-x", runtime=runtime)
assert isinstance(result, str)
assert "directory" in result.lower()
assert "rmdir" in result
@pytest.mark.asyncio
async def test_rejects_directory_via_listing(self):
m = _make_middleware()
_bind_backend(
m,
_make_backend_stub(
children=[{"path": "/documents/foo/x.md", "is_dir": False}]
),
)
runtime = _runtime()
tool = m._create_rm_tool()
result = await tool.coroutine("/documents/foo", runtime=runtime)
assert isinstance(result, str)
assert "directory" in result.lower()
@pytest.mark.asyncio
async def test_rejects_anonymous_doc(self):
m = _make_middleware()
runtime = _runtime(
{
"kb_anon_doc": {
"path": "/documents/uploaded.xml",
"title": "uploaded",
"content": "",
"chunks": [],
}
}
)
tool = m._create_rm_tool()
result = await tool.coroutine("/documents/uploaded.xml", runtime=runtime)
assert isinstance(result, str)
assert "read-only" in result
@pytest.mark.asyncio
async def test_drops_path_from_dirty_paths(self):
m = _make_middleware()
_bind_backend(m, _make_backend_stub(children=[], file_data={"content": ["x"]}))
runtime = _runtime(
{
"files": {"/documents/notes.md": {"content": ["x"]}},
"doc_id_by_path": {"/documents/notes.md": 17},
"dirty_paths": ["/documents/notes.md"],
}
)
tool = m._create_rm_tool()
result = await tool.coroutine("/documents/notes.md", runtime=runtime)
update = result.update
# First element is _CLEAR sentinel; the rest must NOT contain the
# rm'd path.
dirty = update.get("dirty_paths") or []
assert "/documents/notes.md" not in dirty[1:]
# ---------------------------------------------------------------------------
# rmdir
# ---------------------------------------------------------------------------
class TestRmdirStaging:
@pytest.mark.asyncio
async def test_stages_dir_delete_when_empty_and_db_backed(self):
m = _make_middleware()
backend = _bind_backend(m, _make_backend_stub(children=[]))
# Override _load_file_data to return None (folder, not a file) and
# parent listing to claim the folder exists.
backend._load_file_data = AsyncMock(return_value=None)
backend.als_info = AsyncMock(
side_effect=[
[], # children of /documents/proj
[
{"path": "/documents/proj", "is_dir": True},
], # parent listing
]
)
runtime = _runtime(
{
"cwd": "/documents",
},
tool_call_id="tc-rd",
)
tool = m._create_rmdir_tool()
result = await tool.coroutine("/documents/proj", runtime=runtime)
assert hasattr(result, "update")
update = result.update
assert update["pending_dir_deletes"] == [
{"path": "/documents/proj", "tool_call_id": "tc-rd"}
]
@pytest.mark.asyncio
async def test_rejects_non_empty(self):
m = _make_middleware()
_bind_backend(
m,
_make_backend_stub(
children=[{"path": "/documents/proj/x.md", "is_dir": False}]
),
)
runtime = _runtime()
tool = m._create_rmdir_tool()
result = await tool.coroutine("/documents/proj", runtime=runtime)
assert isinstance(result, str)
assert "not empty" in result
@pytest.mark.asyncio
async def test_unstages_same_turn_mkdir(self):
m = _make_middleware()
_bind_backend(m, _make_backend_stub(children=[]))
runtime = _runtime(
{
"cwd": "/documents",
"staged_dirs": ["/documents/scratch"],
},
tool_call_id="tc-rd",
)
tool = m._create_rmdir_tool()
result = await tool.coroutine("/documents/scratch", runtime=runtime)
assert hasattr(result, "update")
update = result.update
assert "pending_dir_deletes" not in update
# _CLEAR sentinel + remaining items (in this case, none).
staged_after = update["staged_dirs"]
assert staged_after[0] == "\x00__SURFSENSE_FILESYSTEM_CLEAR__\x00"
assert "/documents/scratch" not in staged_after[1:]
@pytest.mark.asyncio
async def test_rejects_root(self):
m = _make_middleware()
runtime = _runtime()
tool = m._create_rmdir_tool()
for victim in ("/", "/documents"):
result = await tool.coroutine(victim, runtime=runtime)
assert isinstance(result, str)
assert "refusing to rmdir" in result
@pytest.mark.asyncio
async def test_rejects_cwd(self):
m = _make_middleware()
runtime = _runtime({"cwd": "/documents/proj"})
tool = m._create_rmdir_tool()
result = await tool.coroutine("/documents/proj", runtime=runtime)
assert isinstance(result, str)
assert "cwd" in result.lower()
@pytest.mark.asyncio
async def test_rejects_ancestor_of_cwd(self):
m = _make_middleware()
runtime = _runtime({"cwd": "/documents/proj/sub"})
tool = m._create_rmdir_tool()
result = await tool.coroutine("/documents/proj", runtime=runtime)
assert isinstance(result, str)
assert "cwd" in result.lower()
@pytest.mark.asyncio
async def test_rejects_files(self):
m = _make_middleware()
_bind_backend(m, _make_backend_stub(children=[], file_data={"content": ["x"]}))
runtime = _runtime()
tool = m._create_rmdir_tool()
result = await tool.coroutine("/documents/notes.md", runtime=runtime)
assert isinstance(result, str)
assert "is a file" in result
# ---------------------------------------------------------------------------
# KBPostgresBackend view filter
# ---------------------------------------------------------------------------
class TestKBPostgresBackendDeleteFilter:
"""als_info / glob / grep should suppress paths queued for delete."""
def _make_backend(self, state: dict[str, Any]) -> KBPostgresBackend:
runtime = SimpleNamespace(state=state)
backend = KBPostgresBackend(search_space_id=1, runtime=runtime)
return backend
def test_pending_filesystem_view_returns_deleted_paths(self):
backend = self._make_backend(
{
"pending_deletes": [
{"path": "/documents/x.md", "tool_call_id": "t1"},
],
"pending_dir_deletes": [
{"path": "/documents/d1", "tool_call_id": "t2"},
],
}
)
removed, alias, deleted_dirs = backend._pending_filesystem_view({})
assert "/documents/x.md" in removed
assert "/documents/d1" in deleted_dirs
assert alias == {}
def test_dir_suppressed_covers_descendants(self):
backend = self._make_backend({})
deleted_dirs = {"/documents/d"}
assert backend._is_dir_suppressed("/documents/d", deleted_dirs)
assert backend._is_dir_suppressed("/documents/d/x.md", deleted_dirs)
assert backend._is_dir_suppressed("/documents/d/sub/y.md", deleted_dirs)
assert not backend._is_dir_suppressed("/documents/other.md", deleted_dirs)

View file

@ -98,10 +98,54 @@ class TestInitialFilesystemState:
state = _initial_filesystem_state()
assert state["cwd"] == "/documents"
assert state["staged_dirs"] == []
assert state["staged_dir_tool_calls"] == {}
assert state["pending_moves"] == []
assert state["pending_deletes"] == []
assert state["pending_dir_deletes"] == []
assert state["doc_id_by_path"] == {}
assert state["dirty_paths"] == []
assert state["dirty_path_tool_calls"] == {}
assert state["kb_priority"] == []
assert state["kb_matched_chunk_ids"] == {}
assert state["kb_anon_doc"] is None
assert state["tree_version"] == 0
class TestMultiEditSamePathCoalescing:
"""Multi-edit-same-path turns must coalesce into ONE binding record.
The persistence body uses ``dirty_path_tool_calls[path]`` to find the
tool_call_id that produced the current state on disk. Because
``dirty_paths`` dedupes via :func:`_add_unique_reducer` the second
edit doesn't append a new path entry — and because
``_dict_merge_with_tombstones_reducer`` lets the right-hand side
overwrite, the LATEST tool_call_id wins. That's the correct behavior
for snapshotting: revert restores to the pre-mutation state, and
multiple back-to-back edits in one turn coalesce into a single
revisible op (the user sees ONE Revert button per turn-per-path,
not N).
"""
def test_dirty_paths_dedupes_repeated_writes(self):
# ``_add_unique_reducer`` is applied to ``dirty_paths``. Two writes
# to the same path produce one entry, not two.
first = _add_unique_reducer([], ["/documents/a.md"])
second = _add_unique_reducer(first, ["/documents/a.md"])
assert second == ["/documents/a.md"]
def test_dirty_path_tool_calls_keeps_latest_tool_call_id(self):
# First write tags the path with tcid-1.
merged = _dict_merge_with_tombstones_reducer({}, {"/documents/a.md": "tcid-1"})
# Second write to the same path tags it with tcid-2 (latest wins).
merged = _dict_merge_with_tombstones_reducer(
merged, {"/documents/a.md": "tcid-2"}
)
assert merged == {"/documents/a.md": "tcid-2"}
def test_rm_tombstones_dirty_path_tool_call(self):
# ``rm`` writes ``{path: None}`` into dirty_path_tool_calls to
# prevent a stale binding from leaking past the delete.
merged = _dict_merge_with_tombstones_reducer(
{"/documents/a.md": "tcid-1"}, {"/documents/a.md": None}
)
assert merged == {}