mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-04 05:12:38 +02:00
feat: improved agent streaming
This commit is contained in:
parent
afb4b09cde
commit
c110f5b955
60 changed files with 8068 additions and 303 deletions
|
|
@ -15,6 +15,17 @@ from app.agents.new_chat.middleware.action_log import ActionLogMiddleware
|
|||
from app.agents.new_chat.tools.registry import ToolDefinition
|
||||
|
||||
|
||||
@dataclass
|
||||
class _FakeRuntime:
|
||||
"""Minimal stand-in for ``ToolRuntime`` used in unit tests.
|
||||
|
||||
``ActionLogMiddleware`` reads ``runtime.config['configurable']['turn_id']``
|
||||
to populate the new ``chat_turn_id`` column (see migration 135).
|
||||
"""
|
||||
|
||||
config: dict[str, Any] | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class _FakeRequest:
|
||||
"""Minimal stand-in for ToolCallRequest used in unit tests."""
|
||||
|
|
@ -120,6 +131,9 @@ class TestActionLogMiddlewarePersistence:
|
|||
"args": {"color": "red", "size": 3},
|
||||
"id": "tc-abc",
|
||||
},
|
||||
runtime=_FakeRuntime(
|
||||
config={"configurable": {"turn_id": "42:1700000000000"}}
|
||||
),
|
||||
)
|
||||
result_msg = ToolMessage(content="ok", tool_call_id="tc-abc", id="msg-1")
|
||||
handler = AsyncMock(return_value=result_msg)
|
||||
|
|
@ -142,6 +156,32 @@ class TestActionLogMiddlewarePersistence:
|
|||
assert row.error is None
|
||||
assert row.reverse_descriptor is None
|
||||
assert row.reversible is False
|
||||
# Migration 135: ``turn_id`` is the deprecated alias of ``tool_call_id``;
|
||||
# ``chat_turn_id`` comes from ``runtime.config['configurable']['turn_id']``.
|
||||
assert row.tool_call_id == "tc-abc"
|
||||
assert row.turn_id == "tc-abc"
|
||||
assert row.chat_turn_id == "42:1700000000000"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_turn_id_none_when_runtime_missing(
|
||||
self, patch_get_flags, fake_session_factory
|
||||
) -> None:
|
||||
"""``chat_turn_id`` falls back to NULL when ``runtime.config`` is absent."""
|
||||
captured, factory = fake_session_factory
|
||||
mw = ActionLogMiddleware(thread_id=1, search_space_id=1, user_id=None)
|
||||
request = _FakeRequest(
|
||||
tool_call={"name": "make_widget", "args": {}, "id": "tc-1"},
|
||||
runtime=None,
|
||||
)
|
||||
handler = AsyncMock(return_value=ToolMessage(content="ok", tool_call_id="tc-1"))
|
||||
with (
|
||||
patch_get_flags(_enabled_flags()),
|
||||
patch("app.db.shielded_async_session", side_effect=lambda: factory()),
|
||||
):
|
||||
await mw.awrap_tool_call(request, handler)
|
||||
row = captured["rows"][0]
|
||||
assert row.tool_call_id == "tc-1"
|
||||
assert row.chat_turn_id is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_writes_row_on_failure_and_reraises(
|
||||
|
|
@ -293,6 +333,76 @@ class TestReverseDescriptor:
|
|||
assert row.reversible is False
|
||||
|
||||
|
||||
class TestActionLogDispatch:
|
||||
"""Verify ``adispatch_custom_event`` fires after commit."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatches_action_log_event_on_success(
|
||||
self, patch_get_flags, fake_session_factory
|
||||
) -> None:
|
||||
_captured, factory = fake_session_factory
|
||||
mw = ActionLogMiddleware(thread_id=42, search_space_id=7, user_id="u1")
|
||||
request = _FakeRequest(
|
||||
tool_call={
|
||||
"name": "make_widget",
|
||||
"args": {"color": "red"},
|
||||
"id": "tc-evt",
|
||||
},
|
||||
runtime=_FakeRuntime(
|
||||
config={"configurable": {"turn_id": "42:1700000000000"}}
|
||||
),
|
||||
)
|
||||
result_msg = ToolMessage(content="ok", tool_call_id="tc-evt", id="msg-42")
|
||||
handler = AsyncMock(return_value=result_msg)
|
||||
|
||||
dispatch_mock = AsyncMock()
|
||||
with (
|
||||
patch_get_flags(_enabled_flags()),
|
||||
patch("app.db.shielded_async_session", side_effect=lambda: factory()),
|
||||
patch(
|
||||
"app.agents.new_chat.middleware.action_log.adispatch_custom_event",
|
||||
dispatch_mock,
|
||||
),
|
||||
):
|
||||
await mw.awrap_tool_call(request, handler)
|
||||
|
||||
dispatch_mock.assert_awaited_once()
|
||||
call_args = dispatch_mock.await_args
|
||||
assert call_args is not None
|
||||
assert call_args.args[0] == "action_log"
|
||||
payload = call_args.args[1]
|
||||
assert payload["lc_tool_call_id"] == "tc-evt"
|
||||
assert payload["chat_turn_id"] == "42:1700000000000"
|
||||
assert payload["tool_name"] == "make_widget"
|
||||
assert payload["reversible"] is False
|
||||
assert payload["reverse_descriptor_present"] is False
|
||||
assert payload["error"] is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_dispatch_when_persistence_fails(self, patch_get_flags) -> None:
|
||||
"""If commit fails the dispatch is suppressed (no row to surface)."""
|
||||
mw = ActionLogMiddleware(thread_id=1, search_space_id=1, user_id=None)
|
||||
request = _FakeRequest(
|
||||
tool_call={"name": "make_widget", "args": {}, "id": "tc1"}
|
||||
)
|
||||
handler = AsyncMock(return_value=ToolMessage(content="ok", tool_call_id="tc1"))
|
||||
dispatch_mock = AsyncMock()
|
||||
|
||||
def _exploding_session():
|
||||
raise RuntimeError("DB is down")
|
||||
|
||||
with (
|
||||
patch_get_flags(_enabled_flags()),
|
||||
patch("app.db.shielded_async_session", side_effect=_exploding_session),
|
||||
patch(
|
||||
"app.agents.new_chat.middleware.action_log.adispatch_custom_event",
|
||||
dispatch_mock,
|
||||
),
|
||||
):
|
||||
await mw.awrap_tool_call(request, handler)
|
||||
dispatch_mock.assert_not_awaited()
|
||||
|
||||
|
||||
class TestArgsTruncation:
|
||||
@pytest.mark.asyncio
|
||||
async def test_huge_args_payload_is_truncated(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,122 @@
|
|||
"""Tests for the desktop-mode safety ruleset.
|
||||
|
||||
In desktop mode the agent operates against the user's real disk with no
|
||||
revision history, so destructive filesystem operations must require
|
||||
explicit approval. These tests pin the set of tools that get the ``ask``
|
||||
gate so it cannot silently regress.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.agents.new_chat.middleware.permission import PermissionMiddleware
|
||||
from app.agents.new_chat.permissions import (
|
||||
Rule,
|
||||
Ruleset,
|
||||
aggregate_action,
|
||||
evaluate_many,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
# Mirror the ruleset built inside ``chat_deepagent._build_compiled_agent_blocking``
|
||||
# when ``filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER``. Keeping a
|
||||
# copy here means the rule contract has a focused regression test even when
|
||||
# the larger graph-build helper is hard to instantiate in unit tests.
|
||||
DESKTOP_SAFETY_RULESET = Ruleset(
|
||||
rules=[
|
||||
Rule(permission="rm", pattern="*", action="ask"),
|
||||
Rule(permission="rmdir", pattern="*", action="ask"),
|
||||
Rule(permission="move_file", pattern="*", action="ask"),
|
||||
Rule(permission="edit_file", pattern="*", action="ask"),
|
||||
Rule(permission="write_file", pattern="*", action="ask"),
|
||||
],
|
||||
origin="desktop_safety",
|
||||
)
|
||||
|
||||
SURFSENSE_DEFAULTS = Ruleset(
|
||||
rules=[Rule(permission="*", pattern="*", action="allow")],
|
||||
origin="surfsense_defaults",
|
||||
)
|
||||
|
||||
|
||||
def _action_for(tool_name: str, *rulesets: Ruleset) -> str:
|
||||
rules = evaluate_many(tool_name, [tool_name], *rulesets)
|
||||
return aggregate_action(rules)
|
||||
|
||||
|
||||
class TestDesktopSafetyRulesGateDestructiveOps:
|
||||
@pytest.mark.parametrize(
|
||||
"tool_name",
|
||||
["rm", "rmdir", "move_file", "edit_file", "write_file"],
|
||||
)
|
||||
def test_destructive_op_resolves_to_ask(self, tool_name: str) -> None:
|
||||
# surfsense_defaults says "allow */*"; desktop_safety must override
|
||||
# because it's layered later (last-match-wins).
|
||||
action = _action_for(tool_name, SURFSENSE_DEFAULTS, DESKTOP_SAFETY_RULESET)
|
||||
assert action == "ask", (
|
||||
f"{tool_name} must require approval in desktop mode "
|
||||
f"(no revert path on real disk); got {action!r}"
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tool_name",
|
||||
["read_file", "ls", "list_tree", "grep", "glob", "cd", "pwd", "mkdir"],
|
||||
)
|
||||
def test_safe_ops_remain_allowed(self, tool_name: str) -> None:
|
||||
# Read-only and trivially-reversible tools must NOT get gated —
|
||||
# otherwise every navigation in desktop mode pops an interrupt.
|
||||
action = _action_for(tool_name, SURFSENSE_DEFAULTS, DESKTOP_SAFETY_RULESET)
|
||||
assert action == "allow", (
|
||||
f"{tool_name} should not be gated in desktop mode; got {action!r}"
|
||||
)
|
||||
|
||||
|
||||
class TestDesktopSafetyOverridesAllowDefault:
|
||||
def test_layer_order_last_match_wins(self) -> None:
|
||||
# If desktop_safety is layered BEFORE surfsense_defaults, the allow
|
||||
# default would win and the safety net would be inert. This test
|
||||
# protects against accidentally swapping the rulesets in
|
||||
# ``_build_compiled_agent_blocking``.
|
||||
action = _action_for("rm", DESKTOP_SAFETY_RULESET, SURFSENSE_DEFAULTS)
|
||||
# Layered "wrong way" — the broad allow now wins.
|
||||
assert action == "allow"
|
||||
|
||||
# Correct order: defaults < desktop_safety -> ask wins.
|
||||
action = _action_for("rm", SURFSENSE_DEFAULTS, DESKTOP_SAFETY_RULESET)
|
||||
assert action == "ask"
|
||||
|
||||
|
||||
class TestPermissionMiddlewareIntegration:
|
||||
def test_middleware_raises_interrupt_for_rm_in_desktop_mode(self) -> None:
|
||||
from langchain_core.messages import AIMessage
|
||||
|
||||
from app.agents.new_chat.errors import RejectedError
|
||||
|
||||
mw = PermissionMiddleware(rulesets=[SURFSENSE_DEFAULTS, DESKTOP_SAFETY_RULESET])
|
||||
# Stub the interrupt to a "reject" decision so we can assert the
|
||||
# ask path was taken without spinning up the LangGraph runtime.
|
||||
mw._raise_interrupt = lambda **kw: {"decision_type": "reject"} # type: ignore[assignment]
|
||||
|
||||
state = {
|
||||
"messages": [
|
||||
AIMessage(
|
||||
content="",
|
||||
tool_calls=[
|
||||
{
|
||||
"name": "rm",
|
||||
"args": {"path": "/Users/me/Documents/important.docx"},
|
||||
"id": "tc-rm",
|
||||
}
|
||||
],
|
||||
)
|
||||
]
|
||||
}
|
||||
|
||||
class _FakeRuntime:
|
||||
config: dict = {"configurable": {"thread_id": "test"}}
|
||||
|
||||
with pytest.raises(RejectedError):
|
||||
mw.after_model(state, _FakeRuntime())
|
||||
|
|
@ -0,0 +1,111 @@
|
|||
"""Tests for the default auto-approval list in ``hitl.request_approval``.
|
||||
|
||||
These pin the policy that low-stakes connector creation tools (drafts,
|
||||
new-file creates) skip the HITL interrupt by default. Without this set,
|
||||
every "draft my newsletter" turn used to fire ~3 interrupts before any
|
||||
useful work happened.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.agents.new_chat.tools.hitl import (
|
||||
DEFAULT_AUTO_APPROVED_TOOLS,
|
||||
HITLResult,
|
||||
request_approval,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
class TestDefaultAutoApprovedToolsList:
|
||||
def test_set_contains_expected_creation_tools(self) -> None:
|
||||
# If anyone changes the policy list, we want a single test to
|
||||
# update so the contract is explicit. Keep this in sync with
|
||||
# ``hitl.DEFAULT_AUTO_APPROVED_TOOLS``.
|
||||
expected = {
|
||||
"create_gmail_draft",
|
||||
"update_gmail_draft",
|
||||
"create_notion_page",
|
||||
"create_confluence_page",
|
||||
"create_google_drive_file",
|
||||
"create_dropbox_file",
|
||||
"create_onedrive_file",
|
||||
}
|
||||
assert expected == DEFAULT_AUTO_APPROVED_TOOLS
|
||||
|
||||
def test_set_is_immutable(self) -> None:
|
||||
# frozenset prevents accidental at-runtime mutation that would
|
||||
# silently widen the auto-approval surface.
|
||||
assert isinstance(DEFAULT_AUTO_APPROVED_TOOLS, frozenset)
|
||||
|
||||
def test_send_tools_are_not_auto_approved(self) -> None:
|
||||
# External-broadcast tools must always prompt.
|
||||
for tool_name in (
|
||||
"send_gmail_email",
|
||||
"send_discord_message",
|
||||
"send_teams_message",
|
||||
"delete_notion_page",
|
||||
"create_calendar_event",
|
||||
"delete_calendar_event",
|
||||
):
|
||||
assert tool_name not in DEFAULT_AUTO_APPROVED_TOOLS, (
|
||||
f"{tool_name} must remain HITL-gated"
|
||||
)
|
||||
|
||||
|
||||
class TestRequestApprovalAutoBypass:
|
||||
def test_auto_approved_tool_skips_interrupt(self) -> None:
|
||||
# No interrupt mock set up — if the function attempted to call
|
||||
# ``langgraph.types.interrupt`` it would raise GraphInterrupt.
|
||||
# The fact that we get a clean HITLResult proves the bypass.
|
||||
result = request_approval(
|
||||
action_type="gmail_draft_creation",
|
||||
tool_name="create_gmail_draft",
|
||||
params={"to": "alice@example.com", "subject": "hi", "body": "hey"},
|
||||
)
|
||||
assert isinstance(result, HITLResult)
|
||||
assert result.rejected is False
|
||||
assert result.decision_type == "auto_approved"
|
||||
# Original params are preserved untouched (no user edits possible).
|
||||
assert result.params == {
|
||||
"to": "alice@example.com",
|
||||
"subject": "hi",
|
||||
"body": "hey",
|
||||
}
|
||||
|
||||
def test_non_listed_tool_still_attempts_interrupt(self) -> None:
|
||||
# A tool NOT in the default list must reach ``langgraph.interrupt``.
|
||||
# Outside a runnable context that call raises a RuntimeError —
|
||||
# which is exactly the signal we want: the bypass did NOT fire.
|
||||
with pytest.raises(RuntimeError, match="runnable context"):
|
||||
request_approval(
|
||||
action_type="gmail_email_send",
|
||||
tool_name="send_gmail_email",
|
||||
params={"to": "alice@example.com", "subject": "hi", "body": "hey"},
|
||||
)
|
||||
|
||||
def test_user_trusted_tools_still_take_precedence(self) -> None:
|
||||
# ``trusted_tools`` (per-connector "always allow" from MCP/UI)
|
||||
# was checked BEFORE the default list and must keep working
|
||||
# for tools outside the default list.
|
||||
result = request_approval(
|
||||
action_type="mcp_tool_call",
|
||||
tool_name="my_custom_mcp_tool",
|
||||
params={"x": 1},
|
||||
trusted_tools=["my_custom_mcp_tool"],
|
||||
)
|
||||
assert result.decision_type == "trusted"
|
||||
assert result.rejected is False
|
||||
|
||||
def test_auto_approved_overrides_no_trusted_tools(self) -> None:
|
||||
# When trusted_tools is empty and tool is in the default list,
|
||||
# we should still bypass — proves the order in request_approval.
|
||||
result = request_approval(
|
||||
action_type="notion_page_creation",
|
||||
tool_name="create_notion_page",
|
||||
params={"title": "Plan"},
|
||||
trusted_tools=[],
|
||||
)
|
||||
assert result.decision_type == "auto_approved"
|
||||
|
|
@ -0,0 +1,333 @@
|
|||
"""Cloud-mode behavior tests for the new ``rm`` and ``rmdir`` filesystem tools.
|
||||
|
||||
The tools build ``Command(update=...)`` payloads that the persistence
|
||||
middleware applies at end of turn. These tests stub out the backend and
|
||||
runtime to assert the staging payload shape:
|
||||
|
||||
* ``rm`` queues into ``pending_deletes`` and tombstones state files.
|
||||
* ``rm`` rejects directories, ``/documents``, root, and the anonymous doc.
|
||||
* ``rmdir`` queues into ``pending_dir_deletes`` and rejects non-empty dirs.
|
||||
* ``rmdir`` un-stages a same-turn ``mkdir`` rather than queuing a delete.
|
||||
* ``rmdir`` refuses to drop the cwd or any of its ancestors.
|
||||
* ``KBPostgresBackend`` view-helpers honor staged deletes.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from app.agents.new_chat.filesystem_selection import FilesystemMode
|
||||
from app.agents.new_chat.middleware.filesystem import SurfSenseFilesystemMiddleware
|
||||
from app.agents.new_chat.middleware.kb_postgres_backend import KBPostgresBackend
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def _make_middleware(mode: FilesystemMode = FilesystemMode.CLOUD):
|
||||
middleware = SurfSenseFilesystemMiddleware.__new__(SurfSenseFilesystemMiddleware)
|
||||
middleware._filesystem_mode = mode
|
||||
middleware._custom_tool_descriptions = {}
|
||||
return middleware
|
||||
|
||||
|
||||
def _runtime(state: dict[str, Any] | None = None, *, tool_call_id: str = "tc-abc"):
|
||||
state = state or {}
|
||||
state.setdefault("cwd", "/documents")
|
||||
return SimpleNamespace(state=state, tool_call_id=tool_call_id)
|
||||
|
||||
|
||||
class _KBBackendStub(KBPostgresBackend):
|
||||
"""Construct-able subclass of :class:`KBPostgresBackend` for tests.
|
||||
|
||||
We bypass the real ``__init__`` (which expects a runtime + DB session)
|
||||
and inject just the methods the rm/rmdir tools touch. The class
|
||||
inheritance keeps ``isinstance(backend, KBPostgresBackend)`` checks
|
||||
inside the tools happy, which is what gates them from the desktop
|
||||
code path.
|
||||
"""
|
||||
|
||||
def __init__(self, *, children=None, file_data=None) -> None:
|
||||
self.als_info = AsyncMock(return_value=children or [])
|
||||
self._load_file_data = AsyncMock(
|
||||
return_value=(file_data, 17) if file_data is not None else None
|
||||
)
|
||||
|
||||
|
||||
def _make_backend_stub(*, children=None, file_data=None) -> KBPostgresBackend:
|
||||
return _KBBackendStub(children=children, file_data=file_data)
|
||||
|
||||
|
||||
def _bind_backend(middleware, backend):
|
||||
"""Inject a backend resolver onto the middleware test instance."""
|
||||
middleware._get_backend = lambda runtime: backend
|
||||
return backend
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# rm
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRmStaging:
|
||||
@pytest.mark.asyncio
|
||||
async def test_stages_delete_and_tombstones_state(self):
|
||||
m = _make_middleware()
|
||||
_bind_backend(m, _make_backend_stub(children=[], file_data={"content": ["x"]}))
|
||||
runtime = _runtime(
|
||||
{
|
||||
"cwd": "/documents",
|
||||
"files": {"/documents/notes.md": {"content": ["hello"]}},
|
||||
"doc_id_by_path": {"/documents/notes.md": 17},
|
||||
},
|
||||
tool_call_id="tc-1",
|
||||
)
|
||||
|
||||
tool = m._create_rm_tool()
|
||||
result = await tool.coroutine("/documents/notes.md", runtime=runtime)
|
||||
|
||||
assert hasattr(result, "update"), f"expected Command, got {result!r}"
|
||||
update = result.update
|
||||
assert update["pending_deletes"] == [
|
||||
{"path": "/documents/notes.md", "tool_call_id": "tc-1"}
|
||||
]
|
||||
assert update["files"] == {"/documents/notes.md": None}
|
||||
assert update["doc_id_by_path"] == {"/documents/notes.md": None}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_rejects_documents_root(self):
|
||||
m = _make_middleware()
|
||||
runtime = _runtime()
|
||||
tool = m._create_rm_tool()
|
||||
result = await tool.coroutine("/documents", runtime=runtime)
|
||||
assert isinstance(result, str)
|
||||
assert "refusing to rm" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_rejects_root(self):
|
||||
m = _make_middleware()
|
||||
runtime = _runtime()
|
||||
tool = m._create_rm_tool()
|
||||
result = await tool.coroutine("/", runtime=runtime)
|
||||
assert isinstance(result, str)
|
||||
assert "refusing to rm" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_rejects_directory_via_staged_dirs(self):
|
||||
m = _make_middleware()
|
||||
runtime = _runtime(
|
||||
{
|
||||
"staged_dirs": ["/documents/team-x"],
|
||||
}
|
||||
)
|
||||
tool = m._create_rm_tool()
|
||||
result = await tool.coroutine("/documents/team-x", runtime=runtime)
|
||||
assert isinstance(result, str)
|
||||
assert "directory" in result.lower()
|
||||
assert "rmdir" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_rejects_directory_via_listing(self):
|
||||
m = _make_middleware()
|
||||
_bind_backend(
|
||||
m,
|
||||
_make_backend_stub(
|
||||
children=[{"path": "/documents/foo/x.md", "is_dir": False}]
|
||||
),
|
||||
)
|
||||
runtime = _runtime()
|
||||
tool = m._create_rm_tool()
|
||||
result = await tool.coroutine("/documents/foo", runtime=runtime)
|
||||
assert isinstance(result, str)
|
||||
assert "directory" in result.lower()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_rejects_anonymous_doc(self):
|
||||
m = _make_middleware()
|
||||
runtime = _runtime(
|
||||
{
|
||||
"kb_anon_doc": {
|
||||
"path": "/documents/uploaded.xml",
|
||||
"title": "uploaded",
|
||||
"content": "",
|
||||
"chunks": [],
|
||||
}
|
||||
}
|
||||
)
|
||||
tool = m._create_rm_tool()
|
||||
result = await tool.coroutine("/documents/uploaded.xml", runtime=runtime)
|
||||
assert isinstance(result, str)
|
||||
assert "read-only" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_drops_path_from_dirty_paths(self):
|
||||
m = _make_middleware()
|
||||
_bind_backend(m, _make_backend_stub(children=[], file_data={"content": ["x"]}))
|
||||
runtime = _runtime(
|
||||
{
|
||||
"files": {"/documents/notes.md": {"content": ["x"]}},
|
||||
"doc_id_by_path": {"/documents/notes.md": 17},
|
||||
"dirty_paths": ["/documents/notes.md"],
|
||||
}
|
||||
)
|
||||
tool = m._create_rm_tool()
|
||||
result = await tool.coroutine("/documents/notes.md", runtime=runtime)
|
||||
update = result.update
|
||||
# First element is _CLEAR sentinel; the rest must NOT contain the
|
||||
# rm'd path.
|
||||
dirty = update.get("dirty_paths") or []
|
||||
assert "/documents/notes.md" not in dirty[1:]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# rmdir
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRmdirStaging:
|
||||
@pytest.mark.asyncio
|
||||
async def test_stages_dir_delete_when_empty_and_db_backed(self):
|
||||
m = _make_middleware()
|
||||
backend = _bind_backend(m, _make_backend_stub(children=[]))
|
||||
# Override _load_file_data to return None (folder, not a file) and
|
||||
# parent listing to claim the folder exists.
|
||||
backend._load_file_data = AsyncMock(return_value=None)
|
||||
backend.als_info = AsyncMock(
|
||||
side_effect=[
|
||||
[], # children of /documents/proj
|
||||
[
|
||||
{"path": "/documents/proj", "is_dir": True},
|
||||
], # parent listing
|
||||
]
|
||||
)
|
||||
runtime = _runtime(
|
||||
{
|
||||
"cwd": "/documents",
|
||||
},
|
||||
tool_call_id="tc-rd",
|
||||
)
|
||||
|
||||
tool = m._create_rmdir_tool()
|
||||
result = await tool.coroutine("/documents/proj", runtime=runtime)
|
||||
|
||||
assert hasattr(result, "update")
|
||||
update = result.update
|
||||
assert update["pending_dir_deletes"] == [
|
||||
{"path": "/documents/proj", "tool_call_id": "tc-rd"}
|
||||
]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_rejects_non_empty(self):
|
||||
m = _make_middleware()
|
||||
_bind_backend(
|
||||
m,
|
||||
_make_backend_stub(
|
||||
children=[{"path": "/documents/proj/x.md", "is_dir": False}]
|
||||
),
|
||||
)
|
||||
runtime = _runtime()
|
||||
tool = m._create_rmdir_tool()
|
||||
result = await tool.coroutine("/documents/proj", runtime=runtime)
|
||||
assert isinstance(result, str)
|
||||
assert "not empty" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unstages_same_turn_mkdir(self):
|
||||
m = _make_middleware()
|
||||
_bind_backend(m, _make_backend_stub(children=[]))
|
||||
runtime = _runtime(
|
||||
{
|
||||
"cwd": "/documents",
|
||||
"staged_dirs": ["/documents/scratch"],
|
||||
},
|
||||
tool_call_id="tc-rd",
|
||||
)
|
||||
tool = m._create_rmdir_tool()
|
||||
result = await tool.coroutine("/documents/scratch", runtime=runtime)
|
||||
|
||||
assert hasattr(result, "update")
|
||||
update = result.update
|
||||
assert "pending_dir_deletes" not in update
|
||||
# _CLEAR sentinel + remaining items (in this case, none).
|
||||
staged_after = update["staged_dirs"]
|
||||
assert staged_after[0] == "\x00__SURFSENSE_FILESYSTEM_CLEAR__\x00"
|
||||
assert "/documents/scratch" not in staged_after[1:]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_rejects_root(self):
|
||||
m = _make_middleware()
|
||||
runtime = _runtime()
|
||||
tool = m._create_rmdir_tool()
|
||||
for victim in ("/", "/documents"):
|
||||
result = await tool.coroutine(victim, runtime=runtime)
|
||||
assert isinstance(result, str)
|
||||
assert "refusing to rmdir" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_rejects_cwd(self):
|
||||
m = _make_middleware()
|
||||
runtime = _runtime({"cwd": "/documents/proj"})
|
||||
tool = m._create_rmdir_tool()
|
||||
result = await tool.coroutine("/documents/proj", runtime=runtime)
|
||||
assert isinstance(result, str)
|
||||
assert "cwd" in result.lower()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_rejects_ancestor_of_cwd(self):
|
||||
m = _make_middleware()
|
||||
runtime = _runtime({"cwd": "/documents/proj/sub"})
|
||||
tool = m._create_rmdir_tool()
|
||||
result = await tool.coroutine("/documents/proj", runtime=runtime)
|
||||
assert isinstance(result, str)
|
||||
assert "cwd" in result.lower()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_rejects_files(self):
|
||||
m = _make_middleware()
|
||||
_bind_backend(m, _make_backend_stub(children=[], file_data={"content": ["x"]}))
|
||||
runtime = _runtime()
|
||||
tool = m._create_rmdir_tool()
|
||||
result = await tool.coroutine("/documents/notes.md", runtime=runtime)
|
||||
assert isinstance(result, str)
|
||||
assert "is a file" in result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# KBPostgresBackend view filter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestKBPostgresBackendDeleteFilter:
|
||||
"""als_info / glob / grep should suppress paths queued for delete."""
|
||||
|
||||
def _make_backend(self, state: dict[str, Any]) -> KBPostgresBackend:
|
||||
runtime = SimpleNamespace(state=state)
|
||||
backend = KBPostgresBackend(search_space_id=1, runtime=runtime)
|
||||
return backend
|
||||
|
||||
def test_pending_filesystem_view_returns_deleted_paths(self):
|
||||
backend = self._make_backend(
|
||||
{
|
||||
"pending_deletes": [
|
||||
{"path": "/documents/x.md", "tool_call_id": "t1"},
|
||||
],
|
||||
"pending_dir_deletes": [
|
||||
{"path": "/documents/d1", "tool_call_id": "t2"},
|
||||
],
|
||||
}
|
||||
)
|
||||
removed, alias, deleted_dirs = backend._pending_filesystem_view({})
|
||||
assert "/documents/x.md" in removed
|
||||
assert "/documents/d1" in deleted_dirs
|
||||
assert alias == {}
|
||||
|
||||
def test_dir_suppressed_covers_descendants(self):
|
||||
backend = self._make_backend({})
|
||||
deleted_dirs = {"/documents/d"}
|
||||
assert backend._is_dir_suppressed("/documents/d", deleted_dirs)
|
||||
assert backend._is_dir_suppressed("/documents/d/x.md", deleted_dirs)
|
||||
assert backend._is_dir_suppressed("/documents/d/sub/y.md", deleted_dirs)
|
||||
assert not backend._is_dir_suppressed("/documents/other.md", deleted_dirs)
|
||||
|
|
@ -98,10 +98,54 @@ class TestInitialFilesystemState:
|
|||
state = _initial_filesystem_state()
|
||||
assert state["cwd"] == "/documents"
|
||||
assert state["staged_dirs"] == []
|
||||
assert state["staged_dir_tool_calls"] == {}
|
||||
assert state["pending_moves"] == []
|
||||
assert state["pending_deletes"] == []
|
||||
assert state["pending_dir_deletes"] == []
|
||||
assert state["doc_id_by_path"] == {}
|
||||
assert state["dirty_paths"] == []
|
||||
assert state["dirty_path_tool_calls"] == {}
|
||||
assert state["kb_priority"] == []
|
||||
assert state["kb_matched_chunk_ids"] == {}
|
||||
assert state["kb_anon_doc"] is None
|
||||
assert state["tree_version"] == 0
|
||||
|
||||
|
||||
class TestMultiEditSamePathCoalescing:
|
||||
"""Multi-edit-same-path turns must coalesce into ONE binding record.
|
||||
|
||||
The persistence body uses ``dirty_path_tool_calls[path]`` to find the
|
||||
tool_call_id that produced the current state on disk. Because
|
||||
``dirty_paths`` dedupes via :func:`_add_unique_reducer` the second
|
||||
edit doesn't append a new path entry — and because
|
||||
``_dict_merge_with_tombstones_reducer`` lets the right-hand side
|
||||
overwrite, the LATEST tool_call_id wins. That's the correct behavior
|
||||
for snapshotting: revert restores to the pre-mutation state, and
|
||||
multiple back-to-back edits in one turn coalesce into a single
|
||||
revisible op (the user sees ONE Revert button per turn-per-path,
|
||||
not N).
|
||||
"""
|
||||
|
||||
def test_dirty_paths_dedupes_repeated_writes(self):
|
||||
# ``_add_unique_reducer`` is applied to ``dirty_paths``. Two writes
|
||||
# to the same path produce one entry, not two.
|
||||
first = _add_unique_reducer([], ["/documents/a.md"])
|
||||
second = _add_unique_reducer(first, ["/documents/a.md"])
|
||||
assert second == ["/documents/a.md"]
|
||||
|
||||
def test_dirty_path_tool_calls_keeps_latest_tool_call_id(self):
|
||||
# First write tags the path with tcid-1.
|
||||
merged = _dict_merge_with_tombstones_reducer({}, {"/documents/a.md": "tcid-1"})
|
||||
# Second write to the same path tags it with tcid-2 (latest wins).
|
||||
merged = _dict_merge_with_tombstones_reducer(
|
||||
merged, {"/documents/a.md": "tcid-2"}
|
||||
)
|
||||
assert merged == {"/documents/a.md": "tcid-2"}
|
||||
|
||||
def test_rm_tombstones_dirty_path_tool_call(self):
|
||||
# ``rm`` writes ``{path: None}`` into dirty_path_tool_calls to
|
||||
# prevent a stale binding from leaking past the delete.
|
||||
merged = _dict_merge_with_tombstones_reducer(
|
||||
{"/documents/a.md": "tcid-1"}, {"/documents/a.md": None}
|
||||
)
|
||||
assert merged == {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue