test(agents): cover live filesystem middleware, retire dead twin

The single-agent-era filesystem middleware (app/agents/shared/middleware/ filesystem.py, ~2000 lines) was never instantiated in production, yet three unit suites validated it — an illusory guardrail while the live decomposed middleware (multi_agent_chat/middleware/shared/filesystem) was unguarded. Close the gap before reorganizing the agents module: - Add 14 integration tests driving live B's tools in desktop mode (real on-disk effects) and cloud mode (in-state staging, namespace policy). - Port all high-value dead-twin assertions onto the live path: cloud rm/rmdir staging + guard rails, KBPostgresBackend delete-view filter, mode-scoped system prompt, cwd/relative/namespace resolution, multi-root mount normalization. - Delete dead twin filesystem.py, drop its __init__ re-export, and retire its 3 dead-twin tests. Verified: test_import_all + middleware unit + FS integration all green.
2026-07-22 23:31:12 +02:00 · 2026-06-04 17:46:49 +02:00 · 2026-06-04 17:46:49 +02:00 · 1acde6a470
commit 1acde6a470
parent f3484f5a24
9 changed files with 960 additions and 2492 deletions
--- a/surfsense_backend/tests/integration/agents/multi_agent_chat/test_kb_filesystem_cloud.py
+++ b/surfsense_backend/tests/integration/agents/multi_agent_chat/test_kb_filesystem_cloud.py
@ -0,0 +1,198 @@
+"""Real-behavior tests for the LIVE knowledge-base filesystem middleware (B) in
+cloud mode.
+
+Cloud mode is the default production filesystem for web chat. Unlike desktop,
+cloud writes/edits/moves/deletes are *staged* into LangGraph state during the
+turn and committed to Postgres at end-of-turn by the persistence middleware.
+These tests drive the production ``build_filesystem_mw`` cloud tools through a
+real ``create_agent`` graph and assert the staging contract (namespace policy,
+read-from-stage, mkdir staging, duplicate rejection) — all deterministic and
+DB-free because cloud ``awrite`` is pure in-state staging.
+
+The end-of-turn DB commit (``commit_staged_filesystem_state``) is covered
+separately; here we lock the per-tool behavior that the reorg could break.
+"""
+
+from __future__ import annotations
+
+import pytest
+from langchain.agents import create_agent
+from langchain_core.messages import HumanMessage, ToolMessage
+from langgraph.checkpoint.memory import InMemorySaver
+
+from app.agents.multi_agent_chat.middleware.shared.filesystem import (
+    build_filesystem_mw,
+)
+from app.agents.shared.filesystem_backends import build_backend_resolver
+from app.agents.shared.filesystem_selection import FilesystemMode, FilesystemSelection
+from tests.integration.harness import ScriptedTurn, build_scripted_harness
+
+pytestmark = [pytest.mark.integration, pytest.mark.asyncio]
+
+_SEARCH_SPACE_ID = 1
+
+
+def _build_cloud_fs_mw():
+    """Build the production filesystem middleware in cloud mode.
+
+    A non-None ``search_space_id`` makes the resolver hand out a
+    ``KBPostgresBackend``, exactly as production does. Staging operations never
+    touch the DB, so a dummy id is sufficient for these tests.
+    """
+    selection = FilesystemSelection(mode=FilesystemMode.CLOUD)
+    resolver = build_backend_resolver(selection, search_space_id=_SEARCH_SPACE_ID)
+    return build_filesystem_mw(
+        backend_resolver=resolver,
+        filesystem_mode=FilesystemMode.CLOUD,
+        search_space_id=_SEARCH_SPACE_ID,
+        user_id="00000000-0000-0000-0000-000000000001",
+        thread_id=_SEARCH_SPACE_ID,
+        read_only=False,
+    )
+
+
+async def _run(turns: list[ScriptedTurn], thread: str):
+    harness = build_scripted_harness(turns=turns)
+    agent = create_agent(
+        harness.model,
+        tools=[],
+        middleware=[_build_cloud_fs_mw()],
+        checkpointer=InMemorySaver(),
+    )
+    return await agent.ainvoke(
+        {"messages": [HumanMessage(content="do kb work")]},
+        config={"configurable": {"thread_id": thread}},
+    )
+
+
+def _tool_text(result, name: str) -> str:
+    for m in result["messages"]:
+        if isinstance(m, ToolMessage) and m.name == name:
+            return str(m.content)
+    raise AssertionError(f"no ToolMessage from {name!r}")
+
+
+def _write(path: str, content: str, call_id: str) -> ScriptedTurn:
+    return ScriptedTurn(
+        tool_calls=[
+            {
+                "name": "write_file",
+                "args": {"file_path": path, "content": content},
+                "id": call_id,
+            }
+        ]
+    )
+
+
+async def test_cloud_write_then_read_returns_staged_content():
+    """A cloud write stages into state and a later read returns that content."""
+    result = await _run(
+        [
+            _write("/documents/note.md", "cloud CANARY-CLD-1", "c1"),
+            ScriptedTurn(
+                tool_calls=[
+                    {
+                        "name": "read_file",
+                        "args": {"file_path": "/documents/note.md"},
+                        "id": "c2",
+                    }
+                ]
+            ),
+            ScriptedTurn(text="done"),
+        ],
+        "fs-cloud-write-read",
+    )
+
+    assert "Updated file /documents/note.md" in _tool_text(result, "write_file")
+    assert "CANARY-CLD-1" in _tool_text(result, "read_file")
+
+
+async def test_cloud_write_outside_documents_is_rejected():
+    """Cloud namespace policy: writes must target /documents (non-temp paths)."""
+    result = await _run(
+        [
+            _write("/scratch/note.md", "nope", "c1"),
+            ScriptedTurn(text="done"),
+        ],
+        "fs-cloud-namespace",
+    )
+
+    msg = _tool_text(result, "write_file")
+    assert "must target /documents" in msg
+
+
+async def test_cloud_temp_prefixed_write_is_allowed_anywhere():
+    """A ``temp_`` basename escapes the /documents namespace restriction."""
+    result = await _run(
+        [
+            _write("/temp_scratch.md", "ephemeral", "c1"),
+            ScriptedTurn(text="done"),
+        ],
+        "fs-cloud-temp",
+    )
+
+    msg = _tool_text(result, "write_file")
+    assert "must target /documents" not in msg
+    assert "Updated file" in msg
+
+
+async def test_cloud_mkdir_stages_directory():
+    """Cloud mkdir stages the directory for end-of-turn creation (no immediate IO)."""
+    result = await _run(
+        [
+            ScriptedTurn(
+                tool_calls=[
+                    {
+                        "name": "mkdir",
+                        "args": {"path": "/documents/projects"},
+                        "id": "c1",
+                    }
+                ]
+            ),
+            ScriptedTurn(text="done"),
+        ],
+        "fs-cloud-mkdir",
+    )
+
+    msg = _tool_text(result, "mkdir")
+    assert "Staged directory" in msg
+    assert "/documents/projects" in msg
+
+
+async def test_cloud_mkdir_outside_documents_is_rejected():
+    """Cloud mkdir is also restricted to the /documents namespace."""
+    result = await _run(
+        [
+            ScriptedTurn(
+                tool_calls=[
+                    {"name": "mkdir", "args": {"path": "/elsewhere"}, "id": "c1"}
+                ]
+            ),
+            ScriptedTurn(text="done"),
+        ],
+        "fs-cloud-mkdir-bad",
+    )
+
+    assert "must target a path under /documents" in _tool_text(result, "mkdir")
+
+
+async def test_cloud_duplicate_write_is_rejected():
+    """Writing to a path already staged this turn is rejected (use edit instead)."""
+    result = await _run(
+        [
+            _write("/documents/dup.md", "first", "c1"),
+            _write("/documents/dup.md", "second", "c2"),
+            ScriptedTurn(text="done"),
+        ],
+        "fs-cloud-dup",
+    )
+
+    # Two write ToolMessages: first succeeds, second is rejected.
+    write_msgs = [
+        str(m.content)
+        for m in result["messages"]
+        if isinstance(m, ToolMessage) and m.name == "write_file"
+    ]
+    assert len(write_msgs) == 2
+    assert "Updated file" in write_msgs[0]
+    assert "already exists" in write_msgs[1]
--- a/surfsense_backend/tests/integration/agents/multi_agent_chat/test_kb_filesystem_desktop.py
+++ b/surfsense_backend/tests/integration/agents/multi_agent_chat/test_kb_filesystem_desktop.py
@ -0,0 +1,349 @@
+"""Real-behavior tests for the LIVE knowledge-base filesystem middleware (B).
+
+These exercise ``app.agents.multi_agent_chat.middleware.shared.filesystem`` —
+the decomposed middleware + tools that production actually mounts on the
+knowledge_base subagent (via ``build_filesystem_mw``). The previous
+``tests/unit/middleware/test_filesystem_*.py`` suite asserts a *dead twin*
+(``app.agents.shared.middleware.filesystem``) that is never instantiated, so the
+live tool path had no real coverage.
+
+Strategy: mount the production ``build_filesystem_mw`` on a minimal
+``create_agent`` graph and drive its tools with the scripted harness. Desktop
+mode binds a ``MultiRootLocalFolderBackend`` to a real ``tmp_path`` directory,
+so every write/edit/move/rm is asserted against the real on-disk filesystem —
+no mocks, only the LLM is scripted.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+from langchain.agents import create_agent
+from langchain_core.messages import HumanMessage, ToolMessage
+from langgraph.checkpoint.memory import InMemorySaver
+
+from app.agents.multi_agent_chat.middleware.shared.filesystem import (
+    build_filesystem_mw,
+)
+from app.agents.shared.filesystem_backends import build_backend_resolver
+from app.agents.shared.filesystem_selection import (
+    FilesystemMode,
+    FilesystemSelection,
+    LocalFilesystemMount,
+)
+from tests.integration.harness import ScriptedTurn, build_scripted_harness
+
+pytestmark = [pytest.mark.integration, pytest.mark.asyncio]
+
+_MOUNT_ID = "workspace"
+
+
+def _build_desktop_fs_mw(root: Path):
+    """Build the production filesystem middleware bound to a real local folder."""
+    selection = FilesystemSelection(
+        mode=FilesystemMode.DESKTOP_LOCAL_FOLDER,
+        local_mounts=(
+            LocalFilesystemMount(mount_id=_MOUNT_ID, root_path=str(root)),
+        ),
+    )
+    resolver = build_backend_resolver(selection)
+    return build_filesystem_mw(
+        backend_resolver=resolver,
+        filesystem_mode=FilesystemMode.DESKTOP_LOCAL_FOLDER,
+        search_space_id=1,
+        user_id="00000000-0000-0000-0000-000000000001",
+        thread_id=1,
+        read_only=False,
+    )
+
+
+async def _run(root: Path, turns: list[ScriptedTurn], thread: str):
+    """Assemble a 1-middleware agent and drive the scripted turns to completion."""
+    harness = build_scripted_harness(turns=turns)
+    fs_mw = _build_desktop_fs_mw(root)
+    agent = create_agent(
+        harness.model,
+        tools=[],
+        middleware=[fs_mw],
+        checkpointer=InMemorySaver(),
+    )
+    return await agent.ainvoke(
+        {"messages": [HumanMessage(content="do filesystem work")]},
+        config={"configurable": {"thread_id": thread}},
+    )
+
+
+def _tool_messages(result) -> list[ToolMessage]:
+    return [m for m in result["messages"] if isinstance(m, ToolMessage)]
+
+
+def _tool_text(result, name: str) -> str:
+    for m in _tool_messages(result):
+        if m.name == name:
+            return str(m.content)
+    raise AssertionError(f"no ToolMessage from {name!r} in {_tool_messages(result)}")
+
+
+async def test_write_then_read_round_trip(tmp_path: Path):
+    """write_file persists to the real folder and read_file returns the content."""
+    result = await _run(
+        tmp_path,
+        [
+            ScriptedTurn(
+                tool_calls=[
+                    {
+                        "name": "write_file",
+                        "args": {
+                            "file_path": f"/{_MOUNT_ID}/notes.md",
+                            "content": "hello FS-CANARY-001",
+                        },
+                        "id": "c1",
+                    }
+                ]
+            ),
+            ScriptedTurn(
+                tool_calls=[
+                    {
+                        "name": "read_file",
+                        "args": {"file_path": f"/{_MOUNT_ID}/notes.md"},
+                        "id": "c2",
+                    }
+                ]
+            ),
+            ScriptedTurn(text="done"),
+        ],
+        "fs-desktop-write-read",
+    )
+
+    # Real on-disk effect, not a mock.
+    assert (tmp_path / "notes.md").read_text() == "hello FS-CANARY-001"
+    # The tool actually returned the file content.
+    assert "FS-CANARY-001" in _tool_text(result, "read_file")
+
+
+async def test_write_then_ls_lists_file(tmp_path: Path):
+    """ls reflects a freshly written file in the real folder."""
+    result = await _run(
+        tmp_path,
+        [
+            ScriptedTurn(
+                tool_calls=[
+                    {
+                        "name": "write_file",
+                        "args": {
+                            "file_path": f"/{_MOUNT_ID}/report.md",
+                            "content": "x",
+                        },
+                        "id": "c1",
+                    }
+                ]
+            ),
+            ScriptedTurn(
+                tool_calls=[
+                    {"name": "ls", "args": {"path": f"/{_MOUNT_ID}"}, "id": "c2"}
+                ]
+            ),
+            ScriptedTurn(text="done"),
+        ],
+        "fs-desktop-ls",
+    )
+
+    assert (tmp_path / "report.md").exists()
+    assert "report.md" in _tool_text(result, "ls")
+
+
+async def test_edit_file_rewrites_on_disk(tmp_path: Path):
+    """edit_file applies a real string replacement to the on-disk file."""
+    result = await _run(
+        tmp_path,
+        [
+            ScriptedTurn(
+                tool_calls=[
+                    {
+                        "name": "write_file",
+                        "args": {
+                            "file_path": f"/{_MOUNT_ID}/doc.md",
+                            "content": "the quick brown fox",
+                        },
+                        "id": "c1",
+                    }
+                ]
+            ),
+            ScriptedTurn(
+                tool_calls=[
+                    {
+                        "name": "edit_file",
+                        "args": {
+                            "file_path": f"/{_MOUNT_ID}/doc.md",
+                            "old_string": "brown",
+                            "new_string": "red",
+                        },
+                        "id": "c2",
+                    }
+                ]
+            ),
+            ScriptedTurn(text="done"),
+        ],
+        "fs-desktop-edit",
+    )
+
+    assert (tmp_path / "doc.md").read_text() == "the quick red fox"
+
+
+async def test_write_into_existing_subdir(tmp_path: Path):
+    """A write into an EXISTING subdirectory lands on disk under that folder."""
+    (tmp_path / "sub").mkdir()
+    result = await _run(
+        tmp_path,
+        [
+            ScriptedTurn(
+                tool_calls=[
+                    {
+                        "name": "write_file",
+                        "args": {
+                            "file_path": f"/{_MOUNT_ID}/sub/inner.md",
+                            "content": "nested",
+                        },
+                        "id": "c1",
+                    }
+                ]
+            ),
+            ScriptedTurn(text="done"),
+        ],
+        "fs-desktop-subdir",
+    )
+
+    assert "Error" not in _tool_text(result, "write_file")
+    assert (tmp_path / "sub" / "inner.md").read_text() == "nested"
+
+
+async def test_write_to_missing_parent_dir_is_rejected(tmp_path: Path):
+    """Desktop write refuses to create a file under a non-existent directory.
+
+    Real current behavior: the local-folder backend requires the parent to
+    exist (and ``mkdir`` is a no-op for this backend), so the agent cannot
+    fabricate new nested folders via ``write_file``. Locking this guards against
+    a silent behavior change during the agents-module reorg.
+    """
+    result = await _run(
+        tmp_path,
+        [
+            ScriptedTurn(
+                tool_calls=[
+                    {
+                        "name": "write_file",
+                        "args": {
+                            "file_path": f"/{_MOUNT_ID}/missing/inner.md",
+                            "content": "nested",
+                        },
+                        "id": "c1",
+                    }
+                ]
+            ),
+            ScriptedTurn(text="done"),
+        ],
+        "fs-desktop-missing-parent",
+    )
+
+    write_msg = _tool_text(result, "write_file")
+    assert "parent directory" in write_msg.lower()
+    assert not (tmp_path / "missing").exists()
+
+
+async def test_move_file_relocates_on_disk(tmp_path: Path):
+    """move_file relocates the real file from source to destination."""
+    await _run(
+        tmp_path,
+        [
+            ScriptedTurn(
+                tool_calls=[
+                    {
+                        "name": "write_file",
+                        "args": {
+                            "file_path": f"/{_MOUNT_ID}/src.md",
+                            "content": "movable",
+                        },
+                        "id": "c1",
+                    }
+                ]
+            ),
+            ScriptedTurn(
+                tool_calls=[
+                    {
+                        "name": "move_file",
+                        "args": {
+                            "source_path": f"/{_MOUNT_ID}/src.md",
+                            "destination_path": f"/{_MOUNT_ID}/dst.md",
+                        },
+                        "id": "c2",
+                    }
+                ]
+            ),
+            ScriptedTurn(text="done"),
+        ],
+        "fs-desktop-move",
+    )
+
+    assert not (tmp_path / "src.md").exists()
+    assert (tmp_path / "dst.md").read_text() == "movable"
+
+
+async def test_rm_deletes_file_on_disk(tmp_path: Path):
+    """rm removes the real file (desktop deletes are immediate)."""
+    await _run(
+        tmp_path,
+        [
+            ScriptedTurn(
+                tool_calls=[
+                    {
+                        "name": "write_file",
+                        "args": {
+                            "file_path": f"/{_MOUNT_ID}/trash.md",
+                            "content": "bye",
+                        },
+                        "id": "c1",
+                    }
+                ]
+            ),
+            ScriptedTurn(
+                tool_calls=[
+                    {
+                        "name": "rm",
+                        "args": {"path": f"/{_MOUNT_ID}/trash.md"},
+                        "id": "c2",
+                    }
+                ]
+            ),
+            ScriptedTurn(text="done"),
+        ],
+        "fs-desktop-rm",
+    )
+
+    assert not (tmp_path / "trash.md").exists()
+
+
+async def test_rmdir_removes_empty_dir_on_disk(tmp_path: Path):
+    """rmdir removes a real empty directory."""
+    (tmp_path / "gone").mkdir()
+    assert (tmp_path / "gone").is_dir()
+
+    result = await _run(
+        tmp_path,
+        [
+            ScriptedTurn(
+                tool_calls=[
+                    {
+                        "name": "rmdir",
+                        "args": {"path": f"/{_MOUNT_ID}/gone"},
+                        "id": "c1",
+                    }
+                ]
+            ),
+            ScriptedTurn(text="done"),
+        ],
+        "fs-desktop-rmdir",
+    )
+
+    assert "Error" not in _tool_text(result, "rmdir")
+    assert not (tmp_path / "gone").exists()