test(agents): cover live filesystem middleware, retire dead twin

The single-agent-era filesystem middleware (app/agents/shared/middleware/
filesystem.py, ~2000 lines) was never instantiated in production, yet three
unit suites validated it — an illusory guardrail while the live decomposed
middleware (multi_agent_chat/middleware/shared/filesystem) was unguarded.

Close the gap before reorganizing the agents module:
- Add 14 integration tests driving live B's tools in desktop mode (real
  on-disk effects) and cloud mode (in-state staging, namespace policy).
- Port all high-value dead-twin assertions onto the live path: cloud rm/rmdir
  staging + guard rails, KBPostgresBackend delete-view filter, mode-scoped
  system prompt, cwd/relative/namespace resolution, multi-root mount
  normalization.
- Delete dead twin filesystem.py, drop its __init__ re-export, and retire its
  3 dead-twin tests.

Verified: test_import_all + middleware unit + FS integration all green.
This commit is contained in:
CREDO23 2026-06-04 17:46:49 +02:00
parent f3484f5a24
commit 1acde6a470
9 changed files with 960 additions and 2492 deletions

View file

@ -0,0 +1,198 @@
"""Real-behavior tests for the LIVE knowledge-base filesystem middleware (B) in
cloud mode.
Cloud mode is the default production filesystem for web chat. Unlike desktop,
cloud writes/edits/moves/deletes are *staged* into LangGraph state during the
turn and committed to Postgres at end-of-turn by the persistence middleware.
These tests drive the production ``build_filesystem_mw`` cloud tools through a
real ``create_agent`` graph and assert the staging contract (namespace policy,
read-from-stage, mkdir staging, duplicate rejection) all deterministic and
DB-free because cloud ``awrite`` is pure in-state staging.
The end-of-turn DB commit (``commit_staged_filesystem_state``) is covered
separately; here we lock the per-tool behavior that the reorg could break.
"""
from __future__ import annotations
import pytest
from langchain.agents import create_agent
from langchain_core.messages import HumanMessage, ToolMessage
from langgraph.checkpoint.memory import InMemorySaver
from app.agents.multi_agent_chat.middleware.shared.filesystem import (
build_filesystem_mw,
)
from app.agents.shared.filesystem_backends import build_backend_resolver
from app.agents.shared.filesystem_selection import FilesystemMode, FilesystemSelection
from tests.integration.harness import ScriptedTurn, build_scripted_harness
pytestmark = [pytest.mark.integration, pytest.mark.asyncio]
_SEARCH_SPACE_ID = 1
def _build_cloud_fs_mw():
"""Build the production filesystem middleware in cloud mode.
A non-None ``search_space_id`` makes the resolver hand out a
``KBPostgresBackend``, exactly as production does. Staging operations never
touch the DB, so a dummy id is sufficient for these tests.
"""
selection = FilesystemSelection(mode=FilesystemMode.CLOUD)
resolver = build_backend_resolver(selection, search_space_id=_SEARCH_SPACE_ID)
return build_filesystem_mw(
backend_resolver=resolver,
filesystem_mode=FilesystemMode.CLOUD,
search_space_id=_SEARCH_SPACE_ID,
user_id="00000000-0000-0000-0000-000000000001",
thread_id=_SEARCH_SPACE_ID,
read_only=False,
)
async def _run(turns: list[ScriptedTurn], thread: str):
harness = build_scripted_harness(turns=turns)
agent = create_agent(
harness.model,
tools=[],
middleware=[_build_cloud_fs_mw()],
checkpointer=InMemorySaver(),
)
return await agent.ainvoke(
{"messages": [HumanMessage(content="do kb work")]},
config={"configurable": {"thread_id": thread}},
)
def _tool_text(result, name: str) -> str:
for m in result["messages"]:
if isinstance(m, ToolMessage) and m.name == name:
return str(m.content)
raise AssertionError(f"no ToolMessage from {name!r}")
def _write(path: str, content: str, call_id: str) -> ScriptedTurn:
return ScriptedTurn(
tool_calls=[
{
"name": "write_file",
"args": {"file_path": path, "content": content},
"id": call_id,
}
]
)
async def test_cloud_write_then_read_returns_staged_content():
"""A cloud write stages into state and a later read returns that content."""
result = await _run(
[
_write("/documents/note.md", "cloud CANARY-CLD-1", "c1"),
ScriptedTurn(
tool_calls=[
{
"name": "read_file",
"args": {"file_path": "/documents/note.md"},
"id": "c2",
}
]
),
ScriptedTurn(text="done"),
],
"fs-cloud-write-read",
)
assert "Updated file /documents/note.md" in _tool_text(result, "write_file")
assert "CANARY-CLD-1" in _tool_text(result, "read_file")
async def test_cloud_write_outside_documents_is_rejected():
"""Cloud namespace policy: writes must target /documents (non-temp paths)."""
result = await _run(
[
_write("/scratch/note.md", "nope", "c1"),
ScriptedTurn(text="done"),
],
"fs-cloud-namespace",
)
msg = _tool_text(result, "write_file")
assert "must target /documents" in msg
async def test_cloud_temp_prefixed_write_is_allowed_anywhere():
"""A ``temp_`` basename escapes the /documents namespace restriction."""
result = await _run(
[
_write("/temp_scratch.md", "ephemeral", "c1"),
ScriptedTurn(text="done"),
],
"fs-cloud-temp",
)
msg = _tool_text(result, "write_file")
assert "must target /documents" not in msg
assert "Updated file" in msg
async def test_cloud_mkdir_stages_directory():
"""Cloud mkdir stages the directory for end-of-turn creation (no immediate IO)."""
result = await _run(
[
ScriptedTurn(
tool_calls=[
{
"name": "mkdir",
"args": {"path": "/documents/projects"},
"id": "c1",
}
]
),
ScriptedTurn(text="done"),
],
"fs-cloud-mkdir",
)
msg = _tool_text(result, "mkdir")
assert "Staged directory" in msg
assert "/documents/projects" in msg
async def test_cloud_mkdir_outside_documents_is_rejected():
"""Cloud mkdir is also restricted to the /documents namespace."""
result = await _run(
[
ScriptedTurn(
tool_calls=[
{"name": "mkdir", "args": {"path": "/elsewhere"}, "id": "c1"}
]
),
ScriptedTurn(text="done"),
],
"fs-cloud-mkdir-bad",
)
assert "must target a path under /documents" in _tool_text(result, "mkdir")
async def test_cloud_duplicate_write_is_rejected():
"""Writing to a path already staged this turn is rejected (use edit instead)."""
result = await _run(
[
_write("/documents/dup.md", "first", "c1"),
_write("/documents/dup.md", "second", "c2"),
ScriptedTurn(text="done"),
],
"fs-cloud-dup",
)
# Two write ToolMessages: first succeeds, second is rejected.
write_msgs = [
str(m.content)
for m in result["messages"]
if isinstance(m, ToolMessage) and m.name == "write_file"
]
assert len(write_msgs) == 2
assert "Updated file" in write_msgs[0]
assert "already exists" in write_msgs[1]

View file

@ -0,0 +1,349 @@
"""Real-behavior tests for the LIVE knowledge-base filesystem middleware (B).
These exercise ``app.agents.multi_agent_chat.middleware.shared.filesystem``
the decomposed middleware + tools that production actually mounts on the
knowledge_base subagent (via ``build_filesystem_mw``). The previous
``tests/unit/middleware/test_filesystem_*.py`` suite asserts a *dead twin*
(``app.agents.shared.middleware.filesystem``) that is never instantiated, so the
live tool path had no real coverage.
Strategy: mount the production ``build_filesystem_mw`` on a minimal
``create_agent`` graph and drive its tools with the scripted harness. Desktop
mode binds a ``MultiRootLocalFolderBackend`` to a real ``tmp_path`` directory,
so every write/edit/move/rm is asserted against the real on-disk filesystem
no mocks, only the LLM is scripted.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from langchain.agents import create_agent
from langchain_core.messages import HumanMessage, ToolMessage
from langgraph.checkpoint.memory import InMemorySaver
from app.agents.multi_agent_chat.middleware.shared.filesystem import (
build_filesystem_mw,
)
from app.agents.shared.filesystem_backends import build_backend_resolver
from app.agents.shared.filesystem_selection import (
FilesystemMode,
FilesystemSelection,
LocalFilesystemMount,
)
from tests.integration.harness import ScriptedTurn, build_scripted_harness
pytestmark = [pytest.mark.integration, pytest.mark.asyncio]
_MOUNT_ID = "workspace"
def _build_desktop_fs_mw(root: Path):
"""Build the production filesystem middleware bound to a real local folder."""
selection = FilesystemSelection(
mode=FilesystemMode.DESKTOP_LOCAL_FOLDER,
local_mounts=(
LocalFilesystemMount(mount_id=_MOUNT_ID, root_path=str(root)),
),
)
resolver = build_backend_resolver(selection)
return build_filesystem_mw(
backend_resolver=resolver,
filesystem_mode=FilesystemMode.DESKTOP_LOCAL_FOLDER,
search_space_id=1,
user_id="00000000-0000-0000-0000-000000000001",
thread_id=1,
read_only=False,
)
async def _run(root: Path, turns: list[ScriptedTurn], thread: str):
"""Assemble a 1-middleware agent and drive the scripted turns to completion."""
harness = build_scripted_harness(turns=turns)
fs_mw = _build_desktop_fs_mw(root)
agent = create_agent(
harness.model,
tools=[],
middleware=[fs_mw],
checkpointer=InMemorySaver(),
)
return await agent.ainvoke(
{"messages": [HumanMessage(content="do filesystem work")]},
config={"configurable": {"thread_id": thread}},
)
def _tool_messages(result) -> list[ToolMessage]:
return [m for m in result["messages"] if isinstance(m, ToolMessage)]
def _tool_text(result, name: str) -> str:
for m in _tool_messages(result):
if m.name == name:
return str(m.content)
raise AssertionError(f"no ToolMessage from {name!r} in {_tool_messages(result)}")
async def test_write_then_read_round_trip(tmp_path: Path):
"""write_file persists to the real folder and read_file returns the content."""
result = await _run(
tmp_path,
[
ScriptedTurn(
tool_calls=[
{
"name": "write_file",
"args": {
"file_path": f"/{_MOUNT_ID}/notes.md",
"content": "hello FS-CANARY-001",
},
"id": "c1",
}
]
),
ScriptedTurn(
tool_calls=[
{
"name": "read_file",
"args": {"file_path": f"/{_MOUNT_ID}/notes.md"},
"id": "c2",
}
]
),
ScriptedTurn(text="done"),
],
"fs-desktop-write-read",
)
# Real on-disk effect, not a mock.
assert (tmp_path / "notes.md").read_text() == "hello FS-CANARY-001"
# The tool actually returned the file content.
assert "FS-CANARY-001" in _tool_text(result, "read_file")
async def test_write_then_ls_lists_file(tmp_path: Path):
"""ls reflects a freshly written file in the real folder."""
result = await _run(
tmp_path,
[
ScriptedTurn(
tool_calls=[
{
"name": "write_file",
"args": {
"file_path": f"/{_MOUNT_ID}/report.md",
"content": "x",
},
"id": "c1",
}
]
),
ScriptedTurn(
tool_calls=[
{"name": "ls", "args": {"path": f"/{_MOUNT_ID}"}, "id": "c2"}
]
),
ScriptedTurn(text="done"),
],
"fs-desktop-ls",
)
assert (tmp_path / "report.md").exists()
assert "report.md" in _tool_text(result, "ls")
async def test_edit_file_rewrites_on_disk(tmp_path: Path):
"""edit_file applies a real string replacement to the on-disk file."""
result = await _run(
tmp_path,
[
ScriptedTurn(
tool_calls=[
{
"name": "write_file",
"args": {
"file_path": f"/{_MOUNT_ID}/doc.md",
"content": "the quick brown fox",
},
"id": "c1",
}
]
),
ScriptedTurn(
tool_calls=[
{
"name": "edit_file",
"args": {
"file_path": f"/{_MOUNT_ID}/doc.md",
"old_string": "brown",
"new_string": "red",
},
"id": "c2",
}
]
),
ScriptedTurn(text="done"),
],
"fs-desktop-edit",
)
assert (tmp_path / "doc.md").read_text() == "the quick red fox"
async def test_write_into_existing_subdir(tmp_path: Path):
"""A write into an EXISTING subdirectory lands on disk under that folder."""
(tmp_path / "sub").mkdir()
result = await _run(
tmp_path,
[
ScriptedTurn(
tool_calls=[
{
"name": "write_file",
"args": {
"file_path": f"/{_MOUNT_ID}/sub/inner.md",
"content": "nested",
},
"id": "c1",
}
]
),
ScriptedTurn(text="done"),
],
"fs-desktop-subdir",
)
assert "Error" not in _tool_text(result, "write_file")
assert (tmp_path / "sub" / "inner.md").read_text() == "nested"
async def test_write_to_missing_parent_dir_is_rejected(tmp_path: Path):
"""Desktop write refuses to create a file under a non-existent directory.
Real current behavior: the local-folder backend requires the parent to
exist (and ``mkdir`` is a no-op for this backend), so the agent cannot
fabricate new nested folders via ``write_file``. Locking this guards against
a silent behavior change during the agents-module reorg.
"""
result = await _run(
tmp_path,
[
ScriptedTurn(
tool_calls=[
{
"name": "write_file",
"args": {
"file_path": f"/{_MOUNT_ID}/missing/inner.md",
"content": "nested",
},
"id": "c1",
}
]
),
ScriptedTurn(text="done"),
],
"fs-desktop-missing-parent",
)
write_msg = _tool_text(result, "write_file")
assert "parent directory" in write_msg.lower()
assert not (tmp_path / "missing").exists()
async def test_move_file_relocates_on_disk(tmp_path: Path):
"""move_file relocates the real file from source to destination."""
await _run(
tmp_path,
[
ScriptedTurn(
tool_calls=[
{
"name": "write_file",
"args": {
"file_path": f"/{_MOUNT_ID}/src.md",
"content": "movable",
},
"id": "c1",
}
]
),
ScriptedTurn(
tool_calls=[
{
"name": "move_file",
"args": {
"source_path": f"/{_MOUNT_ID}/src.md",
"destination_path": f"/{_MOUNT_ID}/dst.md",
},
"id": "c2",
}
]
),
ScriptedTurn(text="done"),
],
"fs-desktop-move",
)
assert not (tmp_path / "src.md").exists()
assert (tmp_path / "dst.md").read_text() == "movable"
async def test_rm_deletes_file_on_disk(tmp_path: Path):
"""rm removes the real file (desktop deletes are immediate)."""
await _run(
tmp_path,
[
ScriptedTurn(
tool_calls=[
{
"name": "write_file",
"args": {
"file_path": f"/{_MOUNT_ID}/trash.md",
"content": "bye",
},
"id": "c1",
}
]
),
ScriptedTurn(
tool_calls=[
{
"name": "rm",
"args": {"path": f"/{_MOUNT_ID}/trash.md"},
"id": "c2",
}
]
),
ScriptedTurn(text="done"),
],
"fs-desktop-rm",
)
assert not (tmp_path / "trash.md").exists()
async def test_rmdir_removes_empty_dir_on_disk(tmp_path: Path):
"""rmdir removes a real empty directory."""
(tmp_path / "gone").mkdir()
assert (tmp_path / "gone").is_dir()
result = await _run(
tmp_path,
[
ScriptedTurn(
tool_calls=[
{
"name": "rmdir",
"args": {"path": f"/{_MOUNT_ID}/gone"},
"id": "c1",
}
]
),
ScriptedTurn(text="done"),
],
"fs-desktop-rmdir",
)
assert "Error" not in _tool_text(result, "rmdir")
assert not (tmp_path / "gone").exists()