feat: antropic model added fix & kb tooling fixes

- Updated main-agent middleware to clarify that both filesystem reads/writes and knowledge-base retrieval are handled by the `knowledge_base` subagent. - Introduced `_forward_mention_pins` function to carry `@`-mention pins into subagent state. - Revised system prompts to reflect the new retrieval method and ensure proper citation handling. - Removed the `search_knowledge_base` tool and its related tests, consolidating functionality under the `task` tool. - Enhanced documentation to guide usage of the new retrieval approach and citation practices.
2026-06-26 21:39:43 +02:00 · 2026-06-25 20:19:44 -07:00 · 2026-06-25 20:19:44 -07:00 · 9642d7ced0
commit 9642d7ced0
parent b4af67f77d
36 changed files with 581 additions and 168 deletions
--- a/surfsense_backend/tests/unit/agents/chat/runtime/referenced_chat_context/test_transcript.py
+++ b/surfsense_backend/tests/unit/agents/chat/runtime/referenced_chat_context/test_transcript.py
@ -7,8 +7,8 @@ import pytest
 from app.agents.chat.runtime.referenced_chat_context import (
    ReferencedChat,
    render_referenced_chats_block,
+    transcript as transcript_mod,
 )
-from app.agents.chat.runtime.referenced_chat_context import transcript as transcript_mod
 from app.agents.chat.runtime.referenced_chat_context.models import ReferencedChatTurn

 pytestmark = pytest.mark.unit
@ -77,9 +77,7 @@ def test_oversized_single_turn_is_partially_filled_to_use_budget(
 ) -> None:
    monkeypatch.setattr(transcript_mod, "_MAX_CHARS_PER_REFERENCE", 40)

-    block = render_referenced_chats_block(
-        [_chat(1, "T", [("assistant", "x" * 500)])]
-    )
+    block = render_referenced_chats_block([_chat(1, "T", [("assistant", "x" * 500)])])

    assert block is not None
    # The turn is too big to keep whole, so its tail fills the budget with a
--- a/surfsense_backend/tests/unit/agents/chat/runtime/test_llm_config_sanitizer.py
+++ b/surfsense_backend/tests/unit/agents/chat/runtime/test_llm_config_sanitizer.py
@ -3,13 +3,28 @@
 from __future__ import annotations

 import pytest
-from langchain_core.messages import AIMessage
+from langchain_core.messages import AIMessage, SystemMessage

 from app.agents.chat.runtime.llm_config import _sanitize_messages

 pytestmark = pytest.mark.unit


+def test_sanitize_messages_drops_whitespace_only_system_text_block() -> None:
+    # Mirrors TodoListMiddleware appending ``{"type":"text","text":"\n\n"}`` to
+    # the system message: Anthropic rejects whitespace-only system blocks.
+    original = SystemMessage(
+        content=[
+            {"type": "text", "text": "real system prompt"},
+            {"type": "text", "text": "\n\n"},
+        ]
+    )
+
+    sanitized = _sanitize_messages([original])
+
+    assert sanitized[0].content == "real system prompt"
+
+
 def test_sanitize_messages_strips_provider_specific_thinking_blocks() -> None:
    original = AIMessage(
        content=[
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/shared/test_todos_mw.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/shared/test_todos_mw.py
@ -0,0 +1,67 @@
+"""Regression tests for ``build_todos_mw``.
+
+langchain's ``TodoListMiddleware.(a)wrap_model_call`` always appends a system
+text block ``f"\\n\\n{self.system_prompt}"``. With an empty ``system_prompt``
+that block is whitespace-only (``"\\n\\n"``), which Anthropic rejects:
+``"system: text content blocks must contain non-whitespace text"``. The main
+agent supplies its own todo guidance and wants the tool only, so an empty
+prompt must NOT mutate the request's system message.
+"""
+
+from __future__ import annotations
+
+import pytest
+from langchain.agents.middleware import TodoListMiddleware
+
+from app.agents.chat.multi_agent_chat.shared.middleware.todos import (
+    _ToolOnlyTodoListMiddleware,
+    build_todos_mw,
+)
+
+pytestmark = pytest.mark.unit
+
+
+class _Request:
+    def __init__(self) -> None:
+        self.override_called = False
+
+    def override(self, **_kwargs: object) -> _Request:
+        self.override_called = True
+        return self
+
+
+@pytest.mark.parametrize("blank", ["", "   ", "\n\n"])
+def test_blank_prompt_returns_tool_only_middleware(blank: str) -> None:
+    mw = build_todos_mw(system_prompt=blank)
+    assert isinstance(mw, _ToolOnlyTodoListMiddleware)
+    # Still contributes the write_todos tool.
+    assert any(getattr(t, "name", None) == "write_todos" for t in mw.tools)
+
+
+async def test_tool_only_middleware_does_not_touch_system_message() -> None:
+    mw = build_todos_mw(system_prompt="")
+    request = _Request()
+    captured: dict[str, object] = {}
+
+    async def handler(req: _Request) -> str:
+        captured["req"] = req
+        return "ok"
+
+    result = await mw.awrap_model_call(request, handler)
+
+    assert result == "ok"
+    assert captured["req"] is request
+    assert request.override_called is False
+
+
+def test_custom_prompt_uses_upstream_middleware() -> None:
+    mw = build_todos_mw(system_prompt="custom todo guidance")
+    assert isinstance(mw, TodoListMiddleware)
+    assert not isinstance(mw, _ToolOnlyTodoListMiddleware)
+    assert mw.system_prompt == "custom todo guidance"
+
+
+def test_none_prompt_uses_upstream_default() -> None:
+    mw = build_todos_mw()
+    assert isinstance(mw, TodoListMiddleware)
+    assert not isinstance(mw, _ToolOnlyTodoListMiddleware)
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_web_results.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_web_results.py
@ -49,7 +49,9 @@ def test_wraps_in_web_results_container() -> None:
    assert block.startswith("<web_results>")
    assert block.endswith("</web_results>")
    assert "cite a result with its [n]" in block
-    assert '<document title="Example" source="Web · example.com" view="excerpt">' in block
+    assert (
+        '<document title="Example" source="Web · example.com" view="excerpt">' in block
+    )
    assert "[1] the answer is 42" in block


--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py
@ -32,9 +32,10 @@ def test_maps_identity_source_and_passages() -> None:

    assert document.title == "Q3 Launch Notes"
    assert document.source == "Slack"
-    assert [
-        (p.locator["chunk_id"], p.content) for p in document.passages
-    ] == [(880, "a"), (881, "b")]
+    assert [(p.locator["chunk_id"], p.content) for p in document.passages] == [
+        (880, "a"),
+        (881, "b"),
+    ]
    assert all(p.locator["document_id"] == 42 for p in document.passages)


--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py
@ -23,7 +23,11 @@ def _hit(document_id: int, chunk_id: int) -> DocumentHit:
        document_type="FILE",
        metadata={},
        score=1.0 / document_id,
-        chunks=[ChunkHit(chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0)],
+        chunks=[
+            ChunkHit(
+                chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0
+            )
+        ],
    )