feat: antropic model added fix & kb tooling fixes

- Updated main-agent middleware to clarify that both filesystem reads/writes and knowledge-base retrieval are handled by the `knowledge_base` subagent. - Introduced `_forward_mention_pins` function to carry `@`-mention pins into subagent state. - Revised system prompts to reflect the new retrieval method and ensure proper citation handling. - Removed the `search_knowledge_base` tool and its related tests, consolidating functionality under the `task` tool. - Enhanced documentation to guide usage of the new retrieval approach and citation practices.
2026-06-26 21:39:43 +02:00 · 2026-06-25 20:19:44 -07:00 · 2026-06-25 20:19:44 -07:00 · 9642d7ced0
commit 9642d7ced0
parent b4af67f77d
36 changed files with 581 additions and 168 deletions
--- a/surfsense_backend/tests/integration/agents/multi_agent_chat/subagents/builtins/knowledge_base/tools/test_search_knowledge_base.py
+++ b/surfsense_backend/tests/integration/agents/multi_agent_chat/subagents/builtins/knowledge_base/tools/test_search_knowledge_base.py
@ -1,4 +1,4 @@
-"""Behavior tests for the ``search_knowledge_base`` main-agent tool.
+"""Behavior tests for the ``search_knowledge_base`` knowledge_base-subagent tool.

 These exercise the tool through its public contract: seed a real document,
 invoke the tool, and assert on the ``Command`` it returns — the rendered
@ -6,6 +6,12 @@ invoke the tool, and assert on the ``Command`` it returns — the rendered
 back on state is populated.
 The tool's own DB session is redirected to the test session, and the embedding
 leg is pinned so the search is deterministic without a live model.
+
+``@``-mention scoping is covered along BOTH delivery paths: via ``runtime.state``
+(the real subagent path — the ``task`` tool forwards the mentions into state
+because subagents have no ``context_schema``) and via ``runtime.context`` (the
+fallback for any direct main-graph invocation). State takes precedence when both
+are present.
 """

 from __future__ import annotations
@ -18,11 +24,13 @@ import pytest
 from langchain_core.messages import ToolMessage
 from langgraph.types import Command

-from app.agents.chat.multi_agent_chat.main_agent.tools import search_knowledge_base
-from app.agents.chat.multi_agent_chat.main_agent.tools.search_knowledge_base import (
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.tools import (
+    search_knowledge_base,
+)
+from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.tools.search_knowledge_base import (
    create_search_knowledge_base_tool,
 )
-from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
 from app.config import config
 from app.db import Chunk, Document, DocumentType, Folder

@ -89,9 +97,7 @@ def _pinned_embedding(monkeypatch):


 async def _invoke(tool, query: str, state: dict | None = None, context=None):
-    runtime = SimpleNamespace(
-        state=state or {}, tool_call_id="call-1", context=context
-    )
+    runtime = SimpleNamespace(state=state or {}, tool_call_id="call-1", context=context)
    return await tool.coroutine(query, runtime)


@ -198,9 +204,7 @@ async def test_document_mention_confines_search_to_pinned_doc(
    )
    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)

-    result = await _invoke(
-        tool, "asyncio", context=_mentions(document_ids=[pinned.id])
-    )
+    result = await _invoke(tool, "asyncio", context=_mentions(document_ids=[pinned.id]))

    # Search is confined to the pinned doc: only its content is rendered.
    content = result.update["messages"][0].content
@ -227,11 +231,106 @@ async def test_folder_mention_confines_search_to_folder_documents(
    )
    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)

-    result = await _invoke(
-        tool, "asyncio", context=_mentions(folder_ids=[folder.id])
-    )
+    result = await _invoke(tool, "asyncio", context=_mentions(folder_ids=[folder.id]))

    # Search is confined to the folder's document: only its content is rendered.
    content = result.update["messages"][0].content
    assert "Inside" in content
    assert "Outside" not in content
+
+
+async def test_document_mention_via_state_confines_search(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    """The real subagent path: mentions arrive on ``runtime.state`` (no context).
+
+    The ``task`` tool forwards ``mentioned_document_ids`` into subagent state
+    because subagents are compiled without a ``context_schema``. This asserts
+    the tool honors that state-delivered pin without any ``runtime.context``.
+    """
+    pinned = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Pinned",
+        text="asyncio appears in the pinned doc.",
+    )
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Other",
+        text="asyncio appears in the other doc.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(
+        tool,
+        "asyncio",
+        state={"mentioned_document_ids": [pinned.id]},
+        context=None,
+    )
+
+    content = result.update["messages"][0].content
+    assert "Pinned" in content
+    assert "Other" not in content
+
+
+async def test_folder_mention_via_state_confines_search(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    """Folder pins delivered via state (subagent path) scope to the folder's docs."""
+    folder = await _add_folder(db_session, search_space_id=db_search_space.id)
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Inside",
+        text="asyncio appears inside the folder.",
+        folder_id=folder.id,
+    )
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Outside",
+        text="asyncio appears outside the folder.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(
+        tool,
+        "asyncio",
+        state={"mentioned_folder_ids": [folder.id]},
+        context=None,
+    )
+
+    content = result.update["messages"][0].content
+    assert "Inside" in content
+    assert "Outside" not in content
+
+
+async def test_state_mentions_take_precedence_over_context(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    """When both carry pins, state wins (the forwarded subagent pin is authoritative)."""
+    state_doc = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="StatePinned",
+        text="asyncio appears in the state-pinned doc.",
+    )
+    context_doc = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="ContextPinned",
+        text="asyncio appears in the context-pinned doc.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(
+        tool,
+        "asyncio",
+        state={"mentioned_document_ids": [state_doc.id]},
+        context=_mentions(document_ids=[context_doc.id]),
+    )
+
+    content = result.update["messages"][0].content
+    assert "StatePinned" in content
+    assert "ContextPinned" not in content
--- a/surfsense_backend/tests/unit/agents/chat/runtime/referenced_chat_context/test_transcript.py
+++ b/surfsense_backend/tests/unit/agents/chat/runtime/referenced_chat_context/test_transcript.py
@ -7,8 +7,8 @@ import pytest
 from app.agents.chat.runtime.referenced_chat_context import (
    ReferencedChat,
    render_referenced_chats_block,
+    transcript as transcript_mod,
 )
-from app.agents.chat.runtime.referenced_chat_context import transcript as transcript_mod
 from app.agents.chat.runtime.referenced_chat_context.models import ReferencedChatTurn

 pytestmark = pytest.mark.unit
@ -77,9 +77,7 @@ def test_oversized_single_turn_is_partially_filled_to_use_budget(
 ) -> None:
    monkeypatch.setattr(transcript_mod, "_MAX_CHARS_PER_REFERENCE", 40)

-    block = render_referenced_chats_block(
-        [_chat(1, "T", [("assistant", "x" * 500)])]
-    )
+    block = render_referenced_chats_block([_chat(1, "T", [("assistant", "x" * 500)])])

    assert block is not None
    # The turn is too big to keep whole, so its tail fills the budget with a
--- a/surfsense_backend/tests/unit/agents/chat/runtime/test_llm_config_sanitizer.py
+++ b/surfsense_backend/tests/unit/agents/chat/runtime/test_llm_config_sanitizer.py
@ -3,13 +3,28 @@
 from __future__ import annotations

 import pytest
-from langchain_core.messages import AIMessage
+from langchain_core.messages import AIMessage, SystemMessage

 from app.agents.chat.runtime.llm_config import _sanitize_messages

 pytestmark = pytest.mark.unit


+def test_sanitize_messages_drops_whitespace_only_system_text_block() -> None:
+    # Mirrors TodoListMiddleware appending ``{"type":"text","text":"\n\n"}`` to
+    # the system message: Anthropic rejects whitespace-only system blocks.
+    original = SystemMessage(
+        content=[
+            {"type": "text", "text": "real system prompt"},
+            {"type": "text", "text": "\n\n"},
+        ]
+    )
+
+    sanitized = _sanitize_messages([original])
+
+    assert sanitized[0].content == "real system prompt"
+
+
 def test_sanitize_messages_strips_provider_specific_thinking_blocks() -> None:
    original = AIMessage(
        content=[
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/shared/test_todos_mw.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/shared/test_todos_mw.py
@ -0,0 +1,67 @@
+"""Regression tests for ``build_todos_mw``.
+
+langchain's ``TodoListMiddleware.(a)wrap_model_call`` always appends a system
+text block ``f"\\n\\n{self.system_prompt}"``. With an empty ``system_prompt``
+that block is whitespace-only (``"\\n\\n"``), which Anthropic rejects:
+``"system: text content blocks must contain non-whitespace text"``. The main
+agent supplies its own todo guidance and wants the tool only, so an empty
+prompt must NOT mutate the request's system message.
+"""
+
+from __future__ import annotations
+
+import pytest
+from langchain.agents.middleware import TodoListMiddleware
+
+from app.agents.chat.multi_agent_chat.shared.middleware.todos import (
+    _ToolOnlyTodoListMiddleware,
+    build_todos_mw,
+)
+
+pytestmark = pytest.mark.unit
+
+
+class _Request:
+    def __init__(self) -> None:
+        self.override_called = False
+
+    def override(self, **_kwargs: object) -> _Request:
+        self.override_called = True
+        return self
+
+
+@pytest.mark.parametrize("blank", ["", "   ", "\n\n"])
+def test_blank_prompt_returns_tool_only_middleware(blank: str) -> None:
+    mw = build_todos_mw(system_prompt=blank)
+    assert isinstance(mw, _ToolOnlyTodoListMiddleware)
+    # Still contributes the write_todos tool.
+    assert any(getattr(t, "name", None) == "write_todos" for t in mw.tools)
+
+
+async def test_tool_only_middleware_does_not_touch_system_message() -> None:
+    mw = build_todos_mw(system_prompt="")
+    request = _Request()
+    captured: dict[str, object] = {}
+
+    async def handler(req: _Request) -> str:
+        captured["req"] = req
+        return "ok"
+
+    result = await mw.awrap_model_call(request, handler)
+
+    assert result == "ok"
+    assert captured["req"] is request
+    assert request.override_called is False
+
+
+def test_custom_prompt_uses_upstream_middleware() -> None:
+    mw = build_todos_mw(system_prompt="custom todo guidance")
+    assert isinstance(mw, TodoListMiddleware)
+    assert not isinstance(mw, _ToolOnlyTodoListMiddleware)
+    assert mw.system_prompt == "custom todo guidance"
+
+
+def test_none_prompt_uses_upstream_default() -> None:
+    mw = build_todos_mw()
+    assert isinstance(mw, TodoListMiddleware)
+    assert not isinstance(mw, _ToolOnlyTodoListMiddleware)
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_web_results.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_web_results.py
@ -49,7 +49,9 @@ def test_wraps_in_web_results_container() -> None:
    assert block.startswith("<web_results>")
    assert block.endswith("</web_results>")
    assert "cite a result with its [n]" in block
-    assert '<document title="Example" source="Web · example.com" view="excerpt">' in block
+    assert (
+        '<document title="Example" source="Web · example.com" view="excerpt">' in block
+    )
    assert "[1] the answer is 42" in block


--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py
@ -32,9 +32,10 @@ def test_maps_identity_source_and_passages() -> None:

    assert document.title == "Q3 Launch Notes"
    assert document.source == "Slack"
-    assert [
-        (p.locator["chunk_id"], p.content) for p in document.passages
-    ] == [(880, "a"), (881, "b")]
+    assert [(p.locator["chunk_id"], p.content) for p in document.passages] == [
+        (880, "a"),
+        (881, "b"),
+    ]
    assert all(p.locator["document_id"] == 42 for p in document.passages)


--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py
@ -23,7 +23,11 @@ def _hit(document_id: int, chunk_id: int) -> DocumentHit:
        document_type="FILE",
        metadata={},
        score=1.0 / document_id,
-        chunks=[ChunkHit(chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0)],
+        chunks=[
+            ChunkHit(
+                chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0
+            )
+        ],
    )


--- a/surfsense_backend/tests/unit/services/test_model_connections.py
+++ b/surfsense_backend/tests/unit/services/test_model_connections.py
@ -1,5 +1,49 @@
 from app.services.global_model_catalog import materialize_global_model_catalog
-from app.services.model_resolver import ensure_v1, to_litellm
+from app.services.model_resolver import ensure_v1, strip_version_suffix, to_litellm
+
+
+def test_anthropic_resolver_strips_trailing_v1_from_api_base() -> None:
+    # LiteLLM's Anthropic handler appends ``/v1/messages``; a base URL ending in
+    # ``/v1`` (the frontend default) would otherwise yield ``/v1/v1/messages``.
+    model, kwargs = to_litellm(
+        {
+            "provider": "anthropic",
+            "base_url": "https://api.anthropic.com/v1",
+            "api_key": "sk-ant-test",
+            "extra": {},
+        },
+        "claude-opus-4-8",
+    )
+
+    assert model == "anthropic/claude-opus-4-8"
+    assert kwargs["api_base"] == "https://api.anthropic.com"
+
+
+def test_anthropic_resolver_keeps_root_api_base() -> None:
+    _model, kwargs = to_litellm(
+        {
+            "provider": "anthropic",
+            "base_url": "https://api.anthropic.com",
+            "api_key": "sk-ant-test",
+            "extra": {},
+        },
+        "claude-opus-4-8",
+    )
+
+    assert kwargs["api_base"] == "https://api.anthropic.com"
+
+
+def test_strip_version_suffix() -> None:
+    assert strip_version_suffix("https://api.anthropic.com/v1") == (
+        "https://api.anthropic.com"
+    )
+    assert strip_version_suffix("https://api.anthropic.com/v1/") == (
+        "https://api.anthropic.com"
+    )
+    assert strip_version_suffix("https://api.anthropic.com") == (
+        "https://api.anthropic.com"
+    )
+    assert strip_version_suffix(None) is None


 def test_openai_compatible_resolver_uses_explicit_api_base() -> None: