mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-26 21:39:43 +02:00
feat: antropic model added fix & kb tooling fixes
- Updated main-agent middleware to clarify that both filesystem reads/writes and knowledge-base retrieval are handled by the `knowledge_base` subagent. - Introduced `_forward_mention_pins` function to carry `@`-mention pins into subagent state. - Revised system prompts to reflect the new retrieval method and ensure proper citation handling. - Removed the `search_knowledge_base` tool and its related tests, consolidating functionality under the `task` tool. - Enhanced documentation to guide usage of the new retrieval approach and citation practices.
This commit is contained in:
parent
b4af67f77d
commit
9642d7ced0
36 changed files with 581 additions and 168 deletions
|
|
@ -1,4 +1,4 @@
|
|||
"""Behavior tests for the ``search_knowledge_base`` main-agent tool.
|
||||
"""Behavior tests for the ``search_knowledge_base`` knowledge_base-subagent tool.
|
||||
|
||||
These exercise the tool through its public contract: seed a real document,
|
||||
invoke the tool, and assert on the ``Command`` it returns — the rendered
|
||||
|
|
@ -6,6 +6,12 @@ invoke the tool, and assert on the ``Command`` it returns — the rendered
|
|||
back on state is populated.
|
||||
The tool's own DB session is redirected to the test session, and the embedding
|
||||
leg is pinned so the search is deterministic without a live model.
|
||||
|
||||
``@``-mention scoping is covered along BOTH delivery paths: via ``runtime.state``
|
||||
(the real subagent path — the ``task`` tool forwards the mentions into state
|
||||
because subagents have no ``context_schema``) and via ``runtime.context`` (the
|
||||
fallback for any direct main-graph invocation). State takes precedence when both
|
||||
are present.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -18,11 +24,13 @@ import pytest
|
|||
from langchain_core.messages import ToolMessage
|
||||
from langgraph.types import Command
|
||||
|
||||
from app.agents.chat.multi_agent_chat.main_agent.tools import search_knowledge_base
|
||||
from app.agents.chat.multi_agent_chat.main_agent.tools.search_knowledge_base import (
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
|
||||
from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.tools import (
|
||||
search_knowledge_base,
|
||||
)
|
||||
from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.tools.search_knowledge_base import (
|
||||
create_search_knowledge_base_tool,
|
||||
)
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
|
||||
from app.config import config
|
||||
from app.db import Chunk, Document, DocumentType, Folder
|
||||
|
||||
|
|
@ -89,9 +97,7 @@ def _pinned_embedding(monkeypatch):
|
|||
|
||||
|
||||
async def _invoke(tool, query: str, state: dict | None = None, context=None):
|
||||
runtime = SimpleNamespace(
|
||||
state=state or {}, tool_call_id="call-1", context=context
|
||||
)
|
||||
runtime = SimpleNamespace(state=state or {}, tool_call_id="call-1", context=context)
|
||||
return await tool.coroutine(query, runtime)
|
||||
|
||||
|
||||
|
|
@ -198,9 +204,7 @@ async def test_document_mention_confines_search_to_pinned_doc(
|
|||
)
|
||||
tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
|
||||
|
||||
result = await _invoke(
|
||||
tool, "asyncio", context=_mentions(document_ids=[pinned.id])
|
||||
)
|
||||
result = await _invoke(tool, "asyncio", context=_mentions(document_ids=[pinned.id]))
|
||||
|
||||
# Search is confined to the pinned doc: only its content is rendered.
|
||||
content = result.update["messages"][0].content
|
||||
|
|
@ -227,11 +231,106 @@ async def test_folder_mention_confines_search_to_folder_documents(
|
|||
)
|
||||
tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
|
||||
|
||||
result = await _invoke(
|
||||
tool, "asyncio", context=_mentions(folder_ids=[folder.id])
|
||||
)
|
||||
result = await _invoke(tool, "asyncio", context=_mentions(folder_ids=[folder.id]))
|
||||
|
||||
# Search is confined to the folder's document: only its content is rendered.
|
||||
content = result.update["messages"][0].content
|
||||
assert "Inside" in content
|
||||
assert "Outside" not in content
|
||||
|
||||
|
||||
async def test_document_mention_via_state_confines_search(
|
||||
db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
|
||||
):
|
||||
"""The real subagent path: mentions arrive on ``runtime.state`` (no context).
|
||||
|
||||
The ``task`` tool forwards ``mentioned_document_ids`` into subagent state
|
||||
because subagents are compiled without a ``context_schema``. This asserts
|
||||
the tool honors that state-delivered pin without any ``runtime.context``.
|
||||
"""
|
||||
pinned = await _add_document(
|
||||
db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
title="Pinned",
|
||||
text="asyncio appears in the pinned doc.",
|
||||
)
|
||||
await _add_document(
|
||||
db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
title="Other",
|
||||
text="asyncio appears in the other doc.",
|
||||
)
|
||||
tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
|
||||
|
||||
result = await _invoke(
|
||||
tool,
|
||||
"asyncio",
|
||||
state={"mentioned_document_ids": [pinned.id]},
|
||||
context=None,
|
||||
)
|
||||
|
||||
content = result.update["messages"][0].content
|
||||
assert "Pinned" in content
|
||||
assert "Other" not in content
|
||||
|
||||
|
||||
async def test_folder_mention_via_state_confines_search(
|
||||
db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
|
||||
):
|
||||
"""Folder pins delivered via state (subagent path) scope to the folder's docs."""
|
||||
folder = await _add_folder(db_session, search_space_id=db_search_space.id)
|
||||
await _add_document(
|
||||
db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
title="Inside",
|
||||
text="asyncio appears inside the folder.",
|
||||
folder_id=folder.id,
|
||||
)
|
||||
await _add_document(
|
||||
db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
title="Outside",
|
||||
text="asyncio appears outside the folder.",
|
||||
)
|
||||
tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
|
||||
|
||||
result = await _invoke(
|
||||
tool,
|
||||
"asyncio",
|
||||
state={"mentioned_folder_ids": [folder.id]},
|
||||
context=None,
|
||||
)
|
||||
|
||||
content = result.update["messages"][0].content
|
||||
assert "Inside" in content
|
||||
assert "Outside" not in content
|
||||
|
||||
|
||||
async def test_state_mentions_take_precedence_over_context(
|
||||
db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
|
||||
):
|
||||
"""When both carry pins, state wins (the forwarded subagent pin is authoritative)."""
|
||||
state_doc = await _add_document(
|
||||
db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
title="StatePinned",
|
||||
text="asyncio appears in the state-pinned doc.",
|
||||
)
|
||||
context_doc = await _add_document(
|
||||
db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
title="ContextPinned",
|
||||
text="asyncio appears in the context-pinned doc.",
|
||||
)
|
||||
tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
|
||||
|
||||
result = await _invoke(
|
||||
tool,
|
||||
"asyncio",
|
||||
state={"mentioned_document_ids": [state_doc.id]},
|
||||
context=_mentions(document_ids=[context_doc.id]),
|
||||
)
|
||||
|
||||
content = result.update["messages"][0].content
|
||||
assert "StatePinned" in content
|
||||
assert "ContextPinned" not in content
|
||||
|
|
@ -7,8 +7,8 @@ import pytest
|
|||
from app.agents.chat.runtime.referenced_chat_context import (
|
||||
ReferencedChat,
|
||||
render_referenced_chats_block,
|
||||
transcript as transcript_mod,
|
||||
)
|
||||
from app.agents.chat.runtime.referenced_chat_context import transcript as transcript_mod
|
||||
from app.agents.chat.runtime.referenced_chat_context.models import ReferencedChatTurn
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
|
@ -77,9 +77,7 @@ def test_oversized_single_turn_is_partially_filled_to_use_budget(
|
|||
) -> None:
|
||||
monkeypatch.setattr(transcript_mod, "_MAX_CHARS_PER_REFERENCE", 40)
|
||||
|
||||
block = render_referenced_chats_block(
|
||||
[_chat(1, "T", [("assistant", "x" * 500)])]
|
||||
)
|
||||
block = render_referenced_chats_block([_chat(1, "T", [("assistant", "x" * 500)])])
|
||||
|
||||
assert block is not None
|
||||
# The turn is too big to keep whole, so its tail fills the budget with a
|
||||
|
|
|
|||
|
|
@ -3,13 +3,28 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from langchain_core.messages import AIMessage
|
||||
from langchain_core.messages import AIMessage, SystemMessage
|
||||
|
||||
from app.agents.chat.runtime.llm_config import _sanitize_messages
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def test_sanitize_messages_drops_whitespace_only_system_text_block() -> None:
|
||||
# Mirrors TodoListMiddleware appending ``{"type":"text","text":"\n\n"}`` to
|
||||
# the system message: Anthropic rejects whitespace-only system blocks.
|
||||
original = SystemMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "real system prompt"},
|
||||
{"type": "text", "text": "\n\n"},
|
||||
]
|
||||
)
|
||||
|
||||
sanitized = _sanitize_messages([original])
|
||||
|
||||
assert sanitized[0].content == "real system prompt"
|
||||
|
||||
|
||||
def test_sanitize_messages_strips_provider_specific_thinking_blocks() -> None:
|
||||
original = AIMessage(
|
||||
content=[
|
||||
|
|
|
|||
|
|
@ -0,0 +1,67 @@
|
|||
"""Regression tests for ``build_todos_mw``.
|
||||
|
||||
langchain's ``TodoListMiddleware.(a)wrap_model_call`` always appends a system
|
||||
text block ``f"\\n\\n{self.system_prompt}"``. With an empty ``system_prompt``
|
||||
that block is whitespace-only (``"\\n\\n"``), which Anthropic rejects:
|
||||
``"system: text content blocks must contain non-whitespace text"``. The main
|
||||
agent supplies its own todo guidance and wants the tool only, so an empty
|
||||
prompt must NOT mutate the request's system message.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from langchain.agents.middleware import TodoListMiddleware
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.middleware.todos import (
|
||||
_ToolOnlyTodoListMiddleware,
|
||||
build_todos_mw,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
class _Request:
|
||||
def __init__(self) -> None:
|
||||
self.override_called = False
|
||||
|
||||
def override(self, **_kwargs: object) -> _Request:
|
||||
self.override_called = True
|
||||
return self
|
||||
|
||||
|
||||
@pytest.mark.parametrize("blank", ["", " ", "\n\n"])
|
||||
def test_blank_prompt_returns_tool_only_middleware(blank: str) -> None:
|
||||
mw = build_todos_mw(system_prompt=blank)
|
||||
assert isinstance(mw, _ToolOnlyTodoListMiddleware)
|
||||
# Still contributes the write_todos tool.
|
||||
assert any(getattr(t, "name", None) == "write_todos" for t in mw.tools)
|
||||
|
||||
|
||||
async def test_tool_only_middleware_does_not_touch_system_message() -> None:
|
||||
mw = build_todos_mw(system_prompt="")
|
||||
request = _Request()
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
async def handler(req: _Request) -> str:
|
||||
captured["req"] = req
|
||||
return "ok"
|
||||
|
||||
result = await mw.awrap_model_call(request, handler)
|
||||
|
||||
assert result == "ok"
|
||||
assert captured["req"] is request
|
||||
assert request.override_called is False
|
||||
|
||||
|
||||
def test_custom_prompt_uses_upstream_middleware() -> None:
|
||||
mw = build_todos_mw(system_prompt="custom todo guidance")
|
||||
assert isinstance(mw, TodoListMiddleware)
|
||||
assert not isinstance(mw, _ToolOnlyTodoListMiddleware)
|
||||
assert mw.system_prompt == "custom todo guidance"
|
||||
|
||||
|
||||
def test_none_prompt_uses_upstream_default() -> None:
|
||||
mw = build_todos_mw()
|
||||
assert isinstance(mw, TodoListMiddleware)
|
||||
assert not isinstance(mw, _ToolOnlyTodoListMiddleware)
|
||||
|
|
@ -49,7 +49,9 @@ def test_wraps_in_web_results_container() -> None:
|
|||
assert block.startswith("<web_results>")
|
||||
assert block.endswith("</web_results>")
|
||||
assert "cite a result with its [n]" in block
|
||||
assert '<document title="Example" source="Web · example.com" view="excerpt">' in block
|
||||
assert (
|
||||
'<document title="Example" source="Web · example.com" view="excerpt">' in block
|
||||
)
|
||||
assert "[1] the answer is 42" in block
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -32,9 +32,10 @@ def test_maps_identity_source_and_passages() -> None:
|
|||
|
||||
assert document.title == "Q3 Launch Notes"
|
||||
assert document.source == "Slack"
|
||||
assert [
|
||||
(p.locator["chunk_id"], p.content) for p in document.passages
|
||||
] == [(880, "a"), (881, "b")]
|
||||
assert [(p.locator["chunk_id"], p.content) for p in document.passages] == [
|
||||
(880, "a"),
|
||||
(881, "b"),
|
||||
]
|
||||
assert all(p.locator["document_id"] == 42 for p in document.passages)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,11 @@ def _hit(document_id: int, chunk_id: int) -> DocumentHit:
|
|||
document_type="FILE",
|
||||
metadata={},
|
||||
score=1.0 / document_id,
|
||||
chunks=[ChunkHit(chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0)],
|
||||
chunks=[
|
||||
ChunkHit(
|
||||
chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,49 @@
|
|||
from app.services.global_model_catalog import materialize_global_model_catalog
|
||||
from app.services.model_resolver import ensure_v1, to_litellm
|
||||
from app.services.model_resolver import ensure_v1, strip_version_suffix, to_litellm
|
||||
|
||||
|
||||
def test_anthropic_resolver_strips_trailing_v1_from_api_base() -> None:
|
||||
# LiteLLM's Anthropic handler appends ``/v1/messages``; a base URL ending in
|
||||
# ``/v1`` (the frontend default) would otherwise yield ``/v1/v1/messages``.
|
||||
model, kwargs = to_litellm(
|
||||
{
|
||||
"provider": "anthropic",
|
||||
"base_url": "https://api.anthropic.com/v1",
|
||||
"api_key": "sk-ant-test",
|
||||
"extra": {},
|
||||
},
|
||||
"claude-opus-4-8",
|
||||
)
|
||||
|
||||
assert model == "anthropic/claude-opus-4-8"
|
||||
assert kwargs["api_base"] == "https://api.anthropic.com"
|
||||
|
||||
|
||||
def test_anthropic_resolver_keeps_root_api_base() -> None:
|
||||
_model, kwargs = to_litellm(
|
||||
{
|
||||
"provider": "anthropic",
|
||||
"base_url": "https://api.anthropic.com",
|
||||
"api_key": "sk-ant-test",
|
||||
"extra": {},
|
||||
},
|
||||
"claude-opus-4-8",
|
||||
)
|
||||
|
||||
assert kwargs["api_base"] == "https://api.anthropic.com"
|
||||
|
||||
|
||||
def test_strip_version_suffix() -> None:
|
||||
assert strip_version_suffix("https://api.anthropic.com/v1") == (
|
||||
"https://api.anthropic.com"
|
||||
)
|
||||
assert strip_version_suffix("https://api.anthropic.com/v1/") == (
|
||||
"https://api.anthropic.com"
|
||||
)
|
||||
assert strip_version_suffix("https://api.anthropic.com") == (
|
||||
"https://api.anthropic.com"
|
||||
)
|
||||
assert strip_version_suffix(None) is None
|
||||
|
||||
|
||||
def test_openai_compatible_resolver_uses_explicit_api_base() -> None:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue