mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-26 21:39:43 +02:00
feat: antropic model added fix & kb tooling fixes
- Updated main-agent middleware to clarify that both filesystem reads/writes and knowledge-base retrieval are handled by the `knowledge_base` subagent. - Introduced `_forward_mention_pins` function to carry `@`-mention pins into subagent state. - Revised system prompts to reflect the new retrieval method and ensure proper citation handling. - Removed the `search_knowledge_base` tool and its related tests, consolidating functionality under the `task` tool. - Enhanced documentation to guide usage of the new retrieval approach and citation practices.
This commit is contained in:
parent
b4af67f77d
commit
9642d7ced0
36 changed files with 581 additions and 168 deletions
|
|
@ -7,8 +7,8 @@ import pytest
|
|||
from app.agents.chat.runtime.referenced_chat_context import (
|
||||
ReferencedChat,
|
||||
render_referenced_chats_block,
|
||||
transcript as transcript_mod,
|
||||
)
|
||||
from app.agents.chat.runtime.referenced_chat_context import transcript as transcript_mod
|
||||
from app.agents.chat.runtime.referenced_chat_context.models import ReferencedChatTurn
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
|
@ -77,9 +77,7 @@ def test_oversized_single_turn_is_partially_filled_to_use_budget(
|
|||
) -> None:
|
||||
monkeypatch.setattr(transcript_mod, "_MAX_CHARS_PER_REFERENCE", 40)
|
||||
|
||||
block = render_referenced_chats_block(
|
||||
[_chat(1, "T", [("assistant", "x" * 500)])]
|
||||
)
|
||||
block = render_referenced_chats_block([_chat(1, "T", [("assistant", "x" * 500)])])
|
||||
|
||||
assert block is not None
|
||||
# The turn is too big to keep whole, so its tail fills the budget with a
|
||||
|
|
|
|||
|
|
@ -3,13 +3,28 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from langchain_core.messages import AIMessage
|
||||
from langchain_core.messages import AIMessage, SystemMessage
|
||||
|
||||
from app.agents.chat.runtime.llm_config import _sanitize_messages
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def test_sanitize_messages_drops_whitespace_only_system_text_block() -> None:
|
||||
# Mirrors TodoListMiddleware appending ``{"type":"text","text":"\n\n"}`` to
|
||||
# the system message: Anthropic rejects whitespace-only system blocks.
|
||||
original = SystemMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "real system prompt"},
|
||||
{"type": "text", "text": "\n\n"},
|
||||
]
|
||||
)
|
||||
|
||||
sanitized = _sanitize_messages([original])
|
||||
|
||||
assert sanitized[0].content == "real system prompt"
|
||||
|
||||
|
||||
def test_sanitize_messages_strips_provider_specific_thinking_blocks() -> None:
|
||||
original = AIMessage(
|
||||
content=[
|
||||
|
|
|
|||
|
|
@ -0,0 +1,67 @@
|
|||
"""Regression tests for ``build_todos_mw``.
|
||||
|
||||
langchain's ``TodoListMiddleware.(a)wrap_model_call`` always appends a system
|
||||
text block ``f"\\n\\n{self.system_prompt}"``. With an empty ``system_prompt``
|
||||
that block is whitespace-only (``"\\n\\n"``), which Anthropic rejects:
|
||||
``"system: text content blocks must contain non-whitespace text"``. The main
|
||||
agent supplies its own todo guidance and wants the tool only, so an empty
|
||||
prompt must NOT mutate the request's system message.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from langchain.agents.middleware import TodoListMiddleware
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.middleware.todos import (
|
||||
_ToolOnlyTodoListMiddleware,
|
||||
build_todos_mw,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
class _Request:
|
||||
def __init__(self) -> None:
|
||||
self.override_called = False
|
||||
|
||||
def override(self, **_kwargs: object) -> _Request:
|
||||
self.override_called = True
|
||||
return self
|
||||
|
||||
|
||||
@pytest.mark.parametrize("blank", ["", " ", "\n\n"])
|
||||
def test_blank_prompt_returns_tool_only_middleware(blank: str) -> None:
|
||||
mw = build_todos_mw(system_prompt=blank)
|
||||
assert isinstance(mw, _ToolOnlyTodoListMiddleware)
|
||||
# Still contributes the write_todos tool.
|
||||
assert any(getattr(t, "name", None) == "write_todos" for t in mw.tools)
|
||||
|
||||
|
||||
async def test_tool_only_middleware_does_not_touch_system_message() -> None:
|
||||
mw = build_todos_mw(system_prompt="")
|
||||
request = _Request()
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
async def handler(req: _Request) -> str:
|
||||
captured["req"] = req
|
||||
return "ok"
|
||||
|
||||
result = await mw.awrap_model_call(request, handler)
|
||||
|
||||
assert result == "ok"
|
||||
assert captured["req"] is request
|
||||
assert request.override_called is False
|
||||
|
||||
|
||||
def test_custom_prompt_uses_upstream_middleware() -> None:
|
||||
mw = build_todos_mw(system_prompt="custom todo guidance")
|
||||
assert isinstance(mw, TodoListMiddleware)
|
||||
assert not isinstance(mw, _ToolOnlyTodoListMiddleware)
|
||||
assert mw.system_prompt == "custom todo guidance"
|
||||
|
||||
|
||||
def test_none_prompt_uses_upstream_default() -> None:
|
||||
mw = build_todos_mw()
|
||||
assert isinstance(mw, TodoListMiddleware)
|
||||
assert not isinstance(mw, _ToolOnlyTodoListMiddleware)
|
||||
|
|
@ -49,7 +49,9 @@ def test_wraps_in_web_results_container() -> None:
|
|||
assert block.startswith("<web_results>")
|
||||
assert block.endswith("</web_results>")
|
||||
assert "cite a result with its [n]" in block
|
||||
assert '<document title="Example" source="Web · example.com" view="excerpt">' in block
|
||||
assert (
|
||||
'<document title="Example" source="Web · example.com" view="excerpt">' in block
|
||||
)
|
||||
assert "[1] the answer is 42" in block
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -32,9 +32,10 @@ def test_maps_identity_source_and_passages() -> None:
|
|||
|
||||
assert document.title == "Q3 Launch Notes"
|
||||
assert document.source == "Slack"
|
||||
assert [
|
||||
(p.locator["chunk_id"], p.content) for p in document.passages
|
||||
] == [(880, "a"), (881, "b")]
|
||||
assert [(p.locator["chunk_id"], p.content) for p in document.passages] == [
|
||||
(880, "a"),
|
||||
(881, "b"),
|
||||
]
|
||||
assert all(p.locator["document_id"] == 42 for p in document.passages)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,11 @@ def _hit(document_id: int, chunk_id: int) -> DocumentHit:
|
|||
document_type="FILE",
|
||||
metadata={},
|
||||
score=1.0 / document_id,
|
||||
chunks=[ChunkHit(chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0)],
|
||||
chunks=[
|
||||
ChunkHit(
|
||||
chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue