feat: antropic model added fix & kb tooling fixes

- Updated main-agent middleware to clarify that both filesystem reads/writes and knowledge-base retrieval are handled by the `knowledge_base` subagent.
- Introduced `_forward_mention_pins` function to carry `@`-mention pins into subagent state.
- Revised system prompts to reflect the new retrieval method and ensure proper citation handling.
- Removed the `search_knowledge_base` tool and its related tests, consolidating functionality under the `task` tool.
- Enhanced documentation to guide usage of the new retrieval approach and citation practices.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-06-25 20:19:44 -07:00
parent b4af67f77d
commit 9642d7ced0
36 changed files with 581 additions and 168 deletions

View file

@ -1,4 +1,4 @@
"""Behavior tests for the ``search_knowledge_base`` main-agent tool.
"""Behavior tests for the ``search_knowledge_base`` knowledge_base-subagent tool.
These exercise the tool through its public contract: seed a real document,
invoke the tool, and assert on the ``Command`` it returns the rendered
@ -6,6 +6,12 @@ invoke the tool, and assert on the ``Command`` it returns — the rendered
back on state is populated.
The tool's own DB session is redirected to the test session, and the embedding
leg is pinned so the search is deterministic without a live model.
``@``-mention scoping is covered along BOTH delivery paths: via ``runtime.state``
(the real subagent path the ``task`` tool forwards the mentions into state
because subagents have no ``context_schema``) and via ``runtime.context`` (the
fallback for any direct main-graph invocation). State takes precedence when both
are present.
"""
from __future__ import annotations
@ -18,11 +24,13 @@ import pytest
from langchain_core.messages import ToolMessage
from langgraph.types import Command
from app.agents.chat.multi_agent_chat.main_agent.tools import search_knowledge_base
from app.agents.chat.multi_agent_chat.main_agent.tools.search_knowledge_base import (
from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.tools import (
search_knowledge_base,
)
from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.tools.search_knowledge_base import (
create_search_knowledge_base_tool,
)
from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
from app.config import config
from app.db import Chunk, Document, DocumentType, Folder
@ -89,9 +97,7 @@ def _pinned_embedding(monkeypatch):
async def _invoke(tool, query: str, state: dict | None = None, context=None):
runtime = SimpleNamespace(
state=state or {}, tool_call_id="call-1", context=context
)
runtime = SimpleNamespace(state=state or {}, tool_call_id="call-1", context=context)
return await tool.coroutine(query, runtime)
@ -198,9 +204,7 @@ async def test_document_mention_confines_search_to_pinned_doc(
)
tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
result = await _invoke(
tool, "asyncio", context=_mentions(document_ids=[pinned.id])
)
result = await _invoke(tool, "asyncio", context=_mentions(document_ids=[pinned.id]))
# Search is confined to the pinned doc: only its content is rendered.
content = result.update["messages"][0].content
@ -227,11 +231,106 @@ async def test_folder_mention_confines_search_to_folder_documents(
)
tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
result = await _invoke(
tool, "asyncio", context=_mentions(folder_ids=[folder.id])
)
result = await _invoke(tool, "asyncio", context=_mentions(folder_ids=[folder.id]))
# Search is confined to the folder's document: only its content is rendered.
content = result.update["messages"][0].content
assert "Inside" in content
assert "Outside" not in content
async def test_document_mention_via_state_confines_search(
db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
):
"""The real subagent path: mentions arrive on ``runtime.state`` (no context).
The ``task`` tool forwards ``mentioned_document_ids`` into subagent state
because subagents are compiled without a ``context_schema``. This asserts
the tool honors that state-delivered pin without any ``runtime.context``.
"""
pinned = await _add_document(
db_session,
search_space_id=db_search_space.id,
title="Pinned",
text="asyncio appears in the pinned doc.",
)
await _add_document(
db_session,
search_space_id=db_search_space.id,
title="Other",
text="asyncio appears in the other doc.",
)
tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
result = await _invoke(
tool,
"asyncio",
state={"mentioned_document_ids": [pinned.id]},
context=None,
)
content = result.update["messages"][0].content
assert "Pinned" in content
assert "Other" not in content
async def test_folder_mention_via_state_confines_search(
db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
):
"""Folder pins delivered via state (subagent path) scope to the folder's docs."""
folder = await _add_folder(db_session, search_space_id=db_search_space.id)
await _add_document(
db_session,
search_space_id=db_search_space.id,
title="Inside",
text="asyncio appears inside the folder.",
folder_id=folder.id,
)
await _add_document(
db_session,
search_space_id=db_search_space.id,
title="Outside",
text="asyncio appears outside the folder.",
)
tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
result = await _invoke(
tool,
"asyncio",
state={"mentioned_folder_ids": [folder.id]},
context=None,
)
content = result.update["messages"][0].content
assert "Inside" in content
assert "Outside" not in content
async def test_state_mentions_take_precedence_over_context(
db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
):
"""When both carry pins, state wins (the forwarded subagent pin is authoritative)."""
state_doc = await _add_document(
db_session,
search_space_id=db_search_space.id,
title="StatePinned",
text="asyncio appears in the state-pinned doc.",
)
context_doc = await _add_document(
db_session,
search_space_id=db_search_space.id,
title="ContextPinned",
text="asyncio appears in the context-pinned doc.",
)
tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
result = await _invoke(
tool,
"asyncio",
state={"mentioned_document_ids": [state_doc.id]},
context=_mentions(document_ids=[context_doc.id]),
)
content = result.update["messages"][0].content
assert "StatePinned" in content
assert "ContextPinned" not in content

View file

@ -7,8 +7,8 @@ import pytest
from app.agents.chat.runtime.referenced_chat_context import (
ReferencedChat,
render_referenced_chats_block,
transcript as transcript_mod,
)
from app.agents.chat.runtime.referenced_chat_context import transcript as transcript_mod
from app.agents.chat.runtime.referenced_chat_context.models import ReferencedChatTurn
pytestmark = pytest.mark.unit
@ -77,9 +77,7 @@ def test_oversized_single_turn_is_partially_filled_to_use_budget(
) -> None:
monkeypatch.setattr(transcript_mod, "_MAX_CHARS_PER_REFERENCE", 40)
block = render_referenced_chats_block(
[_chat(1, "T", [("assistant", "x" * 500)])]
)
block = render_referenced_chats_block([_chat(1, "T", [("assistant", "x" * 500)])])
assert block is not None
# The turn is too big to keep whole, so its tail fills the budget with a

View file

@ -3,13 +3,28 @@
from __future__ import annotations
import pytest
from langchain_core.messages import AIMessage
from langchain_core.messages import AIMessage, SystemMessage
from app.agents.chat.runtime.llm_config import _sanitize_messages
pytestmark = pytest.mark.unit
def test_sanitize_messages_drops_whitespace_only_system_text_block() -> None:
# Mirrors TodoListMiddleware appending ``{"type":"text","text":"\n\n"}`` to
# the system message: Anthropic rejects whitespace-only system blocks.
original = SystemMessage(
content=[
{"type": "text", "text": "real system prompt"},
{"type": "text", "text": "\n\n"},
]
)
sanitized = _sanitize_messages([original])
assert sanitized[0].content == "real system prompt"
def test_sanitize_messages_strips_provider_specific_thinking_blocks() -> None:
original = AIMessage(
content=[

View file

@ -0,0 +1,67 @@
"""Regression tests for ``build_todos_mw``.
langchain's ``TodoListMiddleware.(a)wrap_model_call`` always appends a system
text block ``f"\\n\\n{self.system_prompt}"``. With an empty ``system_prompt``
that block is whitespace-only (``"\\n\\n"``), which Anthropic rejects:
``"system: text content blocks must contain non-whitespace text"``. The main
agent supplies its own todo guidance and wants the tool only, so an empty
prompt must NOT mutate the request's system message.
"""
from __future__ import annotations
import pytest
from langchain.agents.middleware import TodoListMiddleware
from app.agents.chat.multi_agent_chat.shared.middleware.todos import (
_ToolOnlyTodoListMiddleware,
build_todos_mw,
)
pytestmark = pytest.mark.unit
class _Request:
def __init__(self) -> None:
self.override_called = False
def override(self, **_kwargs: object) -> _Request:
self.override_called = True
return self
@pytest.mark.parametrize("blank", ["", " ", "\n\n"])
def test_blank_prompt_returns_tool_only_middleware(blank: str) -> None:
mw = build_todos_mw(system_prompt=blank)
assert isinstance(mw, _ToolOnlyTodoListMiddleware)
# Still contributes the write_todos tool.
assert any(getattr(t, "name", None) == "write_todos" for t in mw.tools)
async def test_tool_only_middleware_does_not_touch_system_message() -> None:
mw = build_todos_mw(system_prompt="")
request = _Request()
captured: dict[str, object] = {}
async def handler(req: _Request) -> str:
captured["req"] = req
return "ok"
result = await mw.awrap_model_call(request, handler)
assert result == "ok"
assert captured["req"] is request
assert request.override_called is False
def test_custom_prompt_uses_upstream_middleware() -> None:
mw = build_todos_mw(system_prompt="custom todo guidance")
assert isinstance(mw, TodoListMiddleware)
assert not isinstance(mw, _ToolOnlyTodoListMiddleware)
assert mw.system_prompt == "custom todo guidance"
def test_none_prompt_uses_upstream_default() -> None:
mw = build_todos_mw()
assert isinstance(mw, TodoListMiddleware)
assert not isinstance(mw, _ToolOnlyTodoListMiddleware)

View file

@ -49,7 +49,9 @@ def test_wraps_in_web_results_container() -> None:
assert block.startswith("<web_results>")
assert block.endswith("</web_results>")
assert "cite a result with its [n]" in block
assert '<document title="Example" source="Web · example.com" view="excerpt">' in block
assert (
'<document title="Example" source="Web · example.com" view="excerpt">' in block
)
assert "[1] the answer is 42" in block

View file

@ -32,9 +32,10 @@ def test_maps_identity_source_and_passages() -> None:
assert document.title == "Q3 Launch Notes"
assert document.source == "Slack"
assert [
(p.locator["chunk_id"], p.content) for p in document.passages
] == [(880, "a"), (881, "b")]
assert [(p.locator["chunk_id"], p.content) for p in document.passages] == [
(880, "a"),
(881, "b"),
]
assert all(p.locator["document_id"] == 42 for p in document.passages)

View file

@ -23,7 +23,11 @@ def _hit(document_id: int, chunk_id: int) -> DocumentHit:
document_type="FILE",
metadata={},
score=1.0 / document_id,
chunks=[ChunkHit(chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0)],
chunks=[
ChunkHit(
chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0
)
],
)

View file

@ -1,5 +1,49 @@
from app.services.global_model_catalog import materialize_global_model_catalog
from app.services.model_resolver import ensure_v1, to_litellm
from app.services.model_resolver import ensure_v1, strip_version_suffix, to_litellm
def test_anthropic_resolver_strips_trailing_v1_from_api_base() -> None:
# LiteLLM's Anthropic handler appends ``/v1/messages``; a base URL ending in
# ``/v1`` (the frontend default) would otherwise yield ``/v1/v1/messages``.
model, kwargs = to_litellm(
{
"provider": "anthropic",
"base_url": "https://api.anthropic.com/v1",
"api_key": "sk-ant-test",
"extra": {},
},
"claude-opus-4-8",
)
assert model == "anthropic/claude-opus-4-8"
assert kwargs["api_base"] == "https://api.anthropic.com"
def test_anthropic_resolver_keeps_root_api_base() -> None:
_model, kwargs = to_litellm(
{
"provider": "anthropic",
"base_url": "https://api.anthropic.com",
"api_key": "sk-ant-test",
"extra": {},
},
"claude-opus-4-8",
)
assert kwargs["api_base"] == "https://api.anthropic.com"
def test_strip_version_suffix() -> None:
assert strip_version_suffix("https://api.anthropic.com/v1") == (
"https://api.anthropic.com"
)
assert strip_version_suffix("https://api.anthropic.com/v1/") == (
"https://api.anthropic.com"
)
assert strip_version_suffix("https://api.anthropic.com") == (
"https://api.anthropic.com"
)
assert strip_version_suffix(None) is None
def test_openai_compatible_resolver_uses_explicit_api_base() -> None: