mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-04 05:12:38 +02:00
feat: updated agent harness
This commit is contained in:
parent
9ec9b64348
commit
31a372bb84
139 changed files with 12583 additions and 1111 deletions
146
surfsense_backend/tests/integration/harness/__init__.py
Normal file
146
surfsense_backend/tests/integration/harness/__init__.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""
|
||||
Integration test harness for the SurfSense agent stack.
|
||||
|
||||
The plan calls for an ``LLMToolEmulator``-backed harness for end-to-end
|
||||
replay of ``stream_new_chat``. The currently-installed langchain version
|
||||
does not expose ``LLMToolEmulator``, so this harness builds the equivalent
|
||||
on top of :class:`langchain_core.language_models.fake_chat_models.FakeMessagesListChatModel`.
|
||||
|
||||
The harness lets a test author script a sequence of model responses
|
||||
(text + optional tool calls) and replay them against the new_chat agent
|
||||
graph. Tools are stubbed via ``StubToolSpec`` -> ``langchain_core.tools.tool``
|
||||
decorator and execute deterministic Python callbacks.
|
||||
|
||||
Used by:
|
||||
- ``tests/integration/agents/new_chat/test_feature_flag_smoke.py`` to
|
||||
confirm the kill-switch path produces identical-shape output regardless
|
||||
of which middleware flags are toggled.
|
||||
- Future per-tier PRs to record golden transcripts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from collections.abc import Callable, Sequence
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.language_models import LanguageModelInput
|
||||
from langchain_core.language_models.fake_chat_models import (
|
||||
FakeMessagesListChatModel,
|
||||
)
|
||||
from langchain_core.messages import AIMessage, BaseMessage
|
||||
from langchain_core.runnables import Runnable
|
||||
from langchain_core.tools import BaseTool, tool
|
||||
|
||||
|
||||
class _ToolBindingFakeChatModel(FakeMessagesListChatModel):
|
||||
"""Adapter so the harness model can pretend it understands ``bind_tools``.
|
||||
|
||||
The base ``FakeMessagesListChatModel`` raises ``NotImplementedError`` from
|
||||
``bind_tools``, but ``langchain.agents.create_agent`` always calls
|
||||
``bind_tools`` to attach the tool registry. We don't actually need the
|
||||
fake to honor the tool schema — it's already scripted to emit the right
|
||||
tool calls — so we return self.
|
||||
"""
|
||||
|
||||
def bind_tools( # type: ignore[override]
|
||||
self,
|
||||
tools: Sequence[Any],
|
||||
*,
|
||||
tool_choice: Any = None,
|
||||
**kwargs: Any,
|
||||
) -> Runnable[LanguageModelInput, AIMessage]:
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class StubToolSpec:
|
||||
"""A test-mode tool: a name, description, and a deterministic body."""
|
||||
|
||||
name: str
|
||||
description: str
|
||||
handler: Callable[..., Any]
|
||||
args_schema: dict[str, Any] | None = None
|
||||
|
||||
def build(self) -> BaseTool:
|
||||
"""Realize as a `langchain_core.tools.BaseTool`."""
|
||||
|
||||
@tool(name_or_callable=self.name, description=self.description)
|
||||
def _stub_tool(**kwargs: Any) -> Any:
|
||||
return self.handler(**kwargs)
|
||||
|
||||
return _stub_tool
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScriptedTurn:
|
||||
"""One scripted assistant turn.
|
||||
|
||||
`text` is the assistant text (may be empty if pure tool call).
|
||||
`tool_calls` is a list of dicts ``{name, args, id}``; if non-empty, the
|
||||
agent will route to those tools and append a follow-up turn.
|
||||
"""
|
||||
|
||||
text: str = ""
|
||||
tool_calls: list[dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
|
||||
def build_scripted_messages(turns: list[ScriptedTurn]) -> list[BaseMessage]:
|
||||
"""Convert :class:`ScriptedTurn` records to AIMessage payloads."""
|
||||
out: list[BaseMessage] = []
|
||||
for turn in turns:
|
||||
tool_calls: list[dict[str, Any]] = []
|
||||
for tc in turn.tool_calls:
|
||||
tool_calls.append(
|
||||
{
|
||||
"name": tc["name"],
|
||||
"args": tc.get("args", {}),
|
||||
"id": tc.get("id") or f"call_{uuid.uuid4().hex[:8]}",
|
||||
}
|
||||
)
|
||||
out.append(AIMessage(content=turn.text, tool_calls=tool_calls or []))
|
||||
return out
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScriptedHarness:
|
||||
"""Bundle of (model, tools) ready to plug into ``create_agent``."""
|
||||
|
||||
model: _ToolBindingFakeChatModel
|
||||
tools: list[BaseTool]
|
||||
|
||||
|
||||
def build_scripted_harness(
|
||||
*,
|
||||
turns: list[ScriptedTurn],
|
||||
tools: list[StubToolSpec] | None = None,
|
||||
sleep: float | None = None,
|
||||
) -> ScriptedHarness:
|
||||
"""Construct a deterministic agent harness from a script.
|
||||
|
||||
Example::
|
||||
|
||||
harness = build_scripted_harness(
|
||||
turns=[
|
||||
ScriptedTurn(tool_calls=[{"name": "echo", "args": {"x": 1}}]),
|
||||
ScriptedTurn(text="done"),
|
||||
],
|
||||
tools=[
|
||||
StubToolSpec(name="echo", description="echo args", handler=lambda **kw: kw),
|
||||
],
|
||||
)
|
||||
"""
|
||||
messages = build_scripted_messages(turns)
|
||||
model = _ToolBindingFakeChatModel(responses=messages, sleep=sleep)
|
||||
realized_tools = [t.build() for t in (tools or [])]
|
||||
return ScriptedHarness(model=model, tools=realized_tools)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"ScriptedHarness",
|
||||
"ScriptedTurn",
|
||||
"StubToolSpec",
|
||||
"build_scripted_harness",
|
||||
"build_scripted_messages",
|
||||
]
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
"""Smoke test: scripted harness drives create_agent end-to-end and produces a tool-call-then-final-text trace."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from langchain.agents import create_agent
|
||||
|
||||
from tests.integration.harness import (
|
||||
ScriptedTurn,
|
||||
StubToolSpec,
|
||||
build_scripted_harness,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scripted_harness_drives_basic_agent() -> None:
|
||||
harness = build_scripted_harness(
|
||||
turns=[
|
||||
ScriptedTurn(
|
||||
tool_calls=[
|
||||
{"name": "echo", "args": {"x": 1}, "id": "call_1"},
|
||||
]
|
||||
),
|
||||
ScriptedTurn(text="done"),
|
||||
],
|
||||
tools=[
|
||||
StubToolSpec(
|
||||
name="echo",
|
||||
description="Echo args back.",
|
||||
handler=lambda **kwargs: {"echoed": kwargs},
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
agent = create_agent(
|
||||
harness.model,
|
||||
system_prompt="You are a test agent.",
|
||||
tools=harness.tools,
|
||||
)
|
||||
|
||||
result = await agent.ainvoke({"messages": [("user", "do the thing")]})
|
||||
messages = result["messages"]
|
||||
final_ai = next(
|
||||
(m for m in reversed(messages) if m.__class__.__name__ == "AIMessage"),
|
||||
None,
|
||||
)
|
||||
assert final_ai is not None
|
||||
assert final_ai.content == "done"
|
||||
tool_messages = [m for m in messages if m.__class__.__name__ == "ToolMessage"]
|
||||
assert len(tool_messages) == 1
|
||||
assert "echoed" in str(tool_messages[0].content)
|
||||
Loading…
Add table
Add a link
Reference in a new issue