feat: updated agent harness

2026-05-04 05:12:38 +02:00 · 2026-04-28 09:22:19 -07:00 · 2026-04-28 09:22:19 -07:00 · 31a372bb84
commit 31a372bb84
parent 9ec9b64348
139 changed files with 12583 additions and 1111 deletions
--- a/surfsense_backend/tests/integration/harness/init.py
+++ b/surfsense_backend/tests/integration/harness/init.py
@ -0,0 +1,146 @@
+"""
+Integration test harness for the SurfSense agent stack.
+
+The plan calls for an ``LLMToolEmulator``-backed harness for end-to-end
+replay of ``stream_new_chat``. The currently-installed langchain version
+does not expose ``LLMToolEmulator``, so this harness builds the equivalent
+on top of :class:`langchain_core.language_models.fake_chat_models.FakeMessagesListChatModel`.
+
+The harness lets a test author script a sequence of model responses
+(text + optional tool calls) and replay them against the new_chat agent
+graph. Tools are stubbed via ``StubToolSpec`` -> ``langchain_core.tools.tool``
+decorator and execute deterministic Python callbacks.
+
+Used by:
+- ``tests/integration/agents/new_chat/test_feature_flag_smoke.py`` to
+  confirm the kill-switch path produces identical-shape output regardless
+  of which middleware flags are toggled.
+- Future per-tier PRs to record golden transcripts.
+"""
+
+from __future__ import annotations
+
+import uuid
+from collections.abc import Callable, Sequence
+from dataclasses import dataclass, field
+from typing import Any
+
+from langchain_core.language_models import LanguageModelInput
+from langchain_core.language_models.fake_chat_models import (
+    FakeMessagesListChatModel,
+)
+from langchain_core.messages import AIMessage, BaseMessage
+from langchain_core.runnables import Runnable
+from langchain_core.tools import BaseTool, tool
+
+
+class _ToolBindingFakeChatModel(FakeMessagesListChatModel):
+    """Adapter so the harness model can pretend it understands ``bind_tools``.
+
+    The base ``FakeMessagesListChatModel`` raises ``NotImplementedError`` from
+    ``bind_tools``, but ``langchain.agents.create_agent`` always calls
+    ``bind_tools`` to attach the tool registry. We don't actually need the
+    fake to honor the tool schema — it's already scripted to emit the right
+    tool calls — so we return self.
+    """
+
+    def bind_tools(  # type: ignore[override]
+        self,
+        tools: Sequence[Any],
+        *,
+        tool_choice: Any = None,
+        **kwargs: Any,
+    ) -> Runnable[LanguageModelInput, AIMessage]:
+        return self
+
+
+@dataclass
+class StubToolSpec:
+    """A test-mode tool: a name, description, and a deterministic body."""
+
+    name: str
+    description: str
+    handler: Callable[..., Any]
+    args_schema: dict[str, Any] | None = None
+
+    def build(self) -> BaseTool:
+        """Realize as a `langchain_core.tools.BaseTool`."""
+
+        @tool(name_or_callable=self.name, description=self.description)
+        def _stub_tool(**kwargs: Any) -> Any:
+            return self.handler(**kwargs)
+
+        return _stub_tool
+
+
+@dataclass
+class ScriptedTurn:
+    """One scripted assistant turn.
+
+    `text` is the assistant text (may be empty if pure tool call).
+    `tool_calls` is a list of dicts ``{name, args, id}``; if non-empty, the
+    agent will route to those tools and append a follow-up turn.
+    """
+
+    text: str = ""
+    tool_calls: list[dict[str, Any]] = field(default_factory=list)
+
+
+def build_scripted_messages(turns: list[ScriptedTurn]) -> list[BaseMessage]:
+    """Convert :class:`ScriptedTurn` records to AIMessage payloads."""
+    out: list[BaseMessage] = []
+    for turn in turns:
+        tool_calls: list[dict[str, Any]] = []
+        for tc in turn.tool_calls:
+            tool_calls.append(
+                {
+                    "name": tc["name"],
+                    "args": tc.get("args", {}),
+                    "id": tc.get("id") or f"call_{uuid.uuid4().hex[:8]}",
+                }
+            )
+        out.append(AIMessage(content=turn.text, tool_calls=tool_calls or []))
+    return out
+
+
+@dataclass
+class ScriptedHarness:
+    """Bundle of (model, tools) ready to plug into ``create_agent``."""
+
+    model: _ToolBindingFakeChatModel
+    tools: list[BaseTool]
+
+
+def build_scripted_harness(
+    *,
+    turns: list[ScriptedTurn],
+    tools: list[StubToolSpec] | None = None,
+    sleep: float | None = None,
+) -> ScriptedHarness:
+    """Construct a deterministic agent harness from a script.
+
+    Example::
+
+        harness = build_scripted_harness(
+            turns=[
+                ScriptedTurn(tool_calls=[{"name": "echo", "args": {"x": 1}}]),
+                ScriptedTurn(text="done"),
+            ],
+            tools=[
+                StubToolSpec(name="echo", description="echo args", handler=lambda **kw: kw),
+            ],
+        )
+    """
+    messages = build_scripted_messages(turns)
+    model = _ToolBindingFakeChatModel(responses=messages, sleep=sleep)
+    realized_tools = [t.build() for t in (tools or [])]
+    return ScriptedHarness(model=model, tools=realized_tools)
+
+
+__all__ = [
+    "ScriptedHarness",
+    "ScriptedTurn",
+    "StubToolSpec",
+    "build_scripted_harness",
+    "build_scripted_messages",
+]
--- a/surfsense_backend/tests/integration/harness/test_scripted_harness.py
+++ b/surfsense_backend/tests/integration/harness/test_scripted_harness.py
@ -0,0 +1,53 @@
+"""Smoke test: scripted harness drives create_agent end-to-end and produces a tool-call-then-final-text trace."""
+
+from __future__ import annotations
+
+import pytest
+from langchain.agents import create_agent
+
+from tests.integration.harness import (
+    ScriptedTurn,
+    StubToolSpec,
+    build_scripted_harness,
+)
+
+pytestmark = pytest.mark.integration
+
+
+@pytest.mark.asyncio
+async def test_scripted_harness_drives_basic_agent() -> None:
+    harness = build_scripted_harness(
+        turns=[
+            ScriptedTurn(
+                tool_calls=[
+                    {"name": "echo", "args": {"x": 1}, "id": "call_1"},
+                ]
+            ),
+            ScriptedTurn(text="done"),
+        ],
+        tools=[
+            StubToolSpec(
+                name="echo",
+                description="Echo args back.",
+                handler=lambda **kwargs: {"echoed": kwargs},
+            ),
+        ],
+    )
+
+    agent = create_agent(
+        harness.model,
+        system_prompt="You are a test agent.",
+        tools=harness.tools,
+    )
+
+    result = await agent.ainvoke({"messages": [("user", "do the thing")]})
+    messages = result["messages"]
+    final_ai = next(
+        (m for m in reversed(messages) if m.__class__.__name__ == "AIMessage"),
+        None,
+    )
+    assert final_ai is not None
+    assert final_ai.content == "done"
+    tool_messages = [m for m in messages if m.__class__.__name__ == "ToolMessage"]
+    assert len(tool_messages) == 1
+    assert "echoed" in str(tool_messages[0].content)