test(hitl): regression net for end-to-end resume bridge dispatch

2026-05-06 22:32:39 +02:00 · 2026-05-05 17:13:13 +02:00 · 2026-05-05 17:13:13 +02:00 · 681895d28d
commit 681895d28d
parent 102f77ab7f
1 changed files with 132 additions and 0 deletions
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py
@ -0,0 +1,132 @@
 """End-to-end resume-bridge tests against a real LangGraph subagent.
 Builds a minimal Pregel subagent that calls ``interrupt(...)`` and drives the
 ``task`` tool directly with a hand-crafted ``ToolRuntime``. Exercises the only
 runtime contract we own: parent stashes a decision in
 ``config["configurable"]["surfsense_resume_value"]`` -> bridge forwards it as
 ``Command(resume={interrupt_id: value})`` -> subagent completes -> return value
 reflects the decision.
 We pause the subagent **outside** the parent task tool (calling
 ``subagent.ainvoke`` directly) to skip the ``_lg_interrupt`` re-raise path,
 which requires a parent runnable context. The bridge logic under test is the
 *resume* dispatch, not the propagation; propagation is exercised separately in
 its own module's tests.
 """
 from __future__ import annotations
 import pytest
 from langchain.tools import ToolRuntime
 from langchain_core.messages import HumanMessage
 from langgraph.checkpoint.memory import InMemorySaver
 from langgraph.graph import END, START, StateGraph
 from langgraph.types import Command, interrupt
 from typing_extensions import TypedDict
 from app.agents.multi_agent_chat.main_agent.graph.middleware.checkpointed_subagent_middleware.task_tool import (
    build_task_tool_with_parent_config,
 )
 class _SubagentState(TypedDict, total=False):
    messages: list
    decision_text: str
 def _build_single_interrupt_subagent():
    """Subagent that interrupts once, then echoes the resume decision into state."""
    def approve_node(state):
        from langchain_core.messages import AIMessage
        decision = interrupt(
            {
                "action_requests": [
                    {
                        "name": "do_thing",
                        "args": {"x": 1},
                        "description": "test action",
                    }
                ],
                "review_configs": [{}],
            }
        )
        # Capture the resume payload verbatim so the test can assert the
        # bridge forwarded it intact (no reshape, no scalar broadcast).
        return {
            "messages": [AIMessage(content="done")],
            "decision_text": repr(decision),
        }
    graph = StateGraph(_SubagentState)
    graph.add_node("approve", approve_node)
    graph.add_edge(START, "approve")
    graph.add_edge("approve", END)
    return graph.compile(checkpointer=InMemorySaver())
 def _make_runtime(config: dict) -> ToolRuntime:
    return ToolRuntime(
        state={"messages": [HumanMessage(content="seed")]},
        context=None,
        config=config,
        stream_writer=None,
        tool_call_id="parent-tcid-1",
        store=None,
    )
@pytest.mark.asyncio
 async def test_resume_bridge_dispatches_decision_into_pending_subagent():
    """Side-channel decision -> targeted Command(resume) -> subagent completes."""
    subagent = _build_single_interrupt_subagent()
    task_tool = build_task_tool_with_parent_config(
        [
            {
                "name": "approver",
                "description": "approves things",
                "runnable": subagent,
            }
        ]
    )
    # 1. Pause the subagent directly so we can test only the resume path.
    parent_config: dict = {
        "configurable": {"thread_id": "shared-thread"},
        "recursion_limit": 100,
    }
    await subagent.ainvoke({"messages": [HumanMessage(content="seed")]}, parent_config)
    snap = await subagent.aget_state(parent_config)
    assert snap.tasks and snap.tasks[0].interrupts, (
        "fixture broken: subagent should be paused on its interrupt"
    )
    # 2. Stash the user's decision on the side-channel — this is what
    #    ``stream_resume_chat`` does in production.
    parent_config["configurable"]["surfsense_resume_value"] = {
        "decisions": ["APPROVED"]
    }
    runtime = _make_runtime(parent_config)
    # 3. Drive the bridge. Subagent has no remaining interrupt after resume,
    #    so propagation will not call ``_lg_interrupt`` (no parent ctx needed).
    result = await task_tool.coroutine(
        description="please approve",
        subagent_type="approver",
        runtime=runtime,
    )
    assert isinstance(result, Command)
    update = result.update
    # Bridge forwards the side-channel payload **verbatim** to the
    # subagent's ``interrupt()``. A scalar broadcast or accidental
    # unwrap would change this shape and we want to catch that.
    assert update["decision_text"] == repr({"decisions": ["APPROVED"]})
    # 4. Side-channel was consumed; a stale replay would re-prompt the user.
    assert "surfsense_resume_value" not in parent_config["configurable"]
    # 5. Subagent moved past the interrupt (no pending tasks remain).
    final = await subagent.aget_state(parent_config)
    assert not final.tasks or all(not t.interrupts for t in final.tasks)