test(automations/actions): lock agent_task helpers (auto_decide + finalize)

auto_decide.build_auto_decisions (3): produces one decision per action_request entry, defaults to one decision for legacy scalar interrupts, and skips malformed interrupts silently so a misbehaving tool can't take down the whole agent_task step. finalize.extract_final_assistant_message (4): string-content AIMessage returned verbatim, list-of-parts content concatenated (skipping non-text parts like tool_use), walks back past trailing ToolMessages to find the last AIMessage, and returns None when no extractable text is present (so callers can branch on silence vs. empty). 7 tests, pure unit.
2026-05-29 19:35:20 +02:00 · 2026-05-28 19:03:29 +02:00 · 2026-05-28 19:03:29 +02:00 · acbeb60a43
commit acbeb60a43
parent db4eef651f
4 changed files with 153 additions and 0 deletions
--- a/surfsense_backend/tests/unit/automations/actions/init.py
+++ b/surfsense_backend/tests/unit/automations/actions/init.py
--- a/surfsense_backend/tests/unit/automations/actions/agent_task/init.py
+++ b/surfsense_backend/tests/unit/automations/actions/agent_task/init.py
--- a/surfsense_backend/tests/unit/automations/actions/agent_task/test_auto_decide.py
+++ b/surfsense_backend/tests/unit/automations/actions/agent_task/test_auto_decide.py
@ -0,0 +1,73 @@
 """Lock ``build_auto_decisions`` — the HITL auto-approve/reject wire mapper.
 ``build_auto_decisions`` walks ``state.interrupts`` (duck-typed) and produces
 two parallel resume maps: one keyed by LangGraph ``Interrupt.id`` and one
 keyed by ``tool_call_id`` for the subagent middleware bridge. Both carry
 the same decision payload.
 """
 from __future__ import annotations
 from types import SimpleNamespace
 from typing import Any
 import pytest
 from app.automations.actions.agent_task.auto_decide import build_auto_decisions
 pytestmark = pytest.mark.unit
 def _state(interrupts: list[Any]) -> SimpleNamespace:
    """Build a duck-typed LangGraph state stub carrying ``interrupts``."""
    return SimpleNamespace(interrupts=interrupts)
 def _interrupt(*, id_: str, value: Any) -> SimpleNamespace:
    """Build a duck-typed interrupt with the canonical ``(id, value)`` shape."""
    return SimpleNamespace(id=id_, value=value)
 def test_build_auto_decisions_produces_one_decision_per_action_request() -> None:
    """An interrupt carrying N ``action_requests`` produces N decisions of
    the requested type in both maps. This is the canonical batched-HITL
    wire shape — losing a decision would leave a pending action stuck."""
    interrupt = _interrupt(
        id_="lg-1",
        value={
            "tool_call_id": "tc-1",
            "action_requests": [{"id": "a"}, {"id": "b"}],
        },
    )
    lg_map, routed = build_auto_decisions(_state([interrupt]), "approve")
    assert lg_map == {"lg-1": {"decisions": [{"type": "approve"}, {"type": "approve"}]}}
    assert routed == {"tc-1": {"decisions": [{"type": "approve"}, {"type": "approve"}]}}
 def test_build_auto_decisions_defaults_to_one_decision_for_scalar_interrupt() -> None:
    """When an interrupt's value has no ``action_requests`` list, the
    function defaults to a single decision. Locks compatibility with
    older single-action interrupt shapes still emitted by some tools."""
    interrupt = _interrupt(id_="lg-2", value={"tool_call_id": "tc-2"})
    lg_map, routed = build_auto_decisions(_state([interrupt]), "reject")
    assert lg_map == {"lg-2": {"decisions": [{"type": "reject"}]}}
    assert routed == {"tc-2": {"decisions": [{"type": "reject"}]}}
 def test_build_auto_decisions_skips_interrupts_with_invalid_shape() -> None:
    """Interrupts missing the canonical ``(str id, dict value)`` shape are
    skipped silently rather than crashing the resume loop. Locks the
    resilience contract — a malformed interrupt from a misbehaving tool
    shouldn't take down the whole agent_task step."""
    good = _interrupt(id_="lg-good", value={"tool_call_id": "tc-good"})
    bad_value = _interrupt(id_="lg-bad-value", value="not a dict")
    bad_id = _interrupt(id_=None, value={"tool_call_id": "tc-bad-id"})  # type: ignore[arg-type]
    lg_map, routed = build_auto_decisions(_state([good, bad_value, bad_id]), "approve")
    assert lg_map == {"lg-good": {"decisions": [{"type": "approve"}]}}
    assert routed == {"tc-good": {"decisions": [{"type": "approve"}]}}
--- a/surfsense_backend/tests/unit/automations/actions/agent_task/test_finalize.py
+++ b/surfsense_backend/tests/unit/automations/actions/agent_task/test_finalize.py
@ -0,0 +1,80 @@
 """Lock ``extract_final_assistant_message`` — what surfaces in run output.
 Each scenario is one shape the agent runtime is observed to produce.
 Locking these means we can refactor the extractor without losing
 backwards compatibility with already-stored ``run.output`` payloads.
 """
 from __future__ import annotations
 import pytest
 from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
 from app.automations.actions.agent_task.finalize import extract_final_assistant_message
 pytestmark = pytest.mark.unit
 def test_extract_returns_last_ai_message_string_content() -> None:
    """The canonical shape: the agent's final ``AIMessage`` carries a
    plain string. That string is returned verbatim, trimmed."""
    result = {
        "messages": [
            HumanMessage(content="ask"),
            AIMessage(content="the answer"),
        ]
    }
    assert extract_final_assistant_message(result) == "the answer"
 def test_extract_concatenates_text_parts_and_skips_non_text_parts() -> None:
    """Multi-part AIMessage content (Anthropic / OpenAI list shape) joins
    its ``text`` parts in order; non-text parts (tool_use, images, ...)
    are skipped. Locks the wire shape used when the model emits tool
    calls alongside narrative text in the same turn."""
    result = {
        "messages": [
            AIMessage(
                content=[
                    {"type": "text", "text": "Hello "},
                    {"type": "tool_use", "name": "search", "input": {}},
                    {"type": "text", "text": "world"},
                ]
            )
        ]
    }
    assert extract_final_assistant_message(result) == "Hello world"
 def test_extract_returns_last_ai_message_skipping_tool_messages() -> None:
    """When the transcript ends with tool calls and tool results, the
    extractor still walks back to the **last** ``AIMessage`` (the agent's
    final narrative answer). Locks resilience against trailing
    ``ToolMessage`` payloads in the transcript."""
    result = {
        "messages": [
            HumanMessage(content="ask"),
            AIMessage(content="thinking..."),
            ToolMessage(content="tool output", tool_call_id="tc-1"),
            AIMessage(content="final answer"),
            ToolMessage(content="trailing tool noise", tool_call_id="tc-2"),
        ]
    }
    assert extract_final_assistant_message(result) == "final answer"
 def test_extract_returns_none_when_no_assistant_text_is_present() -> None:
    """No ``AIMessage`` with extractable text → ``None`` rather than the
    empty string. Lets callers branch on "did the agent actually say
    anything?" rather than guess whether ``""`` means silence or empty
    output. Empty-string contents are normalized to ``None`` too."""
    no_ai = {"messages": [HumanMessage(content="just a question")]}
    only_tools = {"messages": [AIMessage(content=[{"type": "tool_use", "name": "x", "input": {}}])]}
    empty_string = {"messages": [AIMessage(content="   ")]}
    assert extract_final_assistant_message(no_ai) is None
    assert extract_final_assistant_message(only_tools) is None
    assert extract_final_assistant_message(empty_string) is None