test(automations/actions): lock agent_task helpers (auto_decide + finalize)

auto_decide.build_auto_decisions (3): produces one decision per action_request entry, defaults to one decision for legacy scalar interrupts, and skips malformed interrupts silently so a misbehaving tool can't take down the whole agent_task step. finalize.extract_final_assistant_message (4): string-content AIMessage returned verbatim, list-of-parts content concatenated (skipping non-text parts like tool_use), walks back past trailing ToolMessages to find the last AIMessage, and returns None when no extractable text is present (so callers can branch on silence vs. empty). 7 tests, pure unit.
2026-07-14 22:52:15 +02:00 · 2026-05-28 19:03:29 +02:00 · 2026-05-28 19:03:29 +02:00 · acbeb60a43
commit acbeb60a43
parent db4eef651f
4 changed files with 153 additions and 0 deletions
--- a/surfsense_backend/tests/unit/automations/actions/init.py
+++ b/surfsense_backend/tests/unit/automations/actions/init.py
--- a/surfsense_backend/tests/unit/automations/actions/agent_task/init.py
+++ b/surfsense_backend/tests/unit/automations/actions/agent_task/init.py
--- a/surfsense_backend/tests/unit/automations/actions/agent_task/test_auto_decide.py
+++ b/surfsense_backend/tests/unit/automations/actions/agent_task/test_auto_decide.py
@ -0,0 +1,73 @@
+"""Lock ``build_auto_decisions`` — the HITL auto-approve/reject wire mapper.
+
+``build_auto_decisions`` walks ``state.interrupts`` (duck-typed) and produces
+two parallel resume maps: one keyed by LangGraph ``Interrupt.id`` and one
+keyed by ``tool_call_id`` for the subagent middleware bridge. Both carry
+the same decision payload.
+"""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from typing import Any
+
+import pytest
+
+from app.automations.actions.agent_task.auto_decide import build_auto_decisions
+
+pytestmark = pytest.mark.unit
+
+
+def _state(interrupts: list[Any]) -> SimpleNamespace:
+    """Build a duck-typed LangGraph state stub carrying ``interrupts``."""
+    return SimpleNamespace(interrupts=interrupts)
+
+
+def _interrupt(*, id_: str, value: Any) -> SimpleNamespace:
+    """Build a duck-typed interrupt with the canonical ``(id, value)`` shape."""
+    return SimpleNamespace(id=id_, value=value)
+
+
+def test_build_auto_decisions_produces_one_decision_per_action_request() -> None:
+    """An interrupt carrying N ``action_requests`` produces N decisions of
+    the requested type in both maps. This is the canonical batched-HITL
+    wire shape — losing a decision would leave a pending action stuck."""
+    interrupt = _interrupt(
+        id_="lg-1",
+        value={
+            "tool_call_id": "tc-1",
+            "action_requests": [{"id": "a"}, {"id": "b"}],
+        },
+    )
+
+    lg_map, routed = build_auto_decisions(_state([interrupt]), "approve")
+
+    assert lg_map == {"lg-1": {"decisions": [{"type": "approve"}, {"type": "approve"}]}}
+    assert routed == {"tc-1": {"decisions": [{"type": "approve"}, {"type": "approve"}]}}
+
+
+def test_build_auto_decisions_defaults_to_one_decision_for_scalar_interrupt() -> None:
+    """When an interrupt's value has no ``action_requests`` list, the
+    function defaults to a single decision. Locks compatibility with
+    older single-action interrupt shapes still emitted by some tools."""
+    interrupt = _interrupt(id_="lg-2", value={"tool_call_id": "tc-2"})
+
+    lg_map, routed = build_auto_decisions(_state([interrupt]), "reject")
+
+    assert lg_map == {"lg-2": {"decisions": [{"type": "reject"}]}}
+    assert routed == {"tc-2": {"decisions": [{"type": "reject"}]}}
+
+
+def test_build_auto_decisions_skips_interrupts_with_invalid_shape() -> None:
+    """Interrupts missing the canonical ``(str id, dict value)`` shape are
+    skipped silently rather than crashing the resume loop. Locks the
+    resilience contract — a malformed interrupt from a misbehaving tool
+    shouldn't take down the whole agent_task step."""
+    good = _interrupt(id_="lg-good", value={"tool_call_id": "tc-good"})
+    bad_value = _interrupt(id_="lg-bad-value", value="not a dict")
+    bad_id = _interrupt(id_=None, value={"tool_call_id": "tc-bad-id"})  # type: ignore[arg-type]
+
+    lg_map, routed = build_auto_decisions(_state([good, bad_value, bad_id]), "approve")
+
+    assert lg_map == {"lg-good": {"decisions": [{"type": "approve"}]}}
+    assert routed == {"tc-good": {"decisions": [{"type": "approve"}]}}
--- a/surfsense_backend/tests/unit/automations/actions/agent_task/test_finalize.py
+++ b/surfsense_backend/tests/unit/automations/actions/agent_task/test_finalize.py
@ -0,0 +1,80 @@
+"""Lock ``extract_final_assistant_message`` — what surfaces in run output.
+
+Each scenario is one shape the agent runtime is observed to produce.
+Locking these means we can refactor the extractor without losing
+backwards compatibility with already-stored ``run.output`` payloads.
+"""
+
+from __future__ import annotations
+
+import pytest
+from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
+
+from app.automations.actions.agent_task.finalize import extract_final_assistant_message
+
+pytestmark = pytest.mark.unit
+
+
+def test_extract_returns_last_ai_message_string_content() -> None:
+    """The canonical shape: the agent's final ``AIMessage`` carries a
+    plain string. That string is returned verbatim, trimmed."""
+    result = {
+        "messages": [
+            HumanMessage(content="ask"),
+            AIMessage(content="the answer"),
+        ]
+    }
+
+    assert extract_final_assistant_message(result) == "the answer"
+
+
+def test_extract_concatenates_text_parts_and_skips_non_text_parts() -> None:
+    """Multi-part AIMessage content (Anthropic / OpenAI list shape) joins
+    its ``text`` parts in order; non-text parts (tool_use, images, ...)
+    are skipped. Locks the wire shape used when the model emits tool
+    calls alongside narrative text in the same turn."""
+    result = {
+        "messages": [
+            AIMessage(
+                content=[
+                    {"type": "text", "text": "Hello "},
+                    {"type": "tool_use", "name": "search", "input": {}},
+                    {"type": "text", "text": "world"},
+                ]
+            )
+        ]
+    }
+
+    assert extract_final_assistant_message(result) == "Hello world"
+
+
+def test_extract_returns_last_ai_message_skipping_tool_messages() -> None:
+    """When the transcript ends with tool calls and tool results, the
+    extractor still walks back to the **last** ``AIMessage`` (the agent's
+    final narrative answer). Locks resilience against trailing
+    ``ToolMessage`` payloads in the transcript."""
+    result = {
+        "messages": [
+            HumanMessage(content="ask"),
+            AIMessage(content="thinking..."),
+            ToolMessage(content="tool output", tool_call_id="tc-1"),
+            AIMessage(content="final answer"),
+            ToolMessage(content="trailing tool noise", tool_call_id="tc-2"),
+        ]
+    }
+
+    assert extract_final_assistant_message(result) == "final answer"
+
+
+def test_extract_returns_none_when_no_assistant_text_is_present() -> None:
+    """No ``AIMessage`` with extractable text → ``None`` rather than the
+    empty string. Lets callers branch on "did the agent actually say
+    anything?" rather than guess whether ``""`` means silence or empty
+    output. Empty-string contents are normalized to ``None`` too."""
+    no_ai = {"messages": [HumanMessage(content="just a question")]}
+    only_tools = {"messages": [AIMessage(content=[{"type": "tool_use", "name": "x", "input": {}}])]}
+    empty_string = {"messages": [AIMessage(content="   ")]}
+
+    assert extract_final_assistant_message(no_ai) is None
+    assert extract_final_assistant_message(only_tools) is None
+    assert extract_final_assistant_message(empty_string) is None