diff --git a/surfsense_backend/tests/unit/automations/actions/__init__.py b/surfsense_backend/tests/unit/automations/actions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/unit/automations/actions/agent_task/__init__.py b/surfsense_backend/tests/unit/automations/actions/agent_task/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/unit/automations/actions/agent_task/test_auto_decide.py b/surfsense_backend/tests/unit/automations/actions/agent_task/test_auto_decide.py new file mode 100644 index 000000000..439f32e41 --- /dev/null +++ b/surfsense_backend/tests/unit/automations/actions/agent_task/test_auto_decide.py @@ -0,0 +1,73 @@ +"""Lock ``build_auto_decisions`` — the HITL auto-approve/reject wire mapper. + +``build_auto_decisions`` walks ``state.interrupts`` (duck-typed) and produces +two parallel resume maps: one keyed by LangGraph ``Interrupt.id`` and one +keyed by ``tool_call_id`` for the subagent middleware bridge. Both carry +the same decision payload. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any + +import pytest + +from app.automations.actions.agent_task.auto_decide import build_auto_decisions + +pytestmark = pytest.mark.unit + + +def _state(interrupts: list[Any]) -> SimpleNamespace: + """Build a duck-typed LangGraph state stub carrying ``interrupts``.""" + return SimpleNamespace(interrupts=interrupts) + + +def _interrupt(*, id_: str, value: Any) -> SimpleNamespace: + """Build a duck-typed interrupt with the canonical ``(id, value)`` shape.""" + return SimpleNamespace(id=id_, value=value) + + +def test_build_auto_decisions_produces_one_decision_per_action_request() -> None: + """An interrupt carrying N ``action_requests`` produces N decisions of + the requested type in both maps. This is the canonical batched-HITL + wire shape — losing a decision would leave a pending action stuck.""" + interrupt = _interrupt( + id_="lg-1", + value={ + "tool_call_id": "tc-1", + "action_requests": [{"id": "a"}, {"id": "b"}], + }, + ) + + lg_map, routed = build_auto_decisions(_state([interrupt]), "approve") + + assert lg_map == {"lg-1": {"decisions": [{"type": "approve"}, {"type": "approve"}]}} + assert routed == {"tc-1": {"decisions": [{"type": "approve"}, {"type": "approve"}]}} + + +def test_build_auto_decisions_defaults_to_one_decision_for_scalar_interrupt() -> None: + """When an interrupt's value has no ``action_requests`` list, the + function defaults to a single decision. Locks compatibility with + older single-action interrupt shapes still emitted by some tools.""" + interrupt = _interrupt(id_="lg-2", value={"tool_call_id": "tc-2"}) + + lg_map, routed = build_auto_decisions(_state([interrupt]), "reject") + + assert lg_map == {"lg-2": {"decisions": [{"type": "reject"}]}} + assert routed == {"tc-2": {"decisions": [{"type": "reject"}]}} + + +def test_build_auto_decisions_skips_interrupts_with_invalid_shape() -> None: + """Interrupts missing the canonical ``(str id, dict value)`` shape are + skipped silently rather than crashing the resume loop. Locks the + resilience contract — a malformed interrupt from a misbehaving tool + shouldn't take down the whole agent_task step.""" + good = _interrupt(id_="lg-good", value={"tool_call_id": "tc-good"}) + bad_value = _interrupt(id_="lg-bad-value", value="not a dict") + bad_id = _interrupt(id_=None, value={"tool_call_id": "tc-bad-id"}) # type: ignore[arg-type] + + lg_map, routed = build_auto_decisions(_state([good, bad_value, bad_id]), "approve") + + assert lg_map == {"lg-good": {"decisions": [{"type": "approve"}]}} + assert routed == {"tc-good": {"decisions": [{"type": "approve"}]}} diff --git a/surfsense_backend/tests/unit/automations/actions/agent_task/test_finalize.py b/surfsense_backend/tests/unit/automations/actions/agent_task/test_finalize.py new file mode 100644 index 000000000..bd49d764c --- /dev/null +++ b/surfsense_backend/tests/unit/automations/actions/agent_task/test_finalize.py @@ -0,0 +1,80 @@ +"""Lock ``extract_final_assistant_message`` — what surfaces in run output. + +Each scenario is one shape the agent runtime is observed to produce. +Locking these means we can refactor the extractor without losing +backwards compatibility with already-stored ``run.output`` payloads. +""" + +from __future__ import annotations + +import pytest +from langchain_core.messages import AIMessage, HumanMessage, ToolMessage + +from app.automations.actions.agent_task.finalize import extract_final_assistant_message + +pytestmark = pytest.mark.unit + + +def test_extract_returns_last_ai_message_string_content() -> None: + """The canonical shape: the agent's final ``AIMessage`` carries a + plain string. That string is returned verbatim, trimmed.""" + result = { + "messages": [ + HumanMessage(content="ask"), + AIMessage(content="the answer"), + ] + } + + assert extract_final_assistant_message(result) == "the answer" + + +def test_extract_concatenates_text_parts_and_skips_non_text_parts() -> None: + """Multi-part AIMessage content (Anthropic / OpenAI list shape) joins + its ``text`` parts in order; non-text parts (tool_use, images, ...) + are skipped. Locks the wire shape used when the model emits tool + calls alongside narrative text in the same turn.""" + result = { + "messages": [ + AIMessage( + content=[ + {"type": "text", "text": "Hello "}, + {"type": "tool_use", "name": "search", "input": {}}, + {"type": "text", "text": "world"}, + ] + ) + ] + } + + assert extract_final_assistant_message(result) == "Hello world" + + +def test_extract_returns_last_ai_message_skipping_tool_messages() -> None: + """When the transcript ends with tool calls and tool results, the + extractor still walks back to the **last** ``AIMessage`` (the agent's + final narrative answer). Locks resilience against trailing + ``ToolMessage`` payloads in the transcript.""" + result = { + "messages": [ + HumanMessage(content="ask"), + AIMessage(content="thinking..."), + ToolMessage(content="tool output", tool_call_id="tc-1"), + AIMessage(content="final answer"), + ToolMessage(content="trailing tool noise", tool_call_id="tc-2"), + ] + } + + assert extract_final_assistant_message(result) == "final answer" + + +def test_extract_returns_none_when_no_assistant_text_is_present() -> None: + """No ``AIMessage`` with extractable text → ``None`` rather than the + empty string. Lets callers branch on "did the agent actually say + anything?" rather than guess whether ``""`` means silence or empty + output. Empty-string contents are normalized to ``None`` too.""" + no_ai = {"messages": [HumanMessage(content="just a question")]} + only_tools = {"messages": [AIMessage(content=[{"type": "tool_use", "name": "x", "input": {}}])]} + empty_string = {"messages": [AIMessage(content=" ")]} + + assert extract_final_assistant_message(no_ai) is None + assert extract_final_assistant_message(only_tools) is None + assert extract_final_assistant_message(empty_string) is None