test(automations/actions): lock agent_task helpers (auto_decide + finalize)

auto_decide.build_auto_decisions (3): produces one decision per
action_request entry, defaults to one decision for legacy scalar
interrupts, and skips malformed interrupts silently so a misbehaving
tool can't take down the whole agent_task step.

finalize.extract_final_assistant_message (4): string-content AIMessage
returned verbatim, list-of-parts content concatenated (skipping
non-text parts like tool_use), walks back past trailing ToolMessages
to find the last AIMessage, and returns None when no extractable text
is present (so callers can branch on silence vs. empty).

7 tests, pure unit.
This commit is contained in:
CREDO23 2026-05-28 19:03:29 +02:00
parent db4eef651f
commit acbeb60a43
4 changed files with 153 additions and 0 deletions

View file

@ -0,0 +1,73 @@
"""Lock ``build_auto_decisions`` — the HITL auto-approve/reject wire mapper.
``build_auto_decisions`` walks ``state.interrupts`` (duck-typed) and produces
two parallel resume maps: one keyed by LangGraph ``Interrupt.id`` and one
keyed by ``tool_call_id`` for the subagent middleware bridge. Both carry
the same decision payload.
"""
from __future__ import annotations
from types import SimpleNamespace
from typing import Any
import pytest
from app.automations.actions.agent_task.auto_decide import build_auto_decisions
pytestmark = pytest.mark.unit
def _state(interrupts: list[Any]) -> SimpleNamespace:
"""Build a duck-typed LangGraph state stub carrying ``interrupts``."""
return SimpleNamespace(interrupts=interrupts)
def _interrupt(*, id_: str, value: Any) -> SimpleNamespace:
"""Build a duck-typed interrupt with the canonical ``(id, value)`` shape."""
return SimpleNamespace(id=id_, value=value)
def test_build_auto_decisions_produces_one_decision_per_action_request() -> None:
"""An interrupt carrying N ``action_requests`` produces N decisions of
the requested type in both maps. This is the canonical batched-HITL
wire shape losing a decision would leave a pending action stuck."""
interrupt = _interrupt(
id_="lg-1",
value={
"tool_call_id": "tc-1",
"action_requests": [{"id": "a"}, {"id": "b"}],
},
)
lg_map, routed = build_auto_decisions(_state([interrupt]), "approve")
assert lg_map == {"lg-1": {"decisions": [{"type": "approve"}, {"type": "approve"}]}}
assert routed == {"tc-1": {"decisions": [{"type": "approve"}, {"type": "approve"}]}}
def test_build_auto_decisions_defaults_to_one_decision_for_scalar_interrupt() -> None:
"""When an interrupt's value has no ``action_requests`` list, the
function defaults to a single decision. Locks compatibility with
older single-action interrupt shapes still emitted by some tools."""
interrupt = _interrupt(id_="lg-2", value={"tool_call_id": "tc-2"})
lg_map, routed = build_auto_decisions(_state([interrupt]), "reject")
assert lg_map == {"lg-2": {"decisions": [{"type": "reject"}]}}
assert routed == {"tc-2": {"decisions": [{"type": "reject"}]}}
def test_build_auto_decisions_skips_interrupts_with_invalid_shape() -> None:
"""Interrupts missing the canonical ``(str id, dict value)`` shape are
skipped silently rather than crashing the resume loop. Locks the
resilience contract a malformed interrupt from a misbehaving tool
shouldn't take down the whole agent_task step."""
good = _interrupt(id_="lg-good", value={"tool_call_id": "tc-good"})
bad_value = _interrupt(id_="lg-bad-value", value="not a dict")
bad_id = _interrupt(id_=None, value={"tool_call_id": "tc-bad-id"}) # type: ignore[arg-type]
lg_map, routed = build_auto_decisions(_state([good, bad_value, bad_id]), "approve")
assert lg_map == {"lg-good": {"decisions": [{"type": "approve"}]}}
assert routed == {"tc-good": {"decisions": [{"type": "approve"}]}}

View file

@ -0,0 +1,80 @@
"""Lock ``extract_final_assistant_message`` — what surfaces in run output.
Each scenario is one shape the agent runtime is observed to produce.
Locking these means we can refactor the extractor without losing
backwards compatibility with already-stored ``run.output`` payloads.
"""
from __future__ import annotations
import pytest
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
from app.automations.actions.agent_task.finalize import extract_final_assistant_message
pytestmark = pytest.mark.unit
def test_extract_returns_last_ai_message_string_content() -> None:
"""The canonical shape: the agent's final ``AIMessage`` carries a
plain string. That string is returned verbatim, trimmed."""
result = {
"messages": [
HumanMessage(content="ask"),
AIMessage(content="the answer"),
]
}
assert extract_final_assistant_message(result) == "the answer"
def test_extract_concatenates_text_parts_and_skips_non_text_parts() -> None:
"""Multi-part AIMessage content (Anthropic / OpenAI list shape) joins
its ``text`` parts in order; non-text parts (tool_use, images, ...)
are skipped. Locks the wire shape used when the model emits tool
calls alongside narrative text in the same turn."""
result = {
"messages": [
AIMessage(
content=[
{"type": "text", "text": "Hello "},
{"type": "tool_use", "name": "search", "input": {}},
{"type": "text", "text": "world"},
]
)
]
}
assert extract_final_assistant_message(result) == "Hello world"
def test_extract_returns_last_ai_message_skipping_tool_messages() -> None:
"""When the transcript ends with tool calls and tool results, the
extractor still walks back to the **last** ``AIMessage`` (the agent's
final narrative answer). Locks resilience against trailing
``ToolMessage`` payloads in the transcript."""
result = {
"messages": [
HumanMessage(content="ask"),
AIMessage(content="thinking..."),
ToolMessage(content="tool output", tool_call_id="tc-1"),
AIMessage(content="final answer"),
ToolMessage(content="trailing tool noise", tool_call_id="tc-2"),
]
}
assert extract_final_assistant_message(result) == "final answer"
def test_extract_returns_none_when_no_assistant_text_is_present() -> None:
"""No ``AIMessage`` with extractable text → ``None`` rather than the
empty string. Lets callers branch on "did the agent actually say
anything?" rather than guess whether ``""`` means silence or empty
output. Empty-string contents are normalized to ``None`` too."""
no_ai = {"messages": [HumanMessage(content="just a question")]}
only_tools = {"messages": [AIMessage(content=[{"type": "tool_use", "name": "x", "input": {}}])]}
empty_string = {"messages": [AIMessage(content=" ")]}
assert extract_final_assistant_message(no_ai) is None
assert extract_final_assistant_message(only_tools) is None
assert extract_final_assistant_message(empty_string) is None