mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-29 19:35:20 +02:00
test(automations/actions): lock agent_task helpers (auto_decide + finalize)
auto_decide.build_auto_decisions (3): produces one decision per action_request entry, defaults to one decision for legacy scalar interrupts, and skips malformed interrupts silently so a misbehaving tool can't take down the whole agent_task step. finalize.extract_final_assistant_message (4): string-content AIMessage returned verbatim, list-of-parts content concatenated (skipping non-text parts like tool_use), walks back past trailing ToolMessages to find the last AIMessage, and returns None when no extractable text is present (so callers can branch on silence vs. empty). 7 tests, pure unit.
This commit is contained in:
parent
db4eef651f
commit
acbeb60a43
4 changed files with 153 additions and 0 deletions
|
|
@ -0,0 +1,73 @@
|
|||
"""Lock ``build_auto_decisions`` — the HITL auto-approve/reject wire mapper.
|
||||
|
||||
``build_auto_decisions`` walks ``state.interrupts`` (duck-typed) and produces
|
||||
two parallel resume maps: one keyed by LangGraph ``Interrupt.id`` and one
|
||||
keyed by ``tool_call_id`` for the subagent middleware bridge. Both carry
|
||||
the same decision payload.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from app.automations.actions.agent_task.auto_decide import build_auto_decisions
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def _state(interrupts: list[Any]) -> SimpleNamespace:
|
||||
"""Build a duck-typed LangGraph state stub carrying ``interrupts``."""
|
||||
return SimpleNamespace(interrupts=interrupts)
|
||||
|
||||
|
||||
def _interrupt(*, id_: str, value: Any) -> SimpleNamespace:
|
||||
"""Build a duck-typed interrupt with the canonical ``(id, value)`` shape."""
|
||||
return SimpleNamespace(id=id_, value=value)
|
||||
|
||||
|
||||
def test_build_auto_decisions_produces_one_decision_per_action_request() -> None:
|
||||
"""An interrupt carrying N ``action_requests`` produces N decisions of
|
||||
the requested type in both maps. This is the canonical batched-HITL
|
||||
wire shape — losing a decision would leave a pending action stuck."""
|
||||
interrupt = _interrupt(
|
||||
id_="lg-1",
|
||||
value={
|
||||
"tool_call_id": "tc-1",
|
||||
"action_requests": [{"id": "a"}, {"id": "b"}],
|
||||
},
|
||||
)
|
||||
|
||||
lg_map, routed = build_auto_decisions(_state([interrupt]), "approve")
|
||||
|
||||
assert lg_map == {"lg-1": {"decisions": [{"type": "approve"}, {"type": "approve"}]}}
|
||||
assert routed == {"tc-1": {"decisions": [{"type": "approve"}, {"type": "approve"}]}}
|
||||
|
||||
|
||||
def test_build_auto_decisions_defaults_to_one_decision_for_scalar_interrupt() -> None:
|
||||
"""When an interrupt's value has no ``action_requests`` list, the
|
||||
function defaults to a single decision. Locks compatibility with
|
||||
older single-action interrupt shapes still emitted by some tools."""
|
||||
interrupt = _interrupt(id_="lg-2", value={"tool_call_id": "tc-2"})
|
||||
|
||||
lg_map, routed = build_auto_decisions(_state([interrupt]), "reject")
|
||||
|
||||
assert lg_map == {"lg-2": {"decisions": [{"type": "reject"}]}}
|
||||
assert routed == {"tc-2": {"decisions": [{"type": "reject"}]}}
|
||||
|
||||
|
||||
def test_build_auto_decisions_skips_interrupts_with_invalid_shape() -> None:
|
||||
"""Interrupts missing the canonical ``(str id, dict value)`` shape are
|
||||
skipped silently rather than crashing the resume loop. Locks the
|
||||
resilience contract — a malformed interrupt from a misbehaving tool
|
||||
shouldn't take down the whole agent_task step."""
|
||||
good = _interrupt(id_="lg-good", value={"tool_call_id": "tc-good"})
|
||||
bad_value = _interrupt(id_="lg-bad-value", value="not a dict")
|
||||
bad_id = _interrupt(id_=None, value={"tool_call_id": "tc-bad-id"}) # type: ignore[arg-type]
|
||||
|
||||
lg_map, routed = build_auto_decisions(_state([good, bad_value, bad_id]), "approve")
|
||||
|
||||
assert lg_map == {"lg-good": {"decisions": [{"type": "approve"}]}}
|
||||
assert routed == {"tc-good": {"decisions": [{"type": "approve"}]}}
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
"""Lock ``extract_final_assistant_message`` — what surfaces in run output.
|
||||
|
||||
Each scenario is one shape the agent runtime is observed to produce.
|
||||
Locking these means we can refactor the extractor without losing
|
||||
backwards compatibility with already-stored ``run.output`` payloads.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
|
||||
|
||||
from app.automations.actions.agent_task.finalize import extract_final_assistant_message
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def test_extract_returns_last_ai_message_string_content() -> None:
|
||||
"""The canonical shape: the agent's final ``AIMessage`` carries a
|
||||
plain string. That string is returned verbatim, trimmed."""
|
||||
result = {
|
||||
"messages": [
|
||||
HumanMessage(content="ask"),
|
||||
AIMessage(content="the answer"),
|
||||
]
|
||||
}
|
||||
|
||||
assert extract_final_assistant_message(result) == "the answer"
|
||||
|
||||
|
||||
def test_extract_concatenates_text_parts_and_skips_non_text_parts() -> None:
|
||||
"""Multi-part AIMessage content (Anthropic / OpenAI list shape) joins
|
||||
its ``text`` parts in order; non-text parts (tool_use, images, ...)
|
||||
are skipped. Locks the wire shape used when the model emits tool
|
||||
calls alongside narrative text in the same turn."""
|
||||
result = {
|
||||
"messages": [
|
||||
AIMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "Hello "},
|
||||
{"type": "tool_use", "name": "search", "input": {}},
|
||||
{"type": "text", "text": "world"},
|
||||
]
|
||||
)
|
||||
]
|
||||
}
|
||||
|
||||
assert extract_final_assistant_message(result) == "Hello world"
|
||||
|
||||
|
||||
def test_extract_returns_last_ai_message_skipping_tool_messages() -> None:
|
||||
"""When the transcript ends with tool calls and tool results, the
|
||||
extractor still walks back to the **last** ``AIMessage`` (the agent's
|
||||
final narrative answer). Locks resilience against trailing
|
||||
``ToolMessage`` payloads in the transcript."""
|
||||
result = {
|
||||
"messages": [
|
||||
HumanMessage(content="ask"),
|
||||
AIMessage(content="thinking..."),
|
||||
ToolMessage(content="tool output", tool_call_id="tc-1"),
|
||||
AIMessage(content="final answer"),
|
||||
ToolMessage(content="trailing tool noise", tool_call_id="tc-2"),
|
||||
]
|
||||
}
|
||||
|
||||
assert extract_final_assistant_message(result) == "final answer"
|
||||
|
||||
|
||||
def test_extract_returns_none_when_no_assistant_text_is_present() -> None:
|
||||
"""No ``AIMessage`` with extractable text → ``None`` rather than the
|
||||
empty string. Lets callers branch on "did the agent actually say
|
||||
anything?" rather than guess whether ``""`` means silence or empty
|
||||
output. Empty-string contents are normalized to ``None`` too."""
|
||||
no_ai = {"messages": [HumanMessage(content="just a question")]}
|
||||
only_tools = {"messages": [AIMessage(content=[{"type": "tool_use", "name": "x", "input": {}}])]}
|
||||
empty_string = {"messages": [AIMessage(content=" ")]}
|
||||
|
||||
assert extract_final_assistant_message(no_ai) is None
|
||||
assert extract_final_assistant_message(only_tools) is None
|
||||
assert extract_final_assistant_message(empty_string) is None
|
||||
Loading…
Add table
Add a link
Reference in a new issue