mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-29 19:35:20 +02:00
test(automations/actions): lock agent_task helpers (auto_decide + finalize)
auto_decide.build_auto_decisions (3): produces one decision per action_request entry, defaults to one decision for legacy scalar interrupts, and skips malformed interrupts silently so a misbehaving tool can't take down the whole agent_task step. finalize.extract_final_assistant_message (4): string-content AIMessage returned verbatim, list-of-parts content concatenated (skipping non-text parts like tool_use), walks back past trailing ToolMessages to find the last AIMessage, and returns None when no extractable text is present (so callers can branch on silence vs. empty). 7 tests, pure unit.
This commit is contained in:
parent
db4eef651f
commit
acbeb60a43
4 changed files with 153 additions and 0 deletions
|
|
@ -0,0 +1,73 @@
|
||||||
|
"""Lock ``build_auto_decisions`` — the HITL auto-approve/reject wire mapper.
|
||||||
|
|
||||||
|
``build_auto_decisions`` walks ``state.interrupts`` (duck-typed) and produces
|
||||||
|
two parallel resume maps: one keyed by LangGraph ``Interrupt.id`` and one
|
||||||
|
keyed by ``tool_call_id`` for the subagent middleware bridge. Both carry
|
||||||
|
the same decision payload.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from app.automations.actions.agent_task.auto_decide import build_auto_decisions
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.unit
|
||||||
|
|
||||||
|
|
||||||
|
def _state(interrupts: list[Any]) -> SimpleNamespace:
|
||||||
|
"""Build a duck-typed LangGraph state stub carrying ``interrupts``."""
|
||||||
|
return SimpleNamespace(interrupts=interrupts)
|
||||||
|
|
||||||
|
|
||||||
|
def _interrupt(*, id_: str, value: Any) -> SimpleNamespace:
|
||||||
|
"""Build a duck-typed interrupt with the canonical ``(id, value)`` shape."""
|
||||||
|
return SimpleNamespace(id=id_, value=value)
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_auto_decisions_produces_one_decision_per_action_request() -> None:
|
||||||
|
"""An interrupt carrying N ``action_requests`` produces N decisions of
|
||||||
|
the requested type in both maps. This is the canonical batched-HITL
|
||||||
|
wire shape — losing a decision would leave a pending action stuck."""
|
||||||
|
interrupt = _interrupt(
|
||||||
|
id_="lg-1",
|
||||||
|
value={
|
||||||
|
"tool_call_id": "tc-1",
|
||||||
|
"action_requests": [{"id": "a"}, {"id": "b"}],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
lg_map, routed = build_auto_decisions(_state([interrupt]), "approve")
|
||||||
|
|
||||||
|
assert lg_map == {"lg-1": {"decisions": [{"type": "approve"}, {"type": "approve"}]}}
|
||||||
|
assert routed == {"tc-1": {"decisions": [{"type": "approve"}, {"type": "approve"}]}}
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_auto_decisions_defaults_to_one_decision_for_scalar_interrupt() -> None:
|
||||||
|
"""When an interrupt's value has no ``action_requests`` list, the
|
||||||
|
function defaults to a single decision. Locks compatibility with
|
||||||
|
older single-action interrupt shapes still emitted by some tools."""
|
||||||
|
interrupt = _interrupt(id_="lg-2", value={"tool_call_id": "tc-2"})
|
||||||
|
|
||||||
|
lg_map, routed = build_auto_decisions(_state([interrupt]), "reject")
|
||||||
|
|
||||||
|
assert lg_map == {"lg-2": {"decisions": [{"type": "reject"}]}}
|
||||||
|
assert routed == {"tc-2": {"decisions": [{"type": "reject"}]}}
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_auto_decisions_skips_interrupts_with_invalid_shape() -> None:
|
||||||
|
"""Interrupts missing the canonical ``(str id, dict value)`` shape are
|
||||||
|
skipped silently rather than crashing the resume loop. Locks the
|
||||||
|
resilience contract — a malformed interrupt from a misbehaving tool
|
||||||
|
shouldn't take down the whole agent_task step."""
|
||||||
|
good = _interrupt(id_="lg-good", value={"tool_call_id": "tc-good"})
|
||||||
|
bad_value = _interrupt(id_="lg-bad-value", value="not a dict")
|
||||||
|
bad_id = _interrupt(id_=None, value={"tool_call_id": "tc-bad-id"}) # type: ignore[arg-type]
|
||||||
|
|
||||||
|
lg_map, routed = build_auto_decisions(_state([good, bad_value, bad_id]), "approve")
|
||||||
|
|
||||||
|
assert lg_map == {"lg-good": {"decisions": [{"type": "approve"}]}}
|
||||||
|
assert routed == {"tc-good": {"decisions": [{"type": "approve"}]}}
|
||||||
|
|
@ -0,0 +1,80 @@
|
||||||
|
"""Lock ``extract_final_assistant_message`` — what surfaces in run output.
|
||||||
|
|
||||||
|
Each scenario is one shape the agent runtime is observed to produce.
|
||||||
|
Locking these means we can refactor the extractor without losing
|
||||||
|
backwards compatibility with already-stored ``run.output`` payloads.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
|
||||||
|
|
||||||
|
from app.automations.actions.agent_task.finalize import extract_final_assistant_message
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.unit
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_returns_last_ai_message_string_content() -> None:
|
||||||
|
"""The canonical shape: the agent's final ``AIMessage`` carries a
|
||||||
|
plain string. That string is returned verbatim, trimmed."""
|
||||||
|
result = {
|
||||||
|
"messages": [
|
||||||
|
HumanMessage(content="ask"),
|
||||||
|
AIMessage(content="the answer"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
assert extract_final_assistant_message(result) == "the answer"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_concatenates_text_parts_and_skips_non_text_parts() -> None:
|
||||||
|
"""Multi-part AIMessage content (Anthropic / OpenAI list shape) joins
|
||||||
|
its ``text`` parts in order; non-text parts (tool_use, images, ...)
|
||||||
|
are skipped. Locks the wire shape used when the model emits tool
|
||||||
|
calls alongside narrative text in the same turn."""
|
||||||
|
result = {
|
||||||
|
"messages": [
|
||||||
|
AIMessage(
|
||||||
|
content=[
|
||||||
|
{"type": "text", "text": "Hello "},
|
||||||
|
{"type": "tool_use", "name": "search", "input": {}},
|
||||||
|
{"type": "text", "text": "world"},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
assert extract_final_assistant_message(result) == "Hello world"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_returns_last_ai_message_skipping_tool_messages() -> None:
|
||||||
|
"""When the transcript ends with tool calls and tool results, the
|
||||||
|
extractor still walks back to the **last** ``AIMessage`` (the agent's
|
||||||
|
final narrative answer). Locks resilience against trailing
|
||||||
|
``ToolMessage`` payloads in the transcript."""
|
||||||
|
result = {
|
||||||
|
"messages": [
|
||||||
|
HumanMessage(content="ask"),
|
||||||
|
AIMessage(content="thinking..."),
|
||||||
|
ToolMessage(content="tool output", tool_call_id="tc-1"),
|
||||||
|
AIMessage(content="final answer"),
|
||||||
|
ToolMessage(content="trailing tool noise", tool_call_id="tc-2"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
assert extract_final_assistant_message(result) == "final answer"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_returns_none_when_no_assistant_text_is_present() -> None:
|
||||||
|
"""No ``AIMessage`` with extractable text → ``None`` rather than the
|
||||||
|
empty string. Lets callers branch on "did the agent actually say
|
||||||
|
anything?" rather than guess whether ``""`` means silence or empty
|
||||||
|
output. Empty-string contents are normalized to ``None`` too."""
|
||||||
|
no_ai = {"messages": [HumanMessage(content="just a question")]}
|
||||||
|
only_tools = {"messages": [AIMessage(content=[{"type": "tool_use", "name": "x", "input": {}}])]}
|
||||||
|
empty_string = {"messages": [AIMessage(content=" ")]}
|
||||||
|
|
||||||
|
assert extract_final_assistant_message(no_ai) is None
|
||||||
|
assert extract_final_assistant_message(only_tools) is None
|
||||||
|
assert extract_final_assistant_message(empty_string) is None
|
||||||
Loading…
Add table
Add a link
Reference in a new issue