mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-12 09:12:40 +02:00
chat: fix mixed-decision HITL crash and fold resumed assistant messages into the interrupted bubble.
This commit is contained in:
parent
2e132513be
commit
932bf22a34
6 changed files with 208 additions and 40 deletions
|
|
@ -6,12 +6,19 @@ exposes the side-channel ``stream_resume_chat`` uses to ferry resume payloads.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from langchain.tools import ToolRuntime
|
||||
|
||||
from .constants import DEFAULT_SUBAGENT_RECURSION_LIMIT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# langgraph stores the parent task's scratchpad under this configurable key;
|
||||
# subagents inherit the chain via ``parent_scratchpad`` fallback.
|
||||
_LANGGRAPH_SCRATCHPAD_KEY = "__pregel_scratchpad"
|
||||
|
||||
|
||||
def subagent_invoke_config(runtime: ToolRuntime) -> dict[str, Any]:
|
||||
"""RunnableConfig for the nested invoke; raises ``recursion_limit`` to the parent's budget."""
|
||||
|
|
@ -42,3 +49,42 @@ def has_surfsense_resume(runtime: ToolRuntime) -> bool:
|
|||
if not isinstance(configurable, dict):
|
||||
return False
|
||||
return "surfsense_resume_value" in configurable
|
||||
|
||||
|
||||
def drain_parent_null_resume(runtime: ToolRuntime) -> None:
|
||||
"""Consume the parent's lingering ``NULL_TASK_ID/RESUME`` write before delegating.
|
||||
|
||||
``stream_resume_chat`` wakes the main agent with
|
||||
``Command(resume={"decisions": [...]})`` so the propagated
|
||||
``_lg_interrupt(...)`` can return. langgraph stores that payload as the
|
||||
parent task's ``null_resume`` pending write, which only gets consumed
|
||||
*after* ``subagent.[a]invoke`` returns (when the post-call propagation
|
||||
re-fires). While the subagent is mid-execution, any *new* ``interrupt()``
|
||||
inside it (e.g. a follow-up tool call after a mixed approve/reject) walks
|
||||
``subagent_scratchpad → parent_scratchpad.get_null_resume`` and picks up
|
||||
the parent's still-live decisions — mismatching against a different number
|
||||
of hanging tool calls and crashing ``HumanInTheLoopMiddleware``.
|
||||
|
||||
Draining the write here closes that cross-graph leak so subagent
|
||||
interrupts pause cleanly and re-propagate as a fresh approval card.
|
||||
"""
|
||||
cfg = runtime.config or {}
|
||||
configurable = cfg.get("configurable") if isinstance(cfg, dict) else None
|
||||
if not isinstance(configurable, dict):
|
||||
return
|
||||
scratchpad = configurable.get(_LANGGRAPH_SCRATCHPAD_KEY)
|
||||
if scratchpad is None:
|
||||
return
|
||||
consume = getattr(scratchpad, "get_null_resume", None)
|
||||
if not callable(consume):
|
||||
return
|
||||
try:
|
||||
consume(True)
|
||||
except Exception:
|
||||
# Defensive: if langgraph's internal scratchpad shape changes we don't
|
||||
# want to break the resume path. Worst case the original ValueError
|
||||
# still surfaces — same behavior as before this fix.
|
||||
logger.debug(
|
||||
"drain_parent_null_resume: scratchpad.get_null_resume raised",
|
||||
exc_info=True,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ from langgraph.types import Command
|
|||
|
||||
from .config import (
|
||||
consume_surfsense_resume,
|
||||
drain_parent_null_resume,
|
||||
has_surfsense_resume,
|
||||
subagent_invoke_config,
|
||||
)
|
||||
|
|
@ -157,6 +158,9 @@ def build_task_tool_with_parent_config(
|
|||
)
|
||||
expected = hitlrequest_action_count(pending_value)
|
||||
resume_value = fan_out_decisions_to_match(resume_value, expected)
|
||||
# Prevent the parent's resume payload from leaking into subagent
|
||||
# interrupts via langgraph's parent_scratchpad fallback.
|
||||
drain_parent_null_resume(runtime)
|
||||
result = subagent.invoke(
|
||||
build_resume_command(resume_value, pending_id),
|
||||
config=sub_config,
|
||||
|
|
@ -221,6 +225,9 @@ def build_task_tool_with_parent_config(
|
|||
)
|
||||
expected = hitlrequest_action_count(pending_value)
|
||||
resume_value = fan_out_decisions_to_match(resume_value, expected)
|
||||
# Prevent the parent's resume payload from leaking into subagent
|
||||
# interrupts via langgraph's parent_scratchpad fallback.
|
||||
drain_parent_null_resume(runtime)
|
||||
result = await subagent.ainvoke(
|
||||
build_resume_command(resume_value, pending_id),
|
||||
config=sub_config,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue