chat: fix mixed-decision HITL crash and fold resumed assistant messages into the interrupted bubble.

2026-05-12 09:12:40 +02:00 · 2026-05-09 22:54:07 +02:00 · 2026-05-09 22:54:07 +02:00 · 932bf22a34
commit 932bf22a34
parent 2e132513be
6 changed files with 208 additions and 40 deletions
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/config.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/config.py
@ -6,12 +6,19 @@ exposes the side-channel ``stream_resume_chat`` uses to ferry resume payloads.
 from __future__ import annotations
 import logging
 from typing import Any
 from langchain.tools import ToolRuntime
 from .constants import DEFAULT_SUBAGENT_RECURSION_LIMIT
 logger = logging.getLogger(__name__)
 # langgraph stores the parent task's scratchpad under this configurable key;
 # subagents inherit the chain via ``parent_scratchpad`` fallback.
 _LANGGRAPH_SCRATCHPAD_KEY = "__pregel_scratchpad"
 def subagent_invoke_config(runtime: ToolRuntime) -> dict[str, Any]:
    """RunnableConfig for the nested invoke; raises ``recursion_limit`` to the parent's budget."""
@ -42,3 +49,42 @@ def has_surfsense_resume(runtime: ToolRuntime) -> bool:
    if not isinstance(configurable, dict):
        return False
    return "surfsense_resume_value" in configurable
 def drain_parent_null_resume(runtime: ToolRuntime) -> None:
    """Consume the parent's lingering ``NULL_TASK_ID/RESUME`` write before delegating.
    ``stream_resume_chat`` wakes the main agent with
    ``Command(resume={"decisions": [...]})`` so the propagated
    ``_lg_interrupt(...)`` can return. langgraph stores that payload as the
    parent task's ``null_resume`` pending write, which only gets consumed
    *after* ``subagent.[a]invoke`` returns (when the post-call propagation
    re-fires). While the subagent is mid-execution, any *new* ``interrupt()``
    inside it (e.g. a follow-up tool call after a mixed approve/reject) walks
    ``subagent_scratchpad → parent_scratchpad.get_null_resume`` and picks up
    the parent's still-live decisions — mismatching against a different number
    of hanging tool calls and crashing ``HumanInTheLoopMiddleware``.
    Draining the write here closes that cross-graph leak so subagent
    interrupts pause cleanly and re-propagate as a fresh approval card.
    """
    cfg = runtime.config or {}
    configurable = cfg.get("configurable") if isinstance(cfg, dict) else None
    if not isinstance(configurable, dict):
        return
    scratchpad = configurable.get(_LANGGRAPH_SCRATCHPAD_KEY)
    if scratchpad is None:
        return
    consume = getattr(scratchpad, "get_null_resume", None)
    if not callable(consume):
        return
    try:
        consume(True)
    except Exception:
        # Defensive: if langgraph's internal scratchpad shape changes we don't
        # want to break the resume path. Worst case the original ValueError
        # still surfaces — same behavior as before this fix.
        logger.debug(
            "drain_parent_null_resume: scratchpad.get_null_resume raised",
            exc_info=True,
        )
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py
@ -20,6 +20,7 @@ from langgraph.types import Command
 from .config import (
    consume_surfsense_resume,
    drain_parent_null_resume,
    has_surfsense_resume,
    subagent_invoke_config,
 )
@ -157,6 +158,9 @@ def build_task_tool_with_parent_config(
                )
            expected = hitlrequest_action_count(pending_value)
            resume_value = fan_out_decisions_to_match(resume_value, expected)
            # Prevent the parent's resume payload from leaking into subagent
            # interrupts via langgraph's parent_scratchpad fallback.
            drain_parent_null_resume(runtime)
            result = subagent.invoke(
                build_resume_command(resume_value, pending_id),
                config=sub_config,
@ -221,6 +225,9 @@ def build_task_tool_with_parent_config(
                )
            expected = hitlrequest_action_count(pending_value)
            resume_value = fan_out_decisions_to_match(resume_value, expected)
            # Prevent the parent's resume payload from leaking into subagent
            # interrupts via langgraph's parent_scratchpad fallback.
            drain_parent_null_resume(runtime)
            result = await subagent.ainvoke(
                build_resume_command(resume_value, pending_id),
                config=sub_config,
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@ -64,7 +64,10 @@ import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { getBearerToken } from "@/lib/auth-utils";
 import { type ChatFlow, classifyChatError } from "@/lib/chat/chat-error-classifier";
 import { tagPreAcceptSendFailure, toHttpResponseError } from "@/lib/chat/chat-request-errors";
-import { convertToThreadMessage, filterSupersededAbortedMessages } from "@/lib/chat/message-utils";
+import {
 	convertToThreadMessage,
 	reconcileInterruptedAssistantMessages,
 } from "@/lib/chat/message-utils";
 import {
 	isPodcastGenerating,
 	looksLikePodcastRequest,
@ -395,7 +398,7 @@ export default function NewChatPage() {
 				const memberById = new Map(membersData?.map((m) => [m.user_id, m]) ?? []);
 				const prevById = new Map(prev.map((m) => [m.id, m]));
-				return filterSupersededAbortedMessages(syncedMessages).map((msg) => {
+				return reconcileInterruptedAssistantMessages(syncedMessages).map((msg) => {
 					const member = msg.author_id ? (memberById.get(msg.author_id) ?? null) : null;
 					// Preserve existing author info if member lookup fails (e.g., cloned chats)
@ -622,9 +625,9 @@ export default function NewChatPage() {
 				setCurrentThread(threadData);
 				if (messagesResponse.messages && messagesResponse.messages.length > 0) {
-					const loadedMessages = filterSupersededAbortedMessages(messagesResponse.messages).map(
+					const loadedMessages = reconcileInterruptedAssistantMessages(
-						convertToThreadMessage
+						messagesResponse.messages
-					);
+					).map(convertToThreadMessage);
 					setMessages(loadedMessages);
 					for (const msg of messagesResponse.messages) {
--- a/surfsense_web/features/chat-messages/timeline/build-timeline.ts
+++ b/surfsense_web/features/chat-messages/timeline/build-timeline.ts
@ -81,8 +81,8 @@ interface ToolCallSlim {
 * During the live-resume window the in-memory message holds BOTH the
 * OLD interrupt-frame parts AND the freshly-streamed resume parts in
 * a new ``task`` scope. Without this filter we'd render both until
- * the next reload (where ``filterSupersededAbortedMessages`` drops
+ * the next reload (where ``reconcileInterruptedAssistantMessages``
- * the OLD row upstream).
+ * folds the OLD row into the resume row upstream).
 *
 * A tool-call is "interrupt-affected" when it either carries
 * ``__interrupt__`` directly or sits in a span that contains one. An
--- a/surfsense_web/lib/chat/message-utils.ts
+++ b/surfsense_web/lib/chat/message-utils.ts
@ -1,7 +1,7 @@
 import type { ThreadMessageLike } from "@assistant-ui/react";
 import type { MessageRecord } from "./thread-persistence";
-/** Minimal shape used by ``filterSupersededAbortedMessages``. */
+/** Minimal shape used by the interrupt/resume reconciler. */
 interface AbortableMessage {
 	id: number;
 	role: string;
@ -9,14 +9,28 @@ interface AbortableMessage {
 	turn_id?: string | null;
 }
 function isAssistant(msg: AbortableMessage): boolean {
 	return msg.role.toLowerCase() === "assistant";
 }
 /** True when the row carries at least one tool-call with ``state: "aborted"``. */
 function hasAbortedToolCall(msg: AbortableMessage): boolean {
 	if (!isAssistant(msg) || !Array.isArray(msg.content)) return false;
 	for (const part of msg.content) {
 		if (typeof part !== "object" || part === null) continue;
 		if ((part as { type?: string }).type !== "tool-call") continue;
 		if ((part as { state?: unknown }).state === "aborted") return true;
 	}
 	return false;
 }
 /**
- * True when the row is a frozen interrupt frame: an assistant message
+ * True when EVERY tool-call on the row is aborted. The row is then a
- * whose tool-calls all carry ``state: "aborted"``. A single non-aborted
+ * frozen interrupt frame with no salvageable activity — safe to drop
- * tool-call disqualifies (defensive against future mixed states).
+ * outright.
 */
-function isAbortedAssistantMessage(msg: AbortableMessage): boolean {
+function isFullyAbortedAssistantMessage(msg: AbortableMessage): boolean {
-	if (msg.role.toLowerCase() !== "assistant") return false;
+	if (!isAssistant(msg) || !Array.isArray(msg.content)) return false;
 	if (!Array.isArray(msg.content)) return false;
 	let hasToolCalls = false;
 	for (const part of msg.content) {
 		if (typeof part !== "object" || part === null) continue;
@ -28,42 +42,140 @@ function isAbortedAssistantMessage(msg: AbortableMessage): boolean {
 }
 /**
- * Positional supersede check: an aborted assistant row is superseded
+ * Locate the resume row that supersedes ``messages[idx]``. The
- * iff another assistant row appears later before any user row.
+ * ``stream_resume_chat`` flow allocates a fresh ``turn_id`` so we
- *
+ * can't pair on it; conversational adjacency (assistant → assistant
- * NOT turn-id-based: ``stream_resume_chat`` allocates a fresh
+ * with no user row between) is the unique signature. Skips already-
- * ``turn_id`` for the resumed row, so interrupt+resume rows never
+ * dropped indices so chained interrupt-resumes still pair cleanly.
 * share a turn_id. Conversational adjacency is the reliable signal —
 * an assistant→assistant pair without a user row between them is the
 * unique signature of an interrupt+resume cycle.
 */
-function isSupersededByLaterAssistant<T extends AbortableMessage>(
+function findResumeSuccessorIdx<T extends AbortableMessage>(
 	messages: readonly T[],
-	idx: number
+	idx: number,
-): boolean {
+	dropped: ReadonlySet<number>
 ): number | null {
 	for (let i = idx + 1; i < messages.length; i++) {
 		if (dropped.has(i)) continue;
 		const role = messages[i].role.toLowerCase();
-		if (role === "user") return false;
+		if (role === "user") return null;
-		if (role === "assistant") return true;
+		if (role === "assistant") return i;
 	}
-	return false;
+	return null;
 }
 /** Read ``data.steps`` from either ``data-thinking-steps`` (modern) or ``thinking-steps`` (legacy). */
 function extractStepsFromPart(part: unknown): unknown[] | null {
 	if (typeof part !== "object" || part === null) return null;
 	const p = part as { type?: unknown; data?: unknown; steps?: unknown };
 	if (p.type === "data-thinking-steps") {
 		const data = p.data as { steps?: unknown } | undefined;
 		return Array.isArray(data?.steps) ? data.steps : [];
 	}
 	if (p.type === "thinking-steps") {
 		return Array.isArray(p.steps) ? (p.steps as unknown[]) : [];
 	}
 	return null;
 }
 /** Split a content array into (combined steps, all other parts in order). */
 function partitionContent(content: unknown): { steps: unknown[]; others: unknown[] } {
 	if (!Array.isArray(content)) return { steps: [], others: [] };
 	const steps: unknown[] = [];
 	const others: unknown[] = [];
 	for (const part of content) {
 		const partSteps = extractStepsFromPart(part);
 		if (partSteps !== null) {
 			steps.push(...partSteps);
 			continue;
 		}
 		others.push(part);
 	}
 	return { steps, others };
 }
 /**
- * Drop frozen interrupt-frame rows once they have a resumed
+ * Fold an interrupt-frame row's content into its resume successor so
- * continuation. Pure (returns a new array). Caller passes messages in
+ * the user sees one assistant turn instead of two stacked bubbles.
- * chronological order.
+ * Successor's metadata wins (id, created_at, turn_id, token_usage,
 * author) — that's the row the per-turn revert button keys to.
 *
- * Never-resumed aborts are preserved (user navigated away mid-decision)
+ * Order: combined ``data-thinking-steps`` (older steps then newer) at
- * so the user still sees what happened.
+ * index 0, followed by older's other parts in order, then newer's. The
 * older row's aborted ``task`` wrapper is preserved so the rejected
 * attempt remains visible alongside the successful retry; both spans
 * survive and ``groupItems`` renders them as sibling task branches in
 * one timeline.
 */
-export function filterSupersededAbortedMessages<T extends AbortableMessage>(
+function mergeInterruptedIntoResume<T extends AbortableMessage>(older: T, newer: T): T {
 	const olderParts = partitionContent(older.content);
 	const newerParts = partitionContent(newer.content);
 	const mergedSteps = [...olderParts.steps, ...newerParts.steps];
 	const mergedContent: unknown[] = [];
 	if (mergedSteps.length > 0) {
 		mergedContent.push({ type: "data-thinking-steps", data: { steps: mergedSteps } });
 	}
 	mergedContent.push(...olderParts.others, ...newerParts.others);
 	return { ...newer, content: mergedContent };
 }
 /**
 * Reconcile interrupt-frame and resume rows so the UI shows one
 * assistant turn per user turn even when the backend persists them as
 * separate ``new_chat_messages`` rows.
 *
 * Two cases, both keyed on conversational adjacency (assistant →
 * assistant with no user row between):
 *
 *  - **Fully aborted older row** (every tool-call ``state: "aborted"``,
 *    no salvageable activity) → drop the older row.
 *  - **Partially aborted older row** (mixed completed + aborted, e.g.
 *    inner subagent tools ran before the interrupt) → fold its content
 *    into the successor. Successor metadata wins.
 *
 * Never-resumed aborts (user navigated away mid-decision) survive so
 * the user still sees what happened.
 *
 * Pure: returns a new array with new merged objects when needed.
 * Caller passes messages in chronological order.
 */
 export function reconcileInterruptedAssistantMessages<T extends AbortableMessage>(
 	messages: readonly T[]
 ): T[] {
-	return messages.filter((msg, idx) => {
+	const dropped = new Set<number>();
-		if (!isAbortedAssistantMessage(msg)) return true;
+	const mergeInto = new Map<number, number[]>();
-		return !isSupersededByLaterAssistant(messages, idx);
+
-	});
+	for (let i = 0; i < messages.length; i++) {
 		if (dropped.has(i)) continue;
 		const msg = messages[i];
 		if (!hasAbortedToolCall(msg)) continue;
 		const successorIdx = findResumeSuccessorIdx(messages, i, dropped);
 		if (successorIdx === null) continue;
 		dropped.add(i);
 		if (!isFullyAbortedAssistantMessage(msg)) {
 			const list = mergeInto.get(successorIdx) ?? [];
 			list.push(i);
 			mergeInto.set(successorIdx, list);
 		}
 	}
 	const result: T[] = [];
 	for (let i = 0; i < messages.length; i++) {
 		if (dropped.has(i)) continue;
 		const olderIdxs = mergeInto.get(i);
 		if (olderIdxs && olderIdxs.length > 0) {
 			let merged = messages[i];
 			for (const olderIdx of olderIdxs) {
 				merged = mergeInterruptedIntoResume(messages[olderIdx], merged);
 			}
 			result.push(merged);
 			continue;
 		}
 		result.push(messages[i]);
 	}
 	return result;
 }
 /**
--- a/surfsense_web/lib/chat/streaming-state.ts
+++ b/surfsense_web/lib/chat/streaming-state.ts
@ -79,8 +79,8 @@ export interface ContentPartsState {
 	 * the resume stream's first ``start-step`` fires
 	 * ``addStepSeparator`` while rehydrated OLD content already makes
 	 * ``hasContent`` true → a divider lands between OLD and NEW
-	 * content with no semantic value (OLD content is filtered by
+	 * content with no semantic value (OLD content is folded by
-	 * ``buildTimeline`` + ``filterSupersededAbortedMessages``,
+	 * ``buildTimeline`` + ``reconcileInterruptedAssistantMessages``,
 	 * persisted state carries no separator, so the line vanishes on
 	 * reload).
 	 */