From a9bf7ab7d23d27141da245fbd85d773071203940 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:47 +0200
Subject: [PATCH 01/58] Add SSE envelope helpers under app.services.streaming.

---
 .../services/streaming/envelope/__init__.py   | 23 +++++++++++++++++
 .../streaming/envelope/identifiers.py         | 25 +++++++++++++++++++
 .../app/services/streaming/envelope/sse.py    | 25 +++++++++++++++++++
 3 files changed, 73 insertions(+)
 create mode 100644 surfsense_backend/app/services/streaming/envelope/__init__.py
 create mode 100644 surfsense_backend/app/services/streaming/envelope/identifiers.py
 create mode 100644 surfsense_backend/app/services/streaming/envelope/sse.py

diff --git a/surfsense_backend/app/services/streaming/envelope/__init__.py b/surfsense_backend/app/services/streaming/envelope/__init__.py
new file mode 100644
index 000000000..862e84c8d
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/envelope/__init__.py
@@ -0,0 +1,23 @@
+"""Wire framing layer."""
+
+from __future__ import annotations
+
+from .identifiers import (
+    generate_message_id,
+    generate_reasoning_id,
+    generate_subagent_run_id,
+    generate_text_id,
+    generate_tool_call_id,
+)
+from .sse import format_done, format_sse, get_response_headers
+
+__all__ = [
+    "format_done",
+    "format_sse",
+    "generate_message_id",
+    "generate_reasoning_id",
+    "generate_subagent_run_id",
+    "generate_text_id",
+    "generate_tool_call_id",
+    "get_response_headers",
+]
diff --git a/surfsense_backend/app/services/streaming/envelope/identifiers.py b/surfsense_backend/app/services/streaming/envelope/identifiers.py
new file mode 100644
index 000000000..2fdd6ff09
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/envelope/identifiers.py
@@ -0,0 +1,25 @@
+"""Prefixed UUID generators for stream parts."""
+
+from __future__ import annotations
+
+import uuid
+
+
+def generate_message_id() -> str:
+    return f"msg_{uuid.uuid4().hex}"
+
+
+def generate_text_id() -> str:
+    return f"text_{uuid.uuid4().hex}"
+
+
+def generate_reasoning_id() -> str:
+    return f"reasoning_{uuid.uuid4().hex}"
+
+
+def generate_tool_call_id() -> str:
+    return f"call_{uuid.uuid4().hex}"
+
+
+def generate_subagent_run_id() -> str:
+    return f"subagent_{uuid.uuid4().hex}"
diff --git a/surfsense_backend/app/services/streaming/envelope/sse.py b/surfsense_backend/app/services/streaming/envelope/sse.py
new file mode 100644
index 000000000..508fc1b1c
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/envelope/sse.py
@@ -0,0 +1,25 @@
+"""Server-Sent-Events wire framing."""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+
+def format_sse(data: Any) -> str:
+    if isinstance(data, str):
+        return f"data: {data}\n\n"
+    return f"data: {json.dumps(data)}\n\n"
+
+
+def format_done() -> str:
+    return "data: [DONE]\n\n"
+
+
+def get_response_headers() -> dict[str, str]:
+    return {
+        "Content-Type": "text/event-stream",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "x-vercel-ai-ui-message-stream": "v1",
+    }

From 5510c6c3147e80a4de1b6a25e58bc6479032482e Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:47 +0200
Subject: [PATCH 02/58] Add typed event payload modules for the streaming
 service.

---
 .../app/services/streaming/events/__init__.py |  29 +++++
 .../services/streaming/events/action_log.py   |  24 ++++
 .../app/services/streaming/events/data.py     | 118 ++++++++++++++++++
 .../app/services/streaming/events/error.py    |  23 ++++
 .../services/streaming/events/interrupt.py    |  56 +++++++++
 .../services/streaming/events/lifecycle.py    |  29 +++++
 .../services/streaming/events/reasoning.py    |  36 ++++++
 .../app/services/streaming/events/source.py   |  59 +++++++++
 .../streaming/events/subagent_lifecycle.py    |  86 +++++++++++++
 .../app/services/streaming/events/text.py     |  31 +++++
 .../app/services/streaming/events/tool.py     |  80 ++++++++++++
 11 files changed, 571 insertions(+)
 create mode 100644 surfsense_backend/app/services/streaming/events/__init__.py
 create mode 100644 surfsense_backend/app/services/streaming/events/action_log.py
 create mode 100644 surfsense_backend/app/services/streaming/events/data.py
 create mode 100644 surfsense_backend/app/services/streaming/events/error.py
 create mode 100644 surfsense_backend/app/services/streaming/events/interrupt.py
 create mode 100644 surfsense_backend/app/services/streaming/events/lifecycle.py
 create mode 100644 surfsense_backend/app/services/streaming/events/reasoning.py
 create mode 100644 surfsense_backend/app/services/streaming/events/source.py
 create mode 100644 surfsense_backend/app/services/streaming/events/subagent_lifecycle.py
 create mode 100644 surfsense_backend/app/services/streaming/events/text.py
 create mode 100644 surfsense_backend/app/services/streaming/events/tool.py

diff --git a/surfsense_backend/app/services/streaming/events/__init__.py b/surfsense_backend/app/services/streaming/events/__init__.py
new file mode 100644
index 000000000..91a8ff854
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/events/__init__.py
@@ -0,0 +1,29 @@
+"""SSE event payload formatters, one module per event family."""
+
+from __future__ import annotations
+
+from . import (
+    action_log,
+    data,
+    error,
+    interrupt,
+    lifecycle,
+    reasoning,
+    source,
+    subagent_lifecycle,
+    text,
+    tool,
+)
+
+__all__ = [
+    "action_log",
+    "data",
+    "error",
+    "interrupt",
+    "lifecycle",
+    "reasoning",
+    "source",
+    "subagent_lifecycle",
+    "text",
+    "tool",
+]
diff --git a/surfsense_backend/app/services/streaming/events/action_log.py b/surfsense_backend/app/services/streaming/events/action_log.py
new file mode 100644
index 000000000..0a8e46f0a
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/events/action_log.py
@@ -0,0 +1,24 @@
+"""Action-log events relayed from ``ActionLogMiddleware`` custom dispatches."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from ..emitter import Emitter
+from .data import format_data
+
+
+def format_action_log(
+    payload: dict[str, Any],
+    *,
+    emitter: Emitter | None = None,
+) -> str:
+    return format_data("action-log", payload, emitter=emitter)
+
+
+def format_action_log_updated(
+    payload: dict[str, Any],
+    *,
+    emitter: Emitter | None = None,
+) -> str:
+    return format_data("action-log-updated", payload, emitter=emitter)
diff --git a/surfsense_backend/app/services/streaming/events/data.py b/surfsense_backend/app/services/streaming/events/data.py
new file mode 100644
index 000000000..f6e190578
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/events/data.py
@@ -0,0 +1,118 @@
+"""Generic ``data-*`` envelopes and SurfSense-specific data parts.
+
+Inner ``data`` dict fields use snake_case. Legacy ``threadId`` /
+``messageId`` keys are preserved where they cross the AI SDK boundary.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from ..emitter import Emitter, attach_emitted_by
+from ..envelope import format_sse
+
+
+def format_data(
+    data_type: str,
+    data: Any,
+    *,
+    emitter: Emitter | None = None,
+) -> str:
+    payload: dict[str, Any] = {"type": f"data-{data_type}", "data": data}
+    return format_sse(attach_emitted_by(payload, emitter))
+
+
+def format_terminal_info(
+    text: str,
+    *,
+    message_type: str = "info",
+    emitter: Emitter | None = None,
+) -> str:
+    return format_data(
+        "terminal-info",
+        {"text": text, "type": message_type},
+        emitter=emitter,
+    )
+
+
+def format_further_questions(
+    questions: list[str],
+    *,
+    emitter: Emitter | None = None,
+) -> str:
+    return format_data("further-questions", {"questions": questions}, emitter=emitter)
+
+
+def format_thinking_step(
+    *,
+    step_id: str,
+    title: str,
+    status: str = "in_progress",
+    items: list[str] | None = None,
+    emitter: Emitter | None = None,
+) -> str:
+    return format_data(
+        "thinking-step",
+        {
+            "id": step_id,
+            "title": title,
+            "status": status,
+            "items": items or [],
+        },
+        emitter=emitter,
+    )
+
+
+def format_thread_title_update(
+    *,
+    thread_id: int,
+    title: str,
+    emitter: Emitter | None = None,
+) -> str:
+    return format_data(
+        "thread-title-update",
+        {"threadId": thread_id, "title": title},
+        emitter=emitter,
+    )
+
+
+def format_turn_info(
+    *,
+    chat_turn_id: str,
+    emitter: Emitter | None = None,
+) -> str:
+    return format_data("turn-info", {"chat_turn_id": chat_turn_id}, emitter=emitter)
+
+
+def format_turn_status(
+    *,
+    status: str,
+    emitter: Emitter | None = None,
+) -> str:
+    return format_data("turn-status", {"status": status}, emitter=emitter)
+
+
+def format_user_message_id(
+    *,
+    message_id: str,
+    turn_id: str,
+    emitter: Emitter | None = None,
+) -> str:
+    return format_data(
+        "user-message-id",
+        {"message_id": message_id, "turn_id": turn_id},
+        emitter=emitter,
+    )
+
+
+def format_assistant_message_id(
+    *,
+    message_id: str,
+    turn_id: str,
+    emitter: Emitter | None = None,
+) -> str:
+    return format_data(
+        "assistant-message-id",
+        {"message_id": message_id, "turn_id": turn_id},
+        emitter=emitter,
+    )
diff --git a/surfsense_backend/app/services/streaming/events/error.py b/surfsense_backend/app/services/streaming/events/error.py
new file mode 100644
index 000000000..cd190d1f4
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/events/error.py
@@ -0,0 +1,23 @@
+"""Single terminal error path the orchestrator must route through."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from ..emitter import Emitter, attach_emitted_by
+from ..envelope import format_sse
+
+
+def format_error(
+    error_text: str,
+    *,
+    error_code: str | None = None,
+    extra: dict[str, Any] | None = None,
+    emitter: Emitter | None = None,
+) -> str:
+    payload: dict[str, Any] = {"type": "error", "errorText": error_text}
+    if error_code:
+        payload["errorCode"] = error_code
+    if extra:
+        payload.update(extra)
+    return format_sse(attach_emitted_by(payload, emitter))
diff --git a/surfsense_backend/app/services/streaming/events/interrupt.py b/surfsense_backend/app/services/streaming/events/interrupt.py
new file mode 100644
index 000000000..0334b10b3
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/events/interrupt.py
@@ -0,0 +1,56 @@
+"""Interrupt-request events with a single canonical payload shape."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from ..emitter import Emitter
+from .data import format_data
+
+
+def normalize_interrupt_payload(interrupt_value: dict[str, Any]) -> dict[str, Any]:
+    if "action_requests" in interrupt_value and "review_configs" in interrupt_value:
+        return interrupt_value
+
+    interrupt_type = interrupt_value.get("type", "unknown")
+    message = interrupt_value.get("message")
+    action = interrupt_value.get("action", {}) or {}
+    context = interrupt_value.get("context", {}) or {}
+
+    normalized: dict[str, Any] = {
+        "action_requests": [
+            {
+                "name": action.get("tool", "unknown_tool"),
+                "args": action.get("params", {}),
+            }
+        ],
+        "review_configs": [
+            {
+                "action_name": action.get("tool", "unknown_tool"),
+                "allowed_decisions": ["approve", "edit", "reject"],
+            }
+        ],
+        "interrupt_type": interrupt_type,
+        "context": context,
+    }
+    if message:
+        normalized["message"] = message
+    return normalized
+
+
+def format_interrupt_request(
+    interrupt_value: dict[str, Any],
+    *,
+    interrupt_id: str | None = None,
+    pending_interrupt_count: int | None = None,
+    chat_turn_id: str | None = None,
+    emitter: Emitter | None = None,
+) -> str:
+    payload = normalize_interrupt_payload(interrupt_value)
+    if interrupt_id is not None:
+        payload["interrupt_id"] = interrupt_id
+    if pending_interrupt_count is not None:
+        payload["pending_interrupt_count"] = pending_interrupt_count
+    if chat_turn_id is not None:
+        payload["chat_turn_id"] = chat_turn_id
+    return format_data("interrupt-request", payload, emitter=emitter)
diff --git a/surfsense_backend/app/services/streaming/events/lifecycle.py b/surfsense_backend/app/services/streaming/events/lifecycle.py
new file mode 100644
index 000000000..019718b67
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/events/lifecycle.py
@@ -0,0 +1,29 @@
+"""High-level message and step lifecycle events.
+
+Wire verbs are fixed by the AI SDK protocol (``start`` / ``finish`` for
+the whole message, ``start-step`` / ``finish-step`` for each step).
+Python helpers always read ``format_<entity>_<verb>`` so pairs are
+visible at the call site.
+"""
+
+from __future__ import annotations
+
+from ..emitter import Emitter, attach_emitted_by
+from ..envelope import format_sse
+
+
+def format_message_start(message_id: str, *, emitter: Emitter | None = None) -> str:
+    payload = {"type": "start", "messageId": message_id}
+    return format_sse(attach_emitted_by(payload, emitter))
+
+
+def format_message_finish(*, emitter: Emitter | None = None) -> str:
+    return format_sse(attach_emitted_by({"type": "finish"}, emitter))
+
+
+def format_step_start(*, emitter: Emitter | None = None) -> str:
+    return format_sse(attach_emitted_by({"type": "start-step"}, emitter))
+
+
+def format_step_finish(*, emitter: Emitter | None = None) -> str:
+    return format_sse(attach_emitted_by({"type": "finish-step"}, emitter))
diff --git a/surfsense_backend/app/services/streaming/events/reasoning.py b/surfsense_backend/app/services/streaming/events/reasoning.py
new file mode 100644
index 000000000..5b912d43a
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/events/reasoning.py
@@ -0,0 +1,36 @@
+"""Reasoning-block streaming events."""
+
+from __future__ import annotations
+
+from ..emitter import Emitter, attach_emitted_by
+from ..envelope import format_sse
+
+
+def format_reasoning_start(
+    reasoning_id: str, *, emitter: Emitter | None = None
+) -> str:
+    return format_sse(
+        attach_emitted_by({"type": "reasoning-start", "id": reasoning_id}, emitter)
+    )
+
+
+def format_reasoning_delta(
+    reasoning_id: str,
+    delta: str,
+    *,
+    emitter: Emitter | None = None,
+) -> str:
+    return format_sse(
+        attach_emitted_by(
+            {"type": "reasoning-delta", "id": reasoning_id, "delta": delta},
+            emitter,
+        )
+    )
+
+
+def format_reasoning_end(
+    reasoning_id: str, *, emitter: Emitter | None = None
+) -> str:
+    return format_sse(
+        attach_emitted_by({"type": "reasoning-end", "id": reasoning_id}, emitter)
+    )
diff --git a/surfsense_backend/app/services/streaming/events/source.py b/surfsense_backend/app/services/streaming/events/source.py
new file mode 100644
index 000000000..54541e8d2
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/events/source.py
@@ -0,0 +1,59 @@
+"""Source and file reference events."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from ..emitter import Emitter, attach_emitted_by
+from ..envelope import format_sse
+
+
+def format_source_url(
+    url: str,
+    *,
+    source_id: str | None = None,
+    title: str | None = None,
+    emitter: Emitter | None = None,
+) -> str:
+    payload: dict[str, Any] = {
+        "type": "source-url",
+        "sourceId": source_id or url,
+        "url": url,
+    }
+    if title:
+        payload["title"] = title
+    return format_sse(attach_emitted_by(payload, emitter))
+
+
+def format_source_document(
+    source_id: str,
+    *,
+    media_type: str = "file",
+    title: str | None = None,
+    description: str | None = None,
+    emitter: Emitter | None = None,
+) -> str:
+    payload: dict[str, Any] = {
+        "type": "source-document",
+        "sourceId": source_id,
+        "mediaType": media_type,
+    }
+    if title:
+        payload["title"] = title
+    if description:
+        payload["description"] = description
+    return format_sse(attach_emitted_by(payload, emitter))
+
+
+def format_file(
+    url: str,
+    media_type: str,
+    *,
+    emitter: Emitter | None = None,
+) -> str:
+    payload: dict[str, Any] = {
+        "type": "file",
+        "url": url,
+        "mediaType": media_type,
+    }
+    return format_sse(attach_emitted_by(payload, emitter))
diff --git a/surfsense_backend/app/services/streaming/events/subagent_lifecycle.py b/surfsense_backend/app/services/streaming/events/subagent_lifecycle.py
new file mode 100644
index 000000000..6dd2d4eab
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/events/subagent_lifecycle.py
@@ -0,0 +1,86 @@
+"""Sub-agent lifecycle events the FE pairs into one timeline lane.
+
+A sub-agent run is a high-level boundary (a whole agent invocation),
+so we use the ``start`` / ``finish`` verb pair, matching how the AI SDK
+spells message- and step-level lifecycles.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from ..emitter import Emitter
+from .data import format_data
+
+
+def format_subagent_start(
+    *,
+    subagent_run_id: str,
+    subagent_type: str,
+    parent_tool_call_id: str,
+    chat_turn_id: str | None = None,
+    description: str | None = None,
+    started_at: str | None = None,
+    emitter: Emitter | None = None,
+) -> str:
+    payload: dict[str, Any] = {
+        "subagent_run_id": subagent_run_id,
+        "subagent_type": subagent_type,
+        "parent_tool_call_id": parent_tool_call_id,
+    }
+    if chat_turn_id is not None:
+        payload["chat_turn_id"] = chat_turn_id
+    if description is not None:
+        payload["description"] = description
+    if started_at is not None:
+        payload["started_at"] = started_at
+    return format_data("subagent-start", payload, emitter=emitter)
+
+
+def format_subagent_finish(
+    *,
+    subagent_run_id: str,
+    subagent_type: str,
+    parent_tool_call_id: str,
+    status: str = "completed",
+    ended_at: str | None = None,
+    duration_ms: int | None = None,
+    emitter: Emitter | None = None,
+) -> str:
+    payload: dict[str, Any] = {
+        "subagent_run_id": subagent_run_id,
+        "subagent_type": subagent_type,
+        "parent_tool_call_id": parent_tool_call_id,
+        "status": status,
+    }
+    if ended_at is not None:
+        payload["ended_at"] = ended_at
+    if duration_ms is not None:
+        payload["duration_ms"] = duration_ms
+    return format_data("subagent-finish", payload, emitter=emitter)
+
+
+def format_subagent_error(
+    *,
+    subagent_run_id: str,
+    subagent_type: str,
+    parent_tool_call_id: str,
+    error_text: str,
+    error_type: str | None = None,
+    ended_at: str | None = None,
+    duration_ms: int | None = None,
+    emitter: Emitter | None = None,
+) -> str:
+    payload: dict[str, Any] = {
+        "subagent_run_id": subagent_run_id,
+        "subagent_type": subagent_type,
+        "parent_tool_call_id": parent_tool_call_id,
+        "error_text": error_text,
+    }
+    if error_type is not None:
+        payload["error_type"] = error_type
+    if ended_at is not None:
+        payload["ended_at"] = ended_at
+    if duration_ms is not None:
+        payload["duration_ms"] = duration_ms
+    return format_data("subagent-error", payload, emitter=emitter)
diff --git a/surfsense_backend/app/services/streaming/events/text.py b/surfsense_backend/app/services/streaming/events/text.py
new file mode 100644
index 000000000..3baebdebb
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/events/text.py
@@ -0,0 +1,31 @@
+"""Text-block streaming events."""
+
+from __future__ import annotations
+
+from ..emitter import Emitter, attach_emitted_by
+from ..envelope import format_sse
+
+
+def format_text_start(text_id: str, *, emitter: Emitter | None = None) -> str:
+    return format_sse(
+        attach_emitted_by({"type": "text-start", "id": text_id}, emitter)
+    )
+
+
+def format_text_delta(
+    text_id: str,
+    delta: str,
+    *,
+    emitter: Emitter | None = None,
+) -> str:
+    return format_sse(
+        attach_emitted_by(
+            {"type": "text-delta", "id": text_id, "delta": delta}, emitter
+        )
+    )
+
+
+def format_text_end(text_id: str, *, emitter: Emitter | None = None) -> str:
+    return format_sse(
+        attach_emitted_by({"type": "text-end", "id": text_id}, emitter)
+    )
diff --git a/surfsense_backend/app/services/streaming/events/tool.py b/surfsense_backend/app/services/streaming/events/tool.py
new file mode 100644
index 000000000..c85dc061b
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/events/tool.py
@@ -0,0 +1,80 @@
+"""Tool-call streaming events.
+
+``toolCallId`` and ``langchainToolCallId`` are AI SDK protocol fields
+and stay camelCase. Sub-agent provenance rides on the snake_case
+top-level ``emitted_by`` envelope added by :func:`attach_emitted_by`.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from ..emitter import Emitter, attach_emitted_by
+from ..envelope import format_sse
+
+
+def format_tool_input_start(
+    tool_call_id: str,
+    tool_name: str,
+    *,
+    langchain_tool_call_id: str | None = None,
+    emitter: Emitter | None = None,
+) -> str:
+    payload: dict[str, Any] = {
+        "type": "tool-input-start",
+        "toolCallId": tool_call_id,
+        "toolName": tool_name,
+    }
+    if langchain_tool_call_id:
+        payload["langchainToolCallId"] = langchain_tool_call_id
+    return format_sse(attach_emitted_by(payload, emitter))
+
+
+def format_tool_input_delta(
+    tool_call_id: str,
+    input_text_delta: str,
+    *,
+    emitter: Emitter | None = None,
+) -> str:
+    payload: dict[str, Any] = {
+        "type": "tool-input-delta",
+        "toolCallId": tool_call_id,
+        "inputTextDelta": input_text_delta,
+    }
+    return format_sse(attach_emitted_by(payload, emitter))
+
+
+def format_tool_input_available(
+    tool_call_id: str,
+    tool_name: str,
+    input_data: dict[str, Any],
+    *,
+    langchain_tool_call_id: str | None = None,
+    emitter: Emitter | None = None,
+) -> str:
+    payload: dict[str, Any] = {
+        "type": "tool-input-available",
+        "toolCallId": tool_call_id,
+        "toolName": tool_name,
+        "input": input_data,
+    }
+    if langchain_tool_call_id:
+        payload["langchainToolCallId"] = langchain_tool_call_id
+    return format_sse(attach_emitted_by(payload, emitter))
+
+
+def format_tool_output_available(
+    tool_call_id: str,
+    output: Any,
+    *,
+    langchain_tool_call_id: str | None = None,
+    emitter: Emitter | None = None,
+) -> str:
+    payload: dict[str, Any] = {
+        "type": "tool-output-available",
+        "toolCallId": tool_call_id,
+        "output": output,
+    }
+    if langchain_tool_call_id:
+        payload["langchainToolCallId"] = langchain_tool_call_id
+    return format_sse(attach_emitted_by(payload, emitter))

From fc429d87024a6a0a36d520f23dd584bcf7bd8262 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:47 +0200
Subject: [PATCH 03/58] Add streaming emitter and registry for scoped SSE
 writes.

---
 .../services/streaming/emitter/__init__.py    | 29 +++++++++
 .../app/services/streaming/emitter/emitter.py | 61 +++++++++++++++++++
 .../services/streaming/emitter/registry.py    | 51 ++++++++++++++++
 3 files changed, 141 insertions(+)
 create mode 100644 surfsense_backend/app/services/streaming/emitter/__init__.py
 create mode 100644 surfsense_backend/app/services/streaming/emitter/emitter.py
 create mode 100644 surfsense_backend/app/services/streaming/emitter/registry.py

diff --git a/surfsense_backend/app/services/streaming/emitter/__init__.py b/surfsense_backend/app/services/streaming/emitter/__init__.py
new file mode 100644
index 000000000..7814894f3
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/emitter/__init__.py
@@ -0,0 +1,29 @@
+"""Identity of the agent that emitted a streamed event.
+
+The wire field is ``emitted_by``; the Python identity is :class:`Emitter`.
+``EmitterRegistry`` resolves which emitter owns a LangGraph event, with
+LangGraph's own namespace metadata as the primary key and a parent_ids
+walk as a fallback for cases where context vars don't propagate.
+"""
+
+from __future__ import annotations
+
+from .emitter import (
+    MAIN_EMITTER,
+    Emitter,
+    EmitterLevel,
+    attach_emitted_by,
+    main_emitter,
+    subagent_emitter,
+)
+from .registry import EmitterRegistry
+
+__all__ = [
+    "MAIN_EMITTER",
+    "Emitter",
+    "EmitterLevel",
+    "EmitterRegistry",
+    "attach_emitted_by",
+    "main_emitter",
+    "subagent_emitter",
+]
diff --git a/surfsense_backend/app/services/streaming/emitter/emitter.py b/surfsense_backend/app/services/streaming/emitter/emitter.py
new file mode 100644
index 000000000..08f625a69
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/emitter/emitter.py
@@ -0,0 +1,61 @@
+"""Identity payload describing which agent produced a stream event."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Literal
+
+EmitterLevel = Literal["main", "subagent"]
+
+
+@dataclass(frozen=True)
+class Emitter:
+    level: EmitterLevel
+    subagent_type: str | None = None
+    subagent_run_id: str | None = None
+    parent_tool_call_id: str | None = None
+    extra: dict[str, Any] = field(default_factory=dict)
+
+    def to_payload(self) -> dict[str, Any]:
+        payload: dict[str, Any] = {"level": self.level}
+        if self.subagent_type is not None:
+            payload["subagent_type"] = self.subagent_type
+        if self.subagent_run_id is not None:
+            payload["subagent_run_id"] = self.subagent_run_id
+        if self.parent_tool_call_id is not None:
+            payload["parent_tool_call_id"] = self.parent_tool_call_id
+        if self.extra:
+            payload.update(self.extra)
+        return payload
+
+
+MAIN_EMITTER = Emitter(level="main")
+
+
+def main_emitter() -> Emitter:
+    return MAIN_EMITTER
+
+
+def subagent_emitter(
+    *,
+    subagent_type: str,
+    subagent_run_id: str,
+    parent_tool_call_id: str | None = None,
+    extra: dict[str, Any] | None = None,
+) -> Emitter:
+    return Emitter(
+        level="subagent",
+        subagent_type=subagent_type,
+        subagent_run_id=subagent_run_id,
+        parent_tool_call_id=parent_tool_call_id,
+        extra=dict(extra or {}),
+    )
+
+
+def attach_emitted_by(
+    payload: dict[str, Any], emitter: Emitter | None
+) -> dict[str, Any]:
+    if emitter is None:
+        return payload
+    payload["emitted_by"] = emitter.to_payload()
+    return payload
diff --git a/surfsense_backend/app/services/streaming/emitter/registry.py b/surfsense_backend/app/services/streaming/emitter/registry.py
new file mode 100644
index 000000000..cd3e10cdd
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/emitter/registry.py
@@ -0,0 +1,51 @@
+"""Resolve which agent owns a streamed event from its LangGraph run lineage."""
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+
+from .emitter import Emitter, main_emitter
+
+
+class EmitterRegistry:
+    def __init__(self) -> None:
+        self._by_run_id: dict[str, Emitter] = {}
+
+    def register(self, run_id: str, emitter: Emitter) -> None:
+        if not run_id:
+            return
+        self._by_run_id[run_id] = emitter
+
+    def unregister(self, run_id: str) -> Emitter | None:
+        if not run_id:
+            return None
+        return self._by_run_id.pop(run_id, None)
+
+    def get(self, run_id: str | None) -> Emitter | None:
+        if not run_id:
+            return None
+        return self._by_run_id.get(run_id)
+
+    def resolve(
+        self,
+        *,
+        run_id: str | None,
+        parent_ids: Iterable[str] | None,
+    ) -> Emitter:
+        own = self.get(run_id)
+        if own is not None:
+            return own
+        if parent_ids:
+            for ancestor in reversed(list(parent_ids)):
+                emitter = self.get(ancestor)
+                if emitter is not None:
+                    return emitter
+        return main_emitter()
+
+    def has_active_subagents(self) -> bool:
+        return any(
+            emitter.level == "subagent" for emitter in self._by_run_id.values()
+        )
+
+    def clear(self) -> None:
+        self._by_run_id.clear()

From fef7621d96c63986e723228c1dcff4a918b443e9 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:47 +0200
Subject: [PATCH 04/58] Add StreamingService and interrupt correlation for chat
 streams.

---
 .../app/services/streaming/__init__.py        |  20 +
 .../streaming/interrupt_correlation.py        |  84 ++++
 .../app/services/streaming/service.py         | 414 ++++++++++++++++++
 3 files changed, 518 insertions(+)
 create mode 100644 surfsense_backend/app/services/streaming/__init__.py
 create mode 100644 surfsense_backend/app/services/streaming/interrupt_correlation.py
 create mode 100644 surfsense_backend/app/services/streaming/service.py

diff --git a/surfsense_backend/app/services/streaming/__init__.py b/surfsense_backend/app/services/streaming/__init__.py
new file mode 100644
index 000000000..287d48a7a
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/__init__.py
@@ -0,0 +1,20 @@
+"""Single-responsibility split of the streaming SSE protocol.
+
+Layout:
+* ``envelope/`` - SSE wire framing + ID generators
+* ``emitter/`` - identity of the agent that emitted an event + runtime registry
+* ``events/`` - one module per SSE event family
+* ``service.py`` - composition root used by the orchestrator
+* ``interrupt_correlation.py`` - id-aware lookup over LangGraph state
+
+Naming on the wire:
+* AI SDK protocol fields keep their existing camelCase
+  (``toolCallId``, ``messageId``, ``inputTextDelta``, ``langchainToolCallId``).
+* Every SurfSense-added field uses ``snake_case``, including the
+  top-level ``emitted_by`` envelope and all inner ``data`` payloads.
+
+Production keeps using ``app.services.new_streaming_service`` and
+``app.tasks.chat.stream_new_chat`` until the cutover phase.
+"""
+
+from __future__ import annotations
diff --git a/surfsense_backend/app/services/streaming/interrupt_correlation.py b/surfsense_backend/app/services/streaming/interrupt_correlation.py
new file mode 100644
index 000000000..3045dfb6a
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/interrupt_correlation.py
@@ -0,0 +1,84 @@
+"""Id-aware lookup of pending LangGraph interrupts (replaces first-wins)."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+
+@dataclass(frozen=True)
+class PendingInterrupt:
+    interrupt_id: str | None
+    value: dict[str, Any]
+    source_task_id: str | None = None
+
+
+def list_pending_interrupts(state: Any) -> list[PendingInterrupt]:
+    out: list[PendingInterrupt] = []
+
+    for task in getattr(state, "tasks", None) or ():
+        task_id = _safe_str(getattr(task, "id", None))
+        for it in getattr(task, "interrupts", None) or ():
+            value = _coerce_interrupt_value(it)
+            if value is None:
+                continue
+            interrupt_id = _safe_str(getattr(it, "id", None))
+            out.append(
+                PendingInterrupt(
+                    interrupt_id=interrupt_id,
+                    value=value,
+                    source_task_id=task_id,
+                )
+            )
+
+    for it in getattr(state, "interrupts", None) or ():
+        value = _coerce_interrupt_value(it)
+        if value is None:
+            continue
+        interrupt_id = _safe_str(getattr(it, "id", None))
+        out.append(PendingInterrupt(interrupt_id=interrupt_id, value=value))
+
+    return out
+
+
+def get_pending_interrupt_by_id(
+    state: Any, interrupt_id: str
+) -> PendingInterrupt | None:
+    for pending in list_pending_interrupts(state):
+        if pending.interrupt_id == interrupt_id:
+            return pending
+    return None
+
+
+def get_pending_interrupt_for_tool_call(
+    state: Any, tool_call_id: str
+) -> PendingInterrupt | None:
+    for pending in list_pending_interrupts(state):
+        actions = pending.value.get("action_requests")
+        if not isinstance(actions, list):
+            continue
+        for action in actions:
+            if not isinstance(action, dict):
+                continue
+            if action.get("tool_call_id") == tool_call_id:
+                return pending
+    return None
+
+
+def first_pending_interrupt(state: Any) -> PendingInterrupt | None:
+    """Explicit opt-in to legacy first-wins; prefer the id-aware helpers above."""
+    pending = list_pending_interrupts(state)
+    return pending[0] if pending else None
+
+
+def _coerce_interrupt_value(item: Any) -> dict[str, Any] | None:
+    if isinstance(item, dict):
+        return item if item else None
+    value = getattr(item, "value", None)
+    if isinstance(value, dict):
+        return value if value else None
+    return None
+
+
+def _safe_str(value: Any) -> str | None:
+    return value if isinstance(value, str) and value else None
diff --git a/surfsense_backend/app/services/streaming/service.py b/surfsense_backend/app/services/streaming/service.py
new file mode 100644
index 000000000..5a75a1b2d
--- /dev/null
+++ b/surfsense_backend/app/services/streaming/service.py
@@ -0,0 +1,414 @@
+"""Composition root: bundles every formatter + a per-invocation emitter registry."""
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+from typing import Any
+
+from . import envelope
+from .emitter import Emitter, EmitterRegistry
+from .events import (
+    action_log,
+    data,
+    error,
+    interrupt,
+    lifecycle,
+    reasoning,
+    source,
+    subagent_lifecycle,
+    text,
+    tool,
+)
+
+
+class StreamingService:
+    def __init__(self) -> None:
+        self._message_id: str | None = None
+        self.emitter_registry = EmitterRegistry()
+
+    @property
+    def message_id(self) -> str | None:
+        return self._message_id
+
+    def begin_message(self, message_id: str | None = None) -> str:
+        self._message_id = message_id or envelope.generate_message_id()
+        return self._message_id
+
+    @staticmethod
+    def generate_text_id() -> str:
+        return envelope.generate_text_id()
+
+    @staticmethod
+    def generate_reasoning_id() -> str:
+        return envelope.generate_reasoning_id()
+
+    @staticmethod
+    def generate_tool_call_id() -> str:
+        return envelope.generate_tool_call_id()
+
+    @staticmethod
+    def generate_subagent_run_id() -> str:
+        return envelope.generate_subagent_run_id()
+
+    @staticmethod
+    def get_response_headers() -> dict[str, str]:
+        return envelope.get_response_headers()
+
+    @staticmethod
+    def format_done() -> str:
+        return envelope.format_done()
+
+    def resolve_emitter(
+        self,
+        *,
+        run_id: str | None,
+        parent_ids: Iterable[str] | None,
+    ) -> Emitter:
+        return self.emitter_registry.resolve(run_id=run_id, parent_ids=parent_ids)
+
+    def format_message_start(
+        self,
+        message_id: str | None = None,
+        *,
+        emitter: Emitter | None = None,
+    ) -> str:
+        chosen = self.begin_message(message_id)
+        return lifecycle.format_message_start(chosen, emitter=emitter)
+
+    def format_message_finish(self, *, emitter: Emitter | None = None) -> str:
+        return lifecycle.format_message_finish(emitter=emitter)
+
+    def format_step_start(self, *, emitter: Emitter | None = None) -> str:
+        return lifecycle.format_step_start(emitter=emitter)
+
+    def format_step_finish(self, *, emitter: Emitter | None = None) -> str:
+        return lifecycle.format_step_finish(emitter=emitter)
+
+    def format_text_start(
+        self, text_id: str, *, emitter: Emitter | None = None
+    ) -> str:
+        return text.format_text_start(text_id, emitter=emitter)
+
+    def format_text_delta(
+        self, text_id: str, delta: str, *, emitter: Emitter | None = None
+    ) -> str:
+        return text.format_text_delta(text_id, delta, emitter=emitter)
+
+    def format_text_end(
+        self, text_id: str, *, emitter: Emitter | None = None
+    ) -> str:
+        return text.format_text_end(text_id, emitter=emitter)
+
+    def format_reasoning_start(
+        self, reasoning_id: str, *, emitter: Emitter | None = None
+    ) -> str:
+        return reasoning.format_reasoning_start(reasoning_id, emitter=emitter)
+
+    def format_reasoning_delta(
+        self,
+        reasoning_id: str,
+        delta: str,
+        *,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return reasoning.format_reasoning_delta(reasoning_id, delta, emitter=emitter)
+
+    def format_reasoning_end(
+        self, reasoning_id: str, *, emitter: Emitter | None = None
+    ) -> str:
+        return reasoning.format_reasoning_end(reasoning_id, emitter=emitter)
+
+    def format_tool_input_start(
+        self,
+        tool_call_id: str,
+        tool_name: str,
+        *,
+        langchain_tool_call_id: str | None = None,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return tool.format_tool_input_start(
+            tool_call_id,
+            tool_name,
+            langchain_tool_call_id=langchain_tool_call_id,
+            emitter=emitter,
+        )
+
+    def format_tool_input_delta(
+        self,
+        tool_call_id: str,
+        input_text_delta: str,
+        *,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return tool.format_tool_input_delta(
+            tool_call_id, input_text_delta, emitter=emitter
+        )
+
+    def format_tool_input_available(
+        self,
+        tool_call_id: str,
+        tool_name: str,
+        input_data: dict[str, Any],
+        *,
+        langchain_tool_call_id: str | None = None,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return tool.format_tool_input_available(
+            tool_call_id,
+            tool_name,
+            input_data,
+            langchain_tool_call_id=langchain_tool_call_id,
+            emitter=emitter,
+        )
+
+    def format_tool_output_available(
+        self,
+        tool_call_id: str,
+        output: Any,
+        *,
+        langchain_tool_call_id: str | None = None,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return tool.format_tool_output_available(
+            tool_call_id,
+            output,
+            langchain_tool_call_id=langchain_tool_call_id,
+            emitter=emitter,
+        )
+
+    def format_source_url(
+        self,
+        url: str,
+        *,
+        source_id: str | None = None,
+        title: str | None = None,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return source.format_source_url(
+            url, source_id=source_id, title=title, emitter=emitter
+        )
+
+    def format_source_document(
+        self,
+        source_id: str,
+        *,
+        media_type: str = "file",
+        title: str | None = None,
+        description: str | None = None,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return source.format_source_document(
+            source_id,
+            media_type=media_type,
+            title=title,
+            description=description,
+            emitter=emitter,
+        )
+
+    def format_file(
+        self, url: str, media_type: str, *, emitter: Emitter | None = None
+    ) -> str:
+        return source.format_file(url, media_type, emitter=emitter)
+
+    def format_data(
+        self, data_type: str, payload: Any, *, emitter: Emitter | None = None
+    ) -> str:
+        return data.format_data(data_type, payload, emitter=emitter)
+
+    def format_terminal_info(
+        self,
+        text_value: str,
+        *,
+        message_type: str = "info",
+        emitter: Emitter | None = None,
+    ) -> str:
+        return data.format_terminal_info(
+            text_value, message_type=message_type, emitter=emitter
+        )
+
+    def format_further_questions(
+        self,
+        questions: list[str],
+        *,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return data.format_further_questions(questions, emitter=emitter)
+
+    def format_thinking_step(
+        self,
+        *,
+        step_id: str,
+        title: str,
+        status: str = "in_progress",
+        items: list[str] | None = None,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return data.format_thinking_step(
+            step_id=step_id,
+            title=title,
+            status=status,
+            items=items,
+            emitter=emitter,
+        )
+
+    def format_thread_title_update(
+        self,
+        *,
+        thread_id: int,
+        title: str,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return data.format_thread_title_update(
+            thread_id=thread_id, title=title, emitter=emitter
+        )
+
+    def format_turn_info(
+        self,
+        *,
+        chat_turn_id: str,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return data.format_turn_info(chat_turn_id=chat_turn_id, emitter=emitter)
+
+    def format_turn_status(
+        self,
+        *,
+        status: str,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return data.format_turn_status(status=status, emitter=emitter)
+
+    def format_user_message_id(
+        self,
+        *,
+        message_id: str,
+        turn_id: str,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return data.format_user_message_id(
+            message_id=message_id, turn_id=turn_id, emitter=emitter
+        )
+
+    def format_assistant_message_id(
+        self,
+        *,
+        message_id: str,
+        turn_id: str,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return data.format_assistant_message_id(
+            message_id=message_id, turn_id=turn_id, emitter=emitter
+        )
+
+    def format_error(
+        self,
+        error_text: str,
+        *,
+        error_code: str | None = None,
+        extra: dict[str, Any] | None = None,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return error.format_error(
+            error_text,
+            error_code=error_code,
+            extra=extra,
+            emitter=emitter,
+        )
+
+    def format_interrupt_request(
+        self,
+        interrupt_value: dict[str, Any],
+        *,
+        interrupt_id: str | None = None,
+        pending_interrupt_count: int | None = None,
+        chat_turn_id: str | None = None,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return interrupt.format_interrupt_request(
+            interrupt_value,
+            interrupt_id=interrupt_id,
+            pending_interrupt_count=pending_interrupt_count,
+            chat_turn_id=chat_turn_id,
+            emitter=emitter,
+        )
+
+    def format_subagent_start(
+        self,
+        *,
+        subagent_run_id: str,
+        subagent_type: str,
+        parent_tool_call_id: str,
+        chat_turn_id: str | None = None,
+        description: str | None = None,
+        started_at: str | None = None,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return subagent_lifecycle.format_subagent_start(
+            subagent_run_id=subagent_run_id,
+            subagent_type=subagent_type,
+            parent_tool_call_id=parent_tool_call_id,
+            chat_turn_id=chat_turn_id,
+            description=description,
+            started_at=started_at,
+            emitter=emitter,
+        )
+
+    def format_subagent_finish(
+        self,
+        *,
+        subagent_run_id: str,
+        subagent_type: str,
+        parent_tool_call_id: str,
+        status: str = "completed",
+        ended_at: str | None = None,
+        duration_ms: int | None = None,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return subagent_lifecycle.format_subagent_finish(
+            subagent_run_id=subagent_run_id,
+            subagent_type=subagent_type,
+            parent_tool_call_id=parent_tool_call_id,
+            status=status,
+            ended_at=ended_at,
+            duration_ms=duration_ms,
+            emitter=emitter,
+        )
+
+    def format_subagent_error(
+        self,
+        *,
+        subagent_run_id: str,
+        subagent_type: str,
+        parent_tool_call_id: str,
+        error_text: str,
+        error_type: str | None = None,
+        ended_at: str | None = None,
+        duration_ms: int | None = None,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return subagent_lifecycle.format_subagent_error(
+            subagent_run_id=subagent_run_id,
+            subagent_type=subagent_type,
+            parent_tool_call_id=parent_tool_call_id,
+            error_text=error_text,
+            error_type=error_type,
+            ended_at=ended_at,
+            duration_ms=duration_ms,
+            emitter=emitter,
+        )
+
+    def format_action_log(
+        self,
+        payload: dict[str, Any],
+        *,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return action_log.format_action_log(payload, emitter=emitter)
+
+    def format_action_log_updated(
+        self,
+        payload: dict[str, Any],
+        *,
+        emitter: Emitter | None = None,
+    ) -> str:
+        return action_log.format_action_log_updated(payload, emitter=emitter)

From 3d8c4be369bc887695522e3a18a553cd04954a68 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:48 +0200
Subject: [PATCH 05/58] Add unit tests for streaming SSE envelope behavior.

---
 .../services/streaming/test_sse_envelope.py   | 51 +++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 surfsense_backend/tests/unit/services/streaming/test_sse_envelope.py

diff --git a/surfsense_backend/tests/unit/services/streaming/test_sse_envelope.py b/surfsense_backend/tests/unit/services/streaming/test_sse_envelope.py
new file mode 100644
index 000000000..511e4575a
--- /dev/null
+++ b/surfsense_backend/tests/unit/services/streaming/test_sse_envelope.py
@@ -0,0 +1,51 @@
+"""Pin the exact SSE wire bytes the FE parser depends on."""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from app.services.streaming.envelope import (
+    format_done,
+    format_sse,
+    get_response_headers,
+)
+
+pytestmark = pytest.mark.unit
+
+
+class TestFormatSse:
+    def test_dict_payload_is_json_serialised(self) -> None:
+        frame = format_sse({"type": "start", "messageId": "msg_1"})
+        assert frame.startswith("data: ")
+        assert frame.endswith("\n\n")
+        body = frame[len("data: ") : -2]
+        assert json.loads(body) == {"type": "start", "messageId": "msg_1"}
+
+    def test_string_payload_is_emitted_verbatim(self) -> None:
+        frame = format_sse('{"already":"json"}')
+        assert frame == 'data: {"already":"json"}\n\n'
+
+    def test_nested_payload_round_trips(self) -> None:
+        payload = {
+            "type": "data-action-log",
+            "data": {"id": 7, "tool_name": "ls", "reversible": False},
+        }
+        frame = format_sse(payload)
+        body = frame.removeprefix("data: ").removesuffix("\n\n")
+        assert json.loads(body) == payload
+
+
+class TestFormatDone:
+    def test_done_marker_is_literal(self) -> None:
+        assert format_done() == "data: [DONE]\n\n"
+
+
+class TestResponseHeaders:
+    def test_headers_pin_ai_sdk_v1_protocol(self) -> None:
+        headers = get_response_headers()
+        assert headers["Content-Type"] == "text/event-stream"
+        assert headers["Cache-Control"] == "no-cache"
+        assert headers["Connection"] == "keep-alive"
+        assert headers["x-vercel-ai-ui-message-stream"] == "v1"

From 619a8362b7b45034601f35cda15ea349b0f7c701 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:48 +0200
Subject: [PATCH 06/58] Add unit tests for streaming emitters and registry
 wiring.

---
 .../unit/services/streaming/test_emitter.py   |  79 +++++++++++++
 .../streaming/test_emitter_registry.py        | 111 ++++++++++++++++++
 2 files changed, 190 insertions(+)
 create mode 100644 surfsense_backend/tests/unit/services/streaming/test_emitter.py
 create mode 100644 surfsense_backend/tests/unit/services/streaming/test_emitter_registry.py

diff --git a/surfsense_backend/tests/unit/services/streaming/test_emitter.py b/surfsense_backend/tests/unit/services/streaming/test_emitter.py
new file mode 100644
index 000000000..6c4e1ff58
--- /dev/null
+++ b/surfsense_backend/tests/unit/services/streaming/test_emitter.py
@@ -0,0 +1,79 @@
+"""Pin the wire compactness rule and the top-level ``emitted_by`` field name."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.services.streaming.emitter import (
+    Emitter,
+    attach_emitted_by,
+    main_emitter,
+    subagent_emitter,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def test_main_emitter_payload_contains_only_level() -> None:
+    payload = main_emitter().to_payload()
+    assert payload == {"level": "main"}
+
+
+def test_subagent_emitter_payload_includes_all_set_fields() -> None:
+    payload = subagent_emitter(
+        subagent_type="deliverables",
+        subagent_run_id="subagent_abc",
+        parent_tool_call_id="call_xyz",
+    ).to_payload()
+    assert payload == {
+        "level": "subagent",
+        "subagent_type": "deliverables",
+        "subagent_run_id": "subagent_abc",
+        "parent_tool_call_id": "call_xyz",
+    }
+
+
+def test_subagent_emitter_payload_omits_unset_optional_fields() -> None:
+    """parent_tool_call_id is None when the run is started outside a tool boundary."""
+    payload = Emitter(
+        level="subagent",
+        subagent_type="email",
+        subagent_run_id="subagent_1",
+    ).to_payload()
+    assert "parent_tool_call_id" not in payload
+    assert payload["subagent_type"] == "email"
+
+
+def test_extra_fields_merge_into_payload() -> None:
+    """Future extension fields (e.g. lane colour, label) flow through ``extra``."""
+    emitter = subagent_emitter(
+        subagent_type="search",
+        subagent_run_id="r1",
+        extra={"label": "Web Search"},
+    )
+    assert emitter.to_payload()["label"] == "Web Search"
+
+
+def test_attach_emitted_by_with_none_is_noop() -> None:
+    payload = {"type": "text-delta", "delta": "hi"}
+    result = attach_emitted_by(payload, None)
+    assert "emitted_by" not in result
+    assert result is payload
+
+
+def test_attach_emitted_by_adds_payload_under_snake_case_top_level_key() -> None:
+    payload = {"type": "text-delta", "delta": "hi"}
+    attach_emitted_by(
+        payload,
+        subagent_emitter(
+            subagent_type="x",
+            subagent_run_id="y",
+            parent_tool_call_id="z",
+        ),
+    )
+    assert payload["emitted_by"] == {
+        "level": "subagent",
+        "subagent_type": "x",
+        "subagent_run_id": "y",
+        "parent_tool_call_id": "z",
+    }
diff --git a/surfsense_backend/tests/unit/services/streaming/test_emitter_registry.py b/surfsense_backend/tests/unit/services/streaming/test_emitter_registry.py
new file mode 100644
index 000000000..e459c946a
--- /dev/null
+++ b/surfsense_backend/tests/unit/services/streaming/test_emitter_registry.py
@@ -0,0 +1,111 @@
+"""Pin the parent_ids walk + parallel sub-agent isolation that drives lane attribution."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.services.streaming.emitter import (
+    Emitter,
+    EmitterRegistry,
+    main_emitter,
+    subagent_emitter,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def _sub(run_id: str, kind: str = "deliverables") -> Emitter:
+    return subagent_emitter(
+        subagent_type=kind,
+        subagent_run_id=f"sub_{run_id}",
+        parent_tool_call_id=f"call_{run_id}",
+    )
+
+
+def test_unregistered_event_resolves_to_main_emitter() -> None:
+    registry = EmitterRegistry()
+    resolved = registry.resolve(run_id="run_1", parent_ids=["root"])
+    assert resolved is main_emitter()
+
+
+def test_event_owned_by_registered_run_id_returns_that_emitter() -> None:
+    registry = EmitterRegistry()
+    emitter = _sub("a")
+    registry.register("run_task_a", emitter)
+    assert registry.resolve(run_id="run_task_a", parent_ids=[]) is emitter
+
+
+def test_descendant_resolves_via_parent_ids_chain() -> None:
+    """A model-call event nested under the task tool inherits its sub-agent emitter."""
+    registry = EmitterRegistry()
+    emitter = _sub("a")
+    registry.register("run_task_a", emitter)
+    descendant = registry.resolve(
+        run_id="run_chat_model",
+        parent_ids=["root", "run_agent", "run_task_a"],
+    )
+    assert descendant is emitter
+
+
+def test_nearest_registered_ancestor_wins_over_distant_ones() -> None:
+    """Inner sub-agents owe their emitter to the nearest task tool, not the outer one."""
+    registry = EmitterRegistry()
+    outer = _sub("outer", kind="planner")
+    inner = _sub("inner", kind="email")
+    registry.register("run_outer", outer)
+    registry.register("run_inner", inner)
+    resolved = registry.resolve(
+        run_id="run_inner_tool",
+        parent_ids=["root", "run_outer", "run_inner"],
+    )
+    assert resolved is inner
+
+
+def test_parallel_subagents_do_not_bleed_into_each_other() -> None:
+    """Two concurrent task tools each own their own descendant events."""
+    registry = EmitterRegistry()
+    a = _sub("a", kind="search")
+    b = _sub("b", kind="email")
+    registry.register("run_task_a", a)
+    registry.register("run_task_b", b)
+
+    from_a = registry.resolve(run_id="x", parent_ids=["root", "run_task_a"])
+    from_b = registry.resolve(run_id="y", parent_ids=["root", "run_task_b"])
+    from_main = registry.resolve(run_id="z", parent_ids=["root"])
+
+    assert from_a is a
+    assert from_b is b
+    assert from_main is main_emitter()
+
+
+def test_unregister_releases_run_id_so_descendants_fall_back_to_main() -> None:
+    registry = EmitterRegistry()
+    emitter = _sub("a")
+    registry.register("run_task_a", emitter)
+    registry.unregister("run_task_a")
+    assert registry.resolve(run_id="x", parent_ids=["run_task_a"]) is main_emitter()
+
+
+def test_unregister_returns_the_previously_registered_emitter() -> None:
+    """Lets callers emit ``data-subagent-finish`` carrying the same emitter they opened with."""
+    registry = EmitterRegistry()
+    emitter = _sub("a")
+    registry.register("run_task_a", emitter)
+    assert registry.unregister("run_task_a") is emitter
+
+
+def test_has_active_subagents_tracks_open_lanes() -> None:
+    registry = EmitterRegistry()
+    assert not registry.has_active_subagents()
+    registry.register("run_task_a", _sub("a"))
+    assert registry.has_active_subagents()
+    registry.unregister("run_task_a")
+    assert not registry.has_active_subagents()
+
+
+def test_empty_run_id_and_parent_ids_resolves_to_main() -> None:
+    """Defensive: events without identifiers always belong to the main lane."""
+    registry = EmitterRegistry()
+    registry.register("run_task_a", _sub("a"))
+    assert registry.resolve(run_id=None, parent_ids=None) is main_emitter()
+    assert registry.resolve(run_id="", parent_ids=[]) is main_emitter()

From 366122da6e4568289e131e2ac20be63e8bb5bd90 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:48 +0200
Subject: [PATCH 07/58] Add unit tests for streaming interrupts and service
 propagation.

---
 .../tests/unit/services/streaming/__init__.py |   0
 .../streaming/test_interrupt_correlation.py   | 164 ++++++++++++++++++
 .../streaming/test_interrupt_events.py        |  91 ++++++++++
 .../test_service_emitter_propagation.py       | 142 +++++++++++++++
 4 files changed, 397 insertions(+)
 create mode 100644 surfsense_backend/tests/unit/services/streaming/__init__.py
 create mode 100644 surfsense_backend/tests/unit/services/streaming/test_interrupt_correlation.py
 create mode 100644 surfsense_backend/tests/unit/services/streaming/test_interrupt_events.py
 create mode 100644 surfsense_backend/tests/unit/services/streaming/test_service_emitter_propagation.py

diff --git a/surfsense_backend/tests/unit/services/streaming/__init__.py b/surfsense_backend/tests/unit/services/streaming/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/tests/unit/services/streaming/test_interrupt_correlation.py b/surfsense_backend/tests/unit/services/streaming/test_interrupt_correlation.py
new file mode 100644
index 000000000..edf4ecb9a
--- /dev/null
+++ b/surfsense_backend/tests/unit/services/streaming/test_interrupt_correlation.py
@@ -0,0 +1,164 @@
+"""Pin id-aware pending-interrupt lookup that replaces the buggy first-wins."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+import pytest
+
+from app.services.streaming.interrupt_correlation import (
+    PendingInterrupt,
+    first_pending_interrupt,
+    get_pending_interrupt_by_id,
+    get_pending_interrupt_for_tool_call,
+    list_pending_interrupts,
+)
+
+pytestmark = pytest.mark.unit
+
+
+@dataclass
+class _Interrupt:
+    value: dict[str, Any]
+    id: str | None = None
+
+
+@dataclass
+class _Task:
+    interrupts: tuple[_Interrupt, ...] = ()
+    id: str | None = None
+
+
+@dataclass
+class _State:
+    tasks: tuple[_Task, ...] = ()
+    interrupts: tuple[_Interrupt, ...] = ()
+
+
+def _hitl(name: str, tool_call_id: str | None = None) -> dict[str, Any]:
+    """Minimal LangChain HITLRequest payload for one action."""
+    action: dict[str, Any] = {"name": name, "args": {}}
+    if tool_call_id is not None:
+        action["tool_call_id"] = tool_call_id
+    return {
+        "action_requests": [action],
+        "review_configs": [{"action_name": name, "allowed_decisions": ["approve"]}],
+    }
+
+
+def test_empty_state_has_no_pending_interrupts() -> None:
+    state = _State()
+    assert list_pending_interrupts(state) == []
+    assert first_pending_interrupt(state) is None
+
+
+def test_single_pending_interrupt_in_task_is_returned() -> None:
+    state = _State(
+        tasks=(
+            _Task(
+                id="task_1",
+                interrupts=(_Interrupt(value=_hitl("send_email"), id="int_1"),),
+            ),
+        )
+    )
+    pending = list_pending_interrupts(state)
+    assert len(pending) == 1
+    assert pending[0] == PendingInterrupt(
+        interrupt_id="int_1",
+        value=_hitl("send_email"),
+        source_task_id="task_1",
+    )
+
+
+def test_pending_interrupts_returned_in_task_then_root_order() -> None:
+    """Determinism matters: callers iterate in this order to render the UI."""
+    state = _State(
+        tasks=(
+            _Task(
+                id="task_a",
+                interrupts=(_Interrupt(value=_hitl("a"), id="int_a"),),
+            ),
+            _Task(
+                id="task_b",
+                interrupts=(_Interrupt(value=_hitl("b"), id="int_b"),),
+            ),
+        ),
+        interrupts=(_Interrupt(value=_hitl("c"), id="int_c"),),
+    )
+    pending = list_pending_interrupts(state)
+    ids = [p.interrupt_id for p in pending]
+    assert ids == ["int_a", "int_b", "int_c"]
+
+
+def test_get_by_id_finds_the_right_interrupt_under_parallel_load() -> None:
+    """Replacing first-wins: id-aware lookup MUST pick the requested one."""
+    state = _State(
+        tasks=(
+            _Task(interrupts=(_Interrupt(value=_hitl("a"), id="int_a"),)),
+            _Task(interrupts=(_Interrupt(value=_hitl("b"), id="int_b"),)),
+            _Task(interrupts=(_Interrupt(value=_hitl("c"), id="int_c"),)),
+        )
+    )
+    found = get_pending_interrupt_by_id(state, "int_b")
+    assert found is not None
+    assert found.value["action_requests"][0]["name"] == "b"
+
+
+def test_get_by_id_returns_none_when_id_is_not_pending() -> None:
+    state = _State(
+        tasks=(_Task(interrupts=(_Interrupt(value=_hitl("a"), id="int_a"),)),)
+    )
+    assert get_pending_interrupt_by_id(state, "missing") is None
+
+
+def test_get_by_tool_call_id_matches_action_request_payload() -> None:
+    """HITLRequest carries ``tool_call_id`` per action; lookup uses that."""
+    state = _State(
+        tasks=(
+            _Task(
+                interrupts=(
+                    _Interrupt(
+                        value=_hitl("a", tool_call_id="call_xxx"), id="int_a"
+                    ),
+                    _Interrupt(
+                        value=_hitl("b", tool_call_id="call_yyy"), id="int_b"
+                    ),
+                )
+            ),
+        )
+    )
+    found = get_pending_interrupt_for_tool_call(state, "call_yyy")
+    assert found is not None
+    assert found.interrupt_id == "int_b"
+
+
+def test_first_pending_interrupt_matches_legacy_first_wins_behaviour() -> None:
+    """Sequential-turn safety: the explicit shortcut still returns the first."""
+    state = _State(
+        tasks=(_Task(interrupts=(_Interrupt(value=_hitl("first"), id="int_1"),)),),
+        interrupts=(_Interrupt(value=_hitl("second"), id="int_2"),),
+    )
+    first = first_pending_interrupt(state)
+    assert first is not None
+    assert first.interrupt_id == "int_1"
+
+
+def test_interrupt_without_id_falls_back_to_none() -> None:
+    """Snapshots from older LangGraph versions may omit ``id`` — preserve that."""
+    state = _State(
+        tasks=(_Task(interrupts=(_Interrupt(value=_hitl("a"), id=None),)),)
+    )
+    pending = list_pending_interrupts(state)
+    assert len(pending) == 1
+    assert pending[0].interrupt_id is None
+
+
+def test_non_dict_interrupt_values_are_ignored() -> None:
+    """Defensive: a non-dict value should not crash the iteration."""
+
+    class _Raw:
+        value = "not a dict"
+
+    state = _State(tasks=(_Task(interrupts=(_Raw(),)),))  # type: ignore[arg-type]
+    assert list_pending_interrupts(state) == []
diff --git a/surfsense_backend/tests/unit/services/streaming/test_interrupt_events.py b/surfsense_backend/tests/unit/services/streaming/test_interrupt_events.py
new file mode 100644
index 000000000..dbdd607bf
--- /dev/null
+++ b/surfsense_backend/tests/unit/services/streaming/test_interrupt_events.py
@@ -0,0 +1,91 @@
+"""Pin interrupt-payload normalisation and the optional correlation fields on the wire."""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from app.services.streaming.events.interrupt import (
+    format_interrupt_request,
+    normalize_interrupt_payload,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def _decode(frame: str) -> dict:
+    body = frame.removeprefix("data: ").removesuffix("\n\n")
+    return json.loads(body)
+
+
+def test_hitlrequest_shape_is_passed_through_unchanged() -> None:
+    raw = {
+        "action_requests": [{"name": "send_email", "args": {"to": "a@b"}}],
+        "review_configs": [
+            {"action_name": "send_email", "allowed_decisions": ["approve"]}
+        ],
+    }
+    assert normalize_interrupt_payload(raw) == raw
+
+
+def test_custom_interrupt_primitive_is_converted_to_canonical_shape() -> None:
+    raw = {
+        "type": "permission",
+        "message": "Allow send?",
+        "action": {"tool": "send_email", "params": {"to": "a@b"}},
+        "context": {"reason": "destructive"},
+    }
+    out = normalize_interrupt_payload(raw)
+    assert out["action_requests"] == [
+        {"name": "send_email", "args": {"to": "a@b"}}
+    ]
+    assert out["review_configs"] == [
+        {
+            "action_name": "send_email",
+            "allowed_decisions": ["approve", "edit", "reject"],
+        }
+    ]
+    assert out["interrupt_type"] == "permission"
+    assert out["message"] == "Allow send?"
+    assert out["context"] == {"reason": "destructive"}
+
+
+def test_custom_interrupt_without_message_omits_message_key() -> None:
+    """Optional fields stay optional on the wire; FE does not see ``"message": None``."""
+    raw = {"action": {"tool": "send_email"}}
+    out = normalize_interrupt_payload(raw)
+    assert "message" not in out
+
+
+def test_custom_interrupt_without_tool_falls_back_to_unknown_tool() -> None:
+    """Defensive: a malformed ``action`` block must not crash the relay."""
+    out = normalize_interrupt_payload({"type": "x", "action": {}})
+    assert out["action_requests"][0]["name"] == "unknown_tool"
+    assert out["review_configs"][0]["action_name"] == "unknown_tool"
+
+
+def test_format_interrupt_request_carries_correlation_fields_on_the_wire() -> None:
+    frame = format_interrupt_request(
+        {"action_requests": [], "review_configs": []},
+        interrupt_id="int_42",
+        pending_interrupt_count=3,
+        chat_turn_id="turn_99",
+    )
+    payload = _decode(frame)
+    assert payload["type"] == "data-interrupt-request"
+    inner = payload["data"]
+    assert inner["interrupt_id"] == "int_42"
+    assert inner["pending_interrupt_count"] == 3
+    assert inner["chat_turn_id"] == "turn_99"
+
+
+def test_format_interrupt_request_omits_correlation_fields_when_unset() -> None:
+    """Backward compat: legacy single-interrupt callers don't have to supply ids."""
+    frame = format_interrupt_request(
+        {"action_requests": [], "review_configs": []},
+    )
+    inner = _decode(frame)["data"]
+    assert "interrupt_id" not in inner
+    assert "pending_interrupt_count" not in inner
+    assert "chat_turn_id" not in inner
diff --git a/surfsense_backend/tests/unit/services/streaming/test_service_emitter_propagation.py b/surfsense_backend/tests/unit/services/streaming/test_service_emitter_propagation.py
new file mode 100644
index 000000000..b381f13bc
--- /dev/null
+++ b/surfsense_backend/tests/unit/services/streaming/test_service_emitter_propagation.py
@@ -0,0 +1,142 @@
+"""Pin that sub-agent emitter reaches every wire event the relay emits."""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from app.services.streaming.emitter import subagent_emitter
+from app.services.streaming.service import StreamingService
+
+pytestmark = pytest.mark.unit
+
+
+def _decode(frame: str) -> dict:
+    body = frame.removeprefix("data: ").removesuffix("\n\n")
+    return json.loads(body)
+
+
+@pytest.fixture
+def service() -> StreamingService:
+    return StreamingService()
+
+
+@pytest.fixture
+def sub_emitter():
+    return subagent_emitter(
+        subagent_type="deliverables",
+        subagent_run_id="sub_xyz",
+        parent_tool_call_id="call_parent",
+    )
+
+
+def test_text_delta_carries_subagent_emitter_on_the_wire(service, sub_emitter) -> None:
+    payload = _decode(service.format_text_delta("text_1", "hi", emitter=sub_emitter))
+    assert payload["emitted_by"]["subagent_run_id"] == "sub_xyz"
+    assert payload["delta"] == "hi"
+
+
+def test_reasoning_delta_carries_subagent_emitter_on_the_wire(
+    service, sub_emitter
+) -> None:
+    payload = _decode(
+        service.format_reasoning_delta("r_1", "thinking", emitter=sub_emitter)
+    )
+    assert payload["emitted_by"]["subagent_run_id"] == "sub_xyz"
+
+
+def test_tool_input_start_carries_subagent_emitter_and_lc_id(
+    service, sub_emitter
+) -> None:
+    payload = _decode(
+        service.format_tool_input_start(
+            "call_1",
+            "send_email",
+            langchain_tool_call_id="lc_1",
+            emitter=sub_emitter,
+        )
+    )
+    assert payload["emitted_by"]["subagent_type"] == "deliverables"
+    assert payload["langchainToolCallId"] == "lc_1"
+    assert payload["toolName"] == "send_email"
+
+
+def test_tool_output_available_carries_subagent_emitter(service, sub_emitter) -> None:
+    payload = _decode(
+        service.format_tool_output_available(
+            "call_1", {"ok": True}, emitter=sub_emitter
+        )
+    )
+    assert payload["emitted_by"]["subagent_run_id"] == "sub_xyz"
+    assert payload["output"] == {"ok": True}
+
+
+def test_thinking_step_carries_subagent_emitter(service, sub_emitter) -> None:
+    payload = _decode(
+        service.format_thinking_step(
+            step_id="s1",
+            title="Sending email",
+            status="in_progress",
+            emitter=sub_emitter,
+        )
+    )
+    assert payload["type"] == "data-thinking-step"
+    assert payload["emitted_by"]["subagent_run_id"] == "sub_xyz"
+
+
+def test_action_log_carries_subagent_emitter(service, sub_emitter) -> None:
+    payload = _decode(
+        service.format_action_log(
+            {"id": 1, "tool_name": "send_email", "reversible": False},
+            emitter=sub_emitter,
+        )
+    )
+    assert payload["emitted_by"]["subagent_run_id"] == "sub_xyz"
+    assert payload["data"]["tool_name"] == "send_email"
+
+
+def test_subagent_lifecycle_events_share_run_id_for_pairing(
+    service, sub_emitter
+) -> None:
+    start = _decode(
+        service.format_subagent_start(
+            subagent_run_id="sub_xyz",
+            subagent_type="deliverables",
+            parent_tool_call_id="call_parent",
+            emitter=sub_emitter,
+        )
+    )
+    finish = _decode(
+        service.format_subagent_finish(
+            subagent_run_id="sub_xyz",
+            subagent_type="deliverables",
+            parent_tool_call_id="call_parent",
+            emitter=sub_emitter,
+        )
+    )
+    assert start["data"]["subagent_run_id"] == finish["data"]["subagent_run_id"]
+    assert start["type"] == "data-subagent-start"
+    assert finish["type"] == "data-subagent-finish"
+
+
+def test_main_emitter_events_omit_emitted_by_field(service) -> None:
+    payload = _decode(service.format_text_delta("text_1", "hi"))
+    assert "emitted_by" not in payload
+
+
+def test_resolve_emitter_through_service_uses_registry(service, sub_emitter) -> None:
+    service.emitter_registry.register("run_task_1", sub_emitter)
+    resolved = service.resolve_emitter(
+        run_id="run_chat_model",
+        parent_ids=["root", "run_task_1"],
+    )
+    assert resolved is sub_emitter
+
+
+def test_message_id_is_assigned_on_message_start_and_reused(service) -> None:
+    frame = service.format_message_start()
+    payload = _decode(frame)
+    assigned = payload["messageId"]
+    assert assigned.startswith("msg_")
+    assert service.message_id == assigned

From c25b78c30492e53e5f1b69f831f15dda029c9d54 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:48 +0200
Subject: [PATCH 08/58] Add chat streaming error classification, helpers, and
 StreamResult.

---
 .../app/tasks/chat/streaming/__init__.py      |   3 +
 .../tasks/chat/streaming/errors/__init__.py   |   3 +
 .../tasks/chat/streaming/errors/classifier.py | 187 ++++++++++++++++++
 .../tasks/chat/streaming/errors/emitter.py    |  38 ++++
 .../tasks/chat/streaming/helpers/__init__.py  |   3 +
 .../chat/streaming/helpers/chunk_parts.py     |  60 ++++++
 .../streaming/helpers/interrupt_inspector.py  |  47 +++++
 .../streaming/helpers/tool_call_matching.py   |  32 +++
 .../chat/streaming/helpers/tool_output.py     |  43 ++++
 .../app/tasks/chat/streaming/stream_result.py |  28 +++
 10 files changed, 444 insertions(+)
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/errors/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/errors/classifier.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/errors/emitter.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/helpers/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/helpers/chunk_parts.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/helpers/interrupt_inspector.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/helpers/tool_call_matching.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/helpers/tool_output.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/stream_result.py

diff --git a/surfsense_backend/app/tasks/chat/streaming/__init__.py b/surfsense_backend/app/tasks/chat/streaming/__init__.py
new file mode 100644
index 000000000..bb06cc021
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/__init__.py
@@ -0,0 +1,3 @@
+"""Chat streaming orchestrator and event relay."""
+
+from __future__ import annotations
diff --git a/surfsense_backend/app/tasks/chat/streaming/errors/__init__.py b/surfsense_backend/app/tasks/chat/streaming/errors/__init__.py
new file mode 100644
index 000000000..02284d4b0
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/errors/__init__.py
@@ -0,0 +1,3 @@
+"""Error classification, structured logging, and terminal-error SSE emission."""
+
+from __future__ import annotations
diff --git a/surfsense_backend/app/tasks/chat/streaming/errors/classifier.py b/surfsense_backend/app/tasks/chat/streaming/errors/classifier.py
new file mode 100644
index 000000000..3af2b9f9f
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/errors/classifier.py
@@ -0,0 +1,187 @@
+"""Classify stream exceptions for logging and client error payloads."""
+
+from __future__ import annotations
+
+import json
+import logging
+import time
+from typing import Any, Literal
+
+from app.agents.new_chat.errors import BusyError
+from app.agents.new_chat.middleware.busy_mutex import (
+    get_cancel_state,
+    is_cancel_requested,
+)
+
+TURN_CANCELLING_INITIAL_DELAY_MS = 200
+TURN_CANCELLING_BACKOFF_FACTOR = 2
+TURN_CANCELLING_MAX_DELAY_MS = 1500
+
+
+def compute_turn_cancelling_retry_delay(attempt: int) -> int:
+    if attempt < 1:
+        attempt = 1
+    delay = TURN_CANCELLING_INITIAL_DELAY_MS * (
+        TURN_CANCELLING_BACKOFF_FACTOR ** (attempt - 1)
+    )
+    return min(delay, TURN_CANCELLING_MAX_DELAY_MS)
+
+
+def log_chat_stream_error(
+    *,
+    flow: Literal["new", "resume", "regenerate"],
+    error_kind: str,
+    error_code: str | None,
+    severity: Literal["info", "warn", "error"],
+    is_expected: bool,
+    request_id: str | None,
+    thread_id: int | None,
+    search_space_id: int | None,
+    user_id: str | None,
+    message: str,
+    extra: dict[str, Any] | None = None,
+) -> None:
+    payload: dict[str, Any] = {
+        "event": "chat_stream_error",
+        "flow": flow,
+        "error_kind": error_kind,
+        "error_code": error_code,
+        "severity": severity,
+        "is_expected": is_expected,
+        "request_id": request_id or "unknown",
+        "thread_id": thread_id,
+        "search_space_id": search_space_id,
+        "user_id": user_id,
+        "message": message,
+    }
+    if extra:
+        payload.update(extra)
+
+    logger = logging.getLogger(__name__)
+    rendered = json.dumps(payload, ensure_ascii=False)
+    if severity == "error":
+        logger.error("[chat_stream_error] %s", rendered)
+    elif severity == "warn":
+        logger.warning("[chat_stream_error] %s", rendered)
+    else:
+        logger.info("[chat_stream_error] %s", rendered)
+
+
+def _parse_error_payload(message: str) -> dict[str, Any] | None:
+    candidates = [message]
+    first_brace_idx = message.find("{")
+    if first_brace_idx >= 0:
+        candidates.append(message[first_brace_idx:])
+
+    for candidate in candidates:
+        try:
+            parsed = json.loads(candidate)
+            if isinstance(parsed, dict):
+                return parsed
+        except Exception:
+            continue
+    return None
+
+
+def _extract_provider_error_code(parsed: dict[str, Any] | None) -> int | None:
+    if not isinstance(parsed, dict):
+        return None
+    candidates: list[Any] = [parsed.get("code")]
+    nested = parsed.get("error")
+    if isinstance(nested, dict):
+        candidates.append(nested.get("code"))
+    for value in candidates:
+        try:
+            if value is None:
+                continue
+            return int(value)
+        except Exception:
+            continue
+    return None
+
+
+def is_provider_rate_limited(exc: BaseException) -> bool:
+    """Return True if the exception looks like an upstream HTTP 429 / rate limit."""
+    raw = str(exc)
+    lowered = raw.lower()
+    if "ratelimit" in type(exc).__name__.lower():
+        return True
+    parsed = _parse_error_payload(raw)
+    provider_code = _extract_provider_error_code(parsed)
+    if provider_code == 429:
+        return True
+
+    provider_error_type = ""
+    if parsed:
+        top_type = parsed.get("type")
+        if isinstance(top_type, str):
+            provider_error_type = top_type.lower()
+        nested = parsed.get("error")
+        if isinstance(nested, dict):
+            nested_type = nested.get("type")
+            if isinstance(nested_type, str):
+                provider_error_type = nested_type.lower()
+    if provider_error_type == "rate_limit_error":
+        return True
+
+    return (
+        "rate limited" in lowered
+        or "rate-limited" in lowered
+        or "temporarily rate-limited upstream" in lowered
+    )
+
+
+def classify_stream_exception(
+    exc: Exception,
+    *,
+    flow_label: str,
+) -> tuple[
+    str, str, Literal["info", "warn", "error"], bool, str, dict[str, Any] | None
+]:
+    """Return kind, code, severity, expected flag, message, and optional extra dict."""
+    raw = str(exc)
+    if isinstance(exc, BusyError) or "Thread is busy with another request" in raw:
+        busy_thread_id = str(exc.request_id) if isinstance(exc, BusyError) else None
+        if busy_thread_id and is_cancel_requested(busy_thread_id):
+            cancel_state = get_cancel_state(busy_thread_id)
+            attempt = cancel_state[0] if cancel_state else 1
+            retry_after_ms = compute_turn_cancelling_retry_delay(attempt)
+            retry_after_at = int(time.time() * 1000) + retry_after_ms
+            return (
+                "thread_busy",
+                "TURN_CANCELLING",
+                "info",
+                True,
+                "A previous response is still stopping. Please try again in a moment.",
+                {
+                    "retry_after_ms": retry_after_ms,
+                    "retry_after_at": retry_after_at,
+                },
+            )
+        return (
+            "thread_busy",
+            "THREAD_BUSY",
+            "warn",
+            True,
+            "Another response is still finishing for this thread. Please try again in a moment.",
+            None,
+        )
+
+    if is_provider_rate_limited(exc):
+        return (
+            "rate_limited",
+            "RATE_LIMITED",
+            "warn",
+            True,
+            "This model is temporarily rate-limited. Please try again in a few seconds or switch models.",
+            None,
+        )
+
+    return (
+        "server_error",
+        "SERVER_ERROR",
+        "error",
+        False,
+        f"Error during {flow_label}: {raw}",
+        None,
+    )
diff --git a/surfsense_backend/app/tasks/chat/streaming/errors/emitter.py b/surfsense_backend/app/tasks/chat/streaming/errors/emitter.py
new file mode 100644
index 000000000..95806ab87
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/errors/emitter.py
@@ -0,0 +1,38 @@
+"""Emit one terminal error SSE frame and log via the stream error classifier."""
+
+from __future__ import annotations
+
+from typing import Any, Literal
+
+from .classifier import log_chat_stream_error
+
+
+def emit_stream_terminal_error(
+    *,
+    streaming_service: Any,
+    flow: Literal["new", "resume", "regenerate"],
+    request_id: str | None,
+    thread_id: int,
+    search_space_id: int,
+    user_id: str | None,
+    message: str,
+    error_kind: str = "server_error",
+    error_code: str = "SERVER_ERROR",
+    severity: Literal["info", "warn", "error"] = "error",
+    is_expected: bool = False,
+    extra: dict[str, Any] | None = None,
+) -> str:
+    log_chat_stream_error(
+        flow=flow,
+        error_kind=error_kind,
+        error_code=error_code,
+        severity=severity,
+        is_expected=is_expected,
+        request_id=request_id,
+        thread_id=thread_id,
+        search_space_id=search_space_id,
+        user_id=user_id,
+        message=message,
+        extra=extra,
+    )
+    return streaming_service.format_error(message, error_code=error_code, extra=extra)
diff --git a/surfsense_backend/app/tasks/chat/streaming/helpers/__init__.py b/surfsense_backend/app/tasks/chat/streaming/helpers/__init__.py
new file mode 100644
index 000000000..151dfdaac
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/helpers/__init__.py
@@ -0,0 +1,3 @@
+"""Pure helpers for chat streaming."""
+
+from __future__ import annotations
diff --git a/surfsense_backend/app/tasks/chat/streaming/helpers/chunk_parts.py b/surfsense_backend/app/tasks/chat/streaming/helpers/chunk_parts.py
new file mode 100644
index 000000000..48b44fc1d
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/helpers/chunk_parts.py
@@ -0,0 +1,60 @@
+"""Split a model chunk into text, reasoning, and tool-call fragment lists."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def extract_chunk_parts(chunk: Any) -> dict[str, Any]:
+    """Return dict with keys text, reasoning, and tool_call_chunks (merged from chunk fields)."""
+    out: dict[str, Any] = {"text": "", "reasoning": "", "tool_call_chunks": []}
+    if chunk is None:
+        return out
+
+    content = getattr(chunk, "content", None)
+    if isinstance(content, str):
+        if content:
+            out["text"] = content
+    elif isinstance(content, list):
+        text_parts: list[str] = []
+        reasoning_parts: list[str] = []
+        for block in content:
+            if not isinstance(block, dict):
+                continue
+            block_type = block.get("type")
+            if block_type == "text":
+                value = block.get("text") or block.get("content") or ""
+                if isinstance(value, str) and value:
+                    text_parts.append(value)
+            elif block_type == "reasoning":
+                value = (
+                    block.get("reasoning")
+                    or block.get("text")
+                    or block.get("content")
+                    or ""
+                )
+                if isinstance(value, str) and value:
+                    reasoning_parts.append(value)
+            elif block_type in ("tool_call_chunk", "tool_use"):
+                out["tool_call_chunks"].append(block)
+        if text_parts:
+            out["text"] = "".join(text_parts)
+        if reasoning_parts:
+            out["reasoning"] = "".join(reasoning_parts)
+
+    additional = getattr(chunk, "additional_kwargs", None) or {}
+    if isinstance(additional, dict):
+        extra_reasoning = additional.get("reasoning_content")
+        if isinstance(extra_reasoning, str) and extra_reasoning:
+            existing = out["reasoning"]
+            out["reasoning"] = (
+                (existing + extra_reasoning) if existing else extra_reasoning
+            )
+
+    extra_tool_chunks = getattr(chunk, "tool_call_chunks", None)
+    if isinstance(extra_tool_chunks, list):
+        for tcc in extra_tool_chunks:
+            if isinstance(tcc, dict):
+                out["tool_call_chunks"].append(tcc)
+
+    return out
diff --git a/surfsense_backend/app/tasks/chat/streaming/helpers/interrupt_inspector.py b/surfsense_backend/app/tasks/chat/streaming/helpers/interrupt_inspector.py
new file mode 100644
index 000000000..dca099b3f
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/helpers/interrupt_inspector.py
@@ -0,0 +1,47 @@
+"""Read the first interrupt payload from a LangGraph state snapshot."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def first_interrupt_value(state: Any) -> dict[str, Any] | None:
+    """Return the first interrupt payload across all snapshot tasks."""
+
+    def _extract(candidate: Any) -> dict[str, Any] | None:
+        if isinstance(candidate, dict):
+            value = candidate.get("value", candidate)
+            return value if isinstance(value, dict) else None
+        value = getattr(candidate, "value", None)
+        if isinstance(value, dict):
+            return value
+        if isinstance(candidate, list | tuple):
+            for item in candidate:
+                extracted = _extract(item)
+                if extracted is not None:
+                    return extracted
+        return None
+
+    for task in getattr(state, "tasks", ()) or ():
+        try:
+            interrupts = getattr(task, "interrupts", ()) or ()
+        except (AttributeError, IndexError, TypeError):
+            interrupts = ()
+        if not interrupts:
+            extracted = _extract(task)
+            if extracted is not None:
+                return extracted
+            continue
+        for interrupt_item in interrupts:
+            extracted = _extract(interrupt_item)
+            if extracted is not None:
+                return extracted
+
+    try:
+        state_interrupts = getattr(state, "interrupts", ()) or ()
+    except (AttributeError, IndexError, TypeError):
+        state_interrupts = ()
+    extracted = _extract(state_interrupts)
+    if extracted is not None:
+        return extracted
+    return None
diff --git a/surfsense_backend/app/tasks/chat/streaming/helpers/tool_call_matching.py b/surfsense_backend/app/tasks/chat/streaming/helpers/tool_call_matching.py
new file mode 100644
index 000000000..fbe4c94b7
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/helpers/tool_call_matching.py
@@ -0,0 +1,32 @@
+"""Match buffered model tool-call chunks to a tool start when ids were missing."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def match_buffered_langchain_tool_call_id(
+    pending_tool_call_chunks: list[dict[str, Any]],
+    tool_name: str,
+    run_id: str,
+    lc_tool_call_id_by_run: dict[str, str],
+) -> str | None:
+    matched_idx: int | None = None
+    for idx, tcc in enumerate(pending_tool_call_chunks):
+        if tcc.get("name") == tool_name and tcc.get("id"):
+            matched_idx = idx
+            break
+    if matched_idx is None:
+        for idx, tcc in enumerate(pending_tool_call_chunks):
+            if tcc.get("id"):
+                matched_idx = idx
+                break
+    if matched_idx is None:
+        return None
+    matched = pending_tool_call_chunks.pop(matched_idx)
+    candidate = matched.get("id")
+    if isinstance(candidate, str) and candidate:
+        if run_id:
+            lc_tool_call_id_by_run[run_id] = candidate
+        return candidate
+    return None
diff --git a/surfsense_backend/app/tasks/chat/streaming/helpers/tool_output.py b/surfsense_backend/app/tasks/chat/streaming/helpers/tool_output.py
new file mode 100644
index 000000000..a7c401dee
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/helpers/tool_output.py
@@ -0,0 +1,43 @@
+"""Normalize filesystem tool payloads for SSE cards and messages."""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+
+def tool_output_to_text(tool_output: Any) -> str:
+    if isinstance(tool_output, dict):
+        if isinstance(tool_output.get("result"), str):
+            return tool_output["result"]
+        if isinstance(tool_output.get("error"), str):
+            return tool_output["error"]
+        return json.dumps(tool_output, ensure_ascii=False)
+    return str(tool_output)
+
+
+def tool_output_has_error(tool_output: Any) -> bool:
+    if isinstance(tool_output, dict):
+        if tool_output.get("error"):
+            return True
+        result = tool_output.get("result")
+        return bool(
+            isinstance(result, str) and result.strip().lower().startswith("error:")
+        )
+    if isinstance(tool_output, str):
+        return tool_output.strip().lower().startswith("error:")
+    return False
+
+
+def extract_resolved_file_path(
+    *, tool_name: str, tool_output: Any, tool_input: Any | None = None
+) -> str | None:
+    if isinstance(tool_output, dict):
+        path_value = tool_output.get("path")
+        if isinstance(path_value, str) and path_value.strip():
+            return path_value.strip()
+    if tool_name in ("write_file", "edit_file") and isinstance(tool_input, dict):
+        file_path = tool_input.get("file_path")
+        if isinstance(file_path, str) and file_path.strip():
+            return file_path.strip()
+    return None
diff --git a/surfsense_backend/app/tasks/chat/streaming/stream_result.py b/surfsense_backend/app/tasks/chat/streaming/stream_result.py
new file mode 100644
index 000000000..8ea3bd295
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/stream_result.py
@@ -0,0 +1,28 @@
+"""Mutable facts collected while streaming one agent turn."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+
+@dataclass
+class StreamResult:
+    accumulated_text: str = ""
+    is_interrupted: bool = False
+    interrupt_value: dict[str, Any] | None = None
+    sandbox_files: list[str] = field(default_factory=list)
+    agent_called_update_memory: bool = False
+    request_id: str | None = None
+    turn_id: str = ""
+    filesystem_mode: str = "cloud"
+    client_platform: str = "web"
+    intent_detected: str = "chat_only"
+    intent_confidence: float = 0.0
+    write_attempted: bool = False
+    write_succeeded: bool = False
+    verification_succeeded: bool = False
+    commit_gate_passed: bool = True
+    commit_gate_reason: str = ""
+    assistant_message_id: int | None = None
+    content_builder: Any | None = field(default=None, repr=False)

From 7581a7c9c3247bc977ae556daddd25cc185ef2eb Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:48 +0200
Subject: [PATCH 09/58] Add chat streaming relay state and thinking-step SSE
 helpers.

---
 .../tasks/chat/streaming/relay/__init__.py    |  3 +
 .../app/tasks/chat/streaming/relay/state.py   | 55 +++++++++++++++++++
 .../relay/thinking_step_completion.py         | 31 +++++++++++
 .../chat/streaming/relay/thinking_step_sse.py | 24 ++++++++
 4 files changed, 113 insertions(+)
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/relay/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/relay/state.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py

diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py b/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py
new file mode 100644
index 000000000..c1a5e7175
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py
@@ -0,0 +1,3 @@
+"""Relay state: thinking steps, tool bookkeeping, and stream helpers."""
+
+from __future__ import annotations
diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/state.py b/surfsense_backend/app/tasks/chat/streaming/relay/state.py
new file mode 100644
index 000000000..e8e35d0b2
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/state.py
@@ -0,0 +1,55 @@
+"""Mutable counters and maps for one agent stream."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+
+@dataclass
+class AgentEventRelayState:
+    """Tracks text, thinking steps, tool depth, and pending tool-call metadata."""
+
+    accumulated_text: str = ""
+    current_text_id: str | None = None
+    thinking_step_counter: int = 0
+    tool_step_ids: dict[str, str] = field(default_factory=dict)
+    completed_step_ids: set[str] = field(default_factory=set)
+    last_active_step_id: str | None = None
+    last_active_step_title: str = ""
+    last_active_step_items: list[str] = field(default_factory=list)
+    just_finished_tool: bool = False
+    active_tool_depth: int = 0
+    called_update_memory: bool = False
+    current_reasoning_id: str | None = None
+    parity_v2: bool = False
+    pending_tool_call_chunks: list[dict[str, Any]] = field(default_factory=list)
+    lc_tool_call_id_by_run: dict[str, str] = field(default_factory=dict)
+    file_path_by_run: dict[str, str] = field(default_factory=dict)
+    index_to_meta: dict[int, dict[str, str]] = field(default_factory=dict)
+    ui_tool_call_id_by_run: dict[str, str] = field(default_factory=dict)
+    current_lc_tool_call_id: dict[str, str | None] = field(
+        default_factory=lambda: {"value": None}
+    )
+
+    @classmethod
+    def for_invocation(
+        cls,
+        *,
+        initial_step_id: str | None = None,
+        initial_step_title: str = "",
+        initial_step_items: list[str] | None = None,
+        parity_v2: bool,
+    ) -> AgentEventRelayState:
+        counter = 1 if initial_step_id else 0
+        return cls(
+            thinking_step_counter=counter,
+            last_active_step_id=initial_step_id,
+            last_active_step_title=initial_step_title,
+            last_active_step_items=list(initial_step_items or []),
+            parity_v2=parity_v2,
+        )
+
+    def next_thinking_step_id(self, step_prefix: str) -> str:
+        self.thinking_step_counter += 1
+        return f"{step_prefix}-{self.thinking_step_counter}"
diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py
new file mode 100644
index 000000000..a0be71281
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py
@@ -0,0 +1,31 @@
+"""Close the in-progress thinking step with a completed status frame."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from .thinking_step_sse import emit_thinking_step_frame
+
+
+def complete_active_thinking_step(
+    *,
+    streaming_service: Any,
+    content_builder: Any | None,
+    last_active_step_id: str | None,
+    last_active_step_title: str,
+    last_active_step_items: list[str],
+    completed_step_ids: set[str],
+) -> tuple[str | None, str | None]:
+    """Emit a completed thinking-step frame once; return (frame or None, next active step id)."""
+    if last_active_step_id and last_active_step_id not in completed_step_ids:
+        completed_step_ids.add(last_active_step_id)
+        event = emit_thinking_step_frame(
+            streaming_service=streaming_service,
+            content_builder=content_builder,
+            step_id=last_active_step_id,
+            title=last_active_step_title,
+            status="completed",
+            items=last_active_step_items if last_active_step_items else None,
+        )
+        return event, None
+    return None, last_active_step_id
diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py
new file mode 100644
index 000000000..9e8c08dd5
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py
@@ -0,0 +1,24 @@
+"""Thinking-step SSE plus optional content-builder updates."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def emit_thinking_step_frame(
+    *,
+    streaming_service: Any,
+    content_builder: Any | None,
+    step_id: str,
+    title: str,
+    status: str = "in_progress",
+    items: list[str] | None = None,
+) -> str:
+    if content_builder is not None:
+        content_builder.on_thinking_step(step_id, title, status, items)
+    return streaming_service.format_thinking_step(
+        step_id=step_id,
+        title=title,
+        status=status,
+        items=items,
+    )

From ee16e1d5f96b22d7bb9ed822f1ec983c966b7b91 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:48 +0200
Subject: [PATCH 10/58] Add LangGraph handlers for chat model, chain, tool, and
 custom events.

---
 .../tasks/chat/streaming/handlers/__init__.py |   3 +
 .../chat/streaming/handlers/chain_end.py      |  23 +++
 .../streaming/handlers/chat_model_stream.py   | 149 ++++++++++++++++++
 .../handlers/custom_event_dispatch.py         |  56 +++++++
 .../chat/streaming/handlers/custom_events.py  |  77 +++++++++
 .../tasks/chat/streaming/handlers/tool_end.py | 112 +++++++++++++
 .../streaming/handlers/tool_output_frame.py   |  24 +++
 .../chat/streaming/handlers/tool_start.py     | 142 +++++++++++++++++
 8 files changed, 586 insertions(+)
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/chain_end.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py

diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/__init__.py
new file mode 100644
index 000000000..3e2165932
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/__init__.py
@@ -0,0 +1,3 @@
+"""LangGraph stream handlers by event kind."""
+
+from __future__ import annotations
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/chain_end.py b/surfsense_backend/app/tasks/chat/streaming/handlers/chain_end.py
new file mode 100644
index 000000000..c61058ac7
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/chain_end.py
@@ -0,0 +1,23 @@
+"""Close open text when a LangGraph chain or agent node finishes."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+from typing import Any
+
+from app.tasks.chat.streaming.relay.state import AgentEventRelayState
+
+
+def iter_chain_end_frames(
+    _event: dict[str, Any],
+    *,
+    state: AgentEventRelayState,
+    streaming_service: Any,
+    content_builder: Any | None,
+) -> Iterator[str]:
+    """Close the open text stream if one is open."""
+    if state.current_text_id is not None:
+        yield streaming_service.format_text_end(state.current_text_id)
+        if content_builder is not None:
+            content_builder.on_text_end(state.current_text_id)
+        state.current_text_id = None
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py b/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py
new file mode 100644
index 000000000..861342b32
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py
@@ -0,0 +1,149 @@
+"""Chat model stream: text, reasoning, and tool-call chunk SSE."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+from typing import Any
+
+from app.tasks.chat.streaming.helpers.chunk_parts import extract_chunk_parts
+from app.tasks.chat.streaming.relay.state import AgentEventRelayState
+from app.tasks.chat.streaming.relay.thinking_step_completion import (
+    complete_active_thinking_step,
+)
+
+
+def iter_chat_model_stream_frames(
+    event: dict[str, Any],
+    *,
+    state: AgentEventRelayState,
+    streaming_service: Any,
+    content_builder: Any | None,
+    step_prefix: str,
+) -> Iterator[str]:
+    """SSE frames for one chat-model chunk."""
+    if state.active_tool_depth > 0:
+        return
+    if "surfsense:internal" in event.get("tags", []):
+        return
+    chunk = event.get("data", {}).get("chunk")
+    if not chunk:
+        return
+    parts = extract_chunk_parts(chunk)
+
+    reasoning_delta = parts["reasoning"]
+    text_delta = parts["text"]
+
+    if state.parity_v2 and reasoning_delta:
+        if state.current_text_id is not None:
+            yield streaming_service.format_text_end(state.current_text_id)
+            if content_builder is not None:
+                content_builder.on_text_end(state.current_text_id)
+            state.current_text_id = None
+        if state.current_reasoning_id is None:
+            comp, new_active = complete_active_thinking_step(
+                streaming_service=streaming_service,
+                content_builder=content_builder,
+                last_active_step_id=state.last_active_step_id,
+                last_active_step_title=state.last_active_step_title,
+                last_active_step_items=state.last_active_step_items,
+                completed_step_ids=state.completed_step_ids,
+            )
+            if comp:
+                yield comp
+            state.last_active_step_id = new_active
+            if state.just_finished_tool:
+                state.last_active_step_id = None
+                state.last_active_step_title = ""
+                state.last_active_step_items = []
+                state.just_finished_tool = False
+            state.current_reasoning_id = streaming_service.generate_reasoning_id()
+            yield streaming_service.format_reasoning_start(state.current_reasoning_id)
+            if content_builder is not None:
+                content_builder.on_reasoning_start(state.current_reasoning_id)
+        yield streaming_service.format_reasoning_delta(
+            state.current_reasoning_id, reasoning_delta
+        )
+        if content_builder is not None:
+            content_builder.on_reasoning_delta(
+                state.current_reasoning_id, reasoning_delta
+            )
+
+    if text_delta:
+        if state.current_reasoning_id is not None:
+            yield streaming_service.format_reasoning_end(state.current_reasoning_id)
+            if content_builder is not None:
+                content_builder.on_reasoning_end(state.current_reasoning_id)
+            state.current_reasoning_id = None
+        if state.current_text_id is None:
+            comp, new_active = complete_active_thinking_step(
+                streaming_service=streaming_service,
+                content_builder=content_builder,
+                last_active_step_id=state.last_active_step_id,
+                last_active_step_title=state.last_active_step_title,
+                last_active_step_items=state.last_active_step_items,
+                completed_step_ids=state.completed_step_ids,
+            )
+            if comp:
+                yield comp
+            state.last_active_step_id = new_active
+            if state.just_finished_tool:
+                state.last_active_step_id = None
+                state.last_active_step_title = ""
+                state.last_active_step_items = []
+                state.just_finished_tool = False
+            state.current_text_id = streaming_service.generate_text_id()
+            yield streaming_service.format_text_start(state.current_text_id)
+            if content_builder is not None:
+                content_builder.on_text_start(state.current_text_id)
+        yield streaming_service.format_text_delta(state.current_text_id, text_delta)
+        state.accumulated_text += text_delta
+        if content_builder is not None:
+            content_builder.on_text_delta(state.current_text_id, text_delta)
+
+    if state.parity_v2 and parts["tool_call_chunks"]:
+        for tcc in parts["tool_call_chunks"]:
+            idx = tcc.get("index")
+
+            if idx is not None and idx not in state.index_to_meta:
+                lc_id = tcc.get("id")
+                name = tcc.get("name")
+                if lc_id and name:
+                    ui_id = lc_id
+
+                    if state.current_text_id is not None:
+                        yield streaming_service.format_text_end(state.current_text_id)
+                        if content_builder is not None:
+                            content_builder.on_text_end(state.current_text_id)
+                        state.current_text_id = None
+                    if state.current_reasoning_id is not None:
+                        yield streaming_service.format_reasoning_end(
+                            state.current_reasoning_id
+                        )
+                        if content_builder is not None:
+                            content_builder.on_reasoning_end(state.current_reasoning_id)
+                        state.current_reasoning_id = None
+
+                    state.index_to_meta[idx] = {
+                        "ui_id": ui_id,
+                        "lc_id": lc_id,
+                        "name": name,
+                    }
+                    yield streaming_service.format_tool_input_start(
+                        ui_id,
+                        name,
+                        langchain_tool_call_id=lc_id,
+                    )
+                    if content_builder is not None:
+                        content_builder.on_tool_input_start(ui_id, name, lc_id)
+
+            meta = state.index_to_meta.get(idx) if idx is not None else None
+            if meta:
+                args_chunk = tcc.get("args") or ""
+                if args_chunk:
+                    yield streaming_service.format_tool_input_delta(
+                        meta["ui_id"], args_chunk
+                    )
+                    if content_builder is not None:
+                        content_builder.on_tool_input_delta(meta["ui_id"], args_chunk)
+            else:
+                state.pending_tool_call_chunks.append(tcc)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py
new file mode 100644
index 000000000..b373919cf
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py
@@ -0,0 +1,56 @@
+"""Custom graph events routed to SSE (documents, action logs, report progress)."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.custom_events import (
+    handle_action_log,
+    handle_action_log_updated,
+    handle_document_created,
+    handle_report_progress,
+)
+from app.tasks.chat.streaming.relay.state import AgentEventRelayState
+
+
+def iter_custom_event_frames(
+    event: dict[str, Any],
+    *,
+    state: AgentEventRelayState,
+    streaming_service: Any,
+    content_builder: Any | None,
+) -> Iterator[str]:
+    """Yield any SSE produced by ad-hoc graph events (documents, action logs, report progress)."""
+    name = event.get("name")
+    data = event.get("data", {})
+
+    if name == "report_progress":
+        frame, state.last_active_step_items = handle_report_progress(
+            data,
+            last_active_step_id=state.last_active_step_id,
+            last_active_step_title=state.last_active_step_title,
+            last_active_step_items=state.last_active_step_items,
+            streaming_service=streaming_service,
+            content_builder=content_builder,
+        )
+        if frame:
+            yield frame
+        return
+
+    if name == "document_created":
+        frame = handle_document_created(data, streaming_service=streaming_service)
+        if frame:
+            yield frame
+        return
+
+    if name == "action_log":
+        frame = handle_action_log(data, streaming_service=streaming_service)
+        if frame:
+            yield frame
+        return
+
+    if name == "action_log_updated":
+        frame = handle_action_log_updated(data, streaming_service=streaming_service)
+        if frame:
+            yield frame
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py
new file mode 100644
index 000000000..765f1d790
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py
@@ -0,0 +1,77 @@
+"""Custom-event payloads turned into SSE (no model/tool stream handling)."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.relay.thinking_step_sse import emit_thinking_step_frame
+
+
+def handle_report_progress(
+    data: dict[str, Any],
+    *,
+    last_active_step_id: str | None,
+    last_active_step_title: str,
+    last_active_step_items: list[str],
+    streaming_service: Any,
+    content_builder: Any | None,
+) -> tuple[str | None, list[str]]:
+    """Update report step items; may emit one thinking SSE frame.
+
+    Returns (frame or None, items list after update).
+    """
+    message = data.get("message", "")
+    if not message or not last_active_step_id:
+        return None, last_active_step_items
+
+    phase = data.get("phase", "")
+    topic_items = [
+        item for item in last_active_step_items if item.startswith("Topic:")
+    ]
+
+    if phase in ("revising_section", "adding_section"):
+        plan_items = [
+            item
+            for item in last_active_step_items
+            if item.startswith("Topic:")
+            or item.startswith("Modifying ")
+            or item.startswith("Adding ")
+            or item.startswith("Removing ")
+        ]
+        plan_items = [item for item in plan_items if not item.endswith("...")]
+        new_items = [*plan_items, message]
+    else:
+        new_items = [*topic_items, message]
+
+    frame = emit_thinking_step_frame(
+        streaming_service=streaming_service,
+        content_builder=content_builder,
+        step_id=last_active_step_id,
+        title=last_active_step_title,
+        status="in_progress",
+        items=new_items,
+    )
+    return frame, new_items
+
+
+def handle_document_created(data: dict[str, Any], *, streaming_service: Any) -> str | None:
+    if not data.get("id"):
+        return None
+    return streaming_service.format_data(
+        "documents-updated",
+        {"action": "created", "document": data},
+    )
+
+
+def handle_action_log(data: dict[str, Any], *, streaming_service: Any) -> str | None:
+    if data.get("id") is None:
+        return None
+    return streaming_service.format_data("action-log", data)
+
+
+def handle_action_log_updated(
+    data: dict[str, Any], *, streaming_service: Any
+) -> str | None:
+    if data.get("id") is None:
+        return None
+    return streaming_service.format_data("action-log-updated", data)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py
new file mode 100644
index 000000000..0bfef25eb
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py
@@ -0,0 +1,112 @@
+"""Tool end: thinking completion, tool output, and terminal SSE."""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Iterator
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools import (
+    ToolCompletionEmissionContext,
+    iter_tool_completion_emission_frames,
+    resolve_tool_completed_thinking_step,
+)
+from app.tasks.chat.streaming.helpers.tool_output import tool_output_has_error
+from app.tasks.chat.streaming.relay.state import AgentEventRelayState
+from app.tasks.chat.streaming.relay.thinking_step_sse import emit_thinking_step_frame
+
+
+def iter_tool_end_frames(
+    event: dict[str, Any],
+    *,
+    state: AgentEventRelayState,
+    streaming_service: Any,
+    content_builder: Any | None,
+    result: Any,
+    step_prefix: str,
+    config: dict[str, Any],
+) -> Iterator[str]:
+    """SSE frames when one tool run finishes."""
+    state.active_tool_depth = max(0, state.active_tool_depth - 1)
+    run_id = event.get("run_id", "")
+    tool_name = event.get("name", "unknown_tool")
+    raw_output = event.get("data", {}).get("output", "")
+    staged_file_path = (
+        state.file_path_by_run.pop(run_id, None) if run_id else None
+    )
+
+    if tool_name == "update_memory":
+        state.called_update_memory = True
+
+    if hasattr(raw_output, "content"):
+        content = raw_output.content
+        if isinstance(content, str):
+            try:
+                tool_output = json.loads(content)
+            except (json.JSONDecodeError, TypeError):
+                tool_output = {"result": content}
+        elif isinstance(content, dict):
+            tool_output = content
+        else:
+            tool_output = {"result": str(content)}
+    elif isinstance(raw_output, dict):
+        tool_output = raw_output
+    else:
+        tool_output = {"result": str(raw_output) if raw_output else "completed"}
+
+    if tool_name in ("write_file", "edit_file"):
+        if tool_output_has_error(tool_output):
+            pass
+        else:
+            result.write_succeeded = True
+            result.verification_succeeded = True
+
+    tool_call_id = state.ui_tool_call_id_by_run.get(
+        run_id,
+        f"call_{run_id[:32]}" if run_id else "call_unknown",
+    )
+    original_step_id = state.tool_step_ids.get(
+        run_id, f"{step_prefix}-unknown-{run_id[:8]}"
+    )
+    state.completed_step_ids.add(original_step_id)
+
+    holder = state.current_lc_tool_call_id
+    holder["value"] = None
+    authoritative = getattr(raw_output, "tool_call_id", None)
+    if isinstance(authoritative, str) and authoritative:
+        holder["value"] = authoritative
+        if run_id:
+            state.lc_tool_call_id_by_run[run_id] = authoritative
+    elif run_id and run_id in state.lc_tool_call_id_by_run:
+        holder["value"] = state.lc_tool_call_id_by_run[run_id]
+
+    items = state.last_active_step_items
+    title, completed_items = resolve_tool_completed_thinking_step(
+        tool_name, tool_output, items
+    )
+    yield emit_thinking_step_frame(
+        streaming_service=streaming_service,
+        content_builder=content_builder,
+        step_id=original_step_id,
+        title=title,
+        status="completed",
+        items=completed_items,
+    )
+
+    state.just_finished_tool = True
+    state.last_active_step_id = None
+    state.last_active_step_title = ""
+    state.last_active_step_items = []
+
+    emission_ctx = ToolCompletionEmissionContext(
+        tool_name=tool_name,
+        tool_call_id=tool_call_id,
+        tool_output=tool_output,
+        streaming_service=streaming_service,
+        content_builder=content_builder,
+        langchain_tool_call_id_holder=holder,
+        stream_result=result,
+        langgraph_config=config,
+        staged_workspace_file_path=staged_file_path,
+    )
+    yield from iter_tool_completion_emission_frames(emission_ctx)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py
new file mode 100644
index 000000000..07244364c
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py
@@ -0,0 +1,24 @@
+"""Emit tool-output SSE and optional assistant content updates."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def emit_tool_output_available_frame(
+    *,
+    streaming_service: Any,
+    content_builder: Any | None,
+    langchain_id_holder: dict[str, str | None],
+    call_id: str,
+    output: Any,
+) -> str:
+    if content_builder is not None:
+        content_builder.on_tool_output_available(
+            call_id, output, langchain_id_holder["value"]
+        )
+    return streaming_service.format_tool_output_available(
+        call_id,
+        output,
+        langchain_tool_call_id=langchain_id_holder["value"],
+    )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py
new file mode 100644
index 000000000..c316cc74a
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py
@@ -0,0 +1,142 @@
+"""Tool start: thinking-step and tool-input SSE."""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Iterator
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools import resolve_tool_start_thinking
+from app.tasks.chat.streaming.helpers.tool_call_matching import (
+    match_buffered_langchain_tool_call_id,
+)
+from app.tasks.chat.streaming.relay.state import AgentEventRelayState
+from app.tasks.chat.streaming.relay.thinking_step_completion import (
+    complete_active_thinking_step,
+)
+from app.tasks.chat.streaming.relay.thinking_step_sse import emit_thinking_step_frame
+
+
+def iter_tool_start_frames(
+    event: dict[str, Any],
+    *,
+    state: AgentEventRelayState,
+    streaming_service: Any,
+    content_builder: Any | None,
+    result: Any,
+    step_prefix: str,
+) -> Iterator[str]:
+    """SSE frames for the start of one tool run."""
+    state.active_tool_depth += 1
+    tool_name = event.get("name", "unknown_tool")
+    run_id = event.get("run_id", "")
+    tool_input = event.get("data", {}).get("input", {})
+    if tool_name in ("write_file", "edit_file"):
+        result.write_attempted = True
+        if isinstance(tool_input, dict):
+            file_path = tool_input.get("file_path")
+            if isinstance(file_path, str) and file_path.strip() and run_id:
+                state.file_path_by_run[run_id] = file_path.strip()
+
+    if state.current_text_id is not None:
+        yield streaming_service.format_text_end(state.current_text_id)
+        if content_builder is not None:
+            content_builder.on_text_end(state.current_text_id)
+        state.current_text_id = None
+
+    if state.last_active_step_title != "Synthesizing response":
+        comp, new_active = complete_active_thinking_step(
+            streaming_service=streaming_service,
+            content_builder=content_builder,
+            last_active_step_id=state.last_active_step_id,
+            last_active_step_title=state.last_active_step_title,
+            last_active_step_items=state.last_active_step_items,
+            completed_step_ids=state.completed_step_ids,
+        )
+        if comp:
+            yield comp
+        state.last_active_step_id = new_active
+
+    state.just_finished_tool = False
+    tool_step_id = state.next_thinking_step_id(step_prefix)
+    state.tool_step_ids[run_id] = tool_step_id
+    state.last_active_step_id = tool_step_id
+
+    thinking = resolve_tool_start_thinking(tool_name, tool_input)
+    state.last_active_step_title = thinking.title
+    state.last_active_step_items = thinking.items
+    frame_kw: dict[str, Any] = {
+        "streaming_service": streaming_service,
+        "content_builder": content_builder,
+        "step_id": tool_step_id,
+        "title": thinking.title,
+        "status": "in_progress",
+    }
+    if thinking.include_items_on_frame:
+        frame_kw["items"] = thinking.items
+    yield emit_thinking_step_frame(**frame_kw)
+
+    matched_meta: dict[str, str] | None = None
+    if state.parity_v2:
+        taken_ui_ids = set(state.ui_tool_call_id_by_run.values())
+        for meta in state.index_to_meta.values():
+            if meta["name"] == tool_name and meta["ui_id"] not in taken_ui_ids:
+                matched_meta = meta
+                break
+
+    tool_call_id: str
+    langchain_tool_call_id: str | None = None
+    if matched_meta is not None:
+        tool_call_id = matched_meta["ui_id"]
+        langchain_tool_call_id = matched_meta["lc_id"]
+        if run_id:
+            state.lc_tool_call_id_by_run[run_id] = matched_meta["lc_id"]
+    else:
+        tool_call_id = (
+            f"call_{run_id[:32]}"
+            if run_id
+            else streaming_service.generate_tool_call_id()
+        )
+        if state.parity_v2:
+            langchain_tool_call_id = match_buffered_langchain_tool_call_id(
+                state.pending_tool_call_chunks,
+                tool_name,
+                run_id,
+                state.lc_tool_call_id_by_run,
+            )
+        yield streaming_service.format_tool_input_start(
+            tool_call_id,
+            tool_name,
+            langchain_tool_call_id=langchain_tool_call_id,
+        )
+        if content_builder is not None:
+            content_builder.on_tool_input_start(
+                tool_call_id, tool_name, langchain_tool_call_id
+            )
+
+    if run_id:
+        state.ui_tool_call_id_by_run[run_id] = tool_call_id
+
+    if isinstance(tool_input, dict):
+        _safe_input: dict[str, Any] = {}
+        for _k, _v in tool_input.items():
+            try:
+                json.dumps(_v)
+                _safe_input[_k] = _v
+            except (TypeError, ValueError, OverflowError):
+                pass
+    else:
+        _safe_input = {"input": tool_input}
+    yield streaming_service.format_tool_input_available(
+        tool_call_id,
+        tool_name,
+        _safe_input,
+        langchain_tool_call_id=langchain_tool_call_id,
+    )
+    if content_builder is not None:
+        content_builder.on_tool_input_available(
+            tool_call_id,
+            tool_name,
+            _safe_input,
+            langchain_tool_call_id,
+        )

From 1392abf5b1d9cbb2309c7780edeacf9c0c72f205 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:48 +0200
Subject: [PATCH 11/58] Add chat tool streaming registry with shared, default,
 and connector tools.

---
 .../chat/streaming/handlers/tools/__init__.py | 23 +++++
 .../handlers/tools/connector/__init__.py      |  0
 .../tools/connector/shared/__init__.py        |  0
 .../tools/connector/shared/emission.py        | 15 ++++
 .../tools/connector/shared/thinking.py        | 22 +++++
 .../tools/connector/shared/tool_names.py      | 31 +++++++
 .../handlers/tools/default/__init__.py        |  3 +
 .../handlers/tools/default/emission.py        | 24 +++++
 .../handlers/tools/default/thinking.py        | 23 +++++
 .../handlers/tools/emission_context.py        | 34 +++++++
 .../chat/streaming/handlers/tools/registry.py | 88 +++++++++++++++++++
 .../handlers/tools/shared/__init__.py         |  0
 .../streaming/handlers/tools/shared/model.py  | 12 +++
 13 files changed, 275 insertions(+)
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/emission.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/tool_names.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/emission.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/registry.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/shared/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/shared/model.py

diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/__init__.py
new file mode 100644
index 000000000..4b191c100
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/__init__.py
@@ -0,0 +1,23 @@
+"""Per-tool streaming: thinking-step and completion emission."""
+
+from __future__ import annotations
+
+from app.tasks.chat.streaming.handlers.tools.emission_context import (
+    ToolCompletionEmissionContext,
+)
+from app.tasks.chat.streaming.handlers.tools.registry import (
+    iter_tool_completion_emission_frames,
+    resolve_tool_completed_thinking_step,
+    resolve_tool_start_thinking,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+__all__ = [
+    "ToolCompletionEmissionContext",
+    "ToolStartThinking",
+    "iter_tool_completion_emission_frames",
+    "resolve_tool_completed_thinking_step",
+    "resolve_tool_start_thinking",
+]
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/emission.py
new file mode 100644
index 000000000..8e19dc224
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/emission.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from app.tasks.chat.streaming.handlers.tools.emission_context import (
+    ToolCompletionEmissionContext,
+)
+
+
+def iter_completion_emission_frames(
+    ctx: ToolCompletionEmissionContext,
+) -> Iterator[str]:
+    out = ctx.tool_output
+    payload = out if isinstance(out, dict) else {"result": out}
+    yield ctx.emit_tool_output_card(payload)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/thinking.py
new file mode 100644
index 000000000..7e9dd8b96
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/thinking.py
@@ -0,0 +1,22 @@
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.default import (
+    thinking as default_thinking,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    return default_thinking.resolve_start_thinking(tool_name, tool_input)
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    return default_thinking.resolve_completed_thinking(
+        tool_name, tool_output, last_items
+    )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/tool_names.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/tool_names.py
new file mode 100644
index 000000000..ab698b32d
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/tool_names.py
@@ -0,0 +1,31 @@
+from __future__ import annotations
+
+SHARED_CONNECTOR_TOOLS: frozenset[str] = frozenset(
+    {
+        "create_calendar_event",
+        "create_confluence_page",
+        "create_dropbox_file",
+        "create_gmail_draft",
+        "create_google_drive_file",
+        "create_jira_issue",
+        "create_linear_issue",
+        "create_notion_page",
+        "create_onedrive_file",
+        "delete_calendar_event",
+        "delete_confluence_page",
+        "delete_dropbox_file",
+        "delete_google_drive_file",
+        "delete_jira_issue",
+        "delete_linear_issue",
+        "delete_notion_page",
+        "delete_onedrive_file",
+        "send_gmail_email",
+        "trash_gmail_email",
+        "update_calendar_event",
+        "update_confluence_page",
+        "update_gmail_draft",
+        "update_jira_issue",
+        "update_linear_issue",
+        "update_notion_page",
+    }
+)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/__init__.py
new file mode 100644
index 000000000..5e84a37f4
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/__init__.py
@@ -0,0 +1,3 @@
+"""Fallback tool package."""
+
+from __future__ import annotations
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/emission.py
new file mode 100644
index 000000000..e24c619a7
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/emission.py
@@ -0,0 +1,24 @@
+"""Default tool-output card and a short completion terminal line."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from app.tasks.chat.streaming.handlers.tools.emission_context import (
+    ToolCompletionEmissionContext,
+)
+
+
+def iter_completion_emission_frames(
+    ctx: ToolCompletionEmissionContext,
+) -> Iterator[str]:
+    yield ctx.emit_tool_output_card(
+        {
+            "status": "completed",
+            "result_length": len(str(ctx.tool_output)),
+        },
+    )
+    yield ctx.streaming_service.format_terminal_info(
+        f"Tool {ctx.tool_name} completed",
+        "success",
+    )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/thinking.py
new file mode 100644
index 000000000..46d15a4e7
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/thinking.py
@@ -0,0 +1,23 @@
+"""Fallback thinking-step copy for unknown tools and connectors without custom UI."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_input
+    title = tool_name.replace("_", " ").strip().capitalize() or tool_name
+    return ToolStartThinking(title=title, items=[], include_items_on_frame=False)
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str]
+) -> tuple[str, list[str]]:
+    del tool_output
+    title = tool_name.replace("_", " ").strip().capitalize() or tool_name
+    return (title, last_items)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py
new file mode 100644
index 000000000..d9ff796c0
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py
@@ -0,0 +1,34 @@
+"""Context for one tool-completion emission pass."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tool_output_frame import (
+    emit_tool_output_available_frame,
+)
+
+
+@dataclass
+class ToolCompletionEmissionContext:
+    """Streaming service, tool output, and ids for completion frames."""
+
+    tool_name: str
+    tool_call_id: str
+    tool_output: Any
+    streaming_service: Any
+    content_builder: Any | None
+    langchain_tool_call_id_holder: dict[str, str | None]
+    stream_result: Any
+    langgraph_config: dict[str, Any]
+    staged_workspace_file_path: str | None
+
+    def emit_tool_output_card(self, payload: Any) -> str:
+        return emit_tool_output_available_frame(
+            streaming_service=self.streaming_service,
+            content_builder=self.content_builder,
+            langchain_id_holder=self.langchain_tool_call_id_holder,
+            call_id=self.tool_call_id,
+            output=payload,
+        )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/registry.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/registry.py
new file mode 100644
index 000000000..c0568f870
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/registry.py
@@ -0,0 +1,88 @@
+"""Resolve thinking and emission modules by tool name."""
+
+from __future__ import annotations
+
+import importlib
+from collections.abc import Iterator
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.connector.shared.tool_names import (
+    SHARED_CONNECTOR_TOOLS,
+)
+from app.tasks.chat.streaming.handlers.tools.deliverables.tool_names import (
+    DELIVERABLE_TOOLS,
+)
+from app.tasks.chat.streaming.handlers.tools.emission_context import (
+    ToolCompletionEmissionContext,
+)
+from app.tasks.chat.streaming.handlers.tools.filesystem.tool_names import (
+    FILESYSTEM_TOOLS,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+_BASE = "app.tasks.chat.streaming.handlers.tools"
+_CONNECTOR_SHARED = "connector.shared"
+
+_THINKING_ALIAS: dict[str, str] = {
+    "execute_code": "filesystem.execute",
+}
+_EMISSION_ALIAS: dict[str, str] = {
+    "edit_file": "filesystem.write_file",
+    "execute_code": "filesystem.execute",
+}
+
+
+def _thinking_module(tool_name: str) -> str:
+    if tool_name in SHARED_CONNECTOR_TOOLS:
+        return _CONNECTOR_SHARED
+    if tool_name in FILESYSTEM_TOOLS:
+        return f"filesystem.{tool_name}"
+    if tool_name in DELIVERABLE_TOOLS:
+        return f"deliverables.{tool_name}"
+    return _THINKING_ALIAS.get(tool_name, tool_name)
+
+
+def _emission_module(tool_name: str) -> str:
+    if tool_name in _EMISSION_ALIAS:
+        return _EMISSION_ALIAS[tool_name]
+    if tool_name in SHARED_CONNECTOR_TOOLS:
+        return _CONNECTOR_SHARED
+    if tool_name in DELIVERABLE_TOOLS:
+        return f"deliverables.{tool_name}"
+    if tool_name in FILESYSTEM_TOOLS:
+        return f"filesystem.{tool_name}"
+    return tool_name
+
+
+def _import_thinking(tool_name: str):
+    try:
+        return importlib.import_module(f"{_BASE}.{_thinking_module(tool_name)}.thinking")
+    except ModuleNotFoundError:
+        return importlib.import_module(f"{_BASE}.default.thinking")
+
+
+def _import_emission(tool_name: str):
+    try:
+        return importlib.import_module(f"{_BASE}.{_emission_module(tool_name)}.emission")
+    except ModuleNotFoundError:
+        return importlib.import_module(f"{_BASE}.default.emission")
+
+
+def resolve_tool_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    return _import_thinking(tool_name).resolve_start_thinking(tool_name, tool_input)
+
+
+def resolve_tool_completed_thinking_step(
+    tool_name: str, tool_output: Any, last_items: list[str]
+) -> tuple[str, list[str]]:
+    return _import_thinking(tool_name).resolve_completed_thinking(
+        tool_name, tool_output, last_items
+    )
+
+
+def iter_tool_completion_emission_frames(
+    ctx: ToolCompletionEmissionContext,
+) -> Iterator[str]:
+    yield from _import_emission(ctx.tool_name).iter_completion_emission_frames(ctx)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/shared/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/shared/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/shared/model.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/shared/model.py
new file mode 100644
index 000000000..047a84374
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/shared/model.py
@@ -0,0 +1,12 @@
+"""In-progress thinking-step title and bullet lines."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True, slots=True)
+class ToolStartThinking:
+    title: str
+    items: list[str]
+    include_items_on_frame: bool = True

From a322eedaa1a3a0604ffb5f053b9e5381ac539284 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:48 +0200
Subject: [PATCH 12/58] Add filesystem tool streaming handlers for chat runs.

---
 .../handlers/tools/filesystem/__init__.py     |  0
 .../tools/filesystem/edit_file/__init__.py    |  0
 .../tools/filesystem/edit_file/thinking.py    | 27 +++++++++
 .../tools/filesystem/execute/__init__.py      |  0
 .../tools/filesystem/execute/emission.py      | 40 +++++++++++++
 .../tools/filesystem/execute/thinking.py      | 42 +++++++++++++
 .../tools/filesystem/glob/__init__.py         |  0
 .../tools/filesystem/glob/thinking.py         | 27 +++++++++
 .../tools/filesystem/grep/__init__.py         |  0
 .../tools/filesystem/grep/thinking.py         | 31 ++++++++++
 .../handlers/tools/filesystem/ls/__init__.py  |  0
 .../handlers/tools/filesystem/ls/thinking.py  | 59 +++++++++++++++++++
 .../tools/filesystem/mkdir/__init__.py        |  0
 .../tools/filesystem/mkdir/thinking.py        | 27 +++++++++
 .../tools/filesystem/move_file/__init__.py    |  0
 .../tools/filesystem/move_file/thinking.py    | 33 +++++++++++
 .../tools/filesystem/read_file/__init__.py    |  0
 .../tools/filesystem/read_file/thinking.py    | 27 +++++++++
 .../handlers/tools/filesystem/rm/__init__.py  |  0
 .../handlers/tools/filesystem/rm/thinking.py  | 28 +++++++++
 .../tools/filesystem/rmdir/__init__.py        |  0
 .../tools/filesystem/rmdir/thinking.py        | 27 +++++++++
 .../tools/filesystem/shared/__init__.py       |  0
 .../tools/filesystem/shared/tool_input.py     | 17 ++++++
 .../handlers/tools/filesystem/tool_names.py   | 18 ++++++
 .../tools/filesystem/write_file/__init__.py   |  0
 .../tools/filesystem/write_file/emission.py   | 43 ++++++++++++++
 .../tools/filesystem/write_file/thinking.py   | 27 +++++++++
 .../tools/filesystem/write_todos/__init__.py  |  0
 .../tools/filesystem/write_todos/thinking.py  | 34 +++++++++++
 30 files changed, 507 insertions(+)
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/edit_file/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/edit_file/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/emission.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/glob/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/glob/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/grep/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/grep/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/ls/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/ls/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/mkdir/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/mkdir/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/move_file/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/move_file/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/read_file/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/read_file/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rm/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rm/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rmdir/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rmdir/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/shared/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/shared/tool_input.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/tool_names.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/emission.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_todos/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_todos/thinking.py

diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/edit_file/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/edit_file/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/edit_file/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/edit_file/thinking.py
new file mode 100644
index 000000000..8669107db
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/edit_file/thinking.py
@@ -0,0 +1,27 @@
+"""edit_file: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import (
+    as_tool_input_dict,
+    truncate_path,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    fp = d.get("file_path", "") if isinstance(tool_input, dict) else str(tool_input)
+    return ToolStartThinking(title="Editing file", items=[truncate_path(fp)])
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_output, tool_name
+    return ("Editing file", last_items)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/emission.py
new file mode 100644
index 000000000..0ff87a907
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/emission.py
@@ -0,0 +1,40 @@
+"""execute: exit code, stdout, sandbox file hints."""
+
+from __future__ import annotations
+
+import re
+from collections.abc import Iterator
+
+from app.tasks.chat.streaming.handlers.tools.emission_context import (
+    ToolCompletionEmissionContext,
+)
+
+
+def iter_completion_emission_frames(
+    ctx: ToolCompletionEmissionContext,
+) -> Iterator[str]:
+    out = ctx.tool_output
+    raw_text = out.get("result", "") if isinstance(out, dict) else str(out)
+    exit_code: int | None = None
+    output_text = raw_text
+    m = re.match(r"^Exit code:\s*(\d+)", raw_text)
+    if m:
+        exit_code = int(m.group(1))
+        om = re.search(r"\nOutput:\n([\s\S]*)", raw_text)
+        output_text = om.group(1) if om else ""
+    thread_id_str = ctx.langgraph_config.get("configurable", {}).get("thread_id", "")
+
+    for sf_match in re.finditer(
+        r"^SANDBOX_FILE:\s*(.+)$", output_text, re.MULTILINE
+    ):
+        fpath = sf_match.group(1).strip()
+        if fpath and fpath not in ctx.stream_result.sandbox_files:
+            ctx.stream_result.sandbox_files.append(fpath)
+
+    yield ctx.emit_tool_output_card(
+        {
+            "exit_code": exit_code,
+            "output": output_text,
+            "thread_id": thread_id_str,
+        },
+    )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/thinking.py
new file mode 100644
index 000000000..2c8aa296b
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/thinking.py
@@ -0,0 +1,42 @@
+"""execute: sandbox command thinking + completion lines."""
+
+from __future__ import annotations
+
+import re
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import (
+    as_tool_input_dict,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    cmd = d.get("command", "") if isinstance(tool_input, dict) else str(tool_input)
+    display_cmd = cmd[:80] + ("…" if len(cmd) > 80 else "")
+    return ToolStartThinking(title="Running command", items=[f"$ {display_cmd}"])
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_name
+    items = last_items
+    raw_text = (
+        tool_output.get("result", "")
+        if isinstance(tool_output, dict)
+        else str(tool_output)
+    )
+    m = re.match(r"^Exit code:\s*(\d+)", raw_text)
+    exit_code_val = int(m.group(1)) if m else None
+    if exit_code_val is not None and exit_code_val == 0:
+        completed = [*items, "Completed successfully"]
+    elif exit_code_val is not None:
+        completed = [*items, f"Exit code: {exit_code_val}"]
+    else:
+        completed = [*items, "Finished"]
+    return ("Running command", completed)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/glob/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/glob/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/glob/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/glob/thinking.py
new file mode 100644
index 000000000..f5a57beac
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/glob/thinking.py
@@ -0,0 +1,27 @@
+"""glob: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import (
+    as_tool_input_dict,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    pat = d.get("pattern", "") if isinstance(tool_input, dict) else str(tool_input)
+    base = d.get("path", "/") if isinstance(tool_input, dict) else "/"
+    return ToolStartThinking(title="Searching files", items=[f"{pat} in {base}"])
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_output, tool_name
+    return ("Searching files", last_items)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/grep/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/grep/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/grep/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/grep/thinking.py
new file mode 100644
index 000000000..da0864177
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/grep/thinking.py
@@ -0,0 +1,31 @@
+"""grep: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import (
+    as_tool_input_dict,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    pat = d.get("pattern", "") if isinstance(tool_input, dict) else str(tool_input)
+    grep_path = d.get("path", "") if isinstance(tool_input, dict) else ""
+    display_pat = pat[:60] + ("…" if len(pat) > 60 else "")
+    return ToolStartThinking(
+        title="Searching content",
+        items=[f'"{display_pat}"' + (f" in {grep_path}" if grep_path else "")],
+    )
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_output, tool_name
+    return ("Searching content", last_items)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/ls/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/ls/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/ls/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/ls/thinking.py
new file mode 100644
index 000000000..80c547b5a
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/ls/thinking.py
@@ -0,0 +1,59 @@
+"""ls: thinking-step copy for directory listing."""
+
+from __future__ import annotations
+
+import ast
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    if isinstance(tool_input, dict):
+        path = tool_input.get("path", "/")
+    else:
+        path = str(tool_input)
+    return ToolStartThinking(title="Listing files", items=[path])
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_name
+    if isinstance(tool_output, dict):
+        ls_output = tool_output.get("result", "")
+    elif isinstance(tool_output, str):
+        ls_output = tool_output
+    else:
+        ls_output = str(tool_output) if tool_output else ""
+    file_names: list[str] = []
+    if ls_output:
+        paths: list[str] = []
+        try:
+            parsed = ast.literal_eval(ls_output)
+            if isinstance(parsed, list):
+                paths = [str(p) for p in parsed]
+        except (ValueError, SyntaxError):
+            paths = [
+                line.strip()
+                for line in ls_output.strip().split("\n")
+                if line.strip()
+            ]
+        for p in paths:
+            name = p.rstrip("/").split("/")[-1]
+            if name and len(name) <= 40:
+                file_names.append(name)
+            elif name:
+                file_names.append(name[:37] + "...")
+    if file_names:
+        if len(file_names) <= 5:
+            completed = [f"[{name}]" for name in file_names]
+        else:
+            completed = [f"[{name}]" for name in file_names[:4]]
+            completed.append(f"(+{len(file_names) - 4} more)")
+    else:
+        completed = ["No files found"]
+    return ("Listing files", completed)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/mkdir/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/mkdir/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/mkdir/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/mkdir/thinking.py
new file mode 100644
index 000000000..3a3707698
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/mkdir/thinking.py
@@ -0,0 +1,27 @@
+"""mkdir: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import (
+    as_tool_input_dict,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    p = d.get("path", "") if isinstance(tool_input, dict) else str(tool_input)
+    display = p if len(p) <= 80 else "…" + p[-77:]
+    return ToolStartThinking(title="Creating folder", items=[display] if display else [])
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_output, tool_name
+    return ("Creating folder", last_items)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/move_file/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/move_file/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/move_file/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/move_file/thinking.py
new file mode 100644
index 000000000..192a789f4
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/move_file/thinking.py
@@ -0,0 +1,33 @@
+"""move_file: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import (
+    as_tool_input_dict,
+    truncate_middle,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    src = d.get("source_path", "") if isinstance(tool_input, dict) else ""
+    dst = d.get("destination_path", "") if isinstance(tool_input, dict) else ""
+    display_src = truncate_middle(src, max_len=60)
+    display_dst = truncate_middle(dst, max_len=60)
+    return ToolStartThinking(
+        title="Moving file",
+        items=[f"{display_src} → {display_dst}"] if src or dst else [],
+    )
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_output, tool_name
+    return ("Moving file", last_items)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/read_file/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/read_file/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/read_file/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/read_file/thinking.py
new file mode 100644
index 000000000..3f4290ad7
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/read_file/thinking.py
@@ -0,0 +1,27 @@
+"""read_file: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import (
+    as_tool_input_dict,
+    truncate_path,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    fp = d.get("file_path", "") if isinstance(tool_input, dict) else str(tool_input)
+    return ToolStartThinking(title="Reading file", items=[truncate_path(fp)])
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_output, tool_name
+    return ("Reading file", last_items)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rm/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rm/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rm/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rm/thinking.py
new file mode 100644
index 000000000..a82a44e6f
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rm/thinking.py
@@ -0,0 +1,28 @@
+"""rm: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import (
+    as_tool_input_dict,
+    truncate_path,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    rm_path = d.get("path", "") if isinstance(tool_input, dict) else str(tool_input)
+    display = truncate_path(rm_path)
+    return ToolStartThinking(title="Deleting file", items=[display] if display else [])
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_output, tool_name
+    return ("Deleting file", last_items)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rmdir/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rmdir/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rmdir/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rmdir/thinking.py
new file mode 100644
index 000000000..6c97904b7
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rmdir/thinking.py
@@ -0,0 +1,27 @@
+"""rmdir: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import (
+    as_tool_input_dict,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    p = d.get("path", "") if isinstance(tool_input, dict) else str(tool_input)
+    display = p if len(p) <= 80 else "…" + p[-77:]
+    return ToolStartThinking(title="Deleting folder", items=[display] if display else [])
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_output, tool_name
+    return ("Deleting folder", last_items)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/shared/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/shared/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/shared/tool_input.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/shared/tool_input.py
new file mode 100644
index 000000000..507782283
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/shared/tool_input.py
@@ -0,0 +1,17 @@
+"""Tool-call args + display truncation for filesystem thinking modules."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def as_tool_input_dict(tool_input: Any) -> dict[str, Any]:
+    return tool_input if isinstance(tool_input, dict) else {}
+
+
+def truncate_path(fp: str, *, max_len: int = 80) -> str:
+    return fp if len(fp) <= max_len else "…" + fp[-(max_len - 3) :]
+
+
+def truncate_middle(s: str, *, max_len: int = 60) -> str:
+    return s if len(s) <= max_len else "…" + s[-(max_len - 3) :]
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/tool_names.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/tool_names.py
new file mode 100644
index 000000000..e2ad33736
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/tool_names.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+FILESYSTEM_TOOLS: frozenset[str] = frozenset(
+    {
+        "read_file",
+        "glob",
+        "grep",
+        "ls",
+        "mkdir",
+        "move_file",
+        "rm",
+        "rmdir",
+        "write_todos",
+        "write_file",
+        "edit_file",
+        "execute",
+    }
+)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/emission.py
new file mode 100644
index 000000000..820235379
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/emission.py
@@ -0,0 +1,43 @@
+"""write_file: path + status envelope on the tool-output card."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from app.tasks.chat.streaming.handlers.tools.emission_context import (
+    ToolCompletionEmissionContext,
+)
+from app.tasks.chat.streaming.helpers.tool_output import (
+    extract_resolved_file_path,
+    tool_output_has_error,
+    tool_output_to_text,
+)
+
+
+def iter_completion_emission_frames(
+    ctx: ToolCompletionEmissionContext,
+) -> Iterator[str]:
+    resolved_path = extract_resolved_file_path(
+        tool_name=ctx.tool_name,
+        tool_output=ctx.tool_output,
+        tool_input={"file_path": ctx.staged_workspace_file_path}
+        if ctx.staged_workspace_file_path
+        else None,
+    )
+    result_text = tool_output_to_text(ctx.tool_output)
+    if tool_output_has_error(ctx.tool_output):
+        yield ctx.emit_tool_output_card(
+            {
+                "status": "error",
+                "error": result_text,
+                "path": resolved_path,
+            },
+        )
+    else:
+        yield ctx.emit_tool_output_card(
+            {
+                "status": "completed",
+                "path": resolved_path,
+                "result": result_text,
+            },
+        )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/thinking.py
new file mode 100644
index 000000000..43bc8a65f
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/thinking.py
@@ -0,0 +1,27 @@
+"""write_file: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import (
+    as_tool_input_dict,
+    truncate_path,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    fp = d.get("file_path", "") if isinstance(tool_input, dict) else str(tool_input)
+    return ToolStartThinking(title="Writing file", items=[truncate_path(fp)])
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_output, tool_name
+    return ("Writing file", last_items)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_todos/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_todos/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_todos/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_todos/thinking.py
new file mode 100644
index 000000000..43e533daa
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_todos/thinking.py
@@ -0,0 +1,34 @@
+"""write_todos: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import (
+    as_tool_input_dict,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    todos = d.get("todos", []) if isinstance(tool_input, dict) else []
+    todo_count = len(todos) if isinstance(todos, list) else 0
+    return ToolStartThinking(
+        title="Planning tasks",
+        items=(
+            [f"{todo_count} task{'s' if todo_count != 1 else ''}"]
+            if todo_count
+            else []
+        ),
+    )
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_output, tool_name
+    return ("Planning tasks", last_items)

From c8fb4aa5e5b2df6c3c8cca2081f5f8634bd69d16 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:48 +0200
Subject: [PATCH 13/58] Add deliverables and web tool streaming handlers for
 chat runs.

---
 .../handlers/tools/deliverables/__init__.py   |  0
 .../deliverables/generate_image/__init__.py   |  0
 .../deliverables/generate_image/emission.py   | 28 +++++++
 .../deliverables/generate_image/thinking.py   | 39 +++++++++
 .../deliverables/generate_podcast/__init__.py |  0
 .../deliverables/generate_podcast/emission.py | 37 +++++++++
 .../deliverables/generate_podcast/thinking.py | 80 +++++++++++++++++++
 .../deliverables/generate_report/__init__.py  |  0
 .../deliverables/generate_report/emission.py  | 33 ++++++++
 .../deliverables/generate_report/thinking.py  | 77 ++++++++++++++++++
 .../deliverables/generate_resume/__init__.py  |  0
 .../deliverables/generate_resume/emission.py  | 32 ++++++++
 .../deliverables/generate_resume/thinking.py  | 24 ++++++
 .../generate_video_presentation/__init__.py   |  0
 .../generate_video_presentation/emission.py   | 28 +++++++
 .../generate_video_presentation/thinking.py   | 52 ++++++++++++
 .../deliverables/save_document/__init__.py    |  0
 .../deliverables/save_document/emission.py    | 16 ++++
 .../deliverables/save_document/thinking.py    | 38 +++++++++
 .../tools/deliverables/shared/__init__.py     |  0
 .../tools/deliverables/shared/tool_input.py   |  9 +++
 .../handlers/tools/deliverables/tool_names.py | 12 +++
 .../handlers/tools/scrape_webpage/emission.py | 43 ++++++++++
 .../tools/scrape_webpage/shared/__init__.py   |  0
 .../tools/scrape_webpage/shared/tool_input.py |  9 +++
 .../handlers/tools/scrape_webpage/thinking.py | 47 +++++++++++
 .../handlers/tools/web_search/emission.py     | 41 ++++++++++
 27 files changed, 645 insertions(+)
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/emission.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/emission.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/emission.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/emission.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/emission.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/emission.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/shared/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/shared/tool_input.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/tool_names.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/emission.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/shared/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/shared/tool_input.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/thinking.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/web_search/emission.py

diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/emission.py
new file mode 100644
index 000000000..762f75cca
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/emission.py
@@ -0,0 +1,28 @@
+"""generate_image: tool card + terminal summary."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from app.tasks.chat.streaming.handlers.tools.emission_context import (
+    ToolCompletionEmissionContext,
+)
+
+
+def iter_completion_emission_frames(
+    ctx: ToolCompletionEmissionContext,
+) -> Iterator[str]:
+    out = ctx.tool_output
+    payload = out if isinstance(out, dict) else {"result": out}
+    yield ctx.emit_tool_output_card(payload)
+    if isinstance(out, dict):
+        if out.get("error"):
+            yield ctx.streaming_service.format_terminal_info(
+                f"Image generation failed: {out['error'][:60]}",
+                "error",
+            )
+        else:
+            yield ctx.streaming_service.format_terminal_info(
+                "Image generated successfully",
+                "success",
+            )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/thinking.py
new file mode 100644
index 000000000..9675cb0f2
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/thinking.py
@@ -0,0 +1,39 @@
+"""generate_image: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.deliverables.shared.tool_input import (
+    as_tool_input_dict,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    prompt = d.get("prompt", "") if isinstance(tool_input, dict) else str(tool_input)
+    return ToolStartThinking(
+        title="Generating image",
+        items=[f"Prompt: {prompt[:80]}{'...' if len(prompt) > 80 else ''}"],
+    )
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_name
+    items = last_items
+    if isinstance(tool_output, dict) and not tool_output.get("error"):
+        completed = [*items, "Image generated successfully"]
+    else:
+        error_msg = (
+            tool_output.get("error", "Generation failed")
+            if isinstance(tool_output, dict)
+            else "Generation failed"
+        )
+        completed = [*items, f"Error: {error_msg}"]
+    return ("Generating image", completed)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/emission.py
new file mode 100644
index 000000000..f1a1e9c37
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/emission.py
@@ -0,0 +1,37 @@
+"""generate_podcast: tool card + queue / success / failure terminal lines."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from app.tasks.chat.streaming.handlers.tools.emission_context import (
+    ToolCompletionEmissionContext,
+)
+
+
+def iter_completion_emission_frames(
+    ctx: ToolCompletionEmissionContext,
+) -> Iterator[str]:
+    out = ctx.tool_output
+    payload = out if isinstance(out, dict) else {"result": out}
+    yield ctx.emit_tool_output_card(payload)
+    if isinstance(out, dict) and out.get("status") in (
+        "pending",
+        "generating",
+        "processing",
+    ):
+        yield ctx.streaming_service.format_terminal_info(
+            f"Podcast queued: {out.get('title', 'Podcast')}",
+            "success",
+        )
+    elif isinstance(out, dict) and out.get("status") in ("ready", "success"):
+        yield ctx.streaming_service.format_terminal_info(
+            f"Podcast generated successfully: {out.get('title', 'Podcast')}",
+            "success",
+        )
+    elif isinstance(out, dict) and out.get("status") in ("failed", "error"):
+        error_msg = out.get("error", "Unknown error")
+        yield ctx.streaming_service.format_terminal_info(
+            f"Podcast generation failed: {error_msg}",
+            "error",
+        )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/thinking.py
new file mode 100644
index 000000000..b92e0c91f
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/thinking.py
@@ -0,0 +1,80 @@
+"""generate_podcast: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.deliverables.shared.tool_input import (
+    as_tool_input_dict,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    podcast_title = (
+        d.get("podcast_title", "SurfSense Podcast")
+        if isinstance(tool_input, dict)
+        else "SurfSense Podcast"
+    )
+    content_len = len(
+        d.get("source_content", "") if isinstance(tool_input, dict) else ""
+    )
+    return ToolStartThinking(
+        title="Generating podcast",
+        items=[
+            f"Title: {podcast_title}",
+            f"Content: {content_len:,} characters",
+            "Preparing audio generation...",
+        ],
+    )
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_name
+    items = last_items
+    podcast_status = (
+        tool_output.get("status", "unknown")
+        if isinstance(tool_output, dict)
+        else "unknown"
+    )
+    podcast_title = (
+        tool_output.get("title", "Podcast")
+        if isinstance(tool_output, dict)
+        else "Podcast"
+    )
+    if podcast_status in ("pending", "generating", "processing"):
+        completed = [
+            f"Title: {podcast_title}",
+            "Podcast generation started",
+            "Processing in background...",
+        ]
+    elif podcast_status == "already_generating":
+        completed = [
+            f"Title: {podcast_title}",
+            "Podcast already in progress",
+            "Please wait for it to complete",
+        ]
+    elif podcast_status in ("failed", "error"):
+        error_msg = (
+            tool_output.get("error", "Unknown error")
+            if isinstance(tool_output, dict)
+            else "Unknown error"
+        )
+        completed = [
+            f"Title: {podcast_title}",
+            f"Error: {error_msg[:50]}",
+        ]
+    elif podcast_status in ("ready", "success"):
+        completed = [
+            f"Title: {podcast_title}",
+            "Podcast ready",
+        ]
+    else:
+        completed = items
+    return ("Generating podcast", completed)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/emission.py
new file mode 100644
index 000000000..1c5c71b8b
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/emission.py
@@ -0,0 +1,33 @@
+"""generate_report: full payload + terminal line."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from app.tasks.chat.streaming.handlers.tools.emission_context import (
+    ToolCompletionEmissionContext,
+)
+
+
+def iter_completion_emission_frames(
+    ctx: ToolCompletionEmissionContext,
+) -> Iterator[str]:
+    out = ctx.tool_output
+    payload = out if isinstance(out, dict) else {"result": out}
+    yield ctx.emit_tool_output_card(payload)
+    if isinstance(out, dict) and out.get("status") == "ready":
+        word_count = out.get("word_count", 0)
+        yield ctx.streaming_service.format_terminal_info(
+            f"Report generated: {out.get('title', 'Report')} ({word_count:,} words)",
+            "success",
+        )
+    else:
+        error_msg = (
+            out.get("error", "Unknown error")
+            if isinstance(out, dict)
+            else "Unknown error"
+        )
+        yield ctx.streaming_service.format_terminal_info(
+            f"Report generation failed: {error_msg}",
+            "error",
+        )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/thinking.py
new file mode 100644
index 000000000..f912350f8
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/thinking.py
@@ -0,0 +1,77 @@
+"""generate_report: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.deliverables.shared.tool_input import (
+    as_tool_input_dict,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    report_topic = (
+        d.get("topic", "Report") if isinstance(tool_input, dict) else "Report"
+    )
+    is_revision = bool(
+        isinstance(tool_input, dict) and tool_input.get("parent_report_id")
+    )
+    step_title = "Revising report" if is_revision else "Generating report"
+    return ToolStartThinking(
+        title=step_title,
+        items=[f"Topic: {report_topic}", "Analyzing source content..."],
+    )
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_name
+    items = last_items
+    report_status = (
+        tool_output.get("status", "unknown")
+        if isinstance(tool_output, dict)
+        else "unknown"
+    )
+    report_title = (
+        tool_output.get("title", "Report")
+        if isinstance(tool_output, dict)
+        else "Report"
+    )
+    word_count = (
+        tool_output.get("word_count", 0)
+        if isinstance(tool_output, dict)
+        else 0
+    )
+    is_revision = (
+        tool_output.get("is_revision", False)
+        if isinstance(tool_output, dict)
+        else False
+    )
+    step_title = "Revising report" if is_revision else "Generating report"
+
+    if report_status == "ready":
+        completed = [
+            f"Topic: {report_title}",
+            f"{word_count:,} words",
+            "Report ready",
+        ]
+    elif report_status == "failed":
+        error_msg = (
+            tool_output.get("error", "Unknown error")
+            if isinstance(tool_output, dict)
+            else "Unknown error"
+        )
+        completed = [
+            f"Topic: {report_title}",
+            f"Error: {error_msg[:50]}",
+        ]
+    else:
+        completed = items
+
+    return (step_title, completed)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/emission.py
new file mode 100644
index 000000000..dc8d3c7fc
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/emission.py
@@ -0,0 +1,32 @@
+"""generate_resume: full payload + terminal line."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from app.tasks.chat.streaming.handlers.tools.emission_context import (
+    ToolCompletionEmissionContext,
+)
+
+
+def iter_completion_emission_frames(
+    ctx: ToolCompletionEmissionContext,
+) -> Iterator[str]:
+    out = ctx.tool_output
+    payload = out if isinstance(out, dict) else {"result": out}
+    yield ctx.emit_tool_output_card(payload)
+    if isinstance(out, dict) and out.get("status") == "ready":
+        yield ctx.streaming_service.format_terminal_info(
+            f"Resume generated: {out.get('title', 'Resume')}",
+            "success",
+        )
+    else:
+        error_msg = (
+            out.get("error", "Unknown error")
+            if isinstance(out, dict)
+            else "Unknown error"
+        )
+        yield ctx.streaming_service.format_terminal_info(
+            f"Resume generation failed: {error_msg}",
+            "error",
+        )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/thinking.py
new file mode 100644
index 000000000..e81a80679
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/thinking.py
@@ -0,0 +1,24 @@
+"""generate_resume: generic thinking titles and items."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.default import (
+    thinking as default_thinking,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    return default_thinking.resolve_start_thinking(tool_name, tool_input)
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    return default_thinking.resolve_completed_thinking(
+        tool_name, tool_output, last_items
+    )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/emission.py
new file mode 100644
index 000000000..21e27d4c3
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/emission.py
@@ -0,0 +1,28 @@
+"""generate_video_presentation: tool card + terminal line."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from app.tasks.chat.streaming.handlers.tools.emission_context import (
+    ToolCompletionEmissionContext,
+)
+
+
+def iter_completion_emission_frames(
+    ctx: ToolCompletionEmissionContext,
+) -> Iterator[str]:
+    out = ctx.tool_output
+    payload = out if isinstance(out, dict) else {"result": out}
+    yield ctx.emit_tool_output_card(payload)
+    if isinstance(out, dict) and out.get("status") == "pending":
+        yield ctx.streaming_service.format_terminal_info(
+            f"Video presentation queued: {out.get('title', 'Presentation')}",
+            "success",
+        )
+    elif isinstance(out, dict) and out.get("status") == "failed":
+        error_msg = out.get("error", "Unknown error")
+        yield ctx.streaming_service.format_terminal_info(
+            f"Presentation generation failed: {error_msg}",
+            "error",
+        )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/thinking.py
new file mode 100644
index 000000000..5c5aa977d
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/thinking.py
@@ -0,0 +1,52 @@
+"""generate_video_presentation: generic in-progress thinking; completion is status-driven."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.default import (
+    thinking as default_thinking,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    return default_thinking.resolve_start_thinking(tool_name, tool_input)
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_name
+    items = last_items
+    vp_status = (
+        tool_output.get("status", "unknown")
+        if isinstance(tool_output, dict)
+        else "unknown"
+    )
+    vp_title = (
+        tool_output.get("title", "Presentation")
+        if isinstance(tool_output, dict)
+        else "Presentation"
+    )
+    if vp_status in ("pending", "generating"):
+        completed = [
+            f"Title: {vp_title}",
+            "Presentation generation started",
+            "Processing in background...",
+        ]
+    elif vp_status == "failed":
+        error_msg = (
+            tool_output.get("error", "Unknown error")
+            if isinstance(tool_output, dict)
+            else "Unknown error"
+        )
+        completed = [
+            f"Title: {vp_title}",
+            f"Error: {error_msg[:50]}",
+        ]
+    else:
+        completed = items
+    return ("Generating video presentation", completed)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/emission.py
new file mode 100644
index 000000000..68c93dede
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/emission.py
@@ -0,0 +1,16 @@
+"""save_document: default completion card and terminal line."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from app.tasks.chat.streaming.handlers.tools.default import emission as _default
+from app.tasks.chat.streaming.handlers.tools.emission_context import (
+    ToolCompletionEmissionContext,
+)
+
+
+def iter_completion_emission_frames(
+    ctx: ToolCompletionEmissionContext,
+) -> Iterator[str]:
+    yield from _default.iter_completion_emission_frames(ctx)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/thinking.py
new file mode 100644
index 000000000..77059a28c
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/thinking.py
@@ -0,0 +1,38 @@
+"""save_document: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.deliverables.shared.tool_input import (
+    as_tool_input_dict,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    doc_title = d.get("title", "") if isinstance(tool_input, dict) else str(tool_input)
+    display_title = doc_title[:60] + ("…" if len(doc_title) > 60 else "")
+    return ToolStartThinking(title="Saving document", items=[display_title])
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_name
+    items = last_items
+    result_str = (
+        tool_output.get("result", "")
+        if isinstance(tool_output, dict)
+        else str(tool_output)
+    )
+    is_error = "Error" in result_str
+    completed = [
+        *items,
+        result_str[:80] if is_error else "Saved to knowledge base",
+    ]
+    return ("Saving document", completed)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/shared/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/shared/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/shared/tool_input.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/shared/tool_input.py
new file mode 100644
index 000000000..1303cf09f
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/shared/tool_input.py
@@ -0,0 +1,9 @@
+"""Tool-call args for deliverable thinking modules."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def as_tool_input_dict(tool_input: Any) -> dict[str, Any]:
+    return tool_input if isinstance(tool_input, dict) else {}
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/tool_names.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/tool_names.py
new file mode 100644
index 000000000..5924af196
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/tool_names.py
@@ -0,0 +1,12 @@
+from __future__ import annotations
+
+DELIVERABLE_TOOLS: frozenset[str] = frozenset(
+    {
+        "generate_image",
+        "generate_podcast",
+        "generate_report",
+        "generate_resume",
+        "generate_video_presentation",
+        "save_document",
+    }
+)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/emission.py
new file mode 100644
index 000000000..293d2a1e9
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/emission.py
@@ -0,0 +1,43 @@
+"""scrape_webpage: redacted payload + terminal summary."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from app.tasks.chat.streaming.handlers.tools.emission_context import (
+    ToolCompletionEmissionContext,
+)
+
+
+def iter_completion_emission_frames(
+    ctx: ToolCompletionEmissionContext,
+) -> Iterator[str]:
+    out = ctx.tool_output
+    if isinstance(out, dict):
+        display_output = {k: v for k, v in out.items() if k != "content"}
+        if "content" in out:
+            content = out.get("content", "")
+            display_output["content_preview"] = (
+                content[:500] + "..." if len(content) > 500 else content
+            )
+        yield ctx.emit_tool_output_card(display_output)
+    else:
+        yield ctx.emit_tool_output_card({"result": out})
+
+    if isinstance(out, dict) and "error" not in out:
+        title = out.get("title", "Webpage")
+        word_count = out.get("word_count", 0)
+        yield ctx.streaming_service.format_terminal_info(
+            f"Scraped: {title[:40]}{'...' if len(title) > 40 else ''} ({word_count:,} words)",
+            "success",
+        )
+    else:
+        error_msg = (
+            out.get("error", "Failed to scrape")
+            if isinstance(out, dict)
+            else "Failed to scrape"
+        )
+        yield ctx.streaming_service.format_terminal_info(
+            f"Scrape failed: {error_msg}",
+            "error",
+        )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/shared/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/shared/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/shared/tool_input.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/shared/tool_input.py
new file mode 100644
index 000000000..581f0e64a
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/shared/tool_input.py
@@ -0,0 +1,9 @@
+"""Tool-call args for scrape_webpage thinking."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def as_tool_input_dict(tool_input: Any) -> dict[str, Any]:
+    return tool_input if isinstance(tool_input, dict) else {}
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/thinking.py
new file mode 100644
index 000000000..335cc9703
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/thinking.py
@@ -0,0 +1,47 @@
+"""scrape_webpage: thinking-step copy."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.tasks.chat.streaming.handlers.tools.scrape_webpage.shared.tool_input import (
+    as_tool_input_dict,
+)
+from app.tasks.chat.streaming.handlers.tools.shared.model import (
+    ToolStartThinking,
+)
+
+
+def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking:
+    del tool_name
+    d = as_tool_input_dict(tool_input)
+    url = d.get("url", "") if isinstance(tool_input, dict) else str(tool_input)
+    return ToolStartThinking(
+        title="Scraping webpage",
+        items=[f"URL: {url[:80]}{'...' if len(url) > 80 else ''}"],
+    )
+
+
+def resolve_completed_thinking(
+    tool_name: str, tool_output: Any, last_items: list[str],
+) -> tuple[str, list[str]]:
+    del tool_name
+    items = last_items
+    if isinstance(tool_output, dict):
+        title = tool_output.get("title", "Webpage")
+        word_count = tool_output.get("word_count", 0)
+        has_error = "error" in tool_output
+        if has_error:
+            completed = [
+                *items,
+                f"Error: {tool_output.get('error', 'Failed to scrape')[:50]}",
+            ]
+        else:
+            completed = [
+                *items,
+                f"Title: {title[:50]}{'...' if len(title) > 50 else ''}",
+                f"Extracted: {word_count:,} words",
+            ]
+    else:
+        completed = [*items, "Content extracted"]
+    return ("Scraping webpage", completed)
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/web_search/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/web_search/emission.py
new file mode 100644
index 000000000..eccaed708
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/web_search/emission.py
@@ -0,0 +1,41 @@
+"""web_search: citations parsed from provider XML."""
+
+from __future__ import annotations
+
+import re
+from collections.abc import Iterator
+
+from app.tasks.chat.streaming.handlers.tools.emission_context import (
+    ToolCompletionEmissionContext,
+)
+
+
+def iter_completion_emission_frames(
+    ctx: ToolCompletionEmissionContext,
+) -> Iterator[str]:
+    out = ctx.tool_output
+    xml = out.get("result", str(out)) if isinstance(out, dict) else str(out)
+    citations: dict[str, dict[str, str]] = {}
+    for m in re.finditer(
+        r"<title><!\[CDATA\[(.*?)\]\]></title>\s*<url><!\[CDATA\[(.*?)\]\]></url>",
+        xml,
+    ):
+        title, url = m.group(1).strip(), m.group(2).strip()
+        if url.startswith("http") and url not in citations:
+            citations[url] = {"title": title}
+    for m in re.finditer(
+        r"<chunk\s+id='([^']*)'><!\[CDATA\[([\s\S]*?)\]\]></chunk>",
+        xml,
+    ):
+        chunk_url, content = m.group(1).strip(), m.group(2).strip()
+        if (
+            chunk_url.startswith("http")
+            and chunk_url in citations
+            and content
+        ):
+            citations[chunk_url]["snippet"] = (
+                content[:200] + "…" if len(content) > 200 else content
+            )
+    yield ctx.emit_tool_output_card(
+        {"status": "completed", "citations": citations},
+    )

From ec26ca69a6b4f05471beb8a3c114f689991f5622 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:48 +0200
Subject: [PATCH 14/58] Add chat EventRelay and orchestrator stubs for future
 cutover.

---
 .../app/tasks/chat/streaming/event_relay.py   | 127 ++++++++++++++++++
 .../app/tasks/chat/streaming/orchestrator.py  |  48 +++++++
 2 files changed, 175 insertions(+)
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/event_relay.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestrator.py

diff --git a/surfsense_backend/app/tasks/chat/streaming/event_relay.py b/surfsense_backend/app/tasks/chat/streaming/event_relay.py
new file mode 100644
index 000000000..f86337ad7
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/event_relay.py
@@ -0,0 +1,127 @@
+"""Turn LangGraph astream_events into SSE strings via the handler modules."""
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+from dataclasses import dataclass, field
+from typing import Any
+
+from app.services.streaming.emitter import EmitterRegistry
+from app.tasks.chat.streaming.handlers.chain_end import iter_chain_end_frames
+from app.tasks.chat.streaming.handlers.chat_model_stream import (
+    iter_chat_model_stream_frames,
+)
+from app.tasks.chat.streaming.handlers.custom_event_dispatch import (
+    iter_custom_event_frames,
+)
+from app.tasks.chat.streaming.handlers.tool_end import iter_tool_end_frames
+from app.tasks.chat.streaming.handlers.tool_start import iter_tool_start_frames
+from app.tasks.chat.streaming.relay.state import AgentEventRelayState
+from app.tasks.chat.streaming.relay.thinking_step_completion import (
+    complete_active_thinking_step,
+)
+from app.tasks.chat.streaming.stream_result import StreamResult
+
+
+@dataclass
+class EventRelayConfig:
+    """Optional relay tuning (sub-agent tools, text suppression)."""
+
+    subagent_entry_tool_names: frozenset[str] = field(
+        default_factory=lambda: frozenset({"task"})
+    )
+    suppress_main_text_inside_tools: bool = True
+
+
+class EventRelay:
+    """Dispatches graph events to streaming handlers and optional emitters."""
+
+    def __init__(
+        self,
+        *,
+        streaming_service: Any,
+        config: EventRelayConfig | None = None,
+    ) -> None:
+        self.streaming_service = streaming_service
+        self.config = config or EventRelayConfig()
+        reg = getattr(streaming_service, "emitter_registry", None)
+        self.emitter_registry = reg if reg is not None else EmitterRegistry()
+
+    async def relay(
+        self,
+        events: AsyncIterator[dict[str, Any]],
+        *,
+        state: AgentEventRelayState,
+        result: StreamResult,
+        step_prefix: str = "thinking",
+        content_builder: Any | None = None,
+        config: dict[str, Any] | None = None,
+    ) -> AsyncIterator[str]:
+        """Yield SSE for each event from the async iterator, then finalize text/thinking."""
+        graph_config = config or {}
+        async for event in events:
+            event_type = event.get("event", "")
+            if event_type == "on_chat_model_stream":
+                for frame in iter_chat_model_stream_frames(
+                    event,
+                    state=state,
+                    streaming_service=self.streaming_service,
+                    content_builder=content_builder,
+                    step_prefix=step_prefix,
+                ):
+                    yield frame
+            elif event_type == "on_tool_start":
+                for frame in iter_tool_start_frames(
+                    event,
+                    state=state,
+                    streaming_service=self.streaming_service,
+                    content_builder=content_builder,
+                    result=result,
+                    step_prefix=step_prefix,
+                ):
+                    yield frame
+            elif event_type == "on_tool_end":
+                for frame in iter_tool_end_frames(
+                    event,
+                    state=state,
+                    streaming_service=self.streaming_service,
+                    content_builder=content_builder,
+                    result=result,
+                    step_prefix=step_prefix,
+                    config=graph_config,
+                ):
+                    yield frame
+            elif event_type == "on_custom_event":
+                for frame in iter_custom_event_frames(
+                    event,
+                    state=state,
+                    streaming_service=self.streaming_service,
+                    content_builder=content_builder,
+                ):
+                    yield frame
+            elif event_type in ("on_chain_end", "on_agent_end"):
+                for frame in iter_chain_end_frames(
+                    event,
+                    state=state,
+                    streaming_service=self.streaming_service,
+                    content_builder=content_builder,
+                ):
+                    yield frame
+
+        if state.current_text_id is not None:
+            yield self.streaming_service.format_text_end(state.current_text_id)
+            if content_builder is not None:
+                content_builder.on_text_end(state.current_text_id)
+            state.current_text_id = None
+
+        completion_event, new_active = complete_active_thinking_step(
+            streaming_service=self.streaming_service,
+            content_builder=content_builder,
+            last_active_step_id=state.last_active_step_id,
+            last_active_step_title=state.last_active_step_title,
+            last_active_step_items=state.last_active_step_items,
+            completed_step_ids=state.completed_step_ids,
+        )
+        if completion_event:
+            yield completion_event
+        state.last_active_step_id = new_active
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/orchestrator.py
new file mode 100644
index 000000000..1b8558bc6
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestrator.py
@@ -0,0 +1,48 @@
+"""Top-level chat streaming entrypoints (stubs until wired)."""
+
+from __future__ import annotations
+
+from collections.abc import AsyncGenerator
+from typing import Any
+
+
+async def stream_chat(
+    *,
+    request: Any,
+    user: Any,
+    db_session: Any,
+) -> AsyncGenerator[str, None]:  # pragma: no cover - orchestrator port in progress
+    del request, user, db_session
+    raise NotImplementedError(
+        "stream_chat: orchestrator not wired yet"
+    )
+    if False:  # pragma: no cover
+        yield ""
+
+
+async def stream_resume(
+    *,
+    request: Any,
+    user: Any,
+    db_session: Any,
+) -> AsyncGenerator[str, None]:  # pragma: no cover - orchestrator port in progress
+    del request, user, db_session
+    raise NotImplementedError(
+        "stream_resume: orchestrator not wired yet"
+    )
+    if False:  # pragma: no cover
+        yield ""
+
+
+async def stream_regenerate(
+    *,
+    request: Any,
+    user: Any,
+    db_session: Any,
+) -> AsyncGenerator[str, None]:  # pragma: no cover - orchestrator port in progress
+    del request, user, db_session
+    raise NotImplementedError(
+        "stream_regenerate: orchestrator not wired yet"
+    )
+    if False:  # pragma: no cover
+        yield ""

From 8b6ffd12b8649bd789a9e780dd90a0a64d04fbac Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:08:48 +0200
Subject: [PATCH 15/58] Add parity unit tests for extracted chat streaming vs
 legacy.

---
 .../unit/tasks/chat/streaming/__init__.py     |   0
 .../chat/streaming/test_stage_1_parity.py     | 292 ++++++++++++++++++
 .../chat/streaming/test_stage_2_parity.py     | 240 ++++++++++++++
 3 files changed, 532 insertions(+)
 create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/__init__.py
 create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py
 create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py

diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/__init__.py b/surfsense_backend/tests/unit/tasks/chat/streaming/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py
new file mode 100644
index 000000000..9207f37d1
--- /dev/null
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py
@@ -0,0 +1,292 @@
+"""Pin Stage 1 extractions as faithful copies of the old helpers.
+
+The new orchestrator under ``app.tasks.chat.streaming`` is built in
+parallel with the production module ``app.tasks.chat.stream_new_chat``.
+For each Stage 1 extraction we assert the new function returns the same
+output as the old one for a representative input set. The moment the
+two diverge - intentionally or otherwise - this file fails loudly so
+the divergence is reviewed rather than shipped silently.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass, field
+from typing import Any
+
+import pytest
+
+from app.agents.new_chat.errors import BusyError
+from app.agents.new_chat.middleware.busy_mutex import request_cancel, reset_cancel
+from app.tasks.chat.stream_new_chat import (
+    _classify_stream_exception as old_classify,
+    _emit_stream_terminal_error as old_emit_terminal_error,
+    _extract_chunk_parts as old_extract_chunk_parts,
+    _extract_resolved_file_path as old_extract_resolved_file_path,
+    _first_interrupt_value as old_first_interrupt_value,
+    _tool_output_has_error as old_tool_output_has_error,
+    _tool_output_to_text as old_tool_output_to_text,
+)
+from app.tasks.chat.streaming.errors.classifier import (
+    classify_stream_exception as new_classify,
+)
+from app.tasks.chat.streaming.errors.emitter import (
+    emit_stream_terminal_error as new_emit_terminal_error,
+)
+from app.tasks.chat.streaming.helpers.chunk_parts import (
+    extract_chunk_parts as new_extract_chunk_parts,
+)
+from app.tasks.chat.streaming.helpers.interrupt_inspector import (
+    first_interrupt_value as new_first_interrupt_value,
+)
+from app.tasks.chat.streaming.helpers.tool_output import (
+    extract_resolved_file_path as new_extract_resolved_file_path,
+    tool_output_has_error as new_tool_output_has_error,
+    tool_output_to_text as new_tool_output_to_text,
+)
+
+pytestmark = pytest.mark.unit
+
+
+# ---------------------------------------------------------------- chunk parts
+
+
+@dataclass
+class _Chunk:
+    content: Any = ""
+    additional_kwargs: dict[str, Any] = field(default_factory=dict)
+    tool_call_chunks: list[dict[str, Any]] = field(default_factory=list)
+
+
+_CHUNK_CASES: list[Any] = [
+    None,
+    _Chunk(content=""),
+    _Chunk(content="hello"),
+    _Chunk(content=42),  # invalid type, defensively coerced to empty
+    _Chunk(
+        content=[
+            {"type": "text", "text": "Hello "},
+            {"type": "text", "text": "world"},
+        ]
+    ),
+    _Chunk(
+        content=[
+            {"type": "reasoning", "reasoning": "hmm "},
+            {"type": "reasoning", "text": "still"},
+            {"type": "text", "text": "answer"},
+        ]
+    ),
+    _Chunk(
+        content=[
+            {"type": "tool_call_chunk", "id": "c1", "name": "x", "args": "{"},
+            {"type": "tool_use", "id": "c2", "name": "y"},
+            {"type": "image_url", "url": "ignored"},
+        ]
+    ),
+    _Chunk(
+        content="visible",
+        additional_kwargs={"reasoning_content": "private"},
+    ),
+    _Chunk(
+        tool_call_chunks=[
+            {"id": None, "name": None, "args": '{"a":1}', "index": 0},
+            {"id": "c", "name": "n", "args": "}", "index": 0},
+        ]
+    ),
+    _Chunk(
+        content=[{"type": "tool_call_chunk", "id": "from-block", "name": "x"}],
+        tool_call_chunks=[{"id": "from-attr", "name": "y"}],
+    ),
+]
+
+
+@pytest.mark.parametrize("chunk", _CHUNK_CASES)
+def test_extract_chunk_parts_matches_old_implementation(chunk: Any) -> None:
+    assert new_extract_chunk_parts(chunk) == old_extract_chunk_parts(chunk)
+
+
+# ---------------------------------------------------------- interrupt inspector
+
+
+@dataclass
+class _Interrupt:
+    value: dict[str, Any]
+
+
+@dataclass
+class _Task:
+    interrupts: tuple[Any, ...] = ()
+
+
+@dataclass
+class _State:
+    tasks: tuple[Any, ...] = ()
+    interrupts: tuple[Any, ...] = ()
+
+
+_INTERRUPT_CASES: list[Any] = [
+    _State(),
+    _State(tasks=(_Task(interrupts=(_Interrupt(value={"name": "send"}),)),)),
+    # Multiple tasks: must return the FIRST one in iteration order.
+    _State(
+        tasks=(
+            _Task(interrupts=(_Interrupt(value={"name": "first"}),)),
+            _Task(interrupts=(_Interrupt(value={"name": "second"}),)),
+        )
+    ),
+    # Empty task interrupts -> falls back to root state.interrupts.
+    _State(
+        tasks=(_Task(interrupts=()),),
+        interrupts=(_Interrupt(value={"name": "root"}),),
+    ),
+    # Interrupts as plain dicts (not wrapper objects).
+    _State(interrupts=({"value": {"name": "dict_root"}},)),
+    # A defective task whose `.interrupts` raises - must be tolerated.
+    _State(tasks=(object(),)),
+]
+
+
+@pytest.mark.parametrize("state", _INTERRUPT_CASES)
+def test_first_interrupt_value_matches_old_implementation(state: Any) -> None:
+    assert new_first_interrupt_value(state) == old_first_interrupt_value(state)
+
+
+# ----------------------------------------------------------- error classifier
+
+
+def _classify_cases() -> list[Exception]:
+    """Inputs that the FE depends on being mapped to specific error codes."""
+    return [
+        Exception("totally generic error"),
+        Exception(
+            '{"error":{"type":"rate_limit_error","message":"slow down"}}'
+        ),
+        Exception(
+            'OpenrouterException - {"error":{"message":"Provider returned error",'
+            '"code":429}}'
+        ),
+        BusyError(request_id="thread-busy-parity"),
+        Exception("Thread is busy with another request"),
+    ]
+
+
+@pytest.mark.parametrize("exc", _classify_cases())
+def test_classify_stream_exception_matches_old_implementation(
+    exc: Exception,
+) -> None:
+    new = new_classify(exc, flow_label="parity-test")
+    old = old_classify(exc, flow_label="parity-test")
+    # Strip the wall-clock retry timestamp before comparing — both
+    # implementations call ``time.time()`` independently and the call
+    # order is enough to differ by 1 ms in practice. Every other field
+    # in the tuple must match exactly.
+    new_extra = dict(new[5]) if isinstance(new[5], dict) else new[5]
+    old_extra = dict(old[5]) if isinstance(old[5], dict) else old[5]
+    if isinstance(new_extra, dict) and isinstance(old_extra, dict):
+        new_extra.pop("retry_after_at", None)
+        old_extra.pop("retry_after_at", None)
+    assert new[:5] == old[:5]
+    assert new_extra == old_extra
+
+
+def test_classify_turn_cancelling_branch_parity() -> None:
+    """The TURN_CANCELLING branch reads cancel state for the busy thread id;
+    both implementations must agree on retry-window semantics, not just the
+    plain THREAD_BUSY code."""
+    thread_id = "parity-cancelling-thread"
+    reset_cancel(thread_id)
+    request_cancel(thread_id)
+    exc = BusyError(request_id=thread_id)
+    new = new_classify(exc, flow_label="parity-test")
+    old = old_classify(exc, flow_label="parity-test")
+    assert new[0] == old[0] == "thread_busy"
+    assert new[1] == old[1] == "TURN_CANCELLING"
+    assert isinstance(new[5], dict) and isinstance(old[5], dict)
+    assert new[5]["retry_after_ms"] == old[5]["retry_after_ms"]
+
+
+# ------------------------------------------------------------ terminal emitter
+
+
+class _FakeStreamingService:
+    """Duck-types ``format_error`` for both old and new emitters."""
+
+    def __init__(self) -> None:
+        self.calls: list[dict[str, Any]] = []
+
+    def format_error(
+        self, message: str, *, error_code: str, extra: dict[str, Any] | None = None
+    ) -> str:
+        self.calls.append(
+            {"message": message, "error_code": error_code, "extra": extra}
+        )
+        return f"data: {{\"type\":\"error\",\"errorText\":\"{message}\"}}\n\n"
+
+
+def test_emit_stream_terminal_error_matches_old_output_and_logs(caplog) -> None:
+    """The new emitter must produce the same SSE frame and log the same
+    structured payload as the old one for the same arguments."""
+    args: dict[str, Any] = {
+        "flow": "new",
+        "request_id": "req-parity",
+        "thread_id": 7,
+        "search_space_id": 9,
+        "user_id": "user-parity",
+        "message": "boom",
+        "error_kind": "server_error",
+        "error_code": "SERVER_ERROR",
+        "severity": "error",
+        "is_expected": False,
+        "extra": {"foo": "bar"},
+    }
+
+    new_svc = _FakeStreamingService()
+    old_svc = _FakeStreamingService()
+
+    with caplog.at_level(logging.ERROR):
+        new_frame = new_emit_terminal_error(streaming_service=new_svc, **args)
+        old_frame = old_emit_terminal_error(streaming_service=old_svc, **args)
+
+    assert new_frame == old_frame
+    assert new_svc.calls == old_svc.calls
+    chat_error_records = [
+        r for r in caplog.records if "[chat_stream_error]" in r.message
+    ]
+    # One log line per emit call (two emits -> two records).
+    assert len(chat_error_records) == 2
+
+
+# ---------------------------------------------------------------- tool output
+
+
+def test_tool_output_helpers_match_old_implementation() -> None:
+    samples: list[Any] = [
+        {"result": "ok"},
+        {"error": "bad"},
+        {"result": "Error: x"},
+        "Error: plain",
+        "fine",
+        {"nested": {"a": 1}},
+    ]
+    for s in samples:
+        assert new_tool_output_to_text(s) == old_tool_output_to_text(s)
+        assert new_tool_output_has_error(s) == old_tool_output_has_error(s)
+
+    assert new_extract_resolved_file_path(
+        tool_name="write_file",
+        tool_output={"path": " /tmp/x "},
+        tool_input=None,
+    ) == old_extract_resolved_file_path(
+        tool_name="write_file",
+        tool_output={"path": " /tmp/x "},
+        tool_input=None,
+    )
+    assert new_extract_resolved_file_path(
+        tool_name="write_file",
+        tool_output={},
+        tool_input={"file_path": " /fallback "},
+    ) == old_extract_resolved_file_path(
+        tool_name="write_file",
+        tool_output={},
+        tool_input={"file_path": " /fallback "},
+    )
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py
new file mode 100644
index 000000000..892bb7a6a
--- /dev/null
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py
@@ -0,0 +1,240 @@
+"""Parity tests for Stage 2 extractions (tool matching, thinking step, custom events)."""
+
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import MagicMock
+
+import pytest
+
+from app.tasks.chat.stream_new_chat import _legacy_match_lc_id as old_legacy_match
+from app.tasks.chat.streaming.handlers.custom_events import (
+    handle_action_log,
+    handle_action_log_updated,
+    handle_document_created,
+    handle_report_progress,
+)
+from app.tasks.chat.streaming.helpers.tool_call_matching import (
+    match_buffered_langchain_tool_call_id as new_legacy_match,
+)
+from app.tasks.chat.streaming.relay.state import AgentEventRelayState
+from app.tasks.chat.streaming.relay.thinking_step_completion import (
+    complete_active_thinking_step,
+)
+from app.tasks.chat.streaming.relay.thinking_step_sse import emit_thinking_step_frame
+
+pytestmark = pytest.mark.unit
+
+
+def _copy_chunk_buffer(raw: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    return [dict(x) for x in raw]
+
+
+def test_legacy_tool_call_match_matches_old_implementation() -> None:
+    cases: list[tuple[list[dict[str, Any]], str, str, dict[str, str]]] = [
+        (
+            [
+                {"name": "write_file", "id": "lc-a"},
+                {"name": "other", "id": "lc-b"},
+            ],
+            "write_file",
+            "run-1",
+            {},
+        ),
+        (
+            [{"name": "x", "id": None}, {"name": "y", "id": "lc-fallback"}],
+            "write_file",
+            "run-2",
+            {},
+        ),
+        ([{"name": "no_id"}], "write_file", "run-3", {}),
+    ]
+    for chunks_template, tool_name, run_id, lc_map_seed in cases:
+        old_chunks = _copy_chunk_buffer(chunks_template)
+        new_chunks = _copy_chunk_buffer(chunks_template)
+        old_map = dict(lc_map_seed)
+        new_map = dict(lc_map_seed)
+        old_out = old_legacy_match(old_chunks, tool_name, run_id, old_map)
+        new_out = new_legacy_match(new_chunks, tool_name, run_id, new_map)
+        assert new_out == old_out
+        assert new_chunks == old_chunks
+        assert new_map == old_map
+
+
+def test_emit_thinking_step_frame_invokes_builder_before_service() -> None:
+    order: list[str] = []
+    builder = MagicMock()
+
+    def on_ts(*args: Any, **kwargs: Any) -> None:
+        order.append("builder")
+
+    builder.on_thinking_step.side_effect = on_ts
+
+    svc = MagicMock()
+
+    def fmt(**kwargs: Any) -> str:
+        order.append("service")
+        return "frame"
+
+    svc.format_thinking_step.side_effect = fmt
+
+    out = emit_thinking_step_frame(
+        streaming_service=svc,
+        content_builder=builder,
+        step_id="thinking-1",
+        title="Working",
+        status="in_progress",
+        items=["a"],
+    )
+    assert out == "frame"
+    assert order == ["builder", "service"]
+    builder.on_thinking_step.assert_called_once()
+    svc.format_thinking_step.assert_called_once()
+
+
+def test_emit_thinking_step_frame_skips_builder_when_none() -> None:
+    svc = MagicMock(return_value="x")
+    svc.format_thinking_step.return_value = "frame"
+    assert (
+        emit_thinking_step_frame(
+            streaming_service=svc,
+            content_builder=None,
+            step_id="s",
+            title="t",
+        )
+        == "frame"
+    )
+    svc.format_thinking_step.assert_called_once()
+
+
+def test_complete_active_thinking_step_mirrors_closure_semantics() -> None:
+    svc = MagicMock()
+    svc.format_thinking_step.return_value = "done-frame"
+    completed: set[str] = set()
+
+    frame, new_id = complete_active_thinking_step(
+        streaming_service=svc,
+        content_builder=None,
+        last_active_step_id="thinking-1",
+        last_active_step_title="T",
+        last_active_step_items=["x"],
+        completed_step_ids=completed,
+    )
+    assert frame == "done-frame"
+    assert new_id is None
+    assert "thinking-1" in completed
+
+    frame2, id2 = complete_active_thinking_step(
+        streaming_service=svc,
+        content_builder=None,
+        last_active_step_id="thinking-1",
+        last_active_step_title="T",
+        last_active_step_items=[],
+        completed_step_ids=completed,
+    )
+    assert frame2 is None
+    assert id2 == "thinking-1"
+
+
+def test_agent_event_relay_state_factory_matches_counter_rule() -> None:
+    s0 = AgentEventRelayState.for_invocation(parity_v2=False)
+    assert s0.thinking_step_counter == 0
+    assert s0.last_active_step_id is None
+
+    s1 = AgentEventRelayState.for_invocation(
+        initial_step_id="thinking-resume-1",
+        initial_step_title="Inherited",
+        initial_step_items=["Topic: X"],
+        parity_v2=True,
+    )
+    assert s1.thinking_step_counter == 1
+    assert s1.last_active_step_id == "thinking-resume-1"
+    assert s1.parity_v2 is True
+    assert s1.next_thinking_step_id("thinking") == "thinking-2"
+
+
+@pytest.mark.parametrize(
+    ("phase", "message", "start_items", "expected_tail"),
+    [
+        (
+            "revising_section",
+            "progress line",
+            ["Topic: Foo", "Modifying bar", "stale..."],
+            ["Topic: Foo", "Modifying bar", "progress line"],
+        ),
+        (
+            "other",
+            "phase msg",
+            ["Topic: Foo", "old line"],
+            ["Topic: Foo", "phase msg"],
+        ),
+    ],
+)
+def test_report_progress_items_match_reference(
+    phase: str,
+    message: str,
+    start_items: list[str],
+    expected_tail: list[str],
+) -> None:
+    svc = MagicMock()
+    svc.format_thinking_step.return_value = "sse"
+
+    items = list(start_items)
+    frame, new_items = handle_report_progress(
+        {"message": message, "phase": phase},
+        last_active_step_id="step-1",
+        last_active_step_title="Report",
+        last_active_step_items=items,
+        streaming_service=svc,
+        content_builder=None,
+    )
+    assert frame == "sse"
+    assert new_items == expected_tail
+    kwargs = svc.format_thinking_step.call_args.kwargs
+    assert kwargs["items"] == expected_tail
+
+
+def test_report_progress_noop_when_missing_message_or_step() -> None:
+    svc = MagicMock()
+    items = ["Topic: A"]
+    f1, i1 = handle_report_progress(
+        {"message": "", "phase": "x"},
+        last_active_step_id="s",
+        last_active_step_title="t",
+        last_active_step_items=items,
+        streaming_service=svc,
+        content_builder=None,
+    )
+    assert f1 is None and i1 is items
+
+    f2, i2 = handle_report_progress(
+        {"message": "m", "phase": "x"},
+        last_active_step_id=None,
+        last_active_step_title="t",
+        last_active_step_items=items,
+        streaming_service=svc,
+        content_builder=None,
+    )
+    assert f2 is None and i2 is items
+
+
+def test_document_action_handlers_match_format_data_guards() -> None:
+    svc = MagicMock()
+    svc.format_data.return_value = "data-frame"
+
+    assert handle_document_created({}, streaming_service=svc) is None
+    assert handle_document_created({"id": 0}, streaming_service=svc) is None
+    handle_document_created({"id": 42, "title": "x"}, streaming_service=svc)
+    svc.format_data.assert_called_with(
+        "documents-updated", {"action": "created", "document": {"id": 42, "title": "x"}}
+    )
+
+    svc.reset_mock()
+    assert handle_action_log({"id": None}, streaming_service=svc) is None
+    handle_action_log({"id": 1}, streaming_service=svc)
+    svc.format_data.assert_called_once_with("action-log", {"id": 1})
+
+    svc.reset_mock()
+    assert handle_action_log_updated({"id": None}, streaming_service=svc) is None
+    handle_action_log_updated({"id": 2}, streaming_service=svc)
+    svc.format_data.assert_called_once_with("action-log-updated", {"id": 2})

From 2ec2e82d9d3215734bb367e3cd854035ae078919 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 6 May 2026 20:10:09 +0200
Subject: [PATCH 16/58] Configure workspace Python interpreter and pyright
 extra paths.

---
 .vscode/settings.json | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 05bd30702..7da4b54f8 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,4 +1,9 @@
 {
 	"biome.configurationPath": "./surfsense_web/biome.json",
-	"deepscan.ignoreConfirmWarning": true
+	"deepscan.ignoreConfirmWarning": true,
+	"python.defaultInterpreterPath": "${workspaceFolder}/surfsense_backend/.venv/bin/python",
+	"basedpyright.analysis.extraPaths": [
+		"${workspaceFolder}/surfsense_backend"
+	],
+	"python-envs.pythonProjects": []
 }
\ No newline at end of file

From c0706364d15e25a42d2973a890a106e9df6ccff0 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Thu, 7 May 2026 14:44:36 +0200
Subject: [PATCH 17/58] Add a route-level kill switch for streaming
 orchestrator cutover.

---
 surfsense_backend/app/config/__init__.py      |   6 +
 .../app/routes/new_chat_routes.py             |  33 +++-
 .../app/tasks/chat/streaming/orchestrator.py  | 143 ++++++++++++++----
 3 files changed, 146 insertions(+), 36 deletions(-)

diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py
index f6f0c7f62..543524456 100644
--- a/surfsense_backend/app/config/__init__.py
+++ b/surfsense_backend/app/config/__init__.py
@@ -490,6 +490,12 @@ class Config:
     ENABLE_DESKTOP_LOCAL_FILESYSTEM = (
         os.getenv("ENABLE_DESKTOP_LOCAL_FILESYSTEM", "FALSE").upper() == "TRUE"
     )
+    # Streaming entrypoint switch. Keep this at the route layer so orchestrator
+    # code stays free of legacy fallback branching.
+    ENABLE_CHAT_STREAM_ORCHESTRATOR = (
+        os.getenv("SURFSENSE_ENABLE_CHAT_STREAM_ORCHESTRATOR", "TRUE").upper()
+        == "TRUE"
+    )
 
     @classmethod
     def is_self_hosted(cls) -> bool:
diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py
index ad96654f5..7f035daef 100644
--- a/surfsense_backend/app/routes/new_chat_routes.py
+++ b/surfsense_backend/app/routes/new_chat_routes.py
@@ -71,7 +71,15 @@ from app.schemas.new_chat import (
     TokenUsageSummary,
     TurnStatusResponse,
 )
-from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
+from app.tasks.chat.stream_new_chat import (
+    stream_new_chat as legacy_stream_new_chat,
+    stream_resume_chat as legacy_stream_resume_chat,
+)
+from app.tasks.chat.streaming.orchestrator import (
+    stream_chat,
+    stream_regenerate,
+    stream_resume,
+)
 from app.users import current_active_user
 from app.utils.perf import get_perf_logger
 from app.utils.rbac import check_permission
@@ -90,6 +98,10 @@ TURN_CANCELLING_MAX_DELAY_MS = 1500
 router = APIRouter()
 
 
+def _use_streaming_orchestrator() -> bool:
+    return config.ENABLE_CHAT_STREAM_ORCHESTRATOR
+
+
 def _resolve_filesystem_selection(
     *,
     mode: str,
@@ -1770,7 +1782,11 @@ async def handle_new_chat(
         )
 
         return StreamingResponse(
-            stream_new_chat(
+            (
+                stream_chat
+                if _use_streaming_orchestrator()
+                else legacy_stream_new_chat
+            )(
                 user_query=request.user_query,
                 search_space_id=request.search_space_id,
                 chat_id=request.chat_id,
@@ -2255,7 +2271,12 @@ async def regenerate_response(
                 else None
             )
             try:
-                async for chunk in stream_new_chat(
+                regenerate_fn = (
+                    stream_regenerate
+                    if _use_streaming_orchestrator()
+                    else legacy_stream_new_chat
+                )
+                async for chunk in regenerate_fn(
                     user_query=str(user_query_to_use),
                     search_space_id=request.search_space_id,
                     chat_id=thread_id,
@@ -2387,7 +2408,11 @@ async def resume_chat(
         await session.close()
 
         return StreamingResponse(
-            stream_resume_chat(
+            (
+                stream_resume
+                if _use_streaming_orchestrator()
+                else legacy_stream_resume_chat
+            )(
                 chat_id=thread_id,
                 search_space_id=request.search_space_id,
                 decisions=decisions,
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/orchestrator.py
index 1b8558bc6..e912dd632 100644
--- a/surfsense_backend/app/tasks/chat/streaming/orchestrator.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestrator.py
@@ -1,48 +1,127 @@
-"""Top-level chat streaming entrypoints (stubs until wired)."""
+"""Top-level chat streaming entrypoints.
+
+For now these orchestrator functions are thin compatibility wrappers around the
+current ``stream_new_chat`` / ``stream_resume_chat`` implementations. Routing
+calls through this module lets us cut over to the fully modular event relay in
+one place later without touching API routes again.
+"""
 
 from __future__ import annotations
 
 from collections.abc import AsyncGenerator
-from typing import Any
+from typing import Any, Literal
+
+from app.agents.new_chat.filesystem_selection import FilesystemSelection
+from app.db import ChatVisibility
+from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
 
 
 async def stream_chat(
     *,
-    request: Any,
-    user: Any,
-    db_session: Any,
-) -> AsyncGenerator[str, None]:  # pragma: no cover - orchestrator port in progress
-    del request, user, db_session
-    raise NotImplementedError(
-        "stream_chat: orchestrator not wired yet"
-    )
-    if False:  # pragma: no cover
-        yield ""
+    user_query: str,
+    search_space_id: int,
+    chat_id: int,
+    user_id: str | None = None,
+    llm_config_id: int = -1,
+    mentioned_document_ids: list[int] | None = None,
+    mentioned_surfsense_doc_ids: list[int] | None = None,
+    mentioned_documents: list[dict[str, Any]] | None = None,
+    checkpoint_id: str | None = None,
+    needs_history_bootstrap: bool = False,
+    thread_visibility: ChatVisibility | None = None,
+    current_user_display_name: str | None = None,
+    disabled_tools: list[str] | None = None,
+    filesystem_selection: FilesystemSelection | None = None,
+    request_id: str | None = None,
+    user_image_data_urls: list[str] | None = None,
+) -> AsyncGenerator[str, None]:
+    """Stream a new chat turn through the current production pipeline."""
+    async for chunk in stream_new_chat(
+        user_query=user_query,
+        search_space_id=search_space_id,
+        chat_id=chat_id,
+        user_id=user_id,
+        llm_config_id=llm_config_id,
+        mentioned_document_ids=mentioned_document_ids,
+        mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
+        mentioned_documents=mentioned_documents,
+        checkpoint_id=checkpoint_id,
+        needs_history_bootstrap=needs_history_bootstrap,
+        thread_visibility=thread_visibility,
+        current_user_display_name=current_user_display_name,
+        disabled_tools=disabled_tools,
+        filesystem_selection=filesystem_selection,
+        request_id=request_id,
+        user_image_data_urls=user_image_data_urls,
+    ):
+        yield chunk
 
 
 async def stream_resume(
     *,
-    request: Any,
-    user: Any,
-    db_session: Any,
-) -> AsyncGenerator[str, None]:  # pragma: no cover - orchestrator port in progress
-    del request, user, db_session
-    raise NotImplementedError(
-        "stream_resume: orchestrator not wired yet"
-    )
-    if False:  # pragma: no cover
-        yield ""
+    chat_id: int,
+    search_space_id: int,
+    decisions: list[dict],
+    user_id: str | None = None,
+    llm_config_id: int = -1,
+    thread_visibility: ChatVisibility | None = None,
+    filesystem_selection: FilesystemSelection | None = None,
+    request_id: str | None = None,
+    disabled_tools: list[str] | None = None,
+) -> AsyncGenerator[str, None]:
+    """Resume an interrupted chat turn through the current production pipeline."""
+    async for chunk in stream_resume_chat(
+        chat_id=chat_id,
+        search_space_id=search_space_id,
+        decisions=decisions,
+        user_id=user_id,
+        llm_config_id=llm_config_id,
+        thread_visibility=thread_visibility,
+        filesystem_selection=filesystem_selection,
+        request_id=request_id,
+        disabled_tools=disabled_tools,
+    ):
+        yield chunk
 
 
 async def stream_regenerate(
     *,
-    request: Any,
-    user: Any,
-    db_session: Any,
-) -> AsyncGenerator[str, None]:  # pragma: no cover - orchestrator port in progress
-    del request, user, db_session
-    raise NotImplementedError(
-        "stream_regenerate: orchestrator not wired yet"
-    )
-    if False:  # pragma: no cover
-        yield ""
+    user_query: str,
+    search_space_id: int,
+    chat_id: int,
+    user_id: str | None = None,
+    llm_config_id: int = -1,
+    mentioned_document_ids: list[int] | None = None,
+    mentioned_surfsense_doc_ids: list[int] | None = None,
+    mentioned_documents: list[dict[str, Any]] | None = None,
+    checkpoint_id: str | None = None,
+    needs_history_bootstrap: bool = False,
+    thread_visibility: ChatVisibility | None = None,
+    current_user_display_name: str | None = None,
+    disabled_tools: list[str] | None = None,
+    filesystem_selection: FilesystemSelection | None = None,
+    request_id: str | None = None,
+    user_image_data_urls: list[str] | None = None,
+    flow: Literal["new", "regenerate"] = "regenerate",
+) -> AsyncGenerator[str, None]:
+    """Regenerate an assistant turn through the current production pipeline."""
+    async for chunk in stream_new_chat(
+        user_query=user_query,
+        search_space_id=search_space_id,
+        chat_id=chat_id,
+        user_id=user_id,
+        llm_config_id=llm_config_id,
+        mentioned_document_ids=mentioned_document_ids,
+        mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
+        mentioned_documents=mentioned_documents,
+        checkpoint_id=checkpoint_id,
+        needs_history_bootstrap=needs_history_bootstrap,
+        thread_visibility=thread_visibility,
+        current_user_display_name=current_user_display_name,
+        disabled_tools=disabled_tools,
+        filesystem_selection=filesystem_selection,
+        request_id=request_id,
+        user_image_data_urls=user_image_data_urls,
+        flow=flow,
+    ):
+        yield chunk

From 4e664652a86ddcdb81014b92587d5436762cd2fa Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Thu, 7 May 2026 15:13:22 +0200
Subject: [PATCH 18/58] Add streaming runtime helpers with behavior-focused
 unit tests.

---
 .../app/tasks/chat/streaming/runtime.py       |  92 ++++++++++++++
 .../streaming/test_orchestrator_runtime.py    | 120 ++++++++++++++++++
 2 files changed, 212 insertions(+)
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/runtime.py
 create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_runtime.py

diff --git a/surfsense_backend/app/tasks/chat/streaming/runtime.py b/surfsense_backend/app/tasks/chat/streaming/runtime.py
new file mode 100644
index 000000000..b45da2789
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/runtime.py
@@ -0,0 +1,92 @@
+"""Runtime setup helpers for orchestrated chat streaming."""
+
+from __future__ import annotations
+
+import contextlib
+import logging
+from collections.abc import Callable
+from typing import Any
+
+_PREFLIGHT_TIMEOUT_SEC: float = 2.5
+_PREFLIGHT_MAX_TOKENS: int = 1
+
+
+async def preflight_llm(
+    llm: Any,
+    *,
+    is_provider_rate_limited: Callable[[BaseException], bool],
+) -> None:
+    """Issue a minimal completion probe to catch immediate provider 429s."""
+    from litellm import acompletion
+
+    model = getattr(llm, "model", None)
+    if not model or model == "auto":
+        return
+
+    try:
+        await acompletion(
+            model=model,
+            messages=[{"role": "user", "content": "ping"}],
+            api_key=getattr(llm, "api_key", None),
+            api_base=getattr(llm, "api_base", None),
+            max_tokens=_PREFLIGHT_MAX_TOKENS,
+            timeout=_PREFLIGHT_TIMEOUT_SEC,
+            stream=False,
+            metadata={"tags": ["surfsense:internal", "auto-pin-preflight"]},
+        )
+    except Exception as exc:
+        if is_provider_rate_limited(exc):
+            raise
+        logging.getLogger(__name__).debug(
+            "auto_pin_preflight non_rate_limit_error model=%s err=%s",
+            model,
+            exc,
+        )
+
+
+async def build_main_agent_for_thread(
+    agent_factory: Any,
+    *,
+    llm: Any,
+    search_space_id: int,
+    db_session: Any,
+    connector_service: Any,
+    checkpointer: Any,
+    user_id: str | None,
+    thread_id: int | None,
+    agent_config: Any,
+    firecrawl_api_key: str | None,
+    thread_visibility: Any,
+    filesystem_selection: Any,
+    disabled_tools: list[str] | None = None,
+    mentioned_document_ids: list[int] | None = None,
+) -> Any:
+    """Run one canonical agent-build call for a single thread."""
+    return await agent_factory(
+        llm=llm,
+        search_space_id=search_space_id,
+        db_session=db_session,
+        connector_service=connector_service,
+        checkpointer=checkpointer,
+        user_id=user_id,
+        thread_id=thread_id,
+        agent_config=agent_config,
+        firecrawl_api_key=firecrawl_api_key,
+        thread_visibility=thread_visibility,
+        filesystem_selection=filesystem_selection,
+        disabled_tools=disabled_tools,
+        mentioned_document_ids=mentioned_document_ids,
+    )
+
+
+async def settle_speculative_agent_build(task: Any) -> None:
+    """Wait for a discarded speculative build and swallow its outcome."""
+    with contextlib.suppress(BaseException):
+        await task
+
+
+__all__ = [
+    "build_main_agent_for_thread",
+    "preflight_llm",
+    "settle_speculative_agent_build",
+]
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_runtime.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_runtime.py
new file mode 100644
index 000000000..edb05edfa
--- /dev/null
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_runtime.py
@@ -0,0 +1,120 @@
+"""Behavior tests for streaming runtime helpers."""
+
+from __future__ import annotations
+
+import sys
+import types
+from typing import Any
+
+import pytest
+
+from app.tasks.chat.streaming import runtime
+
+pytestmark = pytest.mark.unit
+
+
+async def test_preflight_llm_calls_litellm_when_model_present(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    calls: dict[str, Any] = {}
+
+    async def _fake_acompletion(**kwargs: Any):
+        calls.update(kwargs)
+        return {"ok": True}
+
+    monkeypatch.setitem(
+        sys.modules,
+        "litellm",
+        types.SimpleNamespace(acompletion=_fake_acompletion),
+    )
+
+    llm = types.SimpleNamespace(model="openai/test", api_key="k", api_base="b")
+    await runtime.preflight_llm(llm, is_provider_rate_limited=lambda _: False)
+
+    assert calls["model"] == "openai/test"
+    assert calls["max_tokens"] == 1
+    assert calls["timeout"] == 2.5
+    assert calls["stream"] is False
+
+
+async def test_preflight_llm_rethrows_rate_limited(monkeypatch: pytest.MonkeyPatch) -> None:
+    class _RateLimitedError(Exception):
+        pass
+
+    async def _fake_acompletion(**kwargs: Any):
+        del kwargs
+        raise _RateLimitedError("rl")
+
+    monkeypatch.setitem(
+        sys.modules,
+        "litellm",
+        types.SimpleNamespace(acompletion=_fake_acompletion),
+    )
+
+    with pytest.raises(_RateLimitedError):
+        await runtime.preflight_llm(
+            types.SimpleNamespace(model="openai/test"),
+            is_provider_rate_limited=lambda exc: isinstance(exc, _RateLimitedError),
+        )
+
+
+async def test_preflight_llm_skips_probe_for_auto_model(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    called = {"count": 0}
+
+    async def _fake_acompletion(**kwargs: Any):
+        del kwargs
+        called["count"] += 1
+        return {"ok": True}
+
+    monkeypatch.setitem(
+        sys.modules,
+        "litellm",
+        types.SimpleNamespace(acompletion=_fake_acompletion),
+    )
+
+    await runtime.preflight_llm(
+        types.SimpleNamespace(model="auto"),
+        is_provider_rate_limited=lambda _: False,
+    )
+    assert called["count"] == 0
+
+
+async def test_build_main_agent_for_thread_forwards_arguments() -> None:
+    seen: dict[str, Any] = {}
+
+    async def _factory(**kwargs: Any):
+        seen.update(kwargs)
+        return "agent"
+
+    out = await runtime.build_main_agent_for_thread(
+        _factory,
+        llm="llm",
+        search_space_id=1,
+        db_session="db",
+        connector_service="connector",
+        checkpointer="cp",
+        user_id="u",
+        thread_id=10,
+        agent_config="cfg",
+        firecrawl_api_key="key",
+        thread_visibility="vis",
+        filesystem_selection="fs",
+        disabled_tools=["a"],
+        mentioned_document_ids=[5],
+    )
+    assert out == "agent"
+    assert seen["thread_id"] == 10
+    assert seen["mentioned_document_ids"] == [5]
+
+
+async def test_settle_speculative_agent_build_swallows_exceptions() -> None:
+    async def _boom() -> None:
+        raise RuntimeError("ignore")
+
+    import asyncio
+
+    task = asyncio.create_task(_boom())
+    await runtime.settle_speculative_agent_build(task)
+    assert task.done()

From f8754a9dab480d2cb112f32c2cf1b4c67b4949ea Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Thu, 7 May 2026 15:41:33 +0200
Subject: [PATCH 19/58] Rename streaming runtime modules for clearer SRP
 boundaries.

---
 .../streaming/{runtime.py => agent_setup.py}  |   2 +-
 .../chat/streaming/orchestration/__init__.py  |   5 +
 .../streaming/orchestration/event_stream.py   |  53 +++++++++
 ...strator_runtime.py => test_agent_setup.py} |  14 +--
 .../test_orchestration_event_stream.py        | 107 ++++++++++++++++++
 5 files changed, 173 insertions(+), 8 deletions(-)
 rename surfsense_backend/app/tasks/chat/streaming/{runtime.py => agent_setup.py} (97%)
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py
 rename surfsense_backend/tests/unit/tasks/chat/streaming/{test_orchestrator_runtime.py => test_agent_setup.py} (87%)
 create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py

diff --git a/surfsense_backend/app/tasks/chat/streaming/runtime.py b/surfsense_backend/app/tasks/chat/streaming/agent_setup.py
similarity index 97%
rename from surfsense_backend/app/tasks/chat/streaming/runtime.py
rename to surfsense_backend/app/tasks/chat/streaming/agent_setup.py
index b45da2789..f67c6ad65 100644
--- a/surfsense_backend/app/tasks/chat/streaming/runtime.py
+++ b/surfsense_backend/app/tasks/chat/streaming/agent_setup.py
@@ -1,4 +1,4 @@
-"""Runtime setup helpers for orchestrated chat streaming."""
+"""Agent setup helpers for orchestrated chat streaming."""
 
 from __future__ import annotations
 
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py
new file mode 100644
index 000000000..8b586f2be
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py
@@ -0,0 +1,5 @@
+"""Composable orchestration pieces for chat streaming."""
+
+from app.tasks.chat.streaming.orchestration.event_stream import stream_agent_events
+
+__all__ = ["stream_agent_events"]
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py
new file mode 100644
index 000000000..1448cd86a
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py
@@ -0,0 +1,53 @@
+"""Run LangGraph event streams through the  EventRelay."""
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+from typing import Any
+
+from app.agents.new_chat.feature_flags import get_flags
+from app.tasks.chat.streaming.event_relay import EventRelay
+from app.tasks.chat.streaming.relay.state import AgentEventRelayState
+from app.tasks.chat.streaming.stream_result import StreamResult
+
+
+async def stream_agent_events(
+    *,
+    agent: Any,
+    config: dict[str, Any],
+    input_data: Any,
+    streaming_service: Any,
+    result: StreamResult,
+    step_prefix: str = "thinking",
+    initial_step_id: str | None = None,
+    initial_step_title: str = "",
+    initial_step_items: list[str] | None = None,
+    content_builder: Any | None = None,
+    runtime_context: Any = None,
+) -> AsyncIterator[str]:
+    """Yield SSE frames from agent ``astream_events`` via ``EventRelay``."""
+    state = AgentEventRelayState.for_invocation(
+        initial_step_id=initial_step_id,
+        initial_step_title=initial_step_title,
+        initial_step_items=initial_step_items,
+        parity_v2=bool(get_flags().enable_stream_parity_v2),
+    )
+
+    astream_kwargs: dict[str, Any] = {"config": config, "version": "v2"}
+    if runtime_context is not None:
+        astream_kwargs["context"] = runtime_context
+
+    events = agent.astream_events(input_data, **astream_kwargs)
+    relay = EventRelay(streaming_service=streaming_service)
+    async for frame in relay.relay(
+        events,
+        state=state,
+        result=result,
+        step_prefix=step_prefix,
+        content_builder=content_builder,
+        config=config,
+    ):
+        yield frame
+
+    result.accumulated_text = state.accumulated_text
+    result.agent_called_update_memory = state.called_update_memory
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_runtime.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py
similarity index 87%
rename from surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_runtime.py
rename to surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py
index edb05edfa..e1f7dd027 100644
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_runtime.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py
@@ -1,4 +1,4 @@
-"""Behavior tests for streaming runtime helpers."""
+"""Behavior tests for streaming agent setup helpers."""
 
 from __future__ import annotations
 
@@ -8,7 +8,7 @@ from typing import Any
 
 import pytest
 
-from app.tasks.chat.streaming import runtime
+from app.tasks.chat.streaming import agent_setup
 
 pytestmark = pytest.mark.unit
 
@@ -29,7 +29,7 @@ async def test_preflight_llm_calls_litellm_when_model_present(
     )
 
     llm = types.SimpleNamespace(model="openai/test", api_key="k", api_base="b")
-    await runtime.preflight_llm(llm, is_provider_rate_limited=lambda _: False)
+    await agent_setup.preflight_llm(llm, is_provider_rate_limited=lambda _: False)
 
     assert calls["model"] == "openai/test"
     assert calls["max_tokens"] == 1
@@ -52,7 +52,7 @@ async def test_preflight_llm_rethrows_rate_limited(monkeypatch: pytest.MonkeyPat
     )
 
     with pytest.raises(_RateLimitedError):
-        await runtime.preflight_llm(
+        await agent_setup.preflight_llm(
             types.SimpleNamespace(model="openai/test"),
             is_provider_rate_limited=lambda exc: isinstance(exc, _RateLimitedError),
         )
@@ -74,7 +74,7 @@ async def test_preflight_llm_skips_probe_for_auto_model(
         types.SimpleNamespace(acompletion=_fake_acompletion),
     )
 
-    await runtime.preflight_llm(
+    await agent_setup.preflight_llm(
         types.SimpleNamespace(model="auto"),
         is_provider_rate_limited=lambda _: False,
     )
@@ -88,7 +88,7 @@ async def test_build_main_agent_for_thread_forwards_arguments() -> None:
         seen.update(kwargs)
         return "agent"
 
-    out = await runtime.build_main_agent_for_thread(
+    out = await agent_setup.build_main_agent_for_thread(
         _factory,
         llm="llm",
         search_space_id=1,
@@ -116,5 +116,5 @@ async def test_settle_speculative_agent_build_swallows_exceptions() -> None:
     import asyncio
 
     task = asyncio.create_task(_boom())
-    await runtime.settle_speculative_agent_build(task)
+    await agent_setup.settle_speculative_agent_build(task)
     assert task.done()
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
new file mode 100644
index 000000000..e12283a75
--- /dev/null
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
@@ -0,0 +1,107 @@
+"""Behavior tests for orchestration event-stream execution."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+import pytest
+
+from app.tasks.chat.streaming.orchestration import stream_agent_events
+from app.tasks.chat.streaming.stream_result import StreamResult
+
+pytestmark = pytest.mark.unit
+
+
+@dataclass
+class _Chunk:
+    content: Any = ""
+    additional_kwargs: dict[str, Any] = field(default_factory=dict)
+    tool_call_chunks: list[dict[str, Any]] = field(default_factory=list)
+
+
+class _StreamingService:
+    def __init__(self) -> None:
+        self._text_idx = 0
+
+    def generate_text_id(self) -> str:
+        self._text_idx += 1
+        return f"text-{self._text_idx}"
+
+    def format_text_start(self, text_id: str) -> str:
+        return f"text_start:{text_id}"
+
+    def format_text_delta(self, text_id: str, text: str) -> str:
+        return f"text_delta:{text_id}:{text}"
+
+    def format_text_end(self, text_id: str) -> str:
+        return f"text_end:{text_id}"
+
+
+class _Agent:
+    def __init__(self, events: list[dict[str, Any]]) -> None:
+        self.events = list(events)
+        self.calls: list[tuple[Any, dict[str, Any]]] = []
+
+    async def astream_events(self, input_data: Any, **kwargs: Any):
+        self.calls.append((input_data, kwargs))
+        for event in self.events:
+            yield event
+
+
+async def _collect(stream: Any) -> list[str]:
+    out: list[str] = []
+    async for x in stream:
+        out.append(x)
+    return out
+
+
+async def test_stream_agent_events_emits_text_lifecycle_and_updates_result() -> None:
+    service = _StreamingService()
+    agent = _Agent(
+        [
+            {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content="Hello")}},
+            {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content=" world")}},
+        ]
+    )
+    result = StreamResult()
+
+    frames = await _collect(
+        stream_agent_events(
+            agent=agent,
+            config={"configurable": {"thread_id": "t-1"}},
+            input_data={"messages": []},
+            streaming_service=service,
+            result=result,
+        )
+    )
+
+    assert frames == [
+        "text_start:text-1",
+        "text_delta:text-1:Hello",
+        "text_delta:text-1: world",
+        "text_end:text-1",
+    ]
+    assert result.accumulated_text == "Hello world"
+    assert result.agent_called_update_memory is False
+    assert agent.calls[0][1]["version"] == "v2"
+
+
+async def test_stream_agent_events_passes_runtime_context_to_agent() -> None:
+    service = _StreamingService()
+    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("x")}}])
+    result = StreamResult()
+
+    _ = await _collect(
+        stream_agent_events(
+            agent=agent,
+            config={"configurable": {"thread_id": "t-2"}},
+            input_data={"messages": []},
+            streaming_service=service,
+            result=result,
+            runtime_context={"mentioned_document_ids": [1, 2]},
+        )
+    )
+
+    assert agent.calls
+    assert agent.calls[0][1]["context"] == {"mentioned_document_ids": [1, 2]}

From 52593d88dbf2dfa5d81587048f4d2bb0ea0d5cad Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Thu, 7 May 2026 16:00:15 +0200
Subject: [PATCH 20/58] Reorganize streaming orchestration modules into relay
 and orchestration folders.

---
 .../app/routes/new_chat_routes.py             |  2 +-
 .../chat/streaming/orchestration/__init__.py  |  8 +-
 .../streaming/orchestration/event_stream.py   |  6 +-
 .../chat/streaming/orchestration/input.py     | 23 +++++
 .../{ => orchestration}/orchestrator.py       | 33 +++++++
 .../output.py}                                |  8 +-
 .../tasks/chat/streaming/relay/__init__.py    |  4 +
 .../chat/streaming/{ => relay}/event_relay.py |  4 +-
 .../test_orchestration_event_stream.py        |  6 +-
 .../test_orchestrator_stream_chat.py          | 88 +++++++++++++++++++
 10 files changed, 170 insertions(+), 12 deletions(-)
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/input.py
 rename surfsense_backend/app/tasks/chat/streaming/{ => orchestration}/orchestrator.py (76%)
 rename surfsense_backend/app/tasks/chat/streaming/{stream_result.py => orchestration/output.py} (82%)
 rename surfsense_backend/app/tasks/chat/streaming/{ => relay}/event_relay.py (97%)
 create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py

diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py
index 7f035daef..e54497f93 100644
--- a/surfsense_backend/app/routes/new_chat_routes.py
+++ b/surfsense_backend/app/routes/new_chat_routes.py
@@ -75,7 +75,7 @@ from app.tasks.chat.stream_new_chat import (
     stream_new_chat as legacy_stream_new_chat,
     stream_resume_chat as legacy_stream_resume_chat,
 )
-from app.tasks.chat.streaming.orchestrator import (
+from app.tasks.chat.streaming.orchestration.orchestrator import (
     stream_chat,
     stream_regenerate,
     stream_resume,
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py
index 8b586f2be..6f683a410 100644
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py
@@ -1,5 +1,11 @@
 """Composable orchestration pieces for chat streaming."""
 
 from app.tasks.chat.streaming.orchestration.event_stream import stream_agent_events
+from app.tasks.chat.streaming.orchestration.input import StreamExecutionInput
+from app.tasks.chat.streaming.orchestration.output import StreamOutput
 
-__all__ = ["stream_agent_events"]
+__all__ = [
+    "StreamExecutionInput",
+    "StreamOutput",
+    "stream_agent_events",
+]
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py
index 1448cd86a..369883c3a 100644
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py
@@ -6,9 +6,9 @@ from collections.abc import AsyncIterator
 from typing import Any
 
 from app.agents.new_chat.feature_flags import get_flags
-from app.tasks.chat.streaming.event_relay import EventRelay
+from app.tasks.chat.streaming.orchestration.output import StreamOutput
+from app.tasks.chat.streaming.relay.event_relay import EventRelay
 from app.tasks.chat.streaming.relay.state import AgentEventRelayState
-from app.tasks.chat.streaming.stream_result import StreamResult
 
 
 async def stream_agent_events(
@@ -17,7 +17,7 @@ async def stream_agent_events(
     config: dict[str, Any],
     input_data: Any,
     streaming_service: Any,
-    result: StreamResult,
+    result: StreamOutput,
     step_prefix: str = "thinking",
     initial_step_id: str | None = None,
     initial_step_title: str = "",
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py
new file mode 100644
index 000000000..13d43b612
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py
@@ -0,0 +1,23 @@
+"""Inputs for orchestrator-owned streaming execution."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+
+@dataclass(frozen=True)
+class StreamExecutionInput:
+    """Container for dependencies required by ``stream_agent_events``."""
+
+    agent: Any
+    config: dict[str, Any]
+    input_data: Any
+    streaming_service: Any
+    step_prefix: str = "thinking"
+    initial_step_id: str | None = None
+    initial_step_title: str = ""
+    initial_step_items: list[str] | None = None
+    content_builder: Any | None = None
+    runtime_context: Any = None
+
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
similarity index 76%
rename from surfsense_backend/app/tasks/chat/streaming/orchestrator.py
rename to surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
index e912dd632..ac7abc6f4 100644
--- a/surfsense_backend/app/tasks/chat/streaming/orchestrator.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
@@ -14,6 +14,9 @@ from typing import Any, Literal
 from app.agents.new_chat.filesystem_selection import FilesystemSelection
 from app.db import ChatVisibility
 from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
+from app.tasks.chat.streaming.orchestration.event_stream import stream_agent_events
+from app.tasks.chat.streaming.orchestration.input import StreamExecutionInput
+from app.tasks.chat.streaming.orchestration.output import StreamOutput
 
 
 async def stream_chat(
@@ -34,8 +37,38 @@ async def stream_chat(
     filesystem_selection: FilesystemSelection | None = None,
     request_id: str | None = None,
     user_image_data_urls: list[str] | None = None,
+    orchestration_input: StreamExecutionInput | None = None,
 ) -> AsyncGenerator[str, None]:
     """Stream a new chat turn through the current production pipeline."""
+    if orchestration_input is not None:
+        result = StreamOutput(
+            request_id=request_id,
+            turn_id=f"{chat_id}:orchestrator",
+            filesystem_mode=(
+                filesystem_selection.mode.value if filesystem_selection else "cloud"
+            ),
+            client_platform=(
+                filesystem_selection.client_platform.value
+                if filesystem_selection
+                else "web"
+            ),
+        )
+        async for frame in stream_agent_events(
+            agent=orchestration_input.agent,
+            config=orchestration_input.config,
+            input_data=orchestration_input.input_data,
+            streaming_service=orchestration_input.streaming_service,
+            result=result,
+            step_prefix=orchestration_input.step_prefix,
+            initial_step_id=orchestration_input.initial_step_id,
+            initial_step_title=orchestration_input.initial_step_title,
+            initial_step_items=orchestration_input.initial_step_items,
+            content_builder=orchestration_input.content_builder,
+            runtime_context=orchestration_input.runtime_context,
+        ):
+            yield frame
+        return
+
     async for chunk in stream_new_chat(
         user_query=user_query,
         search_space_id=search_space_id,
diff --git a/surfsense_backend/app/tasks/chat/streaming/stream_result.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py
similarity index 82%
rename from surfsense_backend/app/tasks/chat/streaming/stream_result.py
rename to surfsense_backend/app/tasks/chat/streaming/orchestration/output.py
index 8ea3bd295..0c4870ec4 100644
--- a/surfsense_backend/app/tasks/chat/streaming/stream_result.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py
@@ -1,4 +1,4 @@
-"""Mutable facts collected while streaming one agent turn."""
+"""Output facts collected while streaming one orchestrated agent turn."""
 
 from __future__ import annotations
 
@@ -7,7 +7,7 @@ from typing import Any
 
 
 @dataclass
-class StreamResult:
+class StreamOutput:
     accumulated_text: str = ""
     is_interrupted: bool = False
     interrupt_value: dict[str, Any] | None = None
@@ -26,3 +26,7 @@ class StreamResult:
     commit_gate_reason: str = ""
     assistant_message_id: int | None = None
     content_builder: Any | None = field(default=None, repr=False)
+
+
+# Backwards-compatible alias while imports migrate.
+StreamResult = StreamOutput
diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py b/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py
index c1a5e7175..351e878a8 100644
--- a/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py
@@ -1,3 +1,7 @@
 """Relay state: thinking steps, tool bookkeeping, and stream helpers."""
 
 from __future__ import annotations
+
+from app.tasks.chat.streaming.relay.event_relay import EventRelay, EventRelayConfig
+
+__all__ = ["EventRelay", "EventRelayConfig"]
diff --git a/surfsense_backend/app/tasks/chat/streaming/event_relay.py b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py
similarity index 97%
rename from surfsense_backend/app/tasks/chat/streaming/event_relay.py
rename to surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py
index f86337ad7..072baac72 100644
--- a/surfsense_backend/app/tasks/chat/streaming/event_relay.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py
@@ -16,11 +16,11 @@ from app.tasks.chat.streaming.handlers.custom_event_dispatch import (
 )
 from app.tasks.chat.streaming.handlers.tool_end import iter_tool_end_frames
 from app.tasks.chat.streaming.handlers.tool_start import iter_tool_start_frames
+from app.tasks.chat.streaming.orchestration.output import StreamOutput
 from app.tasks.chat.streaming.relay.state import AgentEventRelayState
 from app.tasks.chat.streaming.relay.thinking_step_completion import (
     complete_active_thinking_step,
 )
-from app.tasks.chat.streaming.stream_result import StreamResult
 
 
 @dataclass
@@ -52,7 +52,7 @@ class EventRelay:
         events: AsyncIterator[dict[str, Any]],
         *,
         state: AgentEventRelayState,
-        result: StreamResult,
+        result: StreamOutput,
         step_prefix: str = "thinking",
         content_builder: Any | None = None,
         config: dict[str, Any] | None = None,
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
index e12283a75..e0a1877a8 100644
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
@@ -8,7 +8,7 @@ from typing import Any
 import pytest
 
 from app.tasks.chat.streaming.orchestration import stream_agent_events
-from app.tasks.chat.streaming.stream_result import StreamResult
+from app.tasks.chat.streaming.orchestration.output import StreamOutput
 
 pytestmark = pytest.mark.unit
 
@@ -64,7 +64,7 @@ async def test_stream_agent_events_emits_text_lifecycle_and_updates_result() ->
             {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content=" world")}},
         ]
     )
-    result = StreamResult()
+    result = StreamOutput()
 
     frames = await _collect(
         stream_agent_events(
@@ -90,7 +90,7 @@ async def test_stream_agent_events_emits_text_lifecycle_and_updates_result() ->
 async def test_stream_agent_events_passes_runtime_context_to_agent() -> None:
     service = _StreamingService()
     agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("x")}}])
-    result = StreamResult()
+    result = StreamOutput()
 
     _ = await _collect(
         stream_agent_events(
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
new file mode 100644
index 000000000..cf54fdab0
--- /dev/null
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
@@ -0,0 +1,88 @@
+"""Behavior tests for orchestrator ``stream_chat`` public API."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+import pytest
+
+from app.tasks.chat.streaming.orchestration import StreamExecutionInput
+from app.tasks.chat.streaming.orchestration.orchestrator import stream_chat
+
+pytestmark = pytest.mark.unit
+
+
+@dataclass
+class _Chunk:
+    content: Any = ""
+    additional_kwargs: dict[str, Any] = field(default_factory=dict)
+    tool_call_chunks: list[dict[str, Any]] = field(default_factory=list)
+
+
+class _StreamingService:
+    def __init__(self) -> None:
+        self._text_idx = 0
+
+    def generate_text_id(self) -> str:
+        self._text_idx += 1
+        return f"text-{self._text_idx}"
+
+    def format_text_start(self, text_id: str) -> str:
+        return f"text_start:{text_id}"
+
+    def format_text_delta(self, text_id: str, text: str) -> str:
+        return f"text_delta:{text_id}:{text}"
+
+    def format_text_end(self, text_id: str) -> str:
+        return f"text_end:{text_id}"
+
+
+class _Agent:
+    def __init__(self, events: list[dict[str, Any]]) -> None:
+        self.events = list(events)
+        self.calls: list[tuple[Any, dict[str, Any]]] = []
+
+    async def astream_events(self, input_data: Any, **kwargs: Any):
+        self.calls.append((input_data, kwargs))
+        for event in self.events:
+            yield event
+
+
+async def _collect(stream: Any) -> list[str]:
+    out: list[str] = []
+    async for x in stream:
+        out.append(x)
+    return out
+
+
+async def test_stream_chat_uses_orchestration_input_path() -> None:
+    service = _StreamingService()
+    agent = _Agent(
+        [
+            {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content="hello")}},
+            {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content="!")}},
+        ]
+    )
+    frames = await _collect(
+        stream_chat(
+            user_query="ignored-here",
+            search_space_id=1,
+            chat_id=77,
+            orchestration_input=StreamExecutionInput(
+                agent=agent,
+                config={"configurable": {"thread_id": "thread-1"}},
+                input_data={"messages": []},
+                streaming_service=service,
+            ),
+        )
+    )
+
+    assert frames == [
+        "text_start:text-1",
+        "text_delta:text-1:hello",
+        "text_delta:text-1:!",
+        "text_end:text-1",
+    ]
+    assert agent.calls
+    assert agent.calls[0][1]["version"] == "v2"

From 0f40279d9581515dddbd847318ef37589972630c Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Thu, 7 May 2026 16:18:29 +0200
Subject: [PATCH 21/58] Expand orchestration gate coverage to resume and
 regenerate flows.

---
 .../streaming/orchestration/orchestrator.py   | 60 +++++++++++++++++++
 .../test_orchestration_event_stream.py        | 18 ++++--
 .../test_orchestrator_stream_chat.py          | 58 +++++++++++++++++-
 3 files changed, 128 insertions(+), 8 deletions(-)

diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
index ac7abc6f4..1e32e7f5a 100644
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
@@ -101,8 +101,38 @@ async def stream_resume(
     filesystem_selection: FilesystemSelection | None = None,
     request_id: str | None = None,
     disabled_tools: list[str] | None = None,
+    orchestration_input: StreamExecutionInput | None = None,
 ) -> AsyncGenerator[str, None]:
     """Resume an interrupted chat turn through the current production pipeline."""
+    if orchestration_input is not None:
+        result = StreamOutput(
+            request_id=request_id,
+            turn_id=f"{chat_id}:orchestrator-resume",
+            filesystem_mode=(
+                filesystem_selection.mode.value if filesystem_selection else "cloud"
+            ),
+            client_platform=(
+                filesystem_selection.client_platform.value
+                if filesystem_selection
+                else "web"
+            ),
+        )
+        async for frame in stream_agent_events(
+            agent=orchestration_input.agent,
+            config=orchestration_input.config,
+            input_data=orchestration_input.input_data,
+            streaming_service=orchestration_input.streaming_service,
+            result=result,
+            step_prefix=orchestration_input.step_prefix,
+            initial_step_id=orchestration_input.initial_step_id,
+            initial_step_title=orchestration_input.initial_step_title,
+            initial_step_items=orchestration_input.initial_step_items,
+            content_builder=orchestration_input.content_builder,
+            runtime_context=orchestration_input.runtime_context,
+        ):
+            yield frame
+        return
+
     async for chunk in stream_resume_chat(
         chat_id=chat_id,
         search_space_id=search_space_id,
@@ -136,8 +166,38 @@ async def stream_regenerate(
     request_id: str | None = None,
     user_image_data_urls: list[str] | None = None,
     flow: Literal["new", "regenerate"] = "regenerate",
+    orchestration_input: StreamExecutionInput | None = None,
 ) -> AsyncGenerator[str, None]:
     """Regenerate an assistant turn through the current production pipeline."""
+    if orchestration_input is not None:
+        result = StreamOutput(
+            request_id=request_id,
+            turn_id=f"{chat_id}:orchestrator-regenerate",
+            filesystem_mode=(
+                filesystem_selection.mode.value if filesystem_selection else "cloud"
+            ),
+            client_platform=(
+                filesystem_selection.client_platform.value
+                if filesystem_selection
+                else "web"
+            ),
+        )
+        async for frame in stream_agent_events(
+            agent=orchestration_input.agent,
+            config=orchestration_input.config,
+            input_data=orchestration_input.input_data,
+            streaming_service=orchestration_input.streaming_service,
+            result=result,
+            step_prefix=orchestration_input.step_prefix,
+            initial_step_id=orchestration_input.initial_step_id,
+            initial_step_title=orchestration_input.initial_step_title,
+            initial_step_items=orchestration_input.initial_step_items,
+            content_builder=orchestration_input.content_builder,
+            runtime_context=orchestration_input.runtime_context,
+        ):
+            yield frame
+        return
+
     async for chunk in stream_new_chat(
         user_query=user_query,
         search_space_id=search_space_id,
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
index e0a1877a8..bd154e6a0 100644
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
@@ -84,15 +84,20 @@ async def test_stream_agent_events_emits_text_lifecycle_and_updates_result() ->
     ]
     assert result.accumulated_text == "Hello world"
     assert result.agent_called_update_memory is False
-    assert agent.calls[0][1]["version"] == "v2"
 
 
 async def test_stream_agent_events_passes_runtime_context_to_agent() -> None:
     service = _StreamingService()
-    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("x")}}])
+    class _ContextAwareAgent:
+        async def astream_events(self, input_data: Any, **kwargs: Any):
+            del input_data
+            text = "ctx-ok" if kwargs.get("context") else "ctx-missing"
+            yield {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(text)}}
+
+    agent = _ContextAwareAgent()
     result = StreamOutput()
 
-    _ = await _collect(
+    frames = await _collect(
         stream_agent_events(
             agent=agent,
             config={"configurable": {"thread_id": "t-2"}},
@@ -103,5 +108,8 @@ async def test_stream_agent_events_passes_runtime_context_to_agent() -> None:
         )
     )
 
-    assert agent.calls
-    assert agent.calls[0][1]["context"] == {"mentioned_document_ids": [1, 2]}
+    assert frames == [
+        "text_start:text-1",
+        "text_delta:text-1:ctx-ok",
+        "text_end:text-1",
+    ]
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
index cf54fdab0..d9cd7951f 100644
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
@@ -8,7 +8,11 @@ from typing import Any
 import pytest
 
 from app.tasks.chat.streaming.orchestration import StreamExecutionInput
-from app.tasks.chat.streaming.orchestration.orchestrator import stream_chat
+from app.tasks.chat.streaming.orchestration.orchestrator import (
+    stream_chat,
+    stream_regenerate,
+    stream_resume,
+)
 
 pytestmark = pytest.mark.unit
 
@@ -84,5 +88,53 @@ async def test_stream_chat_uses_orchestration_input_path() -> None:
         "text_delta:text-1:!",
         "text_end:text-1",
     ]
-    assert agent.calls
-    assert agent.calls[0][1]["version"] == "v2"
+
+
+async def test_stream_resume_uses_orchestration_input_path() -> None:
+    service = _StreamingService()
+    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("r")}}])
+
+    frames = await _collect(
+        stream_resume(
+            chat_id=9,
+            search_space_id=1,
+            decisions=[],
+            orchestration_input=StreamExecutionInput(
+                agent=agent,
+                config={"configurable": {"thread_id": "thread-r"}},
+                input_data={"messages": []},
+                streaming_service=service,
+            ),
+        )
+    )
+
+    assert frames == [
+        "text_start:text-1",
+        "text_delta:text-1:r",
+        "text_end:text-1",
+    ]
+
+
+async def test_stream_regenerate_uses_orchestration_input_path() -> None:
+    service = _StreamingService()
+    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("g")}}])
+
+    frames = await _collect(
+        stream_regenerate(
+            user_query="q",
+            search_space_id=1,
+            chat_id=2,
+            orchestration_input=StreamExecutionInput(
+                agent=agent,
+                config={"configurable": {"thread_id": "thread-g"}},
+                input_data={"messages": []},
+                streaming_service=service,
+            ),
+        )
+    )
+
+    assert frames == [
+        "text_start:text-1",
+        "text_delta:text-1:g",
+        "text_end:text-1",
+    ]

From a04b2e88bdaeea624c0d192b3259bbd3482bc717 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Thu, 7 May 2026 17:06:17 +0200
Subject: [PATCH 22/58] wire orchestrator streaming context path and align
 event relay outputs

---
 .../chat/streaming/orchestration/__init__.py  |  12 +-
 .../streaming/orchestration/event_stream.py   |   6 +-
 .../chat/streaming/orchestration/input.py     |   4 +-
 .../streaming/orchestration/orchestrator.py   | 142 ++++++++----------
 .../chat/streaming/orchestration/output.py    |   5 +-
 .../tasks/chat/streaming/relay/event_relay.py |   4 +-
 .../test_orchestration_event_stream.py        |  16 +-
 .../test_orchestrator_stream_chat.py          |  14 +-
 8 files changed, 94 insertions(+), 109 deletions(-)

diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py
index 6f683a410..b1a201fd3 100644
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py
@@ -1,11 +1,11 @@
 """Composable orchestration pieces for chat streaming."""
 
-from app.tasks.chat.streaming.orchestration.event_stream import stream_agent_events
-from app.tasks.chat.streaming.orchestration.input import StreamExecutionInput
-from app.tasks.chat.streaming.orchestration.output import StreamOutput
+from app.tasks.chat.streaming.orchestration.event_stream import stream_output
+from app.tasks.chat.streaming.orchestration.input import StreamingContext
+from app.tasks.chat.streaming.orchestration.output import StreamingResult
 
 __all__ = [
-    "StreamExecutionInput",
-    "StreamOutput",
-    "stream_agent_events",
+    "StreamingContext",
+    "StreamingResult",
+    "stream_output",
 ]
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py
index 369883c3a..fc8c13027 100644
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py
@@ -6,18 +6,18 @@ from collections.abc import AsyncIterator
 from typing import Any
 
 from app.agents.new_chat.feature_flags import get_flags
-from app.tasks.chat.streaming.orchestration.output import StreamOutput
+from app.tasks.chat.streaming.orchestration.output import StreamingResult
 from app.tasks.chat.streaming.relay.event_relay import EventRelay
 from app.tasks.chat.streaming.relay.state import AgentEventRelayState
 
 
-async def stream_agent_events(
+async def stream_output(
     *,
     agent: Any,
     config: dict[str, Any],
     input_data: Any,
     streaming_service: Any,
-    result: StreamOutput,
+    result: StreamingResult,
     step_prefix: str = "thinking",
     initial_step_id: str | None = None,
     initial_step_title: str = "",
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py
index 13d43b612..45a33d435 100644
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py
@@ -7,8 +7,8 @@ from typing import Any
 
 
 @dataclass(frozen=True)
-class StreamExecutionInput:
-    """Container for dependencies required by ``stream_agent_events``."""
+class StreamingContext:
+    """Container for dependencies required by ``stream_output``."""
 
     agent: Any
     config: dict[str, Any]
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
index 1e32e7f5a..b40083f42 100644
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
@@ -1,9 +1,4 @@
 """Top-level chat streaming entrypoints.
-
-For now these orchestrator functions are thin compatibility wrappers around the
-current ``stream_new_chat`` / ``stream_resume_chat`` implementations. Routing
-calls through this module lets us cut over to the fully modular event relay in
-one place later without touching API routes again.
 """
 
 from __future__ import annotations
@@ -14,9 +9,47 @@ from typing import Any, Literal
 from app.agents.new_chat.filesystem_selection import FilesystemSelection
 from app.db import ChatVisibility
 from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
-from app.tasks.chat.streaming.orchestration.event_stream import stream_agent_events
-from app.tasks.chat.streaming.orchestration.input import StreamExecutionInput
-from app.tasks.chat.streaming.orchestration.output import StreamOutput
+from app.tasks.chat.streaming.orchestration.event_stream import stream_output
+from app.tasks.chat.streaming.orchestration.input import StreamingContext
+from app.tasks.chat.streaming.orchestration.output import StreamingResult
+
+
+def _build_streaming_result(
+    *,
+    chat_id: int,
+    request_id: str | None,
+    filesystem_selection: FilesystemSelection | None,
+    suffix: str,
+) -> StreamingResult:
+    return StreamingResult(
+        request_id=request_id,
+        turn_id=f"{chat_id}:{suffix}",
+        filesystem_mode=(filesystem_selection.mode.value if filesystem_selection else "cloud"),
+        client_platform=(
+            filesystem_selection.client_platform.value if filesystem_selection else "web"
+        ),
+    )
+
+
+async def _stream_output_with_streaming_context(
+    *,
+    streaming_context: StreamingContext,
+    result: StreamingResult,
+) -> AsyncGenerator[str, None]:
+    async for frame in stream_output(
+        agent=streaming_context.agent,
+        config=streaming_context.config,    
+        input_data=streaming_context.input_data,
+        streaming_service=streaming_context.streaming_service,
+        result=result,
+        step_prefix=streaming_context.step_prefix,
+        initial_step_id=streaming_context.initial_step_id,
+        initial_step_title=streaming_context.initial_step_title,
+        initial_step_items=streaming_context.initial_step_items,
+        content_builder=streaming_context.content_builder,
+        runtime_context=streaming_context.runtime_context,
+    ):
+        yield frame
 
 
 async def stream_chat(
@@ -37,34 +70,19 @@ async def stream_chat(
     filesystem_selection: FilesystemSelection | None = None,
     request_id: str | None = None,
     user_image_data_urls: list[str] | None = None,
-    orchestration_input: StreamExecutionInput | None = None,
+    streaming_context: StreamingContext | None = None,
 ) -> AsyncGenerator[str, None]:
     """Stream a new chat turn through the current production pipeline."""
-    if orchestration_input is not None:
-        result = StreamOutput(
+    if streaming_context is not None:
+        result = _build_streaming_result(
+            chat_id=chat_id,
             request_id=request_id,
-            turn_id=f"{chat_id}:orchestrator",
-            filesystem_mode=(
-                filesystem_selection.mode.value if filesystem_selection else "cloud"
-            ),
-            client_platform=(
-                filesystem_selection.client_platform.value
-                if filesystem_selection
-                else "web"
-            ),
+            filesystem_selection=filesystem_selection,
+            suffix="orchestrator",
         )
-        async for frame in stream_agent_events(
-            agent=orchestration_input.agent,
-            config=orchestration_input.config,
-            input_data=orchestration_input.input_data,
-            streaming_service=orchestration_input.streaming_service,
+        async for frame in _stream_output_with_streaming_context(
+            streaming_context=streaming_context,
             result=result,
-            step_prefix=orchestration_input.step_prefix,
-            initial_step_id=orchestration_input.initial_step_id,
-            initial_step_title=orchestration_input.initial_step_title,
-            initial_step_items=orchestration_input.initial_step_items,
-            content_builder=orchestration_input.content_builder,
-            runtime_context=orchestration_input.runtime_context,
         ):
             yield frame
         return
@@ -101,34 +119,19 @@ async def stream_resume(
     filesystem_selection: FilesystemSelection | None = None,
     request_id: str | None = None,
     disabled_tools: list[str] | None = None,
-    orchestration_input: StreamExecutionInput | None = None,
+    streaming_context: StreamingContext | None = None,
 ) -> AsyncGenerator[str, None]:
     """Resume an interrupted chat turn through the current production pipeline."""
-    if orchestration_input is not None:
-        result = StreamOutput(
+    if streaming_context is not None:
+        result = _build_streaming_result(
+            chat_id=chat_id,
             request_id=request_id,
-            turn_id=f"{chat_id}:orchestrator-resume",
-            filesystem_mode=(
-                filesystem_selection.mode.value if filesystem_selection else "cloud"
-            ),
-            client_platform=(
-                filesystem_selection.client_platform.value
-                if filesystem_selection
-                else "web"
-            ),
+            filesystem_selection=filesystem_selection,
+            suffix="orchestrator-resume",
         )
-        async for frame in stream_agent_events(
-            agent=orchestration_input.agent,
-            config=orchestration_input.config,
-            input_data=orchestration_input.input_data,
-            streaming_service=orchestration_input.streaming_service,
+        async for frame in _stream_output_with_streaming_context(
+            streaming_context=streaming_context,
             result=result,
-            step_prefix=orchestration_input.step_prefix,
-            initial_step_id=orchestration_input.initial_step_id,
-            initial_step_title=orchestration_input.initial_step_title,
-            initial_step_items=orchestration_input.initial_step_items,
-            content_builder=orchestration_input.content_builder,
-            runtime_context=orchestration_input.runtime_context,
         ):
             yield frame
         return
@@ -166,34 +169,19 @@ async def stream_regenerate(
     request_id: str | None = None,
     user_image_data_urls: list[str] | None = None,
     flow: Literal["new", "regenerate"] = "regenerate",
-    orchestration_input: StreamExecutionInput | None = None,
+    streaming_context: StreamingContext | None = None,
 ) -> AsyncGenerator[str, None]:
     """Regenerate an assistant turn through the current production pipeline."""
-    if orchestration_input is not None:
-        result = StreamOutput(
+    if streaming_context is not None:
+        result = _build_streaming_result(
+            chat_id=chat_id,
             request_id=request_id,
-            turn_id=f"{chat_id}:orchestrator-regenerate",
-            filesystem_mode=(
-                filesystem_selection.mode.value if filesystem_selection else "cloud"
-            ),
-            client_platform=(
-                filesystem_selection.client_platform.value
-                if filesystem_selection
-                else "web"
-            ),
+            filesystem_selection=filesystem_selection,
+            suffix="orchestrator-regenerate",
         )
-        async for frame in stream_agent_events(
-            agent=orchestration_input.agent,
-            config=orchestration_input.config,
-            input_data=orchestration_input.input_data,
-            streaming_service=orchestration_input.streaming_service,
+        async for frame in _stream_output_with_streaming_context(
+            streaming_context=streaming_context,
             result=result,
-            step_prefix=orchestration_input.step_prefix,
-            initial_step_id=orchestration_input.initial_step_id,
-            initial_step_title=orchestration_input.initial_step_title,
-            initial_step_items=orchestration_input.initial_step_items,
-            content_builder=orchestration_input.content_builder,
-            runtime_context=orchestration_input.runtime_context,
         ):
             yield frame
         return
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py
index 0c4870ec4..60f8ee6ee 100644
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py
@@ -7,7 +7,7 @@ from typing import Any
 
 
 @dataclass
-class StreamOutput:
+class StreamingResult:
     accumulated_text: str = ""
     is_interrupted: bool = False
     interrupt_value: dict[str, Any] | None = None
@@ -27,6 +27,3 @@ class StreamOutput:
     assistant_message_id: int | None = None
     content_builder: Any | None = field(default=None, repr=False)
 
-
-# Backwards-compatible alias while imports migrate.
-StreamResult = StreamOutput
diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py
index 072baac72..c8aebd99c 100644
--- a/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py
@@ -16,7 +16,7 @@ from app.tasks.chat.streaming.handlers.custom_event_dispatch import (
 )
 from app.tasks.chat.streaming.handlers.tool_end import iter_tool_end_frames
 from app.tasks.chat.streaming.handlers.tool_start import iter_tool_start_frames
-from app.tasks.chat.streaming.orchestration.output import StreamOutput
+from app.tasks.chat.streaming.orchestration.output import StreamingResult
 from app.tasks.chat.streaming.relay.state import AgentEventRelayState
 from app.tasks.chat.streaming.relay.thinking_step_completion import (
     complete_active_thinking_step,
@@ -52,7 +52,7 @@ class EventRelay:
         events: AsyncIterator[dict[str, Any]],
         *,
         state: AgentEventRelayState,
-        result: StreamOutput,
+        result: StreamingResult,
         step_prefix: str = "thinking",
         content_builder: Any | None = None,
         config: dict[str, Any] | None = None,
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
index bd154e6a0..b17d82293 100644
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
@@ -7,8 +7,8 @@ from typing import Any
 
 import pytest
 
-from app.tasks.chat.streaming.orchestration import stream_agent_events
-from app.tasks.chat.streaming.orchestration.output import StreamOutput
+from app.tasks.chat.streaming.orchestration import stream_output
+from app.tasks.chat.streaming.orchestration.output import StreamingResult
 
 pytestmark = pytest.mark.unit
 
@@ -56,7 +56,7 @@ async def _collect(stream: Any) -> list[str]:
     return out
 
 
-async def test_stream_agent_events_emits_text_lifecycle_and_updates_result() -> None:
+async def test_stream_output_emits_text_lifecycle_and_updates_result() -> None:
     service = _StreamingService()
     agent = _Agent(
         [
@@ -64,10 +64,10 @@ async def test_stream_agent_events_emits_text_lifecycle_and_updates_result() ->
             {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content=" world")}},
         ]
     )
-    result = StreamOutput()
+    result = StreamingResult()
 
     frames = await _collect(
-        stream_agent_events(
+        stream_output(
             agent=agent,
             config={"configurable": {"thread_id": "t-1"}},
             input_data={"messages": []},
@@ -86,7 +86,7 @@ async def test_stream_agent_events_emits_text_lifecycle_and_updates_result() ->
     assert result.agent_called_update_memory is False
 
 
-async def test_stream_agent_events_passes_runtime_context_to_agent() -> None:
+async def test_stream_output_passes_runtime_context_to_agent() -> None:
     service = _StreamingService()
     class _ContextAwareAgent:
         async def astream_events(self, input_data: Any, **kwargs: Any):
@@ -95,10 +95,10 @@ async def test_stream_agent_events_passes_runtime_context_to_agent() -> None:
             yield {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(text)}}
 
     agent = _ContextAwareAgent()
-    result = StreamOutput()
+    result = StreamingResult()
 
     frames = await _collect(
-        stream_agent_events(
+        stream_output(
             agent=agent,
             config={"configurable": {"thread_id": "t-2"}},
             input_data={"messages": []},
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
index d9cd7951f..b84193cb7 100644
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
@@ -7,7 +7,7 @@ from typing import Any
 
 import pytest
 
-from app.tasks.chat.streaming.orchestration import StreamExecutionInput
+from app.tasks.chat.streaming.orchestration import StreamingContext
 from app.tasks.chat.streaming.orchestration.orchestrator import (
     stream_chat,
     stream_regenerate,
@@ -60,7 +60,7 @@ async def _collect(stream: Any) -> list[str]:
     return out
 
 
-async def test_stream_chat_uses_orchestration_input_path() -> None:
+async def test_stream_chat_uses_streaming_context_path() -> None:
     service = _StreamingService()
     agent = _Agent(
         [
@@ -73,7 +73,7 @@ async def test_stream_chat_uses_orchestration_input_path() -> None:
             user_query="ignored-here",
             search_space_id=1,
             chat_id=77,
-            orchestration_input=StreamExecutionInput(
+            streaming_context=StreamingContext(
                 agent=agent,
                 config={"configurable": {"thread_id": "thread-1"}},
                 input_data={"messages": []},
@@ -90,7 +90,7 @@ async def test_stream_chat_uses_orchestration_input_path() -> None:
     ]
 
 
-async def test_stream_resume_uses_orchestration_input_path() -> None:
+async def test_stream_resume_uses_streaming_context_path() -> None:
     service = _StreamingService()
     agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("r")}}])
 
@@ -99,7 +99,7 @@ async def test_stream_resume_uses_orchestration_input_path() -> None:
             chat_id=9,
             search_space_id=1,
             decisions=[],
-            orchestration_input=StreamExecutionInput(
+            streaming_context=StreamingContext(
                 agent=agent,
                 config={"configurable": {"thread_id": "thread-r"}},
                 input_data={"messages": []},
@@ -115,7 +115,7 @@ async def test_stream_resume_uses_orchestration_input_path() -> None:
     ]
 
 
-async def test_stream_regenerate_uses_orchestration_input_path() -> None:
+async def test_stream_regenerate_uses_streaming_context_path() -> None:
     service = _StreamingService()
     agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("g")}}])
 
@@ -124,7 +124,7 @@ async def test_stream_regenerate_uses_orchestration_input_path() -> None:
             user_query="q",
             search_space_id=1,
             chat_id=2,
-            orchestration_input=StreamExecutionInput(
+            streaming_context=StreamingContext(
                 agent=agent,
                 config={"configurable": {"thread_id": "thread-g"}},
                 input_data={"messages": []},

From 52895e37e9ec86aba93cf4dd3fcf734da2b2e162 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Thu, 7 May 2026 17:57:27 +0200
Subject: [PATCH 23/58] build streaming contexts for chat resume and regenerate
 paths

---
 .../streaming/orchestration/orchestrator.py   |  55 +++-
 .../streaming_context/__init__.py             |  18 ++
 .../orchestration/streaming_context/chat.py   | 258 ++++++++++++++++++
 .../streaming_context/regenerate.py           |  49 ++++
 .../orchestration/streaming_context/resume.py | 154 +++++++++++
 .../test_orchestrator_stream_chat.py          | 100 +++++++
 6 files changed, 633 insertions(+), 1 deletion(-)
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py

diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
index b40083f42..80cae77a2 100644
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
@@ -9,6 +9,11 @@ from typing import Any, Literal
 from app.agents.new_chat.filesystem_selection import FilesystemSelection
 from app.db import ChatVisibility
 from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
+from app.tasks.chat.streaming.orchestration.streaming_context import (
+    build_chat_streaming_context,
+    build_regenerate_streaming_context,
+    build_resume_streaming_context,
+)
 from app.tasks.chat.streaming.orchestration.event_stream import stream_output
 from app.tasks.chat.streaming.orchestration.input import StreamingContext
 from app.tasks.chat.streaming.orchestration.output import StreamingResult
@@ -38,7 +43,7 @@ async def _stream_output_with_streaming_context(
 ) -> AsyncGenerator[str, None]:
     async for frame in stream_output(
         agent=streaming_context.agent,
-        config=streaming_context.config,    
+        config=streaming_context.config,
         input_data=streaming_context.input_data,
         streaming_service=streaming_context.streaming_service,
         result=result,
@@ -73,6 +78,24 @@ async def stream_chat(
     streaming_context: StreamingContext | None = None,
 ) -> AsyncGenerator[str, None]:
     """Stream a new chat turn through the current production pipeline."""
+    if streaming_context is None:
+        streaming_context = await build_chat_streaming_context(
+            user_query=user_query,
+            search_space_id=search_space_id,
+            chat_id=chat_id,
+            user_id=user_id,
+            llm_config_id=llm_config_id,
+            mentioned_document_ids=mentioned_document_ids,
+            mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
+            checkpoint_id=checkpoint_id,
+            needs_history_bootstrap=needs_history_bootstrap,
+            thread_visibility=thread_visibility,
+            current_user_display_name=current_user_display_name,
+            disabled_tools=disabled_tools,
+            filesystem_selection=filesystem_selection,
+            request_id=request_id,
+            user_image_data_urls=user_image_data_urls,
+        )
     if streaming_context is not None:
         result = _build_streaming_result(
             chat_id=chat_id,
@@ -122,6 +145,18 @@ async def stream_resume(
     streaming_context: StreamingContext | None = None,
 ) -> AsyncGenerator[str, None]:
     """Resume an interrupted chat turn through the current production pipeline."""
+    if streaming_context is None:
+        streaming_context = await build_resume_streaming_context(
+            chat_id=chat_id,
+            search_space_id=search_space_id,
+            decisions=decisions,
+            user_id=user_id,
+            llm_config_id=llm_config_id,
+            thread_visibility=thread_visibility,
+            filesystem_selection=filesystem_selection,
+            request_id=request_id,
+            disabled_tools=disabled_tools,
+        )
     if streaming_context is not None:
         result = _build_streaming_result(
             chat_id=chat_id,
@@ -172,6 +207,24 @@ async def stream_regenerate(
     streaming_context: StreamingContext | None = None,
 ) -> AsyncGenerator[str, None]:
     """Regenerate an assistant turn through the current production pipeline."""
+    if streaming_context is None:
+        streaming_context = await build_regenerate_streaming_context(
+            user_query=user_query,
+            search_space_id=search_space_id,
+            chat_id=chat_id,
+            user_id=user_id,
+            llm_config_id=llm_config_id,
+            mentioned_document_ids=mentioned_document_ids,
+            mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
+            checkpoint_id=checkpoint_id,
+            needs_history_bootstrap=needs_history_bootstrap,
+            thread_visibility=thread_visibility,
+            current_user_display_name=current_user_display_name,
+            disabled_tools=disabled_tools,
+            filesystem_selection=filesystem_selection,
+            request_id=request_id,
+            user_image_data_urls=user_image_data_urls,
+        )
     if streaming_context is not None:
         result = _build_streaming_result(
             chat_id=chat_id,
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py
new file mode 100644
index 000000000..1bd3e103d
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py
@@ -0,0 +1,18 @@
+"""Streaming context builders per orchestrator entrypoint."""
+
+from app.tasks.chat.streaming.orchestration.streaming_context.chat import (
+    build_chat_streaming_context,
+)
+from app.tasks.chat.streaming.orchestration.streaming_context.regenerate import (
+    build_regenerate_streaming_context,
+)
+from app.tasks.chat.streaming.orchestration.streaming_context.resume import (
+    build_resume_streaming_context,
+)
+
+__all__ = [
+    "build_chat_streaming_context",
+    "build_regenerate_streaming_context",
+    "build_resume_streaming_context",
+]
+
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py
new file mode 100644
index 000000000..eb459ae5c
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py
@@ -0,0 +1,258 @@
+"""Build ``StreamingContext`` for chat streaming."""
+
+from __future__ import annotations
+
+import logging
+import time
+from typing import Any
+
+from langchain_core.messages import HumanMessage
+from sqlalchemy.future import select
+from sqlalchemy.orm import selectinload
+
+from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
+from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
+from app.agents.new_chat.checkpointer import get_checkpointer
+from app.agents.new_chat.context import SurfSenseContextSchema
+from app.agents.new_chat.filesystem_selection import FilesystemSelection
+from app.agents.new_chat.llm_config import (
+    AgentConfig,
+    create_chat_litellm_from_agent_config,
+    create_chat_litellm_from_config,
+    load_agent_config,
+    load_global_llm_config_by_id,
+)
+from app.db import (
+    ChatVisibility,
+    NewChatThread,
+    Report,
+    SearchSourceConnectorType,
+    SurfsenseDocsDocument,
+    async_session_maker,
+)
+from app.services.auto_model_pin_service import resolve_or_get_pinned_llm_config_id
+from app.services.connector_service import ConnectorService
+from app.services.new_streaming_service import VercelStreamingService
+from app.tasks.chat.stream_new_chat import format_mentioned_surfsense_docs_as_context
+from app.tasks.chat.streaming.agent_setup import build_main_agent_for_thread
+from app.tasks.chat.streaming.orchestration.input import StreamingContext
+from app.utils.content_utils import bootstrap_history_from_db
+from app.utils.user_message_multimodal import build_human_message_content
+
+logger = logging.getLogger(__name__)
+
+
+async def build_chat_streaming_context(
+    *,
+    user_query: str,
+    search_space_id: int,
+    chat_id: int,
+    user_id: str | None = None,
+    llm_config_id: int = -1,
+    mentioned_document_ids: list[int] | None = None,
+    mentioned_surfsense_doc_ids: list[int] | None = None,
+    checkpoint_id: str | None = None,
+    needs_history_bootstrap: bool = False,
+    thread_visibility: ChatVisibility | None = None,
+    current_user_display_name: str | None = None,
+    disabled_tools: list[str] | None = None,
+    filesystem_selection: FilesystemSelection | None = None,
+    request_id: str | None = None,
+    user_image_data_urls: list[str] | None = None,
+) -> StreamingContext | None:
+    """Build context for ``stream_output`` from route-level chat inputs."""
+    session = async_session_maker()
+    try:
+        requested_llm_config_id = llm_config_id
+        llm_config_id = (
+            await resolve_or_get_pinned_llm_config_id(
+                session,
+                thread_id=chat_id,
+                search_space_id=search_space_id,
+                user_id=user_id,
+                selected_llm_config_id=llm_config_id,
+                requires_image_input=bool(user_image_data_urls),
+            )
+        ).resolved_llm_config_id
+
+        llm: Any
+        agent_config: AgentConfig | None
+        if llm_config_id >= 0:
+            agent_config = await load_agent_config(
+                session=session,
+                config_id=llm_config_id,
+                search_space_id=search_space_id,
+            )
+            if not agent_config:
+                logger.warning("streaming context build failed: missing config %s", llm_config_id)
+                return None
+            llm = create_chat_litellm_from_agent_config(agent_config)
+        else:
+            loaded_llm_config = load_global_llm_config_by_id(llm_config_id)
+            if not loaded_llm_config:
+                logger.warning(
+                    "streaming context build failed: missing global config %s",
+                    llm_config_id,
+                )
+                return None
+            llm = create_chat_litellm_from_config(loaded_llm_config)
+            agent_config = AgentConfig.from_yaml_config(loaded_llm_config)
+
+        connector_service = ConnectorService(session, search_space_id=search_space_id)
+        firecrawl_api_key = None
+        webcrawler_connector = await connector_service.get_connector_by_type(
+            SearchSourceConnectorType.WEBCRAWLER_CONNECTOR,
+            search_space_id,
+        )
+        if webcrawler_connector and webcrawler_connector.config:
+            firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY")
+
+        checkpointer = await get_checkpointer()
+        visibility = thread_visibility or ChatVisibility.PRIVATE
+
+        from app.config import config as app_config
+
+        agent_factory = (
+            create_multi_agent_chat_deep_agent
+            if bool(app_config.MULTI_AGENT_CHAT_ENABLED)
+            else create_surfsense_deep_agent
+        )
+        agent = await build_main_agent_for_thread(
+            agent_factory,
+            llm=llm,
+            search_space_id=search_space_id,
+            db_session=session,
+            connector_service=connector_service,
+            checkpointer=checkpointer,
+            user_id=user_id,
+            thread_id=chat_id,
+            agent_config=agent_config,
+            firecrawl_api_key=firecrawl_api_key,
+            thread_visibility=visibility,
+            filesystem_selection=filesystem_selection,
+            disabled_tools=disabled_tools,
+            mentioned_document_ids=mentioned_document_ids,
+        )
+
+        langchain_messages = []
+        if needs_history_bootstrap:
+            langchain_messages = await bootstrap_history_from_db(
+                session,
+                chat_id,
+                thread_visibility=visibility,
+            )
+            thread_result = await session.execute(
+                select(NewChatThread).filter(NewChatThread.id == chat_id)
+            )
+            thread = thread_result.scalars().first()
+            if thread:
+                thread.needs_history_bootstrap = False
+                await session.commit()
+
+        mentioned_surfsense_docs: list[SurfsenseDocsDocument] = []
+        if mentioned_surfsense_doc_ids:
+            result = await session.execute(
+                select(SurfsenseDocsDocument)
+                .options(selectinload(SurfsenseDocsDocument.chunks))
+                .filter(SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids))
+            )
+            mentioned_surfsense_docs = list(result.scalars().all())
+
+        recent_reports_result = await session.execute(
+            select(Report)
+            .filter(Report.thread_id == chat_id, Report.content.isnot(None))
+            .order_by(Report.id.desc())
+            .limit(3)
+        )
+        recent_reports = list(recent_reports_result.scalars().all())
+
+        final_query = user_query
+        context_parts = []
+        if mentioned_surfsense_docs:
+            context_parts.append(
+                format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs)
+            )
+        if recent_reports:
+            report_lines = [
+                f'  - report_id={r.id}, title="{r.title}", style="{r.report_style or "detailed"}"'
+                for r in recent_reports
+            ]
+            reports_listing = "\n".join(report_lines)
+            context_parts.append(
+                "<report_context>\n"
+                "Previously generated reports in this conversation:\n"
+                f"{reports_listing}\n\n"
+                "If the user wants to MODIFY, REVISE, UPDATE, or ADD to one of these reports, "
+                "set parent_report_id to the relevant report_id above.\n"
+                "If the user wants a completely NEW report on a different topic, "
+                "leave parent_report_id unset.\n"
+                "</report_context>"
+            )
+        if context_parts:
+            joined_context = "\n\n".join(context_parts)
+            final_query = f"{joined_context}\n\n<user_query>{user_query}</user_query>"
+        if visibility == ChatVisibility.SEARCH_SPACE and current_user_display_name:
+            final_query = f"**[{current_user_display_name}]:** {final_query}"
+
+        human_content = build_human_message_content(
+            final_query,
+            list(user_image_data_urls or ()),
+        )
+        langchain_messages.append(HumanMessage(content=human_content))
+
+        turn_id = f"{chat_id}:{int(time.time() * 1000)}"
+        input_state = {
+            "messages": langchain_messages,
+            "search_space_id": search_space_id,
+            "request_id": request_id or "unknown",
+            "turn_id": turn_id,
+        }
+        configurable = {
+            "thread_id": str(chat_id),
+            "request_id": request_id or "unknown",
+            "turn_id": turn_id,
+        }
+        if checkpoint_id:
+            configurable["checkpoint_id"] = checkpoint_id
+        config = {"configurable": configurable, "recursion_limit": 10_000}
+
+        initial_title = (
+            "Analyzing referenced content"
+            if mentioned_surfsense_docs
+            else "Understanding your request"
+        )
+        action_verb = "Analyzing" if mentioned_surfsense_docs else "Processing"
+        query_excerpt = user_query[:80] + ("..." if len(user_query) > 80 else "")
+        query_part = query_excerpt if query_excerpt.strip() else "(message)"
+        initial_items = [f"{action_verb}: {query_part}"]
+
+        runtime_context = SurfSenseContextSchema(
+            search_space_id=search_space_id,
+            mentioned_document_ids=list(mentioned_document_ids or []),
+            request_id=request_id,
+            turn_id=turn_id,
+        )
+
+        await session.commit()
+        return StreamingContext(
+            agent=agent,
+            config=config,
+            input_data=input_state,
+            streaming_service=VercelStreamingService(),
+            step_prefix="thinking",
+            initial_step_id="thinking-1",
+            initial_step_title=initial_title,
+            initial_step_items=initial_items,
+            content_builder=None,
+            runtime_context=runtime_context,
+        )
+    except Exception:
+        logger.exception(
+            "Failed to build chat streaming context (llm_config_id=%s requested=%s)",
+            llm_config_id,
+            requested_llm_config_id,
+        )
+        return None
+    finally:
+        await session.close()
+
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py
new file mode 100644
index 000000000..02e871a2c
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py
@@ -0,0 +1,49 @@
+"""Build ``StreamingContext`` for regenerate streaming."""
+
+from __future__ import annotations
+
+from app.agents.new_chat.filesystem_selection import FilesystemSelection
+from app.db import ChatVisibility
+from app.tasks.chat.streaming.orchestration.input import StreamingContext
+from app.tasks.chat.streaming.orchestration.streaming_context.chat import (
+    build_chat_streaming_context,
+)
+
+
+async def build_regenerate_streaming_context(
+    *,
+    user_query: str,
+    search_space_id: int,
+    chat_id: int,
+    user_id: str | None = None,
+    llm_config_id: int = -1,
+    mentioned_document_ids: list[int] | None = None,
+    mentioned_surfsense_doc_ids: list[int] | None = None,
+    checkpoint_id: str | None = None,
+    needs_history_bootstrap: bool = False,
+    thread_visibility: ChatVisibility | None = None,
+    current_user_display_name: str | None = None,
+    disabled_tools: list[str] | None = None,
+    filesystem_selection: FilesystemSelection | None = None,
+    request_id: str | None = None,
+    user_image_data_urls: list[str] | None = None,
+) -> StreamingContext | None:
+    """Build context for ``stream_regenerate`` execution."""
+    return await build_chat_streaming_context(
+        user_query=user_query,
+        search_space_id=search_space_id,
+        chat_id=chat_id,
+        user_id=user_id,
+        llm_config_id=llm_config_id,
+        mentioned_document_ids=mentioned_document_ids,
+        mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
+        checkpoint_id=checkpoint_id,
+        needs_history_bootstrap=needs_history_bootstrap,
+        thread_visibility=thread_visibility,
+        current_user_display_name=current_user_display_name,
+        disabled_tools=disabled_tools,
+        filesystem_selection=filesystem_selection,
+        request_id=request_id,
+        user_image_data_urls=user_image_data_urls,
+    )
+
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py
new file mode 100644
index 000000000..6d0caea4d
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py
@@ -0,0 +1,154 @@
+"""Build ``StreamingContext`` for resume streaming."""
+
+from __future__ import annotations
+
+import logging
+import time
+from typing import Any
+
+from langgraph.types import Command
+
+from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
+from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
+from app.agents.new_chat.checkpointer import get_checkpointer
+from app.agents.new_chat.context import SurfSenseContextSchema
+from app.agents.new_chat.filesystem_selection import FilesystemSelection
+from app.agents.new_chat.llm_config import (
+    AgentConfig,
+    create_chat_litellm_from_agent_config,
+    create_chat_litellm_from_config,
+    load_agent_config,
+    load_global_llm_config_by_id,
+)
+from app.db import ChatVisibility, SearchSourceConnectorType, async_session_maker
+from app.services.auto_model_pin_service import resolve_or_get_pinned_llm_config_id
+from app.services.connector_service import ConnectorService
+from app.services.new_streaming_service import VercelStreamingService
+from app.tasks.chat.streaming.agent_setup import build_main_agent_for_thread
+from app.tasks.chat.streaming.orchestration.input import StreamingContext
+
+logger = logging.getLogger(__name__)
+
+
+async def build_resume_streaming_context(
+    *,
+    chat_id: int,
+    search_space_id: int,
+    decisions: list[dict],
+    user_id: str | None = None,
+    llm_config_id: int = -1,
+    thread_visibility: ChatVisibility | None = None,
+    filesystem_selection: FilesystemSelection | None = None,
+    request_id: str | None = None,
+    disabled_tools: list[str] | None = None,
+) -> StreamingContext | None:
+    """Build context for ``stream_resume`` execution."""
+    session = async_session_maker()
+    try:
+        llm_config_id = (
+            await resolve_or_get_pinned_llm_config_id(
+                session,
+                thread_id=chat_id,
+                search_space_id=search_space_id,
+                user_id=user_id,
+                selected_llm_config_id=llm_config_id,
+            )
+        ).resolved_llm_config_id
+
+        llm: Any
+        agent_config: AgentConfig | None
+        if llm_config_id >= 0:
+            agent_config = await load_agent_config(
+                session=session,
+                config_id=llm_config_id,
+                search_space_id=search_space_id,
+            )
+            if not agent_config:
+                logger.warning("resume context build failed: missing config %s", llm_config_id)
+                return None
+            llm = create_chat_litellm_from_agent_config(agent_config)
+        else:
+            loaded_llm_config = load_global_llm_config_by_id(llm_config_id)
+            if not loaded_llm_config:
+                logger.warning(
+                    "resume context build failed: missing global config %s",
+                    llm_config_id,
+                )
+                return None
+            llm = create_chat_litellm_from_config(loaded_llm_config)
+            agent_config = AgentConfig.from_yaml_config(loaded_llm_config)
+
+        connector_service = ConnectorService(session, search_space_id=search_space_id)
+        firecrawl_api_key = None
+        webcrawler_connector = await connector_service.get_connector_by_type(
+            SearchSourceConnectorType.WEBCRAWLER_CONNECTOR,
+            search_space_id,
+        )
+        if webcrawler_connector and webcrawler_connector.config:
+            firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY")
+
+        checkpointer = await get_checkpointer()
+        visibility = thread_visibility or ChatVisibility.PRIVATE
+
+        from app.config import config as app_config
+
+        agent_factory = (
+            create_multi_agent_chat_deep_agent
+            if bool(app_config.MULTI_AGENT_CHAT_ENABLED)
+            else create_surfsense_deep_agent
+        )
+        agent = await build_main_agent_for_thread(
+            agent_factory,
+            llm=llm,
+            search_space_id=search_space_id,
+            db_session=session,
+            connector_service=connector_service,
+            checkpointer=checkpointer,
+            user_id=user_id,
+            thread_id=chat_id,
+            agent_config=agent_config,
+            firecrawl_api_key=firecrawl_api_key,
+            thread_visibility=visibility,
+            filesystem_selection=filesystem_selection,
+            disabled_tools=disabled_tools,
+        )
+
+        turn_id = f"{chat_id}:{int(time.time() * 1000)}"
+        config = {
+            "configurable": {
+                "thread_id": str(chat_id),
+                "request_id": request_id or "unknown",
+                "turn_id": turn_id,
+                "surfsense_resume_value": {"decisions": decisions},
+            },
+            "recursion_limit": 10_000,
+        }
+
+        runtime_context = SurfSenseContextSchema(
+            search_space_id=search_space_id,
+            request_id=request_id,
+            turn_id=turn_id,
+        )
+
+        await session.commit()
+        return StreamingContext(
+            agent=agent,
+            config=config,
+            input_data=Command(resume={"decisions": decisions}),
+            streaming_service=VercelStreamingService(),
+            step_prefix="thinking-resume",
+            initial_step_id=None,
+            initial_step_title="",
+            initial_step_items=None,
+            content_builder=None,
+            runtime_context=runtime_context,
+        )
+    except Exception:
+        logger.exception(
+            "Failed to build resume streaming context (llm_config_id=%s)",
+            llm_config_id,
+        )
+        return None
+    finally:
+        await session.close()
+
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
index b84193cb7..46c61b498 100644
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
@@ -8,6 +8,7 @@ from typing import Any
 import pytest
 
 from app.tasks.chat.streaming.orchestration import StreamingContext
+from app.tasks.chat.streaming.orchestration import orchestrator
 from app.tasks.chat.streaming.orchestration.orchestrator import (
     stream_chat,
     stream_regenerate,
@@ -138,3 +139,102 @@ async def test_stream_regenerate_uses_streaming_context_path() -> None:
         "text_delta:text-1:g",
         "text_end:text-1",
     ]
+
+
+async def test_stream_chat_builds_streaming_context_when_not_provided() -> None:
+    service = _StreamingService()
+    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("b")}}])
+
+    async def _fake_builder(**kwargs: Any) -> StreamingContext:
+        del kwargs
+        return StreamingContext(
+            agent=agent,
+            config={"configurable": {"thread_id": "thread-b"}},
+            input_data={"messages": []},
+            streaming_service=service,
+        )
+
+    old = orchestrator.build_chat_streaming_context
+    orchestrator.build_chat_streaming_context = _fake_builder
+    try:
+        frames = await _collect(
+            stream_chat(
+                user_query="q",
+                search_space_id=1,
+                chat_id=3,
+            )
+        )
+    finally:
+        orchestrator.build_chat_streaming_context = old
+
+    assert frames == [
+        "text_start:text-1",
+        "text_delta:text-1:b",
+        "text_end:text-1",
+    ]
+
+
+async def test_stream_resume_builds_streaming_context_when_not_provided() -> None:
+    service = _StreamingService()
+    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("u")}}])
+
+    async def _fake_builder(**kwargs: Any) -> StreamingContext:
+        del kwargs
+        return StreamingContext(
+            agent=agent,
+            config={"configurable": {"thread_id": "thread-u"}},
+            input_data={"messages": []},
+            streaming_service=service,
+        )
+
+    old = orchestrator.build_resume_streaming_context
+    orchestrator.build_resume_streaming_context = _fake_builder
+    try:
+        frames = await _collect(
+            stream_resume(
+                chat_id=9,
+                search_space_id=1,
+                decisions=[],
+            )
+        )
+    finally:
+        orchestrator.build_resume_streaming_context = old
+
+    assert frames == [
+        "text_start:text-1",
+        "text_delta:text-1:u",
+        "text_end:text-1",
+    ]
+
+
+async def test_stream_regenerate_builds_streaming_context_when_not_provided() -> None:
+    service = _StreamingService()
+    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("x")}}])
+
+    async def _fake_builder(**kwargs: Any) -> StreamingContext:
+        del kwargs
+        return StreamingContext(
+            agent=agent,
+            config={"configurable": {"thread_id": "thread-x"}},
+            input_data={"messages": []},
+            streaming_service=service,
+        )
+
+    old = orchestrator.build_regenerate_streaming_context
+    orchestrator.build_regenerate_streaming_context = _fake_builder
+    try:
+        frames = await _collect(
+            stream_regenerate(
+                user_query="q",
+                search_space_id=1,
+                chat_id=2,
+            )
+        )
+    finally:
+        orchestrator.build_regenerate_streaming_context = old
+
+    assert frames == [
+        "text_start:text-1",
+        "text_delta:text-1:x",
+        "text_end:text-1",
+    ]

From 7e07092f67d55a81916b74a5d3691dea5461f273 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Thu, 7 May 2026 19:25:20 +0200
Subject: [PATCH 24/58] refactor(chat): drop alternate streaming entry path;
 use graph_stream

---
 surfsense_backend/app/config/__init__.py      |   6 -
 .../app/routes/new_chat_routes.py             |  32 +--
 surfsense_backend/app/schemas/new_chat.py     |   2 +-
 .../app/services/streaming/__init__.py        |   6 +-
 .../app/services/streaming/events/error.py    |   2 +-
 .../app/tasks/chat/streaming/__init__.py      |   2 +-
 .../app/tasks/chat/streaming/agent_setup.py   |  92 ------
 .../chat/streaming/graph_stream/__init__.py   |  21 ++
 .../event_stream.py                           |   4 +-
 .../output.py => graph_stream/result.py}      |   3 +-
 .../chat/streaming/orchestration/__init__.py  |  11 -
 .../chat/streaming/orchestration/input.py     |  23 --
 .../streaming/orchestration/orchestrator.py   | 261 ------------------
 .../streaming_context/__init__.py             |  18 --
 .../orchestration/streaming_context/chat.py   | 258 -----------------
 .../streaming_context/regenerate.py           |  49 ----
 .../orchestration/streaming_context/resume.py | 154 -----------
 .../tasks/chat/streaming/relay/__init__.py    |  22 +-
 .../tasks/chat/streaming/relay/event_relay.py |   2 +-
 .../tasks/chat/streaming/test_agent_setup.py  | 120 --------
 .../test_orchestrator_stream_chat.py          | 240 ----------------
 .../chat/streaming/test_stage_1_parity.py     |   4 +-
 ..._event_stream.py => test_stream_output.py} |   7 +-
 23 files changed, 61 insertions(+), 1278 deletions(-)
 delete mode 100644 surfsense_backend/app/tasks/chat/streaming/agent_setup.py
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/graph_stream/__init__.py
 rename surfsense_backend/app/tasks/chat/streaming/{orchestration => graph_stream}/event_stream.py (92%)
 rename surfsense_backend/app/tasks/chat/streaming/{orchestration/output.py => graph_stream/result.py} (91%)
 delete mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py
 delete mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/input.py
 delete mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
 delete mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py
 delete mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py
 delete mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py
 delete mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py
 delete mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py
 delete mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
 rename surfsense_backend/tests/unit/tasks/chat/streaming/{test_orchestration_event_stream.py => test_stream_output.py} (93%)

diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py
index 543524456..f6f0c7f62 100644
--- a/surfsense_backend/app/config/__init__.py
+++ b/surfsense_backend/app/config/__init__.py
@@ -490,12 +490,6 @@ class Config:
     ENABLE_DESKTOP_LOCAL_FILESYSTEM = (
         os.getenv("ENABLE_DESKTOP_LOCAL_FILESYSTEM", "FALSE").upper() == "TRUE"
     )
-    # Streaming entrypoint switch. Keep this at the route layer so orchestrator
-    # code stays free of legacy fallback branching.
-    ENABLE_CHAT_STREAM_ORCHESTRATOR = (
-        os.getenv("SURFSENSE_ENABLE_CHAT_STREAM_ORCHESTRATOR", "TRUE").upper()
-        == "TRUE"
-    )
 
     @classmethod
     def is_self_hosted(cls) -> bool:
diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py
index e54497f93..743b5b849 100644
--- a/surfsense_backend/app/routes/new_chat_routes.py
+++ b/surfsense_backend/app/routes/new_chat_routes.py
@@ -72,13 +72,8 @@ from app.schemas.new_chat import (
     TurnStatusResponse,
 )
 from app.tasks.chat.stream_new_chat import (
-    stream_new_chat as legacy_stream_new_chat,
-    stream_resume_chat as legacy_stream_resume_chat,
-)
-from app.tasks.chat.streaming.orchestration.orchestrator import (
-    stream_chat,
-    stream_regenerate,
-    stream_resume,
+    stream_new_chat,
+    stream_resume_chat,
 )
 from app.users import current_active_user
 from app.utils.perf import get_perf_logger
@@ -98,10 +93,6 @@ TURN_CANCELLING_MAX_DELAY_MS = 1500
 router = APIRouter()
 
 
-def _use_streaming_orchestrator() -> bool:
-    return config.ENABLE_CHAT_STREAM_ORCHESTRATOR
-
-
 def _resolve_filesystem_selection(
     *,
     mode: str,
@@ -1782,11 +1773,7 @@ async def handle_new_chat(
         )
 
         return StreamingResponse(
-            (
-                stream_chat
-                if _use_streaming_orchestrator()
-                else legacy_stream_new_chat
-            )(
+            stream_new_chat(
                 user_query=request.user_query,
                 search_space_id=request.search_space_id,
                 chat_id=request.chat_id,
@@ -2271,12 +2258,7 @@ async def regenerate_response(
                 else None
             )
             try:
-                regenerate_fn = (
-                    stream_regenerate
-                    if _use_streaming_orchestrator()
-                    else legacy_stream_new_chat
-                )
-                async for chunk in regenerate_fn(
+                async for chunk in stream_new_chat(
                     user_query=str(user_query_to_use),
                     search_space_id=request.search_space_id,
                     chat_id=thread_id,
@@ -2408,11 +2390,7 @@ async def resume_chat(
         await session.close()
 
         return StreamingResponse(
-            (
-                stream_resume
-                if _use_streaming_orchestrator()
-                else legacy_stream_resume_chat
-            )(
+            stream_resume_chat(
                 chat_id=thread_id,
                 search_space_id=request.search_space_id,
                 decisions=decisions,
diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py
index 95d183433..fe8dab076 100644
--- a/surfsense_backend/app/schemas/new_chat.py
+++ b/surfsense_backend/app/schemas/new_chat.py
@@ -380,7 +380,7 @@ class ResumeRequest(BaseModel):
             "/regenerate. Resume reuses the original interrupted user "
             "turn so the server does not write a new user message. "
             "Currently unused but accepted to keep request bodies "
-            "uniform across the three streaming entrypoints."
+            "uniform across new-message, regenerate, and resume stream routes."
         ),
     )
 
diff --git a/surfsense_backend/app/services/streaming/__init__.py b/surfsense_backend/app/services/streaming/__init__.py
index 287d48a7a..3ec9b9cf1 100644
--- a/surfsense_backend/app/services/streaming/__init__.py
+++ b/surfsense_backend/app/services/streaming/__init__.py
@@ -4,7 +4,7 @@ Layout:
 * ``envelope/`` - SSE wire framing + ID generators
 * ``emitter/`` - identity of the agent that emitted an event + runtime registry
 * ``events/`` - one module per SSE event family
-* ``service.py`` - composition root used by the orchestrator
+* ``service.py`` - composition root used when emitting chat SSE
 * ``interrupt_correlation.py`` - id-aware lookup over LangGraph state
 
 Naming on the wire:
@@ -13,8 +13,8 @@ Naming on the wire:
 * Every SurfSense-added field uses ``snake_case``, including the
   top-level ``emitted_by`` envelope and all inner ``data`` payloads.
 
-Production keeps using ``app.services.new_streaming_service`` and
-``app.tasks.chat.stream_new_chat`` until the cutover phase.
+Production chat uses ``app.services.new_streaming_service`` from
+``app.tasks.chat.stream_new_chat`` and related routes.
 """
 
 from __future__ import annotations
diff --git a/surfsense_backend/app/services/streaming/events/error.py b/surfsense_backend/app/services/streaming/events/error.py
index cd190d1f4..a1e8e01ca 100644
--- a/surfsense_backend/app/services/streaming/events/error.py
+++ b/surfsense_backend/app/services/streaming/events/error.py
@@ -1,4 +1,4 @@
-"""Single terminal error path the orchestrator must route through."""
+"""Single terminal error path chat streaming must route through."""
 
 from __future__ import annotations
 
diff --git a/surfsense_backend/app/tasks/chat/streaming/__init__.py b/surfsense_backend/app/tasks/chat/streaming/__init__.py
index bb06cc021..70c99342a 100644
--- a/surfsense_backend/app/tasks/chat/streaming/__init__.py
+++ b/surfsense_backend/app/tasks/chat/streaming/__init__.py
@@ -1,3 +1,3 @@
-"""Chat streaming orchestrator and event relay."""
+"""Chat streaming helpers (e.g. LangGraph → SSE relay under ``graph_stream``)."""
 
 from __future__ import annotations
diff --git a/surfsense_backend/app/tasks/chat/streaming/agent_setup.py b/surfsense_backend/app/tasks/chat/streaming/agent_setup.py
deleted file mode 100644
index f67c6ad65..000000000
--- a/surfsense_backend/app/tasks/chat/streaming/agent_setup.py
+++ /dev/null
@@ -1,92 +0,0 @@
-"""Agent setup helpers for orchestrated chat streaming."""
-
-from __future__ import annotations
-
-import contextlib
-import logging
-from collections.abc import Callable
-from typing import Any
-
-_PREFLIGHT_TIMEOUT_SEC: float = 2.5
-_PREFLIGHT_MAX_TOKENS: int = 1
-
-
-async def preflight_llm(
-    llm: Any,
-    *,
-    is_provider_rate_limited: Callable[[BaseException], bool],
-) -> None:
-    """Issue a minimal completion probe to catch immediate provider 429s."""
-    from litellm import acompletion
-
-    model = getattr(llm, "model", None)
-    if not model or model == "auto":
-        return
-
-    try:
-        await acompletion(
-            model=model,
-            messages=[{"role": "user", "content": "ping"}],
-            api_key=getattr(llm, "api_key", None),
-            api_base=getattr(llm, "api_base", None),
-            max_tokens=_PREFLIGHT_MAX_TOKENS,
-            timeout=_PREFLIGHT_TIMEOUT_SEC,
-            stream=False,
-            metadata={"tags": ["surfsense:internal", "auto-pin-preflight"]},
-        )
-    except Exception as exc:
-        if is_provider_rate_limited(exc):
-            raise
-        logging.getLogger(__name__).debug(
-            "auto_pin_preflight non_rate_limit_error model=%s err=%s",
-            model,
-            exc,
-        )
-
-
-async def build_main_agent_for_thread(
-    agent_factory: Any,
-    *,
-    llm: Any,
-    search_space_id: int,
-    db_session: Any,
-    connector_service: Any,
-    checkpointer: Any,
-    user_id: str | None,
-    thread_id: int | None,
-    agent_config: Any,
-    firecrawl_api_key: str | None,
-    thread_visibility: Any,
-    filesystem_selection: Any,
-    disabled_tools: list[str] | None = None,
-    mentioned_document_ids: list[int] | None = None,
-) -> Any:
-    """Run one canonical agent-build call for a single thread."""
-    return await agent_factory(
-        llm=llm,
-        search_space_id=search_space_id,
-        db_session=db_session,
-        connector_service=connector_service,
-        checkpointer=checkpointer,
-        user_id=user_id,
-        thread_id=thread_id,
-        agent_config=agent_config,
-        firecrawl_api_key=firecrawl_api_key,
-        thread_visibility=thread_visibility,
-        filesystem_selection=filesystem_selection,
-        disabled_tools=disabled_tools,
-        mentioned_document_ids=mentioned_document_ids,
-    )
-
-
-async def settle_speculative_agent_build(task: Any) -> None:
-    """Wait for a discarded speculative build and swallow its outcome."""
-    with contextlib.suppress(BaseException):
-        await task
-
-
-__all__ = [
-    "build_main_agent_for_thread",
-    "preflight_llm",
-    "settle_speculative_agent_build",
-]
diff --git a/surfsense_backend/app/tasks/chat/streaming/graph_stream/__init__.py b/surfsense_backend/app/tasks/chat/streaming/graph_stream/__init__.py
new file mode 100644
index 000000000..e3bf0426c
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/__init__.py
@@ -0,0 +1,21 @@
+"""LangGraph ``astream_events`` → SSE (``stream_output`` + ``StreamingResult``).
+
+Imports are lazy to avoid a circular import with ``relay.event_relay``.
+"""
+
+from __future__ import annotations
+
+__all__ = ["StreamingResult", "stream_output"]
+
+
+def __getattr__(name: str):
+    if name == "stream_output":
+        from app.tasks.chat.streaming.graph_stream.event_stream import stream_output
+
+        return stream_output
+    if name == "StreamingResult":
+        from app.tasks.chat.streaming.graph_stream.result import StreamingResult
+
+        return StreamingResult
+    msg = f"module {__name__!r} has no attribute {name!r}"
+    raise AttributeError(msg)
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py b/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py
similarity index 92%
rename from surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py
rename to surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py
index fc8c13027..9142dd914 100644
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py
+++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py
@@ -1,4 +1,4 @@
-"""Run LangGraph event streams through the  EventRelay."""
+"""Run LangGraph event streams through ``EventRelay``."""
 
 from __future__ import annotations
 
@@ -6,7 +6,7 @@ from collections.abc import AsyncIterator
 from typing import Any
 
 from app.agents.new_chat.feature_flags import get_flags
-from app.tasks.chat.streaming.orchestration.output import StreamingResult
+from app.tasks.chat.streaming.graph_stream.result import StreamingResult
 from app.tasks.chat.streaming.relay.event_relay import EventRelay
 from app.tasks.chat.streaming.relay.state import AgentEventRelayState
 
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py b/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py
similarity index 91%
rename from surfsense_backend/app/tasks/chat/streaming/orchestration/output.py
rename to surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py
index 60f8ee6ee..40404e9d0 100644
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py
+++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py
@@ -1,4 +1,4 @@
-"""Output facts collected while streaming one orchestrated agent turn."""
+"""Mutable facts collected while relaying one agent stream (``stream_output``)."""
 
 from __future__ import annotations
 
@@ -26,4 +26,3 @@ class StreamingResult:
     commit_gate_reason: str = ""
     assistant_message_id: int | None = None
     content_builder: Any | None = field(default=None, repr=False)
-
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py
deleted file mode 100644
index b1a201fd3..000000000
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-"""Composable orchestration pieces for chat streaming."""
-
-from app.tasks.chat.streaming.orchestration.event_stream import stream_output
-from app.tasks.chat.streaming.orchestration.input import StreamingContext
-from app.tasks.chat.streaming.orchestration.output import StreamingResult
-
-__all__ = [
-    "StreamingContext",
-    "StreamingResult",
-    "stream_output",
-]
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py
deleted file mode 100644
index 45a33d435..000000000
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""Inputs for orchestrator-owned streaming execution."""
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Any
-
-
-@dataclass(frozen=True)
-class StreamingContext:
-    """Container for dependencies required by ``stream_output``."""
-
-    agent: Any
-    config: dict[str, Any]
-    input_data: Any
-    streaming_service: Any
-    step_prefix: str = "thinking"
-    initial_step_id: str | None = None
-    initial_step_title: str = ""
-    initial_step_items: list[str] | None = None
-    content_builder: Any | None = None
-    runtime_context: Any = None
-
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
deleted file mode 100644
index 80cae77a2..000000000
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
+++ /dev/null
@@ -1,261 +0,0 @@
-"""Top-level chat streaming entrypoints.
-"""
-
-from __future__ import annotations
-
-from collections.abc import AsyncGenerator
-from typing import Any, Literal
-
-from app.agents.new_chat.filesystem_selection import FilesystemSelection
-from app.db import ChatVisibility
-from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
-from app.tasks.chat.streaming.orchestration.streaming_context import (
-    build_chat_streaming_context,
-    build_regenerate_streaming_context,
-    build_resume_streaming_context,
-)
-from app.tasks.chat.streaming.orchestration.event_stream import stream_output
-from app.tasks.chat.streaming.orchestration.input import StreamingContext
-from app.tasks.chat.streaming.orchestration.output import StreamingResult
-
-
-def _build_streaming_result(
-    *,
-    chat_id: int,
-    request_id: str | None,
-    filesystem_selection: FilesystemSelection | None,
-    suffix: str,
-) -> StreamingResult:
-    return StreamingResult(
-        request_id=request_id,
-        turn_id=f"{chat_id}:{suffix}",
-        filesystem_mode=(filesystem_selection.mode.value if filesystem_selection else "cloud"),
-        client_platform=(
-            filesystem_selection.client_platform.value if filesystem_selection else "web"
-        ),
-    )
-
-
-async def _stream_output_with_streaming_context(
-    *,
-    streaming_context: StreamingContext,
-    result: StreamingResult,
-) -> AsyncGenerator[str, None]:
-    async for frame in stream_output(
-        agent=streaming_context.agent,
-        config=streaming_context.config,
-        input_data=streaming_context.input_data,
-        streaming_service=streaming_context.streaming_service,
-        result=result,
-        step_prefix=streaming_context.step_prefix,
-        initial_step_id=streaming_context.initial_step_id,
-        initial_step_title=streaming_context.initial_step_title,
-        initial_step_items=streaming_context.initial_step_items,
-        content_builder=streaming_context.content_builder,
-        runtime_context=streaming_context.runtime_context,
-    ):
-        yield frame
-
-
-async def stream_chat(
-    *,
-    user_query: str,
-    search_space_id: int,
-    chat_id: int,
-    user_id: str | None = None,
-    llm_config_id: int = -1,
-    mentioned_document_ids: list[int] | None = None,
-    mentioned_surfsense_doc_ids: list[int] | None = None,
-    mentioned_documents: list[dict[str, Any]] | None = None,
-    checkpoint_id: str | None = None,
-    needs_history_bootstrap: bool = False,
-    thread_visibility: ChatVisibility | None = None,
-    current_user_display_name: str | None = None,
-    disabled_tools: list[str] | None = None,
-    filesystem_selection: FilesystemSelection | None = None,
-    request_id: str | None = None,
-    user_image_data_urls: list[str] | None = None,
-    streaming_context: StreamingContext | None = None,
-) -> AsyncGenerator[str, None]:
-    """Stream a new chat turn through the current production pipeline."""
-    if streaming_context is None:
-        streaming_context = await build_chat_streaming_context(
-            user_query=user_query,
-            search_space_id=search_space_id,
-            chat_id=chat_id,
-            user_id=user_id,
-            llm_config_id=llm_config_id,
-            mentioned_document_ids=mentioned_document_ids,
-            mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
-            checkpoint_id=checkpoint_id,
-            needs_history_bootstrap=needs_history_bootstrap,
-            thread_visibility=thread_visibility,
-            current_user_display_name=current_user_display_name,
-            disabled_tools=disabled_tools,
-            filesystem_selection=filesystem_selection,
-            request_id=request_id,
-            user_image_data_urls=user_image_data_urls,
-        )
-    if streaming_context is not None:
-        result = _build_streaming_result(
-            chat_id=chat_id,
-            request_id=request_id,
-            filesystem_selection=filesystem_selection,
-            suffix="orchestrator",
-        )
-        async for frame in _stream_output_with_streaming_context(
-            streaming_context=streaming_context,
-            result=result,
-        ):
-            yield frame
-        return
-
-    async for chunk in stream_new_chat(
-        user_query=user_query,
-        search_space_id=search_space_id,
-        chat_id=chat_id,
-        user_id=user_id,
-        llm_config_id=llm_config_id,
-        mentioned_document_ids=mentioned_document_ids,
-        mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
-        mentioned_documents=mentioned_documents,
-        checkpoint_id=checkpoint_id,
-        needs_history_bootstrap=needs_history_bootstrap,
-        thread_visibility=thread_visibility,
-        current_user_display_name=current_user_display_name,
-        disabled_tools=disabled_tools,
-        filesystem_selection=filesystem_selection,
-        request_id=request_id,
-        user_image_data_urls=user_image_data_urls,
-    ):
-        yield chunk
-
-
-async def stream_resume(
-    *,
-    chat_id: int,
-    search_space_id: int,
-    decisions: list[dict],
-    user_id: str | None = None,
-    llm_config_id: int = -1,
-    thread_visibility: ChatVisibility | None = None,
-    filesystem_selection: FilesystemSelection | None = None,
-    request_id: str | None = None,
-    disabled_tools: list[str] | None = None,
-    streaming_context: StreamingContext | None = None,
-) -> AsyncGenerator[str, None]:
-    """Resume an interrupted chat turn through the current production pipeline."""
-    if streaming_context is None:
-        streaming_context = await build_resume_streaming_context(
-            chat_id=chat_id,
-            search_space_id=search_space_id,
-            decisions=decisions,
-            user_id=user_id,
-            llm_config_id=llm_config_id,
-            thread_visibility=thread_visibility,
-            filesystem_selection=filesystem_selection,
-            request_id=request_id,
-            disabled_tools=disabled_tools,
-        )
-    if streaming_context is not None:
-        result = _build_streaming_result(
-            chat_id=chat_id,
-            request_id=request_id,
-            filesystem_selection=filesystem_selection,
-            suffix="orchestrator-resume",
-        )
-        async for frame in _stream_output_with_streaming_context(
-            streaming_context=streaming_context,
-            result=result,
-        ):
-            yield frame
-        return
-
-    async for chunk in stream_resume_chat(
-        chat_id=chat_id,
-        search_space_id=search_space_id,
-        decisions=decisions,
-        user_id=user_id,
-        llm_config_id=llm_config_id,
-        thread_visibility=thread_visibility,
-        filesystem_selection=filesystem_selection,
-        request_id=request_id,
-        disabled_tools=disabled_tools,
-    ):
-        yield chunk
-
-
-async def stream_regenerate(
-    *,
-    user_query: str,
-    search_space_id: int,
-    chat_id: int,
-    user_id: str | None = None,
-    llm_config_id: int = -1,
-    mentioned_document_ids: list[int] | None = None,
-    mentioned_surfsense_doc_ids: list[int] | None = None,
-    mentioned_documents: list[dict[str, Any]] | None = None,
-    checkpoint_id: str | None = None,
-    needs_history_bootstrap: bool = False,
-    thread_visibility: ChatVisibility | None = None,
-    current_user_display_name: str | None = None,
-    disabled_tools: list[str] | None = None,
-    filesystem_selection: FilesystemSelection | None = None,
-    request_id: str | None = None,
-    user_image_data_urls: list[str] | None = None,
-    flow: Literal["new", "regenerate"] = "regenerate",
-    streaming_context: StreamingContext | None = None,
-) -> AsyncGenerator[str, None]:
-    """Regenerate an assistant turn through the current production pipeline."""
-    if streaming_context is None:
-        streaming_context = await build_regenerate_streaming_context(
-            user_query=user_query,
-            search_space_id=search_space_id,
-            chat_id=chat_id,
-            user_id=user_id,
-            llm_config_id=llm_config_id,
-            mentioned_document_ids=mentioned_document_ids,
-            mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
-            checkpoint_id=checkpoint_id,
-            needs_history_bootstrap=needs_history_bootstrap,
-            thread_visibility=thread_visibility,
-            current_user_display_name=current_user_display_name,
-            disabled_tools=disabled_tools,
-            filesystem_selection=filesystem_selection,
-            request_id=request_id,
-            user_image_data_urls=user_image_data_urls,
-        )
-    if streaming_context is not None:
-        result = _build_streaming_result(
-            chat_id=chat_id,
-            request_id=request_id,
-            filesystem_selection=filesystem_selection,
-            suffix="orchestrator-regenerate",
-        )
-        async for frame in _stream_output_with_streaming_context(
-            streaming_context=streaming_context,
-            result=result,
-        ):
-            yield frame
-        return
-
-    async for chunk in stream_new_chat(
-        user_query=user_query,
-        search_space_id=search_space_id,
-        chat_id=chat_id,
-        user_id=user_id,
-        llm_config_id=llm_config_id,
-        mentioned_document_ids=mentioned_document_ids,
-        mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
-        mentioned_documents=mentioned_documents,
-        checkpoint_id=checkpoint_id,
-        needs_history_bootstrap=needs_history_bootstrap,
-        thread_visibility=thread_visibility,
-        current_user_display_name=current_user_display_name,
-        disabled_tools=disabled_tools,
-        filesystem_selection=filesystem_selection,
-        request_id=request_id,
-        user_image_data_urls=user_image_data_urls,
-        flow=flow,
-    ):
-        yield chunk
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py
deleted file mode 100644
index 1bd3e103d..000000000
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-"""Streaming context builders per orchestrator entrypoint."""
-
-from app.tasks.chat.streaming.orchestration.streaming_context.chat import (
-    build_chat_streaming_context,
-)
-from app.tasks.chat.streaming.orchestration.streaming_context.regenerate import (
-    build_regenerate_streaming_context,
-)
-from app.tasks.chat.streaming.orchestration.streaming_context.resume import (
-    build_resume_streaming_context,
-)
-
-__all__ = [
-    "build_chat_streaming_context",
-    "build_regenerate_streaming_context",
-    "build_resume_streaming_context",
-]
-
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py
deleted file mode 100644
index eb459ae5c..000000000
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py
+++ /dev/null
@@ -1,258 +0,0 @@
-"""Build ``StreamingContext`` for chat streaming."""
-
-from __future__ import annotations
-
-import logging
-import time
-from typing import Any
-
-from langchain_core.messages import HumanMessage
-from sqlalchemy.future import select
-from sqlalchemy.orm import selectinload
-
-from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
-from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
-from app.agents.new_chat.checkpointer import get_checkpointer
-from app.agents.new_chat.context import SurfSenseContextSchema
-from app.agents.new_chat.filesystem_selection import FilesystemSelection
-from app.agents.new_chat.llm_config import (
-    AgentConfig,
-    create_chat_litellm_from_agent_config,
-    create_chat_litellm_from_config,
-    load_agent_config,
-    load_global_llm_config_by_id,
-)
-from app.db import (
-    ChatVisibility,
-    NewChatThread,
-    Report,
-    SearchSourceConnectorType,
-    SurfsenseDocsDocument,
-    async_session_maker,
-)
-from app.services.auto_model_pin_service import resolve_or_get_pinned_llm_config_id
-from app.services.connector_service import ConnectorService
-from app.services.new_streaming_service import VercelStreamingService
-from app.tasks.chat.stream_new_chat import format_mentioned_surfsense_docs_as_context
-from app.tasks.chat.streaming.agent_setup import build_main_agent_for_thread
-from app.tasks.chat.streaming.orchestration.input import StreamingContext
-from app.utils.content_utils import bootstrap_history_from_db
-from app.utils.user_message_multimodal import build_human_message_content
-
-logger = logging.getLogger(__name__)
-
-
-async def build_chat_streaming_context(
-    *,
-    user_query: str,
-    search_space_id: int,
-    chat_id: int,
-    user_id: str | None = None,
-    llm_config_id: int = -1,
-    mentioned_document_ids: list[int] | None = None,
-    mentioned_surfsense_doc_ids: list[int] | None = None,
-    checkpoint_id: str | None = None,
-    needs_history_bootstrap: bool = False,
-    thread_visibility: ChatVisibility | None = None,
-    current_user_display_name: str | None = None,
-    disabled_tools: list[str] | None = None,
-    filesystem_selection: FilesystemSelection | None = None,
-    request_id: str | None = None,
-    user_image_data_urls: list[str] | None = None,
-) -> StreamingContext | None:
-    """Build context for ``stream_output`` from route-level chat inputs."""
-    session = async_session_maker()
-    try:
-        requested_llm_config_id = llm_config_id
-        llm_config_id = (
-            await resolve_or_get_pinned_llm_config_id(
-                session,
-                thread_id=chat_id,
-                search_space_id=search_space_id,
-                user_id=user_id,
-                selected_llm_config_id=llm_config_id,
-                requires_image_input=bool(user_image_data_urls),
-            )
-        ).resolved_llm_config_id
-
-        llm: Any
-        agent_config: AgentConfig | None
-        if llm_config_id >= 0:
-            agent_config = await load_agent_config(
-                session=session,
-                config_id=llm_config_id,
-                search_space_id=search_space_id,
-            )
-            if not agent_config:
-                logger.warning("streaming context build failed: missing config %s", llm_config_id)
-                return None
-            llm = create_chat_litellm_from_agent_config(agent_config)
-        else:
-            loaded_llm_config = load_global_llm_config_by_id(llm_config_id)
-            if not loaded_llm_config:
-                logger.warning(
-                    "streaming context build failed: missing global config %s",
-                    llm_config_id,
-                )
-                return None
-            llm = create_chat_litellm_from_config(loaded_llm_config)
-            agent_config = AgentConfig.from_yaml_config(loaded_llm_config)
-
-        connector_service = ConnectorService(session, search_space_id=search_space_id)
-        firecrawl_api_key = None
-        webcrawler_connector = await connector_service.get_connector_by_type(
-            SearchSourceConnectorType.WEBCRAWLER_CONNECTOR,
-            search_space_id,
-        )
-        if webcrawler_connector and webcrawler_connector.config:
-            firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY")
-
-        checkpointer = await get_checkpointer()
-        visibility = thread_visibility or ChatVisibility.PRIVATE
-
-        from app.config import config as app_config
-
-        agent_factory = (
-            create_multi_agent_chat_deep_agent
-            if bool(app_config.MULTI_AGENT_CHAT_ENABLED)
-            else create_surfsense_deep_agent
-        )
-        agent = await build_main_agent_for_thread(
-            agent_factory,
-            llm=llm,
-            search_space_id=search_space_id,
-            db_session=session,
-            connector_service=connector_service,
-            checkpointer=checkpointer,
-            user_id=user_id,
-            thread_id=chat_id,
-            agent_config=agent_config,
-            firecrawl_api_key=firecrawl_api_key,
-            thread_visibility=visibility,
-            filesystem_selection=filesystem_selection,
-            disabled_tools=disabled_tools,
-            mentioned_document_ids=mentioned_document_ids,
-        )
-
-        langchain_messages = []
-        if needs_history_bootstrap:
-            langchain_messages = await bootstrap_history_from_db(
-                session,
-                chat_id,
-                thread_visibility=visibility,
-            )
-            thread_result = await session.execute(
-                select(NewChatThread).filter(NewChatThread.id == chat_id)
-            )
-            thread = thread_result.scalars().first()
-            if thread:
-                thread.needs_history_bootstrap = False
-                await session.commit()
-
-        mentioned_surfsense_docs: list[SurfsenseDocsDocument] = []
-        if mentioned_surfsense_doc_ids:
-            result = await session.execute(
-                select(SurfsenseDocsDocument)
-                .options(selectinload(SurfsenseDocsDocument.chunks))
-                .filter(SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids))
-            )
-            mentioned_surfsense_docs = list(result.scalars().all())
-
-        recent_reports_result = await session.execute(
-            select(Report)
-            .filter(Report.thread_id == chat_id, Report.content.isnot(None))
-            .order_by(Report.id.desc())
-            .limit(3)
-        )
-        recent_reports = list(recent_reports_result.scalars().all())
-
-        final_query = user_query
-        context_parts = []
-        if mentioned_surfsense_docs:
-            context_parts.append(
-                format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs)
-            )
-        if recent_reports:
-            report_lines = [
-                f'  - report_id={r.id}, title="{r.title}", style="{r.report_style or "detailed"}"'
-                for r in recent_reports
-            ]
-            reports_listing = "\n".join(report_lines)
-            context_parts.append(
-                "<report_context>\n"
-                "Previously generated reports in this conversation:\n"
-                f"{reports_listing}\n\n"
-                "If the user wants to MODIFY, REVISE, UPDATE, or ADD to one of these reports, "
-                "set parent_report_id to the relevant report_id above.\n"
-                "If the user wants a completely NEW report on a different topic, "
-                "leave parent_report_id unset.\n"
-                "</report_context>"
-            )
-        if context_parts:
-            joined_context = "\n\n".join(context_parts)
-            final_query = f"{joined_context}\n\n<user_query>{user_query}</user_query>"
-        if visibility == ChatVisibility.SEARCH_SPACE and current_user_display_name:
-            final_query = f"**[{current_user_display_name}]:** {final_query}"
-
-        human_content = build_human_message_content(
-            final_query,
-            list(user_image_data_urls or ()),
-        )
-        langchain_messages.append(HumanMessage(content=human_content))
-
-        turn_id = f"{chat_id}:{int(time.time() * 1000)}"
-        input_state = {
-            "messages": langchain_messages,
-            "search_space_id": search_space_id,
-            "request_id": request_id or "unknown",
-            "turn_id": turn_id,
-        }
-        configurable = {
-            "thread_id": str(chat_id),
-            "request_id": request_id or "unknown",
-            "turn_id": turn_id,
-        }
-        if checkpoint_id:
-            configurable["checkpoint_id"] = checkpoint_id
-        config = {"configurable": configurable, "recursion_limit": 10_000}
-
-        initial_title = (
-            "Analyzing referenced content"
-            if mentioned_surfsense_docs
-            else "Understanding your request"
-        )
-        action_verb = "Analyzing" if mentioned_surfsense_docs else "Processing"
-        query_excerpt = user_query[:80] + ("..." if len(user_query) > 80 else "")
-        query_part = query_excerpt if query_excerpt.strip() else "(message)"
-        initial_items = [f"{action_verb}: {query_part}"]
-
-        runtime_context = SurfSenseContextSchema(
-            search_space_id=search_space_id,
-            mentioned_document_ids=list(mentioned_document_ids or []),
-            request_id=request_id,
-            turn_id=turn_id,
-        )
-
-        await session.commit()
-        return StreamingContext(
-            agent=agent,
-            config=config,
-            input_data=input_state,
-            streaming_service=VercelStreamingService(),
-            step_prefix="thinking",
-            initial_step_id="thinking-1",
-            initial_step_title=initial_title,
-            initial_step_items=initial_items,
-            content_builder=None,
-            runtime_context=runtime_context,
-        )
-    except Exception:
-        logger.exception(
-            "Failed to build chat streaming context (llm_config_id=%s requested=%s)",
-            llm_config_id,
-            requested_llm_config_id,
-        )
-        return None
-    finally:
-        await session.close()
-
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py
deleted file mode 100644
index 02e871a2c..000000000
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py
+++ /dev/null
@@ -1,49 +0,0 @@
-"""Build ``StreamingContext`` for regenerate streaming."""
-
-from __future__ import annotations
-
-from app.agents.new_chat.filesystem_selection import FilesystemSelection
-from app.db import ChatVisibility
-from app.tasks.chat.streaming.orchestration.input import StreamingContext
-from app.tasks.chat.streaming.orchestration.streaming_context.chat import (
-    build_chat_streaming_context,
-)
-
-
-async def build_regenerate_streaming_context(
-    *,
-    user_query: str,
-    search_space_id: int,
-    chat_id: int,
-    user_id: str | None = None,
-    llm_config_id: int = -1,
-    mentioned_document_ids: list[int] | None = None,
-    mentioned_surfsense_doc_ids: list[int] | None = None,
-    checkpoint_id: str | None = None,
-    needs_history_bootstrap: bool = False,
-    thread_visibility: ChatVisibility | None = None,
-    current_user_display_name: str | None = None,
-    disabled_tools: list[str] | None = None,
-    filesystem_selection: FilesystemSelection | None = None,
-    request_id: str | None = None,
-    user_image_data_urls: list[str] | None = None,
-) -> StreamingContext | None:
-    """Build context for ``stream_regenerate`` execution."""
-    return await build_chat_streaming_context(
-        user_query=user_query,
-        search_space_id=search_space_id,
-        chat_id=chat_id,
-        user_id=user_id,
-        llm_config_id=llm_config_id,
-        mentioned_document_ids=mentioned_document_ids,
-        mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
-        checkpoint_id=checkpoint_id,
-        needs_history_bootstrap=needs_history_bootstrap,
-        thread_visibility=thread_visibility,
-        current_user_display_name=current_user_display_name,
-        disabled_tools=disabled_tools,
-        filesystem_selection=filesystem_selection,
-        request_id=request_id,
-        user_image_data_urls=user_image_data_urls,
-    )
-
diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py
deleted file mode 100644
index 6d0caea4d..000000000
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py
+++ /dev/null
@@ -1,154 +0,0 @@
-"""Build ``StreamingContext`` for resume streaming."""
-
-from __future__ import annotations
-
-import logging
-import time
-from typing import Any
-
-from langgraph.types import Command
-
-from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
-from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
-from app.agents.new_chat.checkpointer import get_checkpointer
-from app.agents.new_chat.context import SurfSenseContextSchema
-from app.agents.new_chat.filesystem_selection import FilesystemSelection
-from app.agents.new_chat.llm_config import (
-    AgentConfig,
-    create_chat_litellm_from_agent_config,
-    create_chat_litellm_from_config,
-    load_agent_config,
-    load_global_llm_config_by_id,
-)
-from app.db import ChatVisibility, SearchSourceConnectorType, async_session_maker
-from app.services.auto_model_pin_service import resolve_or_get_pinned_llm_config_id
-from app.services.connector_service import ConnectorService
-from app.services.new_streaming_service import VercelStreamingService
-from app.tasks.chat.streaming.agent_setup import build_main_agent_for_thread
-from app.tasks.chat.streaming.orchestration.input import StreamingContext
-
-logger = logging.getLogger(__name__)
-
-
-async def build_resume_streaming_context(
-    *,
-    chat_id: int,
-    search_space_id: int,
-    decisions: list[dict],
-    user_id: str | None = None,
-    llm_config_id: int = -1,
-    thread_visibility: ChatVisibility | None = None,
-    filesystem_selection: FilesystemSelection | None = None,
-    request_id: str | None = None,
-    disabled_tools: list[str] | None = None,
-) -> StreamingContext | None:
-    """Build context for ``stream_resume`` execution."""
-    session = async_session_maker()
-    try:
-        llm_config_id = (
-            await resolve_or_get_pinned_llm_config_id(
-                session,
-                thread_id=chat_id,
-                search_space_id=search_space_id,
-                user_id=user_id,
-                selected_llm_config_id=llm_config_id,
-            )
-        ).resolved_llm_config_id
-
-        llm: Any
-        agent_config: AgentConfig | None
-        if llm_config_id >= 0:
-            agent_config = await load_agent_config(
-                session=session,
-                config_id=llm_config_id,
-                search_space_id=search_space_id,
-            )
-            if not agent_config:
-                logger.warning("resume context build failed: missing config %s", llm_config_id)
-                return None
-            llm = create_chat_litellm_from_agent_config(agent_config)
-        else:
-            loaded_llm_config = load_global_llm_config_by_id(llm_config_id)
-            if not loaded_llm_config:
-                logger.warning(
-                    "resume context build failed: missing global config %s",
-                    llm_config_id,
-                )
-                return None
-            llm = create_chat_litellm_from_config(loaded_llm_config)
-            agent_config = AgentConfig.from_yaml_config(loaded_llm_config)
-
-        connector_service = ConnectorService(session, search_space_id=search_space_id)
-        firecrawl_api_key = None
-        webcrawler_connector = await connector_service.get_connector_by_type(
-            SearchSourceConnectorType.WEBCRAWLER_CONNECTOR,
-            search_space_id,
-        )
-        if webcrawler_connector and webcrawler_connector.config:
-            firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY")
-
-        checkpointer = await get_checkpointer()
-        visibility = thread_visibility or ChatVisibility.PRIVATE
-
-        from app.config import config as app_config
-
-        agent_factory = (
-            create_multi_agent_chat_deep_agent
-            if bool(app_config.MULTI_AGENT_CHAT_ENABLED)
-            else create_surfsense_deep_agent
-        )
-        agent = await build_main_agent_for_thread(
-            agent_factory,
-            llm=llm,
-            search_space_id=search_space_id,
-            db_session=session,
-            connector_service=connector_service,
-            checkpointer=checkpointer,
-            user_id=user_id,
-            thread_id=chat_id,
-            agent_config=agent_config,
-            firecrawl_api_key=firecrawl_api_key,
-            thread_visibility=visibility,
-            filesystem_selection=filesystem_selection,
-            disabled_tools=disabled_tools,
-        )
-
-        turn_id = f"{chat_id}:{int(time.time() * 1000)}"
-        config = {
-            "configurable": {
-                "thread_id": str(chat_id),
-                "request_id": request_id or "unknown",
-                "turn_id": turn_id,
-                "surfsense_resume_value": {"decisions": decisions},
-            },
-            "recursion_limit": 10_000,
-        }
-
-        runtime_context = SurfSenseContextSchema(
-            search_space_id=search_space_id,
-            request_id=request_id,
-            turn_id=turn_id,
-        )
-
-        await session.commit()
-        return StreamingContext(
-            agent=agent,
-            config=config,
-            input_data=Command(resume={"decisions": decisions}),
-            streaming_service=VercelStreamingService(),
-            step_prefix="thinking-resume",
-            initial_step_id=None,
-            initial_step_title="",
-            initial_step_items=None,
-            content_builder=None,
-            runtime_context=runtime_context,
-        )
-    except Exception:
-        logger.exception(
-            "Failed to build resume streaming context (llm_config_id=%s)",
-            llm_config_id,
-        )
-        return None
-    finally:
-        await session.close()
-
diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py b/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py
index 351e878a8..18eda9a6d 100644
--- a/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py
@@ -1,7 +1,23 @@
-"""Relay state: thinking steps, tool bookkeeping, and stream helpers."""
+"""Relay: thinking steps, tool bookkeeping, and ``EventRelay``.
+
+Package imports are lazy so ``relay.thinking_step_sse`` (and siblings) can load
+without pulling in ``event_relay`` (which imports handler modules that may
+import those siblings).
+"""
 
 from __future__ import annotations
 
-from app.tasks.chat.streaming.relay.event_relay import EventRelay, EventRelayConfig
-
 __all__ = ["EventRelay", "EventRelayConfig"]
+
+
+def __getattr__(name: str):
+    if name == "EventRelay":
+        from app.tasks.chat.streaming.relay.event_relay import EventRelay
+
+        return EventRelay
+    if name == "EventRelayConfig":
+        from app.tasks.chat.streaming.relay.event_relay import EventRelayConfig
+
+        return EventRelayConfig
+    msg = f"module {__name__!r} has no attribute {name!r}"
+    raise AttributeError(msg)
diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py
index c8aebd99c..872998926 100644
--- a/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py
@@ -7,6 +7,7 @@ from dataclasses import dataclass, field
 from typing import Any
 
 from app.services.streaming.emitter import EmitterRegistry
+from app.tasks.chat.streaming.graph_stream.result import StreamingResult
 from app.tasks.chat.streaming.handlers.chain_end import iter_chain_end_frames
 from app.tasks.chat.streaming.handlers.chat_model_stream import (
     iter_chat_model_stream_frames,
@@ -16,7 +17,6 @@ from app.tasks.chat.streaming.handlers.custom_event_dispatch import (
 )
 from app.tasks.chat.streaming.handlers.tool_end import iter_tool_end_frames
 from app.tasks.chat.streaming.handlers.tool_start import iter_tool_start_frames
-from app.tasks.chat.streaming.orchestration.output import StreamingResult
 from app.tasks.chat.streaming.relay.state import AgentEventRelayState
 from app.tasks.chat.streaming.relay.thinking_step_completion import (
     complete_active_thinking_step,
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py
deleted file mode 100644
index e1f7dd027..000000000
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py
+++ /dev/null
@@ -1,120 +0,0 @@
-"""Behavior tests for streaming agent setup helpers."""
-
-from __future__ import annotations
-
-import sys
-import types
-from typing import Any
-
-import pytest
-
-from app.tasks.chat.streaming import agent_setup
-
-pytestmark = pytest.mark.unit
-
-
-async def test_preflight_llm_calls_litellm_when_model_present(
-    monkeypatch: pytest.MonkeyPatch,
-) -> None:
-    calls: dict[str, Any] = {}
-
-    async def _fake_acompletion(**kwargs: Any):
-        calls.update(kwargs)
-        return {"ok": True}
-
-    monkeypatch.setitem(
-        sys.modules,
-        "litellm",
-        types.SimpleNamespace(acompletion=_fake_acompletion),
-    )
-
-    llm = types.SimpleNamespace(model="openai/test", api_key="k", api_base="b")
-    await agent_setup.preflight_llm(llm, is_provider_rate_limited=lambda _: False)
-
-    assert calls["model"] == "openai/test"
-    assert calls["max_tokens"] == 1
-    assert calls["timeout"] == 2.5
-    assert calls["stream"] is False
-
-
-async def test_preflight_llm_rethrows_rate_limited(monkeypatch: pytest.MonkeyPatch) -> None:
-    class _RateLimitedError(Exception):
-        pass
-
-    async def _fake_acompletion(**kwargs: Any):
-        del kwargs
-        raise _RateLimitedError("rl")
-
-    monkeypatch.setitem(
-        sys.modules,
-        "litellm",
-        types.SimpleNamespace(acompletion=_fake_acompletion),
-    )
-
-    with pytest.raises(_RateLimitedError):
-        await agent_setup.preflight_llm(
-            types.SimpleNamespace(model="openai/test"),
-            is_provider_rate_limited=lambda exc: isinstance(exc, _RateLimitedError),
-        )
-
-
-async def test_preflight_llm_skips_probe_for_auto_model(
-    monkeypatch: pytest.MonkeyPatch,
-) -> None:
-    called = {"count": 0}
-
-    async def _fake_acompletion(**kwargs: Any):
-        del kwargs
-        called["count"] += 1
-        return {"ok": True}
-
-    monkeypatch.setitem(
-        sys.modules,
-        "litellm",
-        types.SimpleNamespace(acompletion=_fake_acompletion),
-    )
-
-    await agent_setup.preflight_llm(
-        types.SimpleNamespace(model="auto"),
-        is_provider_rate_limited=lambda _: False,
-    )
-    assert called["count"] == 0
-
-
-async def test_build_main_agent_for_thread_forwards_arguments() -> None:
-    seen: dict[str, Any] = {}
-
-    async def _factory(**kwargs: Any):
-        seen.update(kwargs)
-        return "agent"
-
-    out = await agent_setup.build_main_agent_for_thread(
-        _factory,
-        llm="llm",
-        search_space_id=1,
-        db_session="db",
-        connector_service="connector",
-        checkpointer="cp",
-        user_id="u",
-        thread_id=10,
-        agent_config="cfg",
-        firecrawl_api_key="key",
-        thread_visibility="vis",
-        filesystem_selection="fs",
-        disabled_tools=["a"],
-        mentioned_document_ids=[5],
-    )
-    assert out == "agent"
-    assert seen["thread_id"] == 10
-    assert seen["mentioned_document_ids"] == [5]
-
-
-async def test_settle_speculative_agent_build_swallows_exceptions() -> None:
-    async def _boom() -> None:
-        raise RuntimeError("ignore")
-
-    import asyncio
-
-    task = asyncio.create_task(_boom())
-    await agent_setup.settle_speculative_agent_build(task)
-    assert task.done()
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
deleted file mode 100644
index 46c61b498..000000000
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
+++ /dev/null
@@ -1,240 +0,0 @@
-"""Behavior tests for orchestrator ``stream_chat`` public API."""
-
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from typing import Any
-
-import pytest
-
-from app.tasks.chat.streaming.orchestration import StreamingContext
-from app.tasks.chat.streaming.orchestration import orchestrator
-from app.tasks.chat.streaming.orchestration.orchestrator import (
-    stream_chat,
-    stream_regenerate,
-    stream_resume,
-)
-
-pytestmark = pytest.mark.unit
-
-
-@dataclass
-class _Chunk:
-    content: Any = ""
-    additional_kwargs: dict[str, Any] = field(default_factory=dict)
-    tool_call_chunks: list[dict[str, Any]] = field(default_factory=list)
-
-
-class _StreamingService:
-    def __init__(self) -> None:
-        self._text_idx = 0
-
-    def generate_text_id(self) -> str:
-        self._text_idx += 1
-        return f"text-{self._text_idx}"
-
-    def format_text_start(self, text_id: str) -> str:
-        return f"text_start:{text_id}"
-
-    def format_text_delta(self, text_id: str, text: str) -> str:
-        return f"text_delta:{text_id}:{text}"
-
-    def format_text_end(self, text_id: str) -> str:
-        return f"text_end:{text_id}"
-
-
-class _Agent:
-    def __init__(self, events: list[dict[str, Any]]) -> None:
-        self.events = list(events)
-        self.calls: list[tuple[Any, dict[str, Any]]] = []
-
-    async def astream_events(self, input_data: Any, **kwargs: Any):
-        self.calls.append((input_data, kwargs))
-        for event in self.events:
-            yield event
-
-
-async def _collect(stream: Any) -> list[str]:
-    out: list[str] = []
-    async for x in stream:
-        out.append(x)
-    return out
-
-
-async def test_stream_chat_uses_streaming_context_path() -> None:
-    service = _StreamingService()
-    agent = _Agent(
-        [
-            {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content="hello")}},
-            {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content="!")}},
-        ]
-    )
-    frames = await _collect(
-        stream_chat(
-            user_query="ignored-here",
-            search_space_id=1,
-            chat_id=77,
-            streaming_context=StreamingContext(
-                agent=agent,
-                config={"configurable": {"thread_id": "thread-1"}},
-                input_data={"messages": []},
-                streaming_service=service,
-            ),
-        )
-    )
-
-    assert frames == [
-        "text_start:text-1",
-        "text_delta:text-1:hello",
-        "text_delta:text-1:!",
-        "text_end:text-1",
-    ]
-
-
-async def test_stream_resume_uses_streaming_context_path() -> None:
-    service = _StreamingService()
-    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("r")}}])
-
-    frames = await _collect(
-        stream_resume(
-            chat_id=9,
-            search_space_id=1,
-            decisions=[],
-            streaming_context=StreamingContext(
-                agent=agent,
-                config={"configurable": {"thread_id": "thread-r"}},
-                input_data={"messages": []},
-                streaming_service=service,
-            ),
-        )
-    )
-
-    assert frames == [
-        "text_start:text-1",
-        "text_delta:text-1:r",
-        "text_end:text-1",
-    ]
-
-
-async def test_stream_regenerate_uses_streaming_context_path() -> None:
-    service = _StreamingService()
-    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("g")}}])
-
-    frames = await _collect(
-        stream_regenerate(
-            user_query="q",
-            search_space_id=1,
-            chat_id=2,
-            streaming_context=StreamingContext(
-                agent=agent,
-                config={"configurable": {"thread_id": "thread-g"}},
-                input_data={"messages": []},
-                streaming_service=service,
-            ),
-        )
-    )
-
-    assert frames == [
-        "text_start:text-1",
-        "text_delta:text-1:g",
-        "text_end:text-1",
-    ]
-
-
-async def test_stream_chat_builds_streaming_context_when_not_provided() -> None:
-    service = _StreamingService()
-    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("b")}}])
-
-    async def _fake_builder(**kwargs: Any) -> StreamingContext:
-        del kwargs
-        return StreamingContext(
-            agent=agent,
-            config={"configurable": {"thread_id": "thread-b"}},
-            input_data={"messages": []},
-            streaming_service=service,
-        )
-
-    old = orchestrator.build_chat_streaming_context
-    orchestrator.build_chat_streaming_context = _fake_builder
-    try:
-        frames = await _collect(
-            stream_chat(
-                user_query="q",
-                search_space_id=1,
-                chat_id=3,
-            )
-        )
-    finally:
-        orchestrator.build_chat_streaming_context = old
-
-    assert frames == [
-        "text_start:text-1",
-        "text_delta:text-1:b",
-        "text_end:text-1",
-    ]
-
-
-async def test_stream_resume_builds_streaming_context_when_not_provided() -> None:
-    service = _StreamingService()
-    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("u")}}])
-
-    async def _fake_builder(**kwargs: Any) -> StreamingContext:
-        del kwargs
-        return StreamingContext(
-            agent=agent,
-            config={"configurable": {"thread_id": "thread-u"}},
-            input_data={"messages": []},
-            streaming_service=service,
-        )
-
-    old = orchestrator.build_resume_streaming_context
-    orchestrator.build_resume_streaming_context = _fake_builder
-    try:
-        frames = await _collect(
-            stream_resume(
-                chat_id=9,
-                search_space_id=1,
-                decisions=[],
-            )
-        )
-    finally:
-        orchestrator.build_resume_streaming_context = old
-
-    assert frames == [
-        "text_start:text-1",
-        "text_delta:text-1:u",
-        "text_end:text-1",
-    ]
-
-
-async def test_stream_regenerate_builds_streaming_context_when_not_provided() -> None:
-    service = _StreamingService()
-    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("x")}}])
-
-    async def _fake_builder(**kwargs: Any) -> StreamingContext:
-        del kwargs
-        return StreamingContext(
-            agent=agent,
-            config={"configurable": {"thread_id": "thread-x"}},
-            input_data={"messages": []},
-            streaming_service=service,
-        )
-
-    old = orchestrator.build_regenerate_streaming_context
-    orchestrator.build_regenerate_streaming_context = _fake_builder
-    try:
-        frames = await _collect(
-            stream_regenerate(
-                user_query="q",
-                search_space_id=1,
-                chat_id=2,
-            )
-        )
-    finally:
-        orchestrator.build_regenerate_streaming_context = old
-
-    assert frames == [
-        "text_start:text-1",
-        "text_delta:text-1:x",
-        "text_end:text-1",
-    ]
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py
index 9207f37d1..023c8b999 100644
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py
@@ -1,7 +1,7 @@
 """Pin Stage 1 extractions as faithful copies of the old helpers.
 
-The new orchestrator under ``app.tasks.chat.streaming`` is built in
-parallel with the production module ``app.tasks.chat.stream_new_chat``.
+Extractions under ``app.tasks.chat.streaming`` are compared to
+``app.tasks.chat.stream_new_chat`` helpers.
 For each Stage 1 extraction we assert the new function returns the same
 output as the old one for a representative input set. The moment the
 two diverge - intentionally or otherwise - this file fails loudly so
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py
similarity index 93%
rename from surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
rename to surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py
index b17d82293..9fb876dd7 100644
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py
@@ -1,4 +1,4 @@
-"""Behavior tests for orchestration event-stream execution."""
+"""Tests for ``stream_output`` (LangGraph events → SSE)."""
 
 from __future__ import annotations
 
@@ -7,8 +7,8 @@ from typing import Any
 
 import pytest
 
-from app.tasks.chat.streaming.orchestration import stream_output
-from app.tasks.chat.streaming.orchestration.output import StreamingResult
+from app.tasks.chat.streaming.graph_stream import stream_output
+from app.tasks.chat.streaming.graph_stream.result import StreamingResult
 
 pytestmark = pytest.mark.unit
 
@@ -88,6 +88,7 @@ async def test_stream_output_emits_text_lifecycle_and_updates_result() -> None:
 
 async def test_stream_output_passes_runtime_context_to_agent() -> None:
     service = _StreamingService()
+
     class _ContextAwareAgent:
         async def astream_events(self, input_data: Any, **kwargs: Any):
             del input_data

From 78f4747382cead46c0c72040002562fe56bc35e4 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Thu, 7 May 2026 19:40:10 +0200
Subject: [PATCH 25/58] refactor(chat): stream agent events via stream_output
 and remove parity v2 flag

---
 docker/.env.example                           |    1 -
 surfsense_backend/.env.example                |    8 -
 .../app/agents/new_chat/feature_flags.py      |   15 -
 .../app/services/new_streaming_service.py     |   17 +-
 .../app/tasks/chat/content_builder.py         |    8 +-
 .../app/tasks/chat/stream_new_chat.py         | 1524 +----------------
 .../streaming/graph_stream/event_stream.py    |    2 -
 .../streaming/handlers/chat_model_stream.py   |    4 +-
 .../chat/streaming/handlers/tool_start.py     |   24 +-
 .../app/tasks/chat/streaming/relay/state.py   |    3 -
 .../agents/new_chat/test_feature_flags.py     |    3 -
 .../chat/streaming/test_stage_2_parity.py     |    4 +-
 .../unit/tasks/chat/test_content_builder.py   |    4 +-
 .../tasks/chat/test_tool_input_streaming.py   |  112 +-
 .../assistant-ui/reasoning-message-part.tsx   |    4 +-
 .../components/assistant-ui/tool-fallback.tsx |   14 +-
 surfsense_web/lib/chat/streaming-state.ts     |    5 +-
 17 files changed, 76 insertions(+), 1676 deletions(-)

diff --git a/docker/.env.example b/docker/.env.example
index fd56bdccc..aba15f13f 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -324,7 +324,6 @@ SURFSENSE_ENABLE_ACTION_LOG=true
 SURFSENSE_ENABLE_REVERT_ROUTE=true
 SURFSENSE_ENABLE_PERMISSION=true
 SURFSENSE_ENABLE_DOOM_LOOP=true
-SURFSENSE_ENABLE_STREAM_PARITY_V2=true
 
 # Periodic connector sync interval (default: 5m)
 # SCHEDULE_CHECKER_INTERVAL=5m
diff --git a/surfsense_backend/.env.example b/surfsense_backend/.env.example
index ba89059c8..3d442973c 100644
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@@ -315,14 +315,6 @@ LANGSMITH_PROJECT=surfsense
 # SURFSENSE_ENABLE_ACTION_LOG=false
 # SURFSENSE_ENABLE_REVERT_ROUTE=false        # Backend-only; flip when UI ships
 
-# Streaming parity v2 — opt in to LangChain's structured AIMessageChunk
-# content (typed reasoning blocks, tool-input deltas) and propagate the
-# real tool_call_id to the SSE layer. When OFF, the stream falls back to
-# the str-only text path and synthetic "call_<run_id>" tool-call ids.
-# Schema migrations 135/136 ship unconditionally because they are
-# forward-compatible.
-# SURFSENSE_ENABLE_STREAM_PARITY_V2=false
-
 # Plugins
 # SURFSENSE_ENABLE_PLUGIN_LOADER=false
 # Comma-separated allowlist of plugin entry-point names
diff --git a/surfsense_backend/app/agents/new_chat/feature_flags.py b/surfsense_backend/app/agents/new_chat/feature_flags.py
index b3dc0fa82..3cea051ef 100644
--- a/surfsense_backend/app/agents/new_chat/feature_flags.py
+++ b/surfsense_backend/app/agents/new_chat/feature_flags.py
@@ -28,7 +28,6 @@ Defaults:
     SURFSENSE_ENABLE_PERMISSION=true
     SURFSENSE_ENABLE_DOOM_LOOP=true
     SURFSENSE_ENABLE_LLM_TOOL_SELECTOR=false  # adds a per-turn LLM call
-    SURFSENSE_ENABLE_STREAM_PARITY_V2=true
 
 Master kill-switch (overrides everything else):
 
@@ -88,15 +87,6 @@ class AgentFeatureFlags:
     enable_action_log: bool = True
     enable_revert_route: bool = True
 
-    # Streaming parity v2 — opt in to LangChain's structured
-    # ``AIMessageChunk`` content (typed reasoning blocks, tool-input
-    # deltas) and propagate the real ``tool_call_id`` to the SSE layer.
-    # When OFF the ``stream_new_chat`` task falls back to the str-only
-    # text path and the synthetic ``call_<run_id>`` tool-call id (no
-    # ``langchainToolCallId`` propagation). Schema migrations 135/136
-    # ship unconditionally because they're forward-compatible.
-    enable_stream_parity_v2: bool = True
-
     # Plugins
     enable_plugin_loader: bool = False
 
@@ -169,7 +159,6 @@ class AgentFeatureFlags:
                 enable_kb_planner_runnable=False,
                 enable_action_log=False,
                 enable_revert_route=False,
-                enable_stream_parity_v2=False,
                 enable_plugin_loader=False,
                 enable_otel=False,
                 enable_agent_cache=False,
@@ -208,10 +197,6 @@ class AgentFeatureFlags:
             # Snapshot / revert
             enable_action_log=_env_bool("SURFSENSE_ENABLE_ACTION_LOG", True),
             enable_revert_route=_env_bool("SURFSENSE_ENABLE_REVERT_ROUTE", True),
-            # Streaming parity v2
-            enable_stream_parity_v2=_env_bool(
-                "SURFSENSE_ENABLE_STREAM_PARITY_V2", True
-            ),
             # Plugins
             enable_plugin_loader=_env_bool("SURFSENSE_ENABLE_PLUGIN_LOADER", False),
             # Observability
diff --git a/surfsense_backend/app/services/new_streaming_service.py b/surfsense_backend/app/services/new_streaming_service.py
index 55129668c..cec0c8a5e 100644
--- a/surfsense_backend/app/services/new_streaming_service.py
+++ b/surfsense_backend/app/services/new_streaming_service.py
@@ -608,15 +608,14 @@ class VercelStreamingService:
         Args:
             tool_call_id: The unique tool call identifier. May be EITHER the
                 synthetic ``call_<run_id>`` id derived from LangGraph
-                ``run_id`` (legacy / ``SURFSENSE_ENABLE_STREAM_PARITY_V2``
-                OFF, or the unmatched-fallback path under parity_v2) OR
-                the authoritative LangChain ``tool_call.id`` (parity_v2
-                path: when the provider streams ``tool_call_chunks`` we
-                register the ``index`` and reuse the lc-id as the card
-                id so live ``tool-input-delta`` events can be routed
-                without a downstream join). Either way, the same id is
-                preserved across ``tool-input-start`` / ``-delta`` /
-                ``-available`` / ``tool-output-available`` for one call.
+                ``run_id`` (unmatched chunk fallback when no ``index`` was
+                registered) OR the authoritative LangChain ``tool_call.id``
+                (when the provider streams ``tool_call_chunks`` we register
+                the ``index`` and reuse the lc-id as the card id so live
+                ``tool-input-delta`` events route without a downstream join).
+                Either way, the same id is preserved across
+                ``tool-input-start`` / ``-delta`` / ``-available`` /
+                ``tool-output-available`` for one call.
             tool_name: The name of the tool being called.
             langchain_tool_call_id: Optional authoritative LangChain
                 ``tool_call.id``. When set, surfaces as
diff --git a/surfsense_backend/app/tasks/chat/content_builder.py b/surfsense_backend/app/tasks/chat/content_builder.py
index 041cab286..32b49e6b5 100644
--- a/surfsense_backend/app/tasks/chat/content_builder.py
+++ b/surfsense_backend/app/tasks/chat/content_builder.py
@@ -85,8 +85,8 @@ class AssistantContentBuilder:
         self._current_text_idx: int = -1
         self._current_reasoning_idx: int = -1
         # ``ui_id``-keyed indexes for tool-call parts. ``ui_id`` is the
-        # synthetic ``call_<run_id>`` (legacy) or the LangChain
-        # ``tool_call.id`` (parity_v2) — same key the streaming layer
+        # synthetic ``call_<run_id>`` (chunk fallback) or the LangChain
+        # ``tool_call.id`` (indexed chunk path) — same key the streaming layer
         # threads through every ``tool-input-*`` / ``tool-output-*`` event.
         self._tool_call_idx_by_ui_id: dict[str, int] = {}
         # Live argsText accumulator (concatenated ``tool-input-delta`` chunks)
@@ -181,7 +181,7 @@ class AssistantContentBuilder:
         """Register a tool-call card. Args are filled in by later events."""
         if not ui_id:
             return
-        # Skip duplicate registration: parity_v2 may emit
+        # Skip duplicate registration: the stream may emit
         # ``tool-input-start`` from both ``on_chat_model_stream``
         # (when tool_call_chunks register a name) and ``on_tool_start``
         # (the canonical path). The FE de-dupes via ``toolCallIndices``;
@@ -243,7 +243,7 @@ class AssistantContentBuilder:
         pretty-printed JSON, sets the full ``args`` dict, and backfills
         ``langchainToolCallId`` if it wasn't known at ``tool-input-start`` time.
         Also creates the card if no prior ``tool-input-start`` registered it
-        (legacy parity_v2-OFF / late-registration paths).
+        (late-registration when no prior ``tool-input-start``).
         """
         if not ui_id:
             return
diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py
index 1a2f38077..8e135179a 100644
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@@ -9,13 +9,11 @@ Supports loading LLM configurations from:
 - NewLLMConfig database table (positive IDs for user-created configs with prompt settings)
 """
 
-import ast
 import asyncio
 import contextlib
 import gc
 import json
 import logging
-import re
 import time
 from collections.abc import AsyncGenerator
 from dataclasses import dataclass, field
@@ -33,7 +31,6 @@ from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
 from app.agents.new_chat.checkpointer import get_checkpointer
 from app.agents.new_chat.context import SurfSenseContextSchema
 from app.agents.new_chat.errors import BusyError
-from app.agents.new_chat.feature_flags import get_flags
 from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection
 from app.agents.new_chat.llm_config import (
     AgentConfig,
@@ -77,6 +74,7 @@ from app.services.chat_session_state_service import (
 )
 from app.services.connector_service import ConnectorService
 from app.services.new_streaming_service import VercelStreamingService
+from app.tasks.chat.streaming.graph_stream.event_stream import stream_output
 from app.utils.content_utils import bootstrap_history_from_db
 from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap
 from app.utils.user_message_multimodal import build_human_message_content
@@ -729,9 +727,9 @@ def _legacy_match_lc_id(
 ) -> str | None:
     """Best-effort match a buffered ``tool_call_chunk`` to a tool name.
 
-    Pure extract of the legacy in-line match used at ``on_tool_start`` for
-    parity_v2-OFF and unmatched (chunk path didn't register an index for
-    this call) tools. Pops the next id-bearing chunk whose ``name``
+    Pure extract of the in-line match used at ``on_tool_start`` when the
+    chunk path didn't register an index for this call. Pops the next
+    id-bearing chunk whose ``name``
     matches ``tool_name`` (or any id-bearing chunk as a fallback) and
     returns its id. Mutates ``pending_tool_call_chunks`` and
     ``lc_tool_call_id_by_run`` in place.
@@ -803,1505 +801,22 @@ async def _stream_agent_events(
     Yields:
         SSE-formatted strings for each event.
     """
-    accumulated_text = ""
-    current_text_id: str | None = None
-    thinking_step_counter = 1 if initial_step_id else 0
-    tool_step_ids: dict[str, str] = {}
-    completed_step_ids: set[str] = set()
-    last_active_step_id: str | None = initial_step_id
-    last_active_step_title: str = initial_step_title
-    last_active_step_items: list[str] = initial_step_items or []
-    just_finished_tool: bool = False
-    active_tool_depth: int = 0  # Track nesting: >0 means we're inside a tool
-    called_update_memory: bool = False
+    async for sse in stream_output(
+        agent=agent,
+        config=config,
+        input_data=input_data,
+        streaming_service=streaming_service,
+        result=result,
+        step_prefix=step_prefix,
+        initial_step_id=initial_step_id,
+        initial_step_title=initial_step_title,
+        initial_step_items=initial_step_items,
+        content_builder=content_builder,
+        runtime_context=runtime_context,
+    ):
+        yield sse
 
-    # Reasoning-block streaming. We open a reasoning block on the
-    # first reasoning delta of a step, append deltas as they arrive, and
-    # close it when text starts (the model has switched to writing its
-    # answer) or ``on_chat_model_end`` fires for the model node. Reuses
-    # the same Vercel format-helpers as text-start/delta/end.
-    current_reasoning_id: str | None = None
-
-    # Streaming-parity v2 feature flag. When OFF we keep the legacy
-    # shape: str-only content, no reasoning blocks, no
-    # ``langchainToolCallId`` propagation. The schema migrations
-    # (135 / 136) ship unconditionally because they're forward-compatible.
-    parity_v2 = bool(get_flags().enable_stream_parity_v2)
-
-    # Best-effort attach of LangChain ``tool_call_id`` to the synthetic
-    # ``call_<run_id>`` card id we already emit. We accumulate
-    # ``tool_call_chunks`` from ``on_chat_model_stream``, key them by
-    # name, and pop the next unconsumed entry at ``on_tool_start``. The
-    # authoritative id is later filled in at ``on_tool_end`` from
-    # ``ToolMessage.tool_call_id``. Under parity_v2 we ALSO short-circuit
-    # this list for chunks that already registered into ``index_to_meta``
-    # below — so this list is reserved for the parity_v2-OFF / unmatched
-    # fallback path only and never re-pops a chunk we already streamed.
-    pending_tool_call_chunks: list[dict[str, Any]] = []
-    lc_tool_call_id_by_run: dict[str, str] = {}
-    file_path_by_run: dict[str, str] = {}
-
-    # parity_v2 only: live tool-call argument streaming. ``index_to_meta``
-    # is keyed by the chunk's ``index`` field — LangChain
-    # ``ToolCallChunk``s for the same call share an index but only the
-    # first chunk carries id+name (subsequent ones are id=None,
-    # name=None, args="<delta>"). We register an index when both id and
-    # name are observed on a chunk (per ToolCallChunk semantics they
-    # arrive together on the first chunk), then route every later chunk
-    # at that index to the same ``ui_id`` as a ``tool-input-delta``.
-    # ``ui_tool_call_id_by_run`` maps LangGraph ``run_id`` to the
-    # ``ui_id`` used for that call's ``tool-input-start`` so the matching
-    # ``tool-output-available`` (emitted from ``on_tool_end``) lands on
-    # the same card.
-    index_to_meta: dict[int, dict[str, str]] = {}
-    ui_tool_call_id_by_run: dict[str, str] = {}
-
-    # Per-tool-end mutable cache for the LangChain tool_call_id resolved
-    # at ``on_tool_end``. ``_emit_tool_output`` reads this so every
-    # ``format_tool_output_available`` call automatically carries the
-    # authoritative id without duplicating the kwarg at every call site.
-    current_lc_tool_call_id: dict[str, str | None] = {"value": None}
-
-    def _emit_tool_output(call_id: str, output: Any) -> str:
-        # Drive the builder before formatting the SSE so the in-memory
-        # ContentPart[] mirror sees the result attached to the same
-        # card the FE will render. Builder method is a no-op when
-        # ``content_builder`` is None (anonymous / legacy paths).
-        if content_builder is not None:
-            content_builder.on_tool_output_available(
-                call_id, output, current_lc_tool_call_id["value"]
-            )
-        return streaming_service.format_tool_output_available(
-            call_id,
-            output,
-            langchain_tool_call_id=current_lc_tool_call_id["value"],
-        )
-
-    def _emit_thinking_step(
-        *,
-        step_id: str,
-        title: str,
-        status: str = "in_progress",
-        items: list[str] | None = None,
-    ) -> str:
-        """Format a thinking-step SSE event and notify the builder.
-
-        Single helper used at every ``format_thinking_step`` yield site
-        in this generator. Drives ``AssistantContentBuilder.on_thinking_step``
-        first so the FE-mirror state lands the update before the SSE
-        carrying the same data leaves the wire — order matches the FE
-        pipeline (``processSharedStreamEvent`` updates state, then
-        flushes). Builder call is a no-op when ``content_builder`` is
-        None (anonymous / legacy paths).
-        """
-        if content_builder is not None:
-            content_builder.on_thinking_step(step_id, title, status, items)
-        return streaming_service.format_thinking_step(
-            step_id=step_id,
-            title=title,
-            status=status,
-            items=items,
-        )
-
-    def next_thinking_step_id() -> str:
-        nonlocal thinking_step_counter
-        thinking_step_counter += 1
-        return f"{step_prefix}-{thinking_step_counter}"
-
-    def complete_current_step() -> str | None:
-        nonlocal last_active_step_id
-        if last_active_step_id and last_active_step_id not in completed_step_ids:
-            completed_step_ids.add(last_active_step_id)
-            event = _emit_thinking_step(
-                step_id=last_active_step_id,
-                title=last_active_step_title,
-                status="completed",
-                items=last_active_step_items if last_active_step_items else None,
-            )
-            last_active_step_id = None
-            return event
-        return None
-
-    # Per-invocation runtime context (Phase 1.5). When supplied,
-    # ``KnowledgePriorityMiddleware`` reads ``mentioned_document_ids``
-    # from ``runtime.context`` instead of its constructor closure — the
-    # prerequisite that lets the compiled-agent cache (Phase 1) reuse a
-    # single graph across turns. Astream_events_kwargs stays empty when
-    # callers leave ``runtime_context`` as ``None`` to preserve the
-    # legacy code path bit-for-bit.
-    astream_kwargs: dict[str, Any] = {"config": config, "version": "v2"}
-    if runtime_context is not None:
-        astream_kwargs["context"] = runtime_context
-
-    async for event in agent.astream_events(input_data, **astream_kwargs):
-        event_type = event.get("event", "")
-
-        if event_type == "on_chat_model_stream":
-            if active_tool_depth > 0:
-                continue  # Suppress inner-tool LLM tokens from leaking into chat
-            if "surfsense:internal" in event.get("tags", []):
-                continue  # Suppress middleware-internal LLM tokens (e.g. KB search classification)
-            chunk = event.get("data", {}).get("chunk")
-            if not chunk:
-                continue
-            parts = _extract_chunk_parts(chunk)
-
-            reasoning_delta = parts["reasoning"]
-            text_delta = parts["text"]
-
-            # Reasoning streaming. Open a reasoning block on first
-            # delta; append every subsequent delta until text begins.
-            # When text starts we close the reasoning block first so the
-            # frontend sees the natural hand-off. Gated behind the
-            # parity-v2 flag so legacy deployments keep today's shape.
-            if parity_v2 and reasoning_delta:
-                if current_text_id is not None:
-                    yield streaming_service.format_text_end(current_text_id)
-                    if content_builder is not None:
-                        content_builder.on_text_end(current_text_id)
-                    current_text_id = None
-                if current_reasoning_id is None:
-                    completion_event = complete_current_step()
-                    if completion_event:
-                        yield completion_event
-                    if just_finished_tool:
-                        last_active_step_id = None
-                        last_active_step_title = ""
-                        last_active_step_items = []
-                        just_finished_tool = False
-                    current_reasoning_id = streaming_service.generate_reasoning_id()
-                    yield streaming_service.format_reasoning_start(current_reasoning_id)
-                    if content_builder is not None:
-                        content_builder.on_reasoning_start(current_reasoning_id)
-                yield streaming_service.format_reasoning_delta(
-                    current_reasoning_id, reasoning_delta
-                )
-                if content_builder is not None:
-                    content_builder.on_reasoning_delta(
-                        current_reasoning_id, reasoning_delta
-                    )
-
-            if text_delta:
-                if current_reasoning_id is not None:
-                    yield streaming_service.format_reasoning_end(current_reasoning_id)
-                    if content_builder is not None:
-                        content_builder.on_reasoning_end(current_reasoning_id)
-                    current_reasoning_id = None
-                if current_text_id is None:
-                    completion_event = complete_current_step()
-                    if completion_event:
-                        yield completion_event
-                    if just_finished_tool:
-                        last_active_step_id = None
-                        last_active_step_title = ""
-                        last_active_step_items = []
-                        just_finished_tool = False
-                    current_text_id = streaming_service.generate_text_id()
-                    yield streaming_service.format_text_start(current_text_id)
-                    if content_builder is not None:
-                        content_builder.on_text_start(current_text_id)
-                yield streaming_service.format_text_delta(current_text_id, text_delta)
-                accumulated_text += text_delta
-                if content_builder is not None:
-                    content_builder.on_text_delta(current_text_id, text_delta)
-
-            # Live tool-call argument streaming. Runs AFTER text/reasoning
-            # processing so chunks containing both stay in their natural
-            # wire order (text → text-end → tool-input-start). Active
-            # text/reasoning are closed inside the registration branch
-            # before ``tool-input-start`` so the frontend sees a clean
-            # part boundary even when providers interleave.
-            if parity_v2 and parts["tool_call_chunks"]:
-                for tcc in parts["tool_call_chunks"]:
-                    idx = tcc.get("index")
-
-                    # Register this index when we first see id+name
-                    # TOGETHER. Per LangChain ToolCallChunk semantics the
-                    # first chunk for a tool call carries both fields
-                    # together; later chunks have id=None, name=None and
-                    # only ``args``. Requiring BOTH keeps wire
-                    # ``tool-input-start`` always carrying a real
-                    # toolName (assistant-ui's typed tool-part dispatch
-                    # keys off it).
-                    if idx is not None and idx not in index_to_meta:
-                        lc_id = tcc.get("id")
-                        name = tcc.get("name")
-                        if lc_id and name:
-                            ui_id = lc_id
-
-                            # Close active text/reasoning so wire
-                            # ordering stays clean even on providers
-                            # that interleave text and tool-call chunks
-                            # within the same stream window.
-                            if current_text_id is not None:
-                                yield streaming_service.format_text_end(current_text_id)
-                                if content_builder is not None:
-                                    content_builder.on_text_end(current_text_id)
-                                current_text_id = None
-                            if current_reasoning_id is not None:
-                                yield streaming_service.format_reasoning_end(
-                                    current_reasoning_id
-                                )
-                                if content_builder is not None:
-                                    content_builder.on_reasoning_end(
-                                        current_reasoning_id
-                                    )
-                                current_reasoning_id = None
-
-                            index_to_meta[idx] = {
-                                "ui_id": ui_id,
-                                "lc_id": lc_id,
-                                "name": name,
-                            }
-                            yield streaming_service.format_tool_input_start(
-                                ui_id,
-                                name,
-                                langchain_tool_call_id=lc_id,
-                            )
-                            if content_builder is not None:
-                                content_builder.on_tool_input_start(ui_id, name, lc_id)
-
-                    # Emit args delta for any chunk at a registered
-                    # index (including idless continuations). Once an
-                    # index is owned by ``index_to_meta`` we DO NOT
-                    # append to ``pending_tool_call_chunks`` — that list
-                    # is reserved for the parity_v2-OFF / unmatched
-                    # fallback path so it never re-pops chunks already
-                    # consumed here (skip-append).
-                    meta = index_to_meta.get(idx) if idx is not None else None
-                    if meta:
-                        args_chunk = tcc.get("args") or ""
-                        if args_chunk:
-                            yield streaming_service.format_tool_input_delta(
-                                meta["ui_id"], args_chunk
-                            )
-                            if content_builder is not None:
-                                content_builder.on_tool_input_delta(
-                                    meta["ui_id"], args_chunk
-                                )
-                    else:
-                        pending_tool_call_chunks.append(tcc)
-
-        elif event_type == "on_tool_start":
-            active_tool_depth += 1
-            tool_name = event.get("name", "unknown_tool")
-            run_id = event.get("run_id", "")
-            tool_input = event.get("data", {}).get("input", {})
-            if tool_name in ("write_file", "edit_file"):
-                result.write_attempted = True
-                if isinstance(tool_input, dict):
-                    file_path = tool_input.get("file_path")
-                    if isinstance(file_path, str) and file_path.strip() and run_id:
-                        file_path_by_run[run_id] = file_path.strip()
-
-            if current_text_id is not None:
-                yield streaming_service.format_text_end(current_text_id)
-                if content_builder is not None:
-                    content_builder.on_text_end(current_text_id)
-                current_text_id = None
-
-            if last_active_step_title != "Synthesizing response":
-                completion_event = complete_current_step()
-                if completion_event:
-                    yield completion_event
-
-            just_finished_tool = False
-            tool_step_id = next_thinking_step_id()
-            tool_step_ids[run_id] = tool_step_id
-            last_active_step_id = tool_step_id
-
-            if tool_name == "ls":
-                ls_path = (
-                    tool_input.get("path", "/")
-                    if isinstance(tool_input, dict)
-                    else str(tool_input)
-                )
-                last_active_step_title = "Listing files"
-                last_active_step_items = [ls_path]
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Listing files",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "read_file":
-                fp = (
-                    tool_input.get("file_path", "")
-                    if isinstance(tool_input, dict)
-                    else str(tool_input)
-                )
-                display_fp = fp if len(fp) <= 80 else "…" + fp[-77:]
-                last_active_step_title = "Reading file"
-                last_active_step_items = [display_fp]
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Reading file",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "write_file":
-                fp = (
-                    tool_input.get("file_path", "")
-                    if isinstance(tool_input, dict)
-                    else str(tool_input)
-                )
-                display_fp = fp if len(fp) <= 80 else "…" + fp[-77:]
-                last_active_step_title = "Writing file"
-                last_active_step_items = [display_fp]
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Writing file",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "edit_file":
-                fp = (
-                    tool_input.get("file_path", "")
-                    if isinstance(tool_input, dict)
-                    else str(tool_input)
-                )
-                display_fp = fp if len(fp) <= 80 else "…" + fp[-77:]
-                last_active_step_title = "Editing file"
-                last_active_step_items = [display_fp]
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Editing file",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "glob":
-                pat = (
-                    tool_input.get("pattern", "")
-                    if isinstance(tool_input, dict)
-                    else str(tool_input)
-                )
-                base_path = (
-                    tool_input.get("path", "/") if isinstance(tool_input, dict) else "/"
-                )
-                last_active_step_title = "Searching files"
-                last_active_step_items = [f"{pat} in {base_path}"]
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Searching files",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "grep":
-                pat = (
-                    tool_input.get("pattern", "")
-                    if isinstance(tool_input, dict)
-                    else str(tool_input)
-                )
-                grep_path = (
-                    tool_input.get("path", "") if isinstance(tool_input, dict) else ""
-                )
-                display_pat = pat[:60] + ("…" if len(pat) > 60 else "")
-                last_active_step_title = "Searching content"
-                last_active_step_items = [
-                    f'"{display_pat}"' + (f" in {grep_path}" if grep_path else "")
-                ]
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Searching content",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "rm":
-                rm_path = (
-                    tool_input.get("path", "")
-                    if isinstance(tool_input, dict)
-                    else str(tool_input)
-                )
-                display_path = rm_path if len(rm_path) <= 80 else "…" + rm_path[-77:]
-                last_active_step_title = "Deleting file"
-                last_active_step_items = [display_path] if display_path else []
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Deleting file",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "rmdir":
-                rmdir_path = (
-                    tool_input.get("path", "")
-                    if isinstance(tool_input, dict)
-                    else str(tool_input)
-                )
-                display_path = (
-                    rmdir_path if len(rmdir_path) <= 80 else "…" + rmdir_path[-77:]
-                )
-                last_active_step_title = "Deleting folder"
-                last_active_step_items = [display_path] if display_path else []
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Deleting folder",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "mkdir":
-                mkdir_path = (
-                    tool_input.get("path", "")
-                    if isinstance(tool_input, dict)
-                    else str(tool_input)
-                )
-                display_path = (
-                    mkdir_path if len(mkdir_path) <= 80 else "…" + mkdir_path[-77:]
-                )
-                last_active_step_title = "Creating folder"
-                last_active_step_items = [display_path] if display_path else []
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Creating folder",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "move_file":
-                src = (
-                    tool_input.get("source_path", "")
-                    if isinstance(tool_input, dict)
-                    else ""
-                )
-                dst = (
-                    tool_input.get("destination_path", "")
-                    if isinstance(tool_input, dict)
-                    else ""
-                )
-                display_src = src if len(src) <= 60 else "…" + src[-57:]
-                display_dst = dst if len(dst) <= 60 else "…" + dst[-57:]
-                last_active_step_title = "Moving file"
-                last_active_step_items = (
-                    [f"{display_src} → {display_dst}"] if src or dst else []
-                )
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Moving file",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "write_todos":
-                todos = (
-                    tool_input.get("todos", []) if isinstance(tool_input, dict) else []
-                )
-                todo_count = len(todos) if isinstance(todos, list) else 0
-                last_active_step_title = "Planning tasks"
-                last_active_step_items = (
-                    [f"{todo_count} task{'s' if todo_count != 1 else ''}"]
-                    if todo_count
-                    else []
-                )
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Planning tasks",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "save_document":
-                doc_title = (
-                    tool_input.get("title", "")
-                    if isinstance(tool_input, dict)
-                    else str(tool_input)
-                )
-                display_title = doc_title[:60] + ("…" if len(doc_title) > 60 else "")
-                last_active_step_title = "Saving document"
-                last_active_step_items = [display_title]
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Saving document",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "generate_image":
-                prompt = (
-                    tool_input.get("prompt", "")
-                    if isinstance(tool_input, dict)
-                    else str(tool_input)
-                )
-                last_active_step_title = "Generating image"
-                last_active_step_items = [
-                    f"Prompt: {prompt[:80]}{'...' if len(prompt) > 80 else ''}"
-                ]
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Generating image",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "scrape_webpage":
-                url = (
-                    tool_input.get("url", "")
-                    if isinstance(tool_input, dict)
-                    else str(tool_input)
-                )
-                last_active_step_title = "Scraping webpage"
-                last_active_step_items = [
-                    f"URL: {url[:80]}{'...' if len(url) > 80 else ''}"
-                ]
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Scraping webpage",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "generate_podcast":
-                podcast_title = (
-                    tool_input.get("podcast_title", "SurfSense Podcast")
-                    if isinstance(tool_input, dict)
-                    else "SurfSense Podcast"
-                )
-                content_len = len(
-                    tool_input.get("source_content", "")
-                    if isinstance(tool_input, dict)
-                    else ""
-                )
-                last_active_step_title = "Generating podcast"
-                last_active_step_items = [
-                    f"Title: {podcast_title}",
-                    f"Content: {content_len:,} characters",
-                    "Preparing audio generation...",
-                ]
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Generating podcast",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "generate_report":
-                report_topic = (
-                    tool_input.get("topic", "Report")
-                    if isinstance(tool_input, dict)
-                    else "Report"
-                )
-                is_revision = bool(
-                    isinstance(tool_input, dict) and tool_input.get("parent_report_id")
-                )
-                step_title = "Revising report" if is_revision else "Generating report"
-                last_active_step_title = step_title
-                last_active_step_items = [
-                    f"Topic: {report_topic}",
-                    "Analyzing source content...",
-                ]
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title=step_title,
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            elif tool_name in ("execute", "execute_code"):
-                cmd = (
-                    tool_input.get("command", "")
-                    if isinstance(tool_input, dict)
-                    else str(tool_input)
-                )
-                display_cmd = cmd[:80] + ("…" if len(cmd) > 80 else "")
-                last_active_step_title = "Running command"
-                last_active_step_items = [f"$ {display_cmd}"]
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title="Running command",
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-            else:
-                # Fallback for tools without a curated thinking-step title
-                # (typically connector tools, MCP-registered tools, or
-                # newly added tools that haven't been wired up here yet).
-                # Render the snake_cased name as a sentence-cased phrase
-                # so non-technical users see e.g. "Send gmail email"
-                # rather than the raw identifier "send_gmail_email".
-                last_active_step_title = (
-                    tool_name.replace("_", " ").strip().capitalize() or tool_name
-                )
-                last_active_step_items = []
-                yield _emit_thinking_step(
-                    step_id=tool_step_id,
-                    title=last_active_step_title,
-                    status="in_progress",
-                )
-
-            # Resolve the card identity. If the chunk-emission loop
-            # already registered an ``index`` for this tool call (parity_v2
-            # path), reuse the same ui_id so the card sees:
-            # tool-input-start → deltas… → tool-input-available →
-            # tool-output-available all keyed by lc_id. Otherwise fall
-            # back to the synthetic ``call_<run_id>`` id and the legacy
-            # best-effort match against ``pending_tool_call_chunks``.
-            matched_meta: dict[str, str] | None = None
-            if parity_v2:
-                # FIFO over indices 0,1,2…; first unassigned same-name
-                # match wins. Handles parallel same-name calls (e.g. two
-                # write_file calls) deterministically as long as the
-                # model interleaves on_tool_start in the same order it
-                # streamed the args.
-                taken_ui_ids = set(ui_tool_call_id_by_run.values())
-                for meta in index_to_meta.values():
-                    if meta["name"] == tool_name and meta["ui_id"] not in taken_ui_ids:
-                        matched_meta = meta
-                        break
-
-            tool_call_id: str
-            langchain_tool_call_id: str | None = None
-            if matched_meta is not None:
-                tool_call_id = matched_meta["ui_id"]
-                langchain_tool_call_id = matched_meta["lc_id"]
-                # ``tool-input-start`` already fired during chunk
-                # emission — skip the duplicate. No pruning is needed
-                # because the chunk-emission loop intentionally never
-                # appends registered-index chunks to
-                # ``pending_tool_call_chunks`` (skip-append).
-                if run_id:
-                    lc_tool_call_id_by_run[run_id] = matched_meta["lc_id"]
-            else:
-                tool_call_id = (
-                    f"call_{run_id[:32]}"
-                    if run_id
-                    else streaming_service.generate_tool_call_id()
-                )
-                # Legacy fallback: parity_v2 OFF, or parity_v2 ON but the
-                # provider didn't stream tool_call_chunks for this call
-                # (no index registered). Run the existing best-effort
-                # match BEFORE emitting start so we still attach an
-                # authoritative ``langchainToolCallId`` when possible.
-                if parity_v2:
-                    langchain_tool_call_id = _legacy_match_lc_id(
-                        pending_tool_call_chunks,
-                        tool_name,
-                        run_id,
-                        lc_tool_call_id_by_run,
-                    )
-                yield streaming_service.format_tool_input_start(
-                    tool_call_id,
-                    tool_name,
-                    langchain_tool_call_id=langchain_tool_call_id,
-                )
-                if content_builder is not None:
-                    content_builder.on_tool_input_start(
-                        tool_call_id, tool_name, langchain_tool_call_id
-                    )
-
-            if run_id:
-                ui_tool_call_id_by_run[run_id] = tool_call_id
-
-            # Sanitize tool_input: strip runtime-injected non-serializable
-            # values (e.g. LangChain ToolRuntime) before sending over SSE.
-            if isinstance(tool_input, dict):
-                _safe_input: dict[str, Any] = {}
-                for _k, _v in tool_input.items():
-                    try:
-                        json.dumps(_v)
-                        _safe_input[_k] = _v
-                    except (TypeError, ValueError, OverflowError):
-                        pass
-            else:
-                _safe_input = {"input": tool_input}
-            yield streaming_service.format_tool_input_available(
-                tool_call_id,
-                tool_name,
-                _safe_input,
-                langchain_tool_call_id=langchain_tool_call_id,
-            )
-            if content_builder is not None:
-                content_builder.on_tool_input_available(
-                    tool_call_id,
-                    tool_name,
-                    _safe_input,
-                    langchain_tool_call_id,
-                )
-
-        elif event_type == "on_tool_end":
-            active_tool_depth = max(0, active_tool_depth - 1)
-            run_id = event.get("run_id", "")
-            tool_name = event.get("name", "unknown_tool")
-            raw_output = event.get("data", {}).get("output", "")
-            staged_file_path = file_path_by_run.pop(run_id, None) if run_id else None
-
-            if tool_name == "update_memory":
-                called_update_memory = True
-
-            if hasattr(raw_output, "content"):
-                content = raw_output.content
-                if isinstance(content, str):
-                    try:
-                        tool_output = json.loads(content)
-                    except (json.JSONDecodeError, TypeError):
-                        tool_output = {"result": content}
-                elif isinstance(content, dict):
-                    tool_output = content
-                else:
-                    tool_output = {"result": str(content)}
-            elif isinstance(raw_output, dict):
-                tool_output = raw_output
-            else:
-                tool_output = {"result": str(raw_output) if raw_output else "completed"}
-
-            if tool_name in ("write_file", "edit_file"):
-                if _tool_output_has_error(tool_output):
-                    # Keep successful evidence if a previous write/edit in this turn succeeded.
-                    pass
-                else:
-                    result.write_succeeded = True
-                    result.verification_succeeded = True
-
-            # Look up the SAME card id used at on_tool_start (either the
-            # parity_v2 lc-id-derived ui_id or the legacy synthetic
-            # ``call_<run_id>``) so the output event always lands on the
-            # same card as start/delta/available. Fallback preserves the
-            # legacy synthetic shape for parity_v2-OFF / unknown-run paths.
-            tool_call_id = ui_tool_call_id_by_run.get(
-                run_id,
-                f"call_{run_id[:32]}" if run_id else "call_unknown",
-            )
-            original_step_id = tool_step_ids.get(
-                run_id, f"{step_prefix}-unknown-{run_id[:8]}"
-            )
-            completed_step_ids.add(original_step_id)
-
-            # Authoritative LangChain tool_call_id from the returned
-            # ``ToolMessage``. Falls back to whatever we matched
-            # at ``on_tool_start`` time (kept in ``lc_tool_call_id_by_run``)
-            # if the output isn't a ToolMessage. The value is stored in
-            # ``current_lc_tool_call_id`` so ``_emit_tool_output``
-            # picks it up for every output emit below.
-            #
-            # Emitted in BOTH parity_v2 and legacy modes: the chat tool
-            # card needs the LangChain id to match against the
-            # ``data-action-log`` SSE event (keyed by ``lc_tool_call_id``)
-            # so the inline Revert button can light up. Reading
-            # ``raw_output.tool_call_id`` is a cheap, non-mutating attribute
-            # access that is safe regardless of feature-flag state.
-            current_lc_tool_call_id["value"] = None
-            authoritative = getattr(raw_output, "tool_call_id", None)
-            if isinstance(authoritative, str) and authoritative:
-                current_lc_tool_call_id["value"] = authoritative
-                if run_id:
-                    lc_tool_call_id_by_run[run_id] = authoritative
-            elif run_id and run_id in lc_tool_call_id_by_run:
-                current_lc_tool_call_id["value"] = lc_tool_call_id_by_run[run_id]
-
-            if tool_name == "read_file":
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Reading file",
-                    status="completed",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "write_file":
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Writing file",
-                    status="completed",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "edit_file":
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Editing file",
-                    status="completed",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "glob":
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Searching files",
-                    status="completed",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "grep":
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Searching content",
-                    status="completed",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "rm":
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Deleting file",
-                    status="completed",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "rmdir":
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Deleting folder",
-                    status="completed",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "mkdir":
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Creating folder",
-                    status="completed",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "move_file":
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Moving file",
-                    status="completed",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "write_todos":
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Planning tasks",
-                    status="completed",
-                    items=last_active_step_items,
-                )
-            elif tool_name == "save_document":
-                result_str = (
-                    tool_output.get("result", "")
-                    if isinstance(tool_output, dict)
-                    else str(tool_output)
-                )
-                is_error = "Error" in result_str
-                completed_items = [
-                    *last_active_step_items,
-                    result_str[:80] if is_error else "Saved to knowledge base",
-                ]
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Saving document",
-                    status="completed",
-                    items=completed_items,
-                )
-            elif tool_name == "generate_image":
-                if isinstance(tool_output, dict) and not tool_output.get("error"):
-                    completed_items = [
-                        *last_active_step_items,
-                        "Image generated successfully",
-                    ]
-                else:
-                    error_msg = (
-                        tool_output.get("error", "Generation failed")
-                        if isinstance(tool_output, dict)
-                        else "Generation failed"
-                    )
-                    completed_items = [*last_active_step_items, f"Error: {error_msg}"]
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Generating image",
-                    status="completed",
-                    items=completed_items,
-                )
-            elif tool_name == "scrape_webpage":
-                if isinstance(tool_output, dict):
-                    title = tool_output.get("title", "Webpage")
-                    word_count = tool_output.get("word_count", 0)
-                    has_error = "error" in tool_output
-                    if has_error:
-                        completed_items = [
-                            *last_active_step_items,
-                            f"Error: {tool_output.get('error', 'Failed to scrape')[:50]}",
-                        ]
-                    else:
-                        completed_items = [
-                            *last_active_step_items,
-                            f"Title: {title[:50]}{'...' if len(title) > 50 else ''}",
-                            f"Extracted: {word_count:,} words",
-                        ]
-                else:
-                    completed_items = [*last_active_step_items, "Content extracted"]
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Scraping webpage",
-                    status="completed",
-                    items=completed_items,
-                )
-            elif tool_name == "generate_podcast":
-                podcast_status = (
-                    tool_output.get("status", "unknown")
-                    if isinstance(tool_output, dict)
-                    else "unknown"
-                )
-                podcast_title = (
-                    tool_output.get("title", "Podcast")
-                    if isinstance(tool_output, dict)
-                    else "Podcast"
-                )
-                if podcast_status in ("pending", "generating", "processing"):
-                    completed_items = [
-                        f"Title: {podcast_title}",
-                        "Podcast generation started",
-                        "Processing in background...",
-                    ]
-                elif podcast_status == "already_generating":
-                    completed_items = [
-                        f"Title: {podcast_title}",
-                        "Podcast already in progress",
-                        "Please wait for it to complete",
-                    ]
-                elif podcast_status in ("failed", "error"):
-                    error_msg = (
-                        tool_output.get("error", "Unknown error")
-                        if isinstance(tool_output, dict)
-                        else "Unknown error"
-                    )
-                    completed_items = [
-                        f"Title: {podcast_title}",
-                        f"Error: {error_msg[:50]}",
-                    ]
-                elif podcast_status in ("ready", "success"):
-                    completed_items = [
-                        f"Title: {podcast_title}",
-                        "Podcast ready",
-                    ]
-                else:
-                    completed_items = last_active_step_items
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Generating podcast",
-                    status="completed",
-                    items=completed_items,
-                )
-            elif tool_name == "generate_video_presentation":
-                vp_status = (
-                    tool_output.get("status", "unknown")
-                    if isinstance(tool_output, dict)
-                    else "unknown"
-                )
-                vp_title = (
-                    tool_output.get("title", "Presentation")
-                    if isinstance(tool_output, dict)
-                    else "Presentation"
-                )
-                if vp_status in ("pending", "generating"):
-                    completed_items = [
-                        f"Title: {vp_title}",
-                        "Presentation generation started",
-                        "Processing in background...",
-                    ]
-                elif vp_status == "failed":
-                    error_msg = (
-                        tool_output.get("error", "Unknown error")
-                        if isinstance(tool_output, dict)
-                        else "Unknown error"
-                    )
-                    completed_items = [
-                        f"Title: {vp_title}",
-                        f"Error: {error_msg[:50]}",
-                    ]
-                else:
-                    completed_items = last_active_step_items
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Generating video presentation",
-                    status="completed",
-                    items=completed_items,
-                )
-            elif tool_name == "generate_report":
-                report_status = (
-                    tool_output.get("status", "unknown")
-                    if isinstance(tool_output, dict)
-                    else "unknown"
-                )
-                report_title = (
-                    tool_output.get("title", "Report")
-                    if isinstance(tool_output, dict)
-                    else "Report"
-                )
-                word_count = (
-                    tool_output.get("word_count", 0)
-                    if isinstance(tool_output, dict)
-                    else 0
-                )
-                is_revision = (
-                    tool_output.get("is_revision", False)
-                    if isinstance(tool_output, dict)
-                    else False
-                )
-                step_title = "Revising report" if is_revision else "Generating report"
-
-                if report_status == "ready":
-                    completed_items = [
-                        f"Topic: {report_title}",
-                        f"{word_count:,} words",
-                        "Report ready",
-                    ]
-                elif report_status == "failed":
-                    error_msg = (
-                        tool_output.get("error", "Unknown error")
-                        if isinstance(tool_output, dict)
-                        else "Unknown error"
-                    )
-                    completed_items = [
-                        f"Topic: {report_title}",
-                        f"Error: {error_msg[:50]}",
-                    ]
-                else:
-                    completed_items = last_active_step_items
-
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title=step_title,
-                    status="completed",
-                    items=completed_items,
-                )
-            elif tool_name in ("execute", "execute_code"):
-                raw_text = (
-                    tool_output.get("result", "")
-                    if isinstance(tool_output, dict)
-                    else str(tool_output)
-                )
-                m = re.match(r"^Exit code:\s*(\d+)", raw_text)
-                exit_code_val = int(m.group(1)) if m else None
-                if exit_code_val is not None and exit_code_val == 0:
-                    completed_items = [
-                        *last_active_step_items,
-                        "Completed successfully",
-                    ]
-                elif exit_code_val is not None:
-                    completed_items = [
-                        *last_active_step_items,
-                        f"Exit code: {exit_code_val}",
-                    ]
-                else:
-                    completed_items = [*last_active_step_items, "Finished"]
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Running command",
-                    status="completed",
-                    items=completed_items,
-                )
-            elif tool_name == "ls":
-                if isinstance(tool_output, dict):
-                    ls_output = tool_output.get("result", "")
-                elif isinstance(tool_output, str):
-                    ls_output = tool_output
-                else:
-                    ls_output = str(tool_output) if tool_output else ""
-                file_names: list[str] = []
-                if ls_output:
-                    paths: list[str] = []
-                    try:
-                        parsed = ast.literal_eval(ls_output)
-                        if isinstance(parsed, list):
-                            paths = [str(p) for p in parsed]
-                    except (ValueError, SyntaxError):
-                        paths = [
-                            line.strip()
-                            for line in ls_output.strip().split("\n")
-                            if line.strip()
-                        ]
-                    for p in paths:
-                        name = p.rstrip("/").split("/")[-1]
-                        if name and len(name) <= 40:
-                            file_names.append(name)
-                        elif name:
-                            file_names.append(name[:37] + "...")
-                if file_names:
-                    if len(file_names) <= 5:
-                        completed_items = [f"[{name}]" for name in file_names]
-                    else:
-                        completed_items = [f"[{name}]" for name in file_names[:4]]
-                        completed_items.append(f"(+{len(file_names) - 4} more)")
-                else:
-                    completed_items = ["No files found"]
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title="Listing files",
-                    status="completed",
-                    items=completed_items,
-                )
-            else:
-                # Fallback completion title — see the matching in-progress
-                # branch above for the wording rationale.
-                fallback_title = (
-                    tool_name.replace("_", " ").strip().capitalize() or tool_name
-                )
-                yield _emit_thinking_step(
-                    step_id=original_step_id,
-                    title=fallback_title,
-                    status="completed",
-                    items=last_active_step_items,
-                )
-
-            just_finished_tool = True
-            last_active_step_id = None
-            last_active_step_title = ""
-            last_active_step_items = []
-
-            if tool_name == "generate_podcast":
-                yield _emit_tool_output(
-                    tool_call_id,
-                    tool_output
-                    if isinstance(tool_output, dict)
-                    else {"result": tool_output},
-                )
-                if isinstance(tool_output, dict) and tool_output.get("status") in (
-                    "pending",
-                    "generating",
-                    "processing",
-                ):
-                    yield streaming_service.format_terminal_info(
-                        f"Podcast queued: {tool_output.get('title', 'Podcast')}",
-                        "success",
-                    )
-                elif isinstance(tool_output, dict) and tool_output.get("status") in (
-                    "ready",
-                    "success",
-                ):
-                    yield streaming_service.format_terminal_info(
-                        f"Podcast generated successfully: {tool_output.get('title', 'Podcast')}",
-                        "success",
-                    )
-                elif isinstance(tool_output, dict) and tool_output.get("status") in (
-                    "failed",
-                    "error",
-                ):
-                    error_msg = tool_output.get("error", "Unknown error")
-                    yield streaming_service.format_terminal_info(
-                        f"Podcast generation failed: {error_msg}",
-                        "error",
-                    )
-            elif tool_name == "generate_video_presentation":
-                yield _emit_tool_output(
-                    tool_call_id,
-                    tool_output
-                    if isinstance(tool_output, dict)
-                    else {"result": tool_output},
-                )
-                if (
-                    isinstance(tool_output, dict)
-                    and tool_output.get("status") == "pending"
-                ):
-                    yield streaming_service.format_terminal_info(
-                        f"Video presentation queued: {tool_output.get('title', 'Presentation')}",
-                        "success",
-                    )
-                elif (
-                    isinstance(tool_output, dict)
-                    and tool_output.get("status") == "failed"
-                ):
-                    error_msg = (
-                        tool_output.get("error", "Unknown error")
-                        if isinstance(tool_output, dict)
-                        else "Unknown error"
-                    )
-                    yield streaming_service.format_terminal_info(
-                        f"Presentation generation failed: {error_msg}",
-                        "error",
-                    )
-            elif tool_name == "generate_image":
-                yield _emit_tool_output(
-                    tool_call_id,
-                    tool_output
-                    if isinstance(tool_output, dict)
-                    else {"result": tool_output},
-                )
-                if isinstance(tool_output, dict):
-                    if tool_output.get("error"):
-                        yield streaming_service.format_terminal_info(
-                            f"Image generation failed: {tool_output['error'][:60]}",
-                            "error",
-                        )
-                    else:
-                        yield streaming_service.format_terminal_info(
-                            "Image generated successfully",
-                            "success",
-                        )
-            elif tool_name == "scrape_webpage":
-                if isinstance(tool_output, dict):
-                    display_output = {
-                        k: v for k, v in tool_output.items() if k != "content"
-                    }
-                    if "content" in tool_output:
-                        content = tool_output.get("content", "")
-                        display_output["content_preview"] = (
-                            content[:500] + "..." if len(content) > 500 else content
-                        )
-                    yield _emit_tool_output(
-                        tool_call_id,
-                        display_output,
-                    )
-                else:
-                    yield _emit_tool_output(
-                        tool_call_id,
-                        {"result": tool_output},
-                    )
-                if isinstance(tool_output, dict) and "error" not in tool_output:
-                    title = tool_output.get("title", "Webpage")
-                    word_count = tool_output.get("word_count", 0)
-                    yield streaming_service.format_terminal_info(
-                        f"Scraped: {title[:40]}{'...' if len(title) > 40 else ''} ({word_count:,} words)",
-                        "success",
-                    )
-                else:
-                    error_msg = (
-                        tool_output.get("error", "Failed to scrape")
-                        if isinstance(tool_output, dict)
-                        else "Failed to scrape"
-                    )
-                    yield streaming_service.format_terminal_info(
-                        f"Scrape failed: {error_msg}",
-                        "error",
-                    )
-            elif tool_name in ("write_file", "edit_file"):
-                resolved_path = _extract_resolved_file_path(
-                    tool_name=tool_name,
-                    tool_output=tool_output,
-                    tool_input={"file_path": staged_file_path}
-                    if staged_file_path
-                    else None,
-                )
-                result_text = _tool_output_to_text(tool_output)
-                if _tool_output_has_error(tool_output):
-                    yield _emit_tool_output(
-                        tool_call_id,
-                        {
-                            "status": "error",
-                            "error": result_text,
-                            "path": resolved_path,
-                        },
-                    )
-                else:
-                    yield _emit_tool_output(
-                        tool_call_id,
-                        {
-                            "status": "completed",
-                            "path": resolved_path,
-                            "result": result_text,
-                        },
-                    )
-            elif tool_name == "generate_report":
-                # Stream the full report result so frontend can render the ReportCard
-                yield _emit_tool_output(
-                    tool_call_id,
-                    tool_output
-                    if isinstance(tool_output, dict)
-                    else {"result": tool_output},
-                )
-                # Send appropriate terminal message based on status
-                if (
-                    isinstance(tool_output, dict)
-                    and tool_output.get("status") == "ready"
-                ):
-                    word_count = tool_output.get("word_count", 0)
-                    yield streaming_service.format_terminal_info(
-                        f"Report generated: {tool_output.get('title', 'Report')} ({word_count:,} words)",
-                        "success",
-                    )
-                else:
-                    error_msg = (
-                        tool_output.get("error", "Unknown error")
-                        if isinstance(tool_output, dict)
-                        else "Unknown error"
-                    )
-                    yield streaming_service.format_terminal_info(
-                        f"Report generation failed: {error_msg}",
-                        "error",
-                    )
-            elif tool_name == "generate_resume":
-                yield _emit_tool_output(
-                    tool_call_id,
-                    tool_output
-                    if isinstance(tool_output, dict)
-                    else {"result": tool_output},
-                )
-                if (
-                    isinstance(tool_output, dict)
-                    and tool_output.get("status") == "ready"
-                ):
-                    yield streaming_service.format_terminal_info(
-                        f"Resume generated: {tool_output.get('title', 'Resume')}",
-                        "success",
-                    )
-                else:
-                    error_msg = (
-                        tool_output.get("error", "Unknown error")
-                        if isinstance(tool_output, dict)
-                        else "Unknown error"
-                    )
-                    yield streaming_service.format_terminal_info(
-                        f"Resume generation failed: {error_msg}",
-                        "error",
-                    )
-            elif tool_name in (
-                "create_notion_page",
-                "update_notion_page",
-                "delete_notion_page",
-                "create_linear_issue",
-                "update_linear_issue",
-                "delete_linear_issue",
-                "create_google_drive_file",
-                "delete_google_drive_file",
-                "create_onedrive_file",
-                "delete_onedrive_file",
-                "create_dropbox_file",
-                "delete_dropbox_file",
-                "create_gmail_draft",
-                "update_gmail_draft",
-                "send_gmail_email",
-                "trash_gmail_email",
-                "create_calendar_event",
-                "update_calendar_event",
-                "delete_calendar_event",
-                "create_jira_issue",
-                "update_jira_issue",
-                "delete_jira_issue",
-                "create_confluence_page",
-                "update_confluence_page",
-                "delete_confluence_page",
-            ):
-                yield _emit_tool_output(
-                    tool_call_id,
-                    tool_output
-                    if isinstance(tool_output, dict)
-                    else {"result": tool_output},
-                )
-            elif tool_name in ("execute", "execute_code"):
-                raw_text = (
-                    tool_output.get("result", "")
-                    if isinstance(tool_output, dict)
-                    else str(tool_output)
-                )
-                exit_code: int | None = None
-                output_text = raw_text
-                m = re.match(r"^Exit code:\s*(\d+)", raw_text)
-                if m:
-                    exit_code = int(m.group(1))
-                    om = re.search(r"\nOutput:\n([\s\S]*)", raw_text)
-                    output_text = om.group(1) if om else ""
-                thread_id_str = config.get("configurable", {}).get("thread_id", "")
-
-                for sf_match in re.finditer(
-                    r"^SANDBOX_FILE:\s*(.+)$", output_text, re.MULTILINE
-                ):
-                    fpath = sf_match.group(1).strip()
-                    if fpath and fpath not in result.sandbox_files:
-                        result.sandbox_files.append(fpath)
-
-                yield _emit_tool_output(
-                    tool_call_id,
-                    {
-                        "exit_code": exit_code,
-                        "output": output_text,
-                        "thread_id": thread_id_str,
-                    },
-                )
-            elif tool_name == "web_search":
-                xml = (
-                    tool_output.get("result", str(tool_output))
-                    if isinstance(tool_output, dict)
-                    else str(tool_output)
-                )
-                citations: dict[str, dict[str, str]] = {}
-                for m in re.finditer(
-                    r"<title><!\[CDATA\[(.*?)\]\]></title>\s*<url><!\[CDATA\[(.*?)\]\]></url>",
-                    xml,
-                ):
-                    title, url = m.group(1).strip(), m.group(2).strip()
-                    if url.startswith("http") and url not in citations:
-                        citations[url] = {"title": title}
-                for m in re.finditer(
-                    r"<chunk\s+id='([^']*)'><!\[CDATA\[([\s\S]*?)\]\]></chunk>",
-                    xml,
-                ):
-                    chunk_url, content = m.group(1).strip(), m.group(2).strip()
-                    if (
-                        chunk_url.startswith("http")
-                        and chunk_url in citations
-                        and content
-                    ):
-                        citations[chunk_url]["snippet"] = (
-                            content[:200] + "…" if len(content) > 200 else content
-                        )
-                yield _emit_tool_output(
-                    tool_call_id,
-                    {"status": "completed", "citations": citations},
-                )
-            else:
-                yield _emit_tool_output(
-                    tool_call_id,
-                    {"status": "completed", "result_length": len(str(tool_output))},
-                )
-                yield streaming_service.format_terminal_info(
-                    f"Tool {tool_name} completed", "success"
-                )
-
-        elif event_type == "on_custom_event" and event.get("name") == "report_progress":
-            # Live progress updates from inside the generate_report tool
-            data = event.get("data", {})
-            message = data.get("message", "")
-            if message and last_active_step_id:
-                phase = data.get("phase", "")
-                # Always keep the "Topic: ..." line
-                topic_items = [
-                    item for item in last_active_step_items if item.startswith("Topic:")
-                ]
-
-                if phase in ("revising_section", "adding_section"):
-                    # During section-level ops: keep plan summary + show current op
-                    plan_items = [
-                        item
-                        for item in last_active_step_items
-                        if item.startswith("Topic:")
-                        or item.startswith("Modifying ")
-                        or item.startswith("Adding ")
-                        or item.startswith("Removing ")
-                    ]
-                    # Only keep plan_items that don't end with "..." (not progress lines)
-                    plan_items = [
-                        item for item in plan_items if not item.endswith("...")
-                    ]
-                    last_active_step_items = [*plan_items, message]
-                else:
-                    # Phase transitions: replace everything after topic
-                    last_active_step_items = [*topic_items, message]
-
-                yield _emit_thinking_step(
-                    step_id=last_active_step_id,
-                    title=last_active_step_title,
-                    status="in_progress",
-                    items=last_active_step_items,
-                )
-
-        elif (
-            event_type == "on_custom_event" and event.get("name") == "document_created"
-        ):
-            data = event.get("data", {})
-            if data.get("id"):
-                yield streaming_service.format_data(
-                    "documents-updated",
-                    {
-                        "action": "created",
-                        "document": data,
-                    },
-                )
-
-        elif event_type == "on_custom_event" and event.get("name") == "action_log":
-            # Surface a freshly committed AgentActionLog row so the chat
-            # tool card can render its Revert button immediately.
-            data = event.get("data", {})
-            if data.get("id") is not None:
-                yield streaming_service.format_data("action-log", data)
-
-        elif (
-            event_type == "on_custom_event"
-            and event.get("name") == "action_log_updated"
-        ):
-            # Reversibility flipped in kb_persistence after the SAVEPOINT
-            # for a destructive op (rm/rmdir/move/edit/write) committed.
-            # Frontend uses this to flip the card's Revert
-            # button on without re-fetching the actions list.
-            data = event.get("data", {})
-            if data.get("id") is not None:
-                yield streaming_service.format_data("action-log-updated", data)
-
-        elif event_type in ("on_chain_end", "on_agent_end"):
-            if current_text_id is not None:
-                yield streaming_service.format_text_end(current_text_id)
-                if content_builder is not None:
-                    content_builder.on_text_end(current_text_id)
-                current_text_id = None
-
-    if current_text_id is not None:
-        yield streaming_service.format_text_end(current_text_id)
-        if content_builder is not None:
-            content_builder.on_text_end(current_text_id)
-
-    completion_event = complete_current_step()
-    if completion_event:
-        yield completion_event
+    accumulated_text = result.accumulated_text
 
     state = await agent.aget_state(config)
     state_values = getattr(state, "values", {}) or {}
@@ -2397,7 +912,6 @@ async def _stream_agent_events(
         result.commit_gate_reason = ""
 
     result.accumulated_text = accumulated_text
-    result.agent_called_update_memory = called_update_memory
     _log_file_contract("turn_outcome", result)
 
     interrupt_value = _first_interrupt_value(state)
diff --git a/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py b/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py
index 9142dd914..9a309f9d7 100644
--- a/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py
+++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py
@@ -5,7 +5,6 @@ from __future__ import annotations
 from collections.abc import AsyncIterator
 from typing import Any
 
-from app.agents.new_chat.feature_flags import get_flags
 from app.tasks.chat.streaming.graph_stream.result import StreamingResult
 from app.tasks.chat.streaming.relay.event_relay import EventRelay
 from app.tasks.chat.streaming.relay.state import AgentEventRelayState
@@ -30,7 +29,6 @@ async def stream_output(
         initial_step_id=initial_step_id,
         initial_step_title=initial_step_title,
         initial_step_items=initial_step_items,
-        parity_v2=bool(get_flags().enable_stream_parity_v2),
     )
 
     astream_kwargs: dict[str, Any] = {"config": config, "version": "v2"}
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py b/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py
index 861342b32..ef86dae56 100644
--- a/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py
@@ -33,7 +33,7 @@ def iter_chat_model_stream_frames(
     reasoning_delta = parts["reasoning"]
     text_delta = parts["text"]
 
-    if state.parity_v2 and reasoning_delta:
+    if reasoning_delta:
         if state.current_text_id is not None:
             yield streaming_service.format_text_end(state.current_text_id)
             if content_builder is not None:
@@ -100,7 +100,7 @@ def iter_chat_model_stream_frames(
         if content_builder is not None:
             content_builder.on_text_delta(state.current_text_id, text_delta)
 
-    if state.parity_v2 and parts["tool_call_chunks"]:
+    if parts["tool_call_chunks"]:
         for tcc in parts["tool_call_chunks"]:
             idx = tcc.get("index")
 
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py
index c316cc74a..e7d2d7f78 100644
--- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py
@@ -77,12 +77,11 @@ def iter_tool_start_frames(
     yield emit_thinking_step_frame(**frame_kw)
 
     matched_meta: dict[str, str] | None = None
-    if state.parity_v2:
-        taken_ui_ids = set(state.ui_tool_call_id_by_run.values())
-        for meta in state.index_to_meta.values():
-            if meta["name"] == tool_name and meta["ui_id"] not in taken_ui_ids:
-                matched_meta = meta
-                break
+    taken_ui_ids = set(state.ui_tool_call_id_by_run.values())
+    for meta in state.index_to_meta.values():
+        if meta["name"] == tool_name and meta["ui_id"] not in taken_ui_ids:
+            matched_meta = meta
+            break
 
     tool_call_id: str
     langchain_tool_call_id: str | None = None
@@ -97,13 +96,12 @@ def iter_tool_start_frames(
             if run_id
             else streaming_service.generate_tool_call_id()
         )
-        if state.parity_v2:
-            langchain_tool_call_id = match_buffered_langchain_tool_call_id(
-                state.pending_tool_call_chunks,
-                tool_name,
-                run_id,
-                state.lc_tool_call_id_by_run,
-            )
+        langchain_tool_call_id = match_buffered_langchain_tool_call_id(
+            state.pending_tool_call_chunks,
+            tool_name,
+            run_id,
+            state.lc_tool_call_id_by_run,
+        )
         yield streaming_service.format_tool_input_start(
             tool_call_id,
             tool_name,
diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/state.py b/surfsense_backend/app/tasks/chat/streaming/relay/state.py
index e8e35d0b2..7bd996606 100644
--- a/surfsense_backend/app/tasks/chat/streaming/relay/state.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/state.py
@@ -22,7 +22,6 @@ class AgentEventRelayState:
     active_tool_depth: int = 0
     called_update_memory: bool = False
     current_reasoning_id: str | None = None
-    parity_v2: bool = False
     pending_tool_call_chunks: list[dict[str, Any]] = field(default_factory=list)
     lc_tool_call_id_by_run: dict[str, str] = field(default_factory=dict)
     file_path_by_run: dict[str, str] = field(default_factory=dict)
@@ -39,7 +38,6 @@ class AgentEventRelayState:
         initial_step_id: str | None = None,
         initial_step_title: str = "",
         initial_step_items: list[str] | None = None,
-        parity_v2: bool,
     ) -> AgentEventRelayState:
         counter = 1 if initial_step_id else 0
         return cls(
@@ -47,7 +45,6 @@ class AgentEventRelayState:
             last_active_step_id=initial_step_id,
             last_active_step_title=initial_step_title,
             last_active_step_items=list(initial_step_items or []),
-            parity_v2=parity_v2,
         )
 
     def next_thinking_step_id(self, step_prefix: str) -> str:
diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py b/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py
index 6800be2af..099aea882 100644
--- a/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py
@@ -31,7 +31,6 @@ def _clear_all(monkeypatch: pytest.MonkeyPatch) -> None:
         "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE",
         "SURFSENSE_ENABLE_ACTION_LOG",
         "SURFSENSE_ENABLE_REVERT_ROUTE",
-        "SURFSENSE_ENABLE_STREAM_PARITY_V2",
         "SURFSENSE_ENABLE_PLUGIN_LOADER",
         "SURFSENSE_ENABLE_OTEL",
         "SURFSENSE_ENABLE_AGENT_CACHE",
@@ -61,7 +60,6 @@ def test_defaults_match_shipped_agent_stack(monkeypatch: pytest.MonkeyPatch) ->
     assert flags.enable_kb_planner_runnable is True
     assert flags.enable_action_log is True
     assert flags.enable_revert_route is True
-    assert flags.enable_stream_parity_v2 is True
     assert flags.enable_plugin_loader is False
     assert flags.enable_otel is False
     # Phase 2: agent cache is now default-on (the prerequisite tool
@@ -127,7 +125,6 @@ def test_each_flag_can_be_set_independently(monkeypatch: pytest.MonkeyPatch) ->
         "enable_kb_planner_runnable": "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE",
         "enable_action_log": "SURFSENSE_ENABLE_ACTION_LOG",
         "enable_revert_route": "SURFSENSE_ENABLE_REVERT_ROUTE",
-        "enable_stream_parity_v2": "SURFSENSE_ENABLE_STREAM_PARITY_V2",
         "enable_plugin_loader": "SURFSENSE_ENABLE_PLUGIN_LOADER",
         "enable_otel": "SURFSENSE_ENABLE_OTEL",
     }
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py
index 892bb7a6a..9ae7defec 100644
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py
@@ -137,7 +137,7 @@ def test_complete_active_thinking_step_mirrors_closure_semantics() -> None:
 
 
 def test_agent_event_relay_state_factory_matches_counter_rule() -> None:
-    s0 = AgentEventRelayState.for_invocation(parity_v2=False)
+    s0 = AgentEventRelayState.for_invocation()
     assert s0.thinking_step_counter == 0
     assert s0.last_active_step_id is None
 
@@ -145,11 +145,9 @@ def test_agent_event_relay_state_factory_matches_counter_rule() -> None:
         initial_step_id="thinking-resume-1",
         initial_step_title="Inherited",
         initial_step_items=["Topic: X"],
-        parity_v2=True,
     )
     assert s1.thinking_step_counter == 1
     assert s1.last_active_step_id == "thinking-resume-1"
-    assert s1.parity_v2 is True
     assert s1.next_thinking_step_id("thinking") == "thinking-2"
 
 
diff --git a/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py b/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py
index c317eba20..4b1fadd9c 100644
--- a/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py
+++ b/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py
@@ -161,7 +161,7 @@ class TestToolHeavyTurn:
         _assert_jsonb_safe(snap)
 
     def test_tool_input_available_without_prior_start_creates_card(self):
-        # Legacy / parity_v2-OFF path: tool-input-available may be
+        # Late-registration: tool-input-available may be
         # emitted without a prior tool-input-start (no streamed
         # tool_call_chunks). The card should still be created.
         b = AssistantContentBuilder()
@@ -187,7 +187,7 @@ class TestToolHeavyTurn:
         assert part["result"] == {"matches": 3}
 
     def test_tool_input_start_idempotent_for_same_ui_id(self):
-        # parity_v2: tool-input-start can fire from BOTH the chunk
+        # tool-input-start can fire from BOTH the chunk
         # registration path AND the canonical ``on_tool_start`` path.
         # The second call must not create a duplicate part.
         b = AssistantContentBuilder()
diff --git a/surfsense_backend/tests/unit/tasks/chat/test_tool_input_streaming.py b/surfsense_backend/tests/unit/tasks/chat/test_tool_input_streaming.py
index 60750396c..ada32d168 100644
--- a/surfsense_backend/tests/unit/tasks/chat/test_tool_input_streaming.py
+++ b/surfsense_backend/tests/unit/tasks/chat/test_tool_input_streaming.py
@@ -1,16 +1,13 @@
 """Unit tests for live tool-call argument streaming.
 
-Pins the wire format that ``_stream_agent_events`` emits when
-``SURFSENSE_ENABLE_STREAM_PARITY_V2=true``: ``tool-input-start`` →
-``tool-input-delta``... → ``tool-input-available`` → ``tool-output-available``
-all keyed by the same LangChain ``tool_call.id``.
+Pins the wire format that ``_stream_agent_events`` emits:
+``tool-input-start`` → ``tool-input-delta``... → ``tool-input-available`` →
+``tool-output-available``, keyed consistently with LangChain ``tool_call.id``
+when the model streams indexed chunks.
 
 Identity is tracked in ``index_to_meta`` (per-chunk ``index``) and
-``ui_tool_call_id_by_run`` (LangGraph ``run_id``); both are private to
-``_stream_agent_events`` so we exercise them via the public wire output.
-
-These tests also lock in the legacy / parity_v2-OFF behaviour so the
-synthetic ``call_<run_id>`` shape stays stable for older clients.
+``ui_tool_call_id_by_run`` (LangGraph ``run_id``); both are internal to the
+streaming layer so we assert on the public SSE payloads.
 """
 
 from __future__ import annotations
@@ -22,8 +19,6 @@ from typing import Any
 
 import pytest
 
-import app.tasks.chat.stream_new_chat as stream_module
-from app.agents.new_chat.feature_flags import AgentFeatureFlags
 from app.services.new_streaming_service import VercelStreamingService
 from app.tasks.chat.stream_new_chat import (
     StreamResult,
@@ -164,24 +159,6 @@ def _tool_end(
     }
 
 
-@pytest.fixture
-def parity_v2_on(monkeypatch: pytest.MonkeyPatch) -> None:
-    monkeypatch.setattr(
-        stream_module,
-        "get_flags",
-        lambda: AgentFeatureFlags(enable_stream_parity_v2=True),
-    )
-
-
-@pytest.fixture
-def parity_v2_off(monkeypatch: pytest.MonkeyPatch) -> None:
-    monkeypatch.setattr(
-        stream_module,
-        "get_flags",
-        lambda: AgentFeatureFlags(enable_stream_parity_v2=False),
-    )
-
-
 async def _drain(
     events: list[dict[str, Any]], state: _FakeAgentState | None = None
 ) -> list[dict[str, Any]]:
@@ -253,12 +230,12 @@ class TestLegacyMatch:
 
 
 # ---------------------------------------------------------------------------
-# parity_v2 wire format tests.
+# Tool input streaming wire format
 # ---------------------------------------------------------------------------
 
 
 @pytest.mark.asyncio
-async def test_idless_chunk_merging_by_index(parity_v2_on: None) -> None:
+async def test_idless_chunk_merging_by_index() -> None:
     """First chunk carries id+name; later idless chunks at the same
     ``index`` merge into the SAME ``tool-input-start`` ui id and emit
     one ``tool-input-delta`` per chunk."""
@@ -302,9 +279,7 @@ async def test_idless_chunk_merging_by_index(parity_v2_on: None) -> None:
 
 
 @pytest.mark.asyncio
-async def test_two_interleaved_tool_calls_route_by_index(
-    parity_v2_on: None,
-) -> None:
+async def test_two_interleaved_tool_calls_route_by_index() -> None:
     """Two same-name calls with distinct indices keep their deltas
     routed to the right card."""
     events = [
@@ -344,7 +319,7 @@ async def test_two_interleaved_tool_calls_route_by_index(
 
 
 @pytest.mark.asyncio
-async def test_identity_stable_across_lifecycle(parity_v2_on: None) -> None:
+async def test_identity_stable_across_lifecycle() -> None:
     """Whatever id ``tool-input-start`` chose must be the SAME id used
     on ``tool-input-available`` AND ``tool-output-available``."""
     events = [
@@ -367,7 +342,7 @@ async def test_identity_stable_across_lifecycle(parity_v2_on: None) -> None:
 
 
 @pytest.mark.asyncio
-async def test_no_duplicate_tool_input_start(parity_v2_on: None) -> None:
+async def test_no_duplicate_tool_input_start() -> None:
     """When the chunk-emission loop already fired ``tool-input-start``
     for this run, ``on_tool_start`` MUST NOT emit a second one."""
     events = [
@@ -386,9 +361,7 @@ async def test_no_duplicate_tool_input_start(parity_v2_on: None) -> None:
 
 
 @pytest.mark.asyncio
-async def test_active_text_closes_before_early_tool_input_start(
-    parity_v2_on: None,
-) -> None:
+async def test_active_text_closes_before_early_tool_input_start() -> None:
     """Streaming a text-delta then a tool-call chunk in subsequent
     chunks: the wire MUST contain ``text-end`` before the FIRST
     ``tool-input-start`` (clean part boundary on the frontend)."""
@@ -409,9 +382,7 @@ async def test_active_text_closes_before_early_tool_input_start(
 
 
 @pytest.mark.asyncio
-async def test_mixed_text_and_tool_chunk_preserve_order(
-    parity_v2_on: None,
-) -> None:
+async def test_mixed_text_and_tool_chunk_preserve_order() -> None:
     """One AIMessageChunk that carries BOTH ``text`` content AND
     ``tool_call_chunks`` should emit the text delta FIRST, then close
     text, then ``tool-input-start``+``tool-input-delta``."""
@@ -441,45 +412,7 @@ async def test_mixed_text_and_tool_chunk_preserve_order(
 
 
 @pytest.mark.asyncio
-async def test_parity_v2_off_preserves_legacy_shape(
-    parity_v2_off: None,
-) -> None:
-    """When the flag is OFF, no deltas are emitted and the ``toolCallId``
-    is ``call_<run_id>`` (NOT the lc id)."""
-    events = [
-        _model_stream(
-            tool_call_chunks=[
-                {"id": "lc-1", "name": "ls", "args": '{"path":"/"}', "index": 0}
-            ]
-        ),
-        _tool_start(name="ls", run_id="run-A", input_payload={"path": "/"}),
-        _tool_end(name="ls", run_id="run-A", tool_call_id="lc-1"),
-    ]
-    payloads = await _drain(events)
-
-    assert _of_type(payloads, "tool-input-delta") == []
-    starts = _of_type(payloads, "tool-input-start")
-    assert len(starts) == 1
-    assert starts[0]["toolCallId"].startswith("call_run-A")
-    # No ``langchainToolCallId`` propagation on ``tool-input-start`` in
-    # legacy mode (the start event fires before the ToolMessage is
-    # available, so we can't extract the authoritative LangChain id yet).
-    assert "langchainToolCallId" not in starts[0]
-    output = _of_type(payloads, "tool-output-available")
-    assert output[0]["toolCallId"].startswith("call_run-A")
-    # ``tool-output-available`` MUST carry ``langchainToolCallId`` even
-    # in legacy mode: the chat tool card uses it to backfill the
-    # LangChain id and join against the ``data-action-log`` SSE event
-    # (keyed by ``lc_tool_call_id``) so the inline Revert button can
-    # light up. Sourced from the returned ``ToolMessage.tool_call_id``,
-    # which is populated regardless of feature-flag state.
-    assert output[0]["langchainToolCallId"] == "lc-1"
-
-
-@pytest.mark.asyncio
-async def test_skip_append_prevents_stale_id_reuse(
-    parity_v2_on: None,
-) -> None:
+async def test_skip_append_prevents_stale_id_reuse() -> None:
     """Two same-name tools: the SECOND tool's ``langchainToolCallId``
     must NOT come from the first tool's chunk (``pending_tool_call_chunks``
     must stay empty for indexed-registered chunks)."""
@@ -506,9 +439,7 @@ async def test_skip_append_prevents_stale_id_reuse(
 
 
 @pytest.mark.asyncio
-async def test_registration_waits_for_both_id_and_name(
-    parity_v2_on: None,
-) -> None:
+async def test_registration_waits_for_both_id_and_name() -> None:
     """An id-only chunk (no name yet) must NOT emit ``tool-input-start``."""
     events = [
         _model_stream(
@@ -520,12 +451,9 @@ async def test_registration_waits_for_both_id_and_name(
 
 
 @pytest.mark.asyncio
-async def test_unmatched_fallback_still_attaches_lc_id(
-    parity_v2_on: None,
-) -> None:
-    """parity_v2 ON, but the provider didn't include an ``index``: the
-    legacy fallback path must still emit ``tool-input-start`` with the
-    matching ``langchainToolCallId``."""
+async def test_unmatched_fallback_still_attaches_lc_id() -> None:
+    """When the provider omits chunk ``index``, buffered chunks still get a
+    ``tool-input-start`` with the matching ``langchainToolCallId``."""
     events = [
         # No index on the chunk → not registered into index_to_meta;
         # falls through to ``pending_tool_call_chunks`` so the legacy
@@ -542,9 +470,7 @@ async def test_unmatched_fallback_still_attaches_lc_id(
 
 
 @pytest.mark.asyncio
-async def test_interrupt_request_uses_task_that_contains_interrupt(
-    parity_v2_on: None,
-) -> None:
+async def test_interrupt_request_uses_task_that_contains_interrupt() -> None:
     interrupt_payload = {
         "type": "calendar_event_create",
         "action": {
diff --git a/surfsense_web/components/assistant-ui/reasoning-message-part.tsx b/surfsense_web/components/assistant-ui/reasoning-message-part.tsx
index 70636eab8..6e7aaf048 100644
--- a/surfsense_web/components/assistant-ui/reasoning-message-part.tsx
+++ b/surfsense_web/components/assistant-ui/reasoning-message-part.tsx
@@ -7,8 +7,8 @@ import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { cn } from "@/lib/utils";
 
 /**
- * Renders the structured `reasoning` part emitted by the backend's
- * stream-parity v2 path (A1).
+ * Renders the structured `reasoning` part emitted by the backend stream
+ * (typed reasoning deltas from the chat model).
  *
  * Behaviour mirrors the existing `ThinkingStepsDisplay`:
  *   - collapsed by default;
diff --git a/surfsense_web/components/assistant-ui/tool-fallback.tsx b/surfsense_web/components/assistant-ui/tool-fallback.tsx
index 06082c9c7..ba58f4158 100644
--- a/surfsense_web/components/assistant-ui/tool-fallback.tsx
+++ b/surfsense_web/components/assistant-ui/tool-fallback.tsx
@@ -48,13 +48,11 @@ import { cn } from "@/lib/utils";
  * stream, post-stream reversibility flip, and explicit revert clicks.
  *
  * Match key (in priority order):
- * 1. ``a.tool_call_id === toolCallId`` — direct hit in parity_v2 when
- *    the model streamed ``tool_call_chunks`` so the card's synthetic
- *    id IS the LangChain id.
- * 2. ``a.tool_call_id === langchainToolCallId`` — legacy mode (or
- *    parity_v2 with provider-side chunk emission) where the card's
- *    synthetic id is ``call_<run_id>`` and the LangChain id is
- *    backfilled onto the part by ``tool-output-available``.
+ * 1. ``a.tool_call_id === toolCallId`` — direct hit when the model
+ *    streamed ``tool_call_chunks`` so the card id matches the LangChain id.
+ * 2. ``a.tool_call_id === langchainToolCallId`` — synthetic card id is
+ *    ``call_<run_id>`` and the LangChain id is backfilled by
+ *    ``tool-output-available``.
  * 3. ``(chat_turn_id, tool_name, position-within-turn)`` — fallback
  *    for cards whose synthetic id is ``call_<run_id>`` AND whose
  *    ``langchainToolCallId`` never got backfilled (provider emitted
@@ -116,7 +114,7 @@ function ToolCardRevertButton({
 
 	const action = useMemo(() => {
 		// Tier 1 + 2: O(1) Map-backed direct id match. Covers
-		// ~all parity_v2 streams and any legacy stream that backfilled
+		// Indexed chunk streams and any stream that backfilled
 		// ``langchainToolCallId`` via ``tool-output-available``.
 		const direct = findByToolCallId(toolCallId) ?? findByToolCallId(langchainToolCallId);
 		if (direct) return direct;
diff --git a/surfsense_web/lib/chat/streaming-state.ts b/surfsense_web/lib/chat/streaming-state.ts
index 27047ecfe..809e214d1 100644
--- a/surfsense_web/lib/chat/streaming-state.ts
+++ b/surfsense_web/lib/chat/streaming-state.ts
@@ -421,9 +421,8 @@ export type SSEEvent =
 			/**
 			 * Live tool-call argument delta. Concatenated into
 			 * ``argsText`` on the matching ``tool-call`` content part
-			 * by ``appendToolInputDelta``. parity_v2 only — the legacy
-			 * code path emits ``tool-input-available`` without prior
-			 * deltas.
+			 * by ``appendToolInputDelta``. Some providers emit
+			 * ``tool-input-available`` without prior deltas.
 			 */
 			type: "tool-input-delta";
 			toolCallId: string;

From f0f87107f2a9922bb6e4f8d2cfb48764dc098e5a Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 May 2026 22:46:58 +0200
Subject: [PATCH 26/58] Track active task span id on the agent event relay
 state.

---
 .../app/tasks/chat/streaming/relay/state.py   | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/state.py b/surfsense_backend/app/tasks/chat/streaming/relay/state.py
index 7bd996606..82525a52f 100644
--- a/surfsense_backend/app/tasks/chat/streaming/relay/state.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/state.py
@@ -8,7 +8,13 @@ from typing import Any
 
 @dataclass
 class AgentEventRelayState:
-    """Tracks text, thinking steps, tool depth, and pending tool-call metadata."""
+    """Tracks text, thinking steps, tool depth, and pending tool-call metadata.
+
+    ``active_span_id`` groups steps/tools for one open ``task`` episode.
+    ``active_task_run_id`` is the LangGraph ``run_id`` of that ``task`` so we
+    only clear the span when that run ends (not when child tools end). Handlers
+    will set/clear these via ``task_span`` helpers in a later change.
+    """
 
     accumulated_text: str = ""
     current_text_id: str | None = None
@@ -30,6 +36,17 @@ class AgentEventRelayState:
     current_lc_tool_call_id: dict[str, str | None] = field(
         default_factory=lambda: {"value": None}
     )
+    # Open ``task`` delegation span (one id shared by nested activity); unset outside.
+    active_span_id: str | None = None
+    active_task_run_id: str | None = None
+    # Span id minted when a ``task`` tool_call_chunk registers (before ``on_tool_start``).
+    pending_task_span_by_lc: dict[str, str] = field(default_factory=dict)
+
+    def span_metadata_if_active(self) -> dict[str, Any] | None:
+        """``{"spanId": ...}`` when a span is active; ``None`` otherwise."""
+        if self.active_span_id:
+            return {"spanId": self.active_span_id}
+        return None
 
     @classmethod
     def for_invocation(

From f944cdacb753369af8e117e34cdaca39c3c48c73 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 May 2026 22:47:03 +0200
Subject: [PATCH 27/58] Add helpers to open and close task delegation span ids.

---
 .../tasks/chat/streaming/relay/task_span.py   | 74 +++++++++++++++++++
 .../tasks/chat/streaming/test_task_span.py    | 69 +++++++++++++++++
 2 files changed, 143 insertions(+)
 create mode 100644 surfsense_backend/app/tasks/chat/streaming/relay/task_span.py
 create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_task_span.py

diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/task_span.py b/surfsense_backend/app/tasks/chat/streaming/relay/task_span.py
new file mode 100644
index 000000000..c4cdf24ba
--- /dev/null
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/task_span.py
@@ -0,0 +1,74 @@
+"""Open/close ``active_span_id`` around a delegating ``task`` tool run."""
+
+from __future__ import annotations
+
+import uuid
+
+from app.tasks.chat.streaming.relay.state import AgentEventRelayState
+
+
+def new_span_id() -> str:
+    """One delegation-episode id (shared by activity under an open ``task``)."""
+    return f"spn_{uuid.uuid4().hex}"
+
+
+def _run_key(run_id: str) -> str:
+    return (run_id or "").strip()
+
+
+def _lc_key(langchain_tool_call_id: str | None) -> str:
+    return (langchain_tool_call_id or "").strip()
+
+
+def ensure_pending_task_span_for_lc(state: AgentEventRelayState, lc_id: str) -> str:
+    """Return span id for this LangChain tool call id, storing it in ``pending`` if new.
+
+    Used from ``chat_model_stream`` when the first ``task`` chunk registers so
+    early ``tool-input-start`` can carry ``metadata.spanId`` before ``on_tool_start``.
+    """
+    key = _lc_key(lc_id)
+    if not key:
+        return new_span_id()
+    existing = state.pending_task_span_by_lc.get(key)
+    if existing:
+        return existing
+    sid = new_span_id()
+    state.pending_task_span_by_lc[key] = sid
+    return sid
+
+
+def open_task_span(
+    state: AgentEventRelayState,
+    *,
+    run_id: str,
+    langchain_tool_call_id: str | None = None,
+) -> str:
+    """Set ``active_span_id`` from pending (same lc) or mint; remember ``active_task_run_id``.
+
+    Call when the ``task`` tool **starts**. Nested ``task`` is not supported:
+    a second call replaces the previous span without restoring it.
+    """
+    key = _lc_key(langchain_tool_call_id)
+    sid: str | None = state.pending_task_span_by_lc.pop(key, None) if key else None
+    if not sid:
+        sid = new_span_id()
+    state.active_span_id = sid
+    state.active_task_run_id = _run_key(run_id) or None
+    return sid
+
+
+def clear_task_span_if_delegating_task_ended(
+    state: AgentEventRelayState,
+    *,
+    tool_name: str,
+    run_id: str,
+) -> None:
+    """Clear span state only when this event is the end of the opening ``task`` run."""
+    if tool_name != "task":
+        return
+    if state.active_task_run_id is None:
+        return
+    if state.active_task_run_id != _run_key(run_id):
+        return
+    state.active_span_id = None
+    state.active_task_run_id = None
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_task_span.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_task_span.py
new file mode 100644
index 000000000..349c9879c
--- /dev/null
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_task_span.py
@@ -0,0 +1,69 @@
+"""Unit tests for ``task_span`` open/close helpers."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.tasks.chat.streaming.relay.state import AgentEventRelayState
+from app.tasks.chat.streaming.relay.task_span import (
+    clear_task_span_if_delegating_task_ended,
+    ensure_pending_task_span_for_lc,
+    open_task_span,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def test_open_task_span_sets_span_and_run_id() -> None:
+    state = AgentEventRelayState.for_invocation()
+    sid = open_task_span(state, run_id="run-abc")
+    assert sid.startswith("spn_")
+    assert state.active_span_id == sid
+    assert state.active_task_run_id == "run-abc"
+    assert state.span_metadata_if_active() == {"spanId": sid}
+
+
+def test_clear_ignored_for_non_task_tool() -> None:
+    state = AgentEventRelayState.for_invocation()
+    open_task_span(state, run_id="run-1")
+    sid = state.active_span_id
+    clear_task_span_if_delegating_task_ended(
+        state, tool_name="web_search", run_id="run-1"
+    )
+    assert state.active_span_id == sid
+    assert state.active_task_run_id == "run-1"
+
+
+def test_clear_ignored_when_task_run_id_mismatches() -> None:
+    state = AgentEventRelayState.for_invocation()
+    open_task_span(state, run_id="run-open")
+    clear_task_span_if_delegating_task_ended(state, tool_name="task", run_id="run-other")
+    assert state.active_span_id is not None
+    assert state.active_task_run_id == "run-open"
+
+
+def test_clear_on_matching_task_end() -> None:
+    state = AgentEventRelayState.for_invocation()
+    open_task_span(state, run_id="run-x")
+    clear_task_span_if_delegating_task_ended(state, tool_name="task", run_id="run-x")
+    assert state.active_span_id is None
+    assert state.active_task_run_id is None
+    assert state.span_metadata_if_active() is None
+
+
+def test_clear_noop_when_no_open_span() -> None:
+    state = AgentEventRelayState.for_invocation()
+    clear_task_span_if_delegating_task_ended(state, tool_name="task", run_id="run-x")
+    assert state.active_span_id is None
+
+
+def test_pending_then_open_reuses_same_span_id() -> None:
+    state = AgentEventRelayState.for_invocation()
+    sid_pending = ensure_pending_task_span_for_lc(state, "lc-task-1")
+    assert state.pending_task_span_by_lc["lc-task-1"] == sid_pending
+    sid_active = open_task_span(
+        state, run_id="run-1", langchain_tool_call_id="lc-task-1"
+    )
+    assert sid_active == sid_pending
+    assert state.active_span_id == sid_pending
+    assert "lc-task-1" not in state.pending_task_span_by_lc

From 695f9ded2c6401f23e386019eff346df39a3a6f9 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 May 2026 22:47:08 +0200
Subject: [PATCH 28/58] Mint pending span id when the task tool registers from
 chunks.

---
 .../chat/streaming/handlers/chat_model_stream.py     | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py b/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py
index ef86dae56..c3f6d6d59 100644
--- a/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py
@@ -7,6 +7,7 @@ from typing import Any
 
 from app.tasks.chat.streaming.helpers.chunk_parts import extract_chunk_parts
 from app.tasks.chat.streaming.relay.state import AgentEventRelayState
+from app.tasks.chat.streaming.relay.task_span import ensure_pending_task_span_for_lc
 from app.tasks.chat.streaming.relay.thinking_step_completion import (
     complete_active_thinking_step,
 )
@@ -41,6 +42,7 @@ def iter_chat_model_stream_frames(
             state.current_text_id = None
         if state.current_reasoning_id is None:
             comp, new_active = complete_active_thinking_step(
+                state=state,
                 streaming_service=streaming_service,
                 content_builder=content_builder,
                 last_active_step_id=state.last_active_step_id,
@@ -76,6 +78,7 @@ def iter_chat_model_stream_frames(
             state.current_reasoning_id = None
         if state.current_text_id is None:
             comp, new_active = complete_active_thinking_step(
+                state=state,
                 streaming_service=streaming_service,
                 content_builder=content_builder,
                 last_active_step_id=state.last_active_step_id,
@@ -109,6 +112,10 @@ def iter_chat_model_stream_frames(
                 name = tcc.get("name")
                 if lc_id and name:
                     ui_id = lc_id
+                    tool_input_metadata: dict[str, Any] | None = None
+                    if name == "task":
+                        sid = ensure_pending_task_span_for_lc(state, str(lc_id))
+                        tool_input_metadata = {"spanId": sid}
 
                     if state.current_text_id is not None:
                         yield streaming_service.format_text_end(state.current_text_id)
@@ -132,9 +139,12 @@ def iter_chat_model_stream_frames(
                         ui_id,
                         name,
                         langchain_tool_call_id=lc_id,
+                        metadata=tool_input_metadata,
                     )
                     if content_builder is not None:
-                        content_builder.on_tool_input_start(ui_id, name, lc_id)
+                        content_builder.on_tool_input_start(
+                            ui_id, name, lc_id, metadata=tool_input_metadata
+                        )
 
             meta = state.index_to_meta.get(idx) if idx is not None else None
             if meta:

From 2c1b219c6cb32573e08bc0852dd2236fc3e325cc Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 May 2026 22:47:32 +0200
Subject: [PATCH 29/58] Open task spans at tool start and tag unmatched
 tool-input SSE.

---
 .../chat/streaming/handlers/tool_start.py     | 50 +++++++++++++------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py
index e7d2d7f78..3b3537567 100644
--- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py
@@ -11,6 +11,7 @@ from app.tasks.chat.streaming.helpers.tool_call_matching import (
     match_buffered_langchain_tool_call_id,
 )
 from app.tasks.chat.streaming.relay.state import AgentEventRelayState
+from app.tasks.chat.streaming.relay.task_span import open_task_span
 from app.tasks.chat.streaming.relay.thinking_step_completion import (
     complete_active_thinking_step,
 )
@@ -46,6 +47,7 @@ def iter_tool_start_frames(
 
     if state.last_active_step_title != "Synthesizing response":
         comp, new_active = complete_active_thinking_step(
+            state=state,
             streaming_service=streaming_service,
             content_builder=content_builder,
             last_active_step_id=state.last_active_step_id,
@@ -62,20 +64,6 @@ def iter_tool_start_frames(
     state.tool_step_ids[run_id] = tool_step_id
     state.last_active_step_id = tool_step_id
 
-    thinking = resolve_tool_start_thinking(tool_name, tool_input)
-    state.last_active_step_title = thinking.title
-    state.last_active_step_items = thinking.items
-    frame_kw: dict[str, Any] = {
-        "streaming_service": streaming_service,
-        "content_builder": content_builder,
-        "step_id": tool_step_id,
-        "title": thinking.title,
-        "status": "in_progress",
-    }
-    if thinking.include_items_on_frame:
-        frame_kw["items"] = thinking.items
-    yield emit_thinking_step_frame(**frame_kw)
-
     matched_meta: dict[str, str] | None = None
     taken_ui_ids = set(state.ui_tool_call_id_by_run.values())
     for meta in state.index_to_meta.values():
@@ -102,16 +90,46 @@ def iter_tool_start_frames(
             run_id,
             state.lc_tool_call_id_by_run,
         )
+
+    if tool_name == "task":
+        open_task_span(
+            state,
+            run_id=run_id,
+            langchain_tool_call_id=langchain_tool_call_id,
+        )
+
+    span_md = state.span_metadata_if_active()
+
+    if matched_meta is None:
         yield streaming_service.format_tool_input_start(
             tool_call_id,
             tool_name,
             langchain_tool_call_id=langchain_tool_call_id,
+            metadata=span_md,
         )
         if content_builder is not None:
             content_builder.on_tool_input_start(
-                tool_call_id, tool_name, langchain_tool_call_id
+                tool_call_id,
+                tool_name,
+                langchain_tool_call_id,
+                metadata=span_md,
             )
 
+    thinking = resolve_tool_start_thinking(tool_name, tool_input)
+    state.last_active_step_title = thinking.title
+    state.last_active_step_items = thinking.items
+    frame_kw: dict[str, Any] = {
+        "streaming_service": streaming_service,
+        "content_builder": content_builder,
+        "step_id": tool_step_id,
+        "title": thinking.title,
+        "status": "in_progress",
+        "metadata": span_md,
+    }
+    if thinking.include_items_on_frame:
+        frame_kw["items"] = thinking.items
+    yield emit_thinking_step_frame(**frame_kw)
+
     if run_id:
         state.ui_tool_call_id_by_run[run_id] = tool_call_id
 
@@ -130,6 +148,7 @@ def iter_tool_start_frames(
         tool_name,
         _safe_input,
         langchain_tool_call_id=langchain_tool_call_id,
+        metadata=span_md,
     )
     if content_builder is not None:
         content_builder.on_tool_input_available(
@@ -137,4 +156,5 @@ def iter_tool_start_frames(
             tool_name,
             _safe_input,
             langchain_tool_call_id,
+            metadata=span_md,
         )

From 3ed09bdd90f114bfb370c614bf3e697244288965 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 May 2026 22:47:38 +0200
Subject: [PATCH 30/58] Clear spans after task completion and pass span id on
 tool output.

---
 .../app/tasks/chat/streaming/handlers/tool_end.py          | 7 +++++++
 .../app/tasks/chat/streaming/handlers/tool_output_frame.py | 7 ++++++-
 .../chat/streaming/handlers/tools/emission_context.py      | 2 ++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py
index 0bfef25eb..ec7d6551c 100644
--- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py
@@ -13,6 +13,7 @@ from app.tasks.chat.streaming.handlers.tools import (
 )
 from app.tasks.chat.streaming.helpers.tool_output import tool_output_has_error
 from app.tasks.chat.streaming.relay.state import AgentEventRelayState
+from app.tasks.chat.streaming.relay.task_span import clear_task_span_if_delegating_task_ended
 from app.tasks.chat.streaming.relay.thinking_step_sse import emit_thinking_step_frame
 
 
@@ -91,6 +92,7 @@ def iter_tool_end_frames(
         title=title,
         status="completed",
         items=completed_items,
+        metadata=state.span_metadata_if_active(),
     )
 
     state.just_finished_tool = True
@@ -108,5 +110,10 @@ def iter_tool_end_frames(
         stream_result=result,
         langgraph_config=config,
         staged_workspace_file_path=staged_file_path,
+        tool_metadata=state.span_metadata_if_active(),
     )
     yield from iter_tool_completion_emission_frames(emission_ctx)
+
+    clear_task_span_if_delegating_task_ended(
+        state, tool_name=tool_name, run_id=run_id
+    )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py
index 07244364c..4cd8e3274 100644
--- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py
@@ -12,13 +12,18 @@ def emit_tool_output_available_frame(
     langchain_id_holder: dict[str, str | None],
     call_id: str,
     output: Any,
+    tool_metadata: dict[str, Any] | None = None,
 ) -> str:
     if content_builder is not None:
         content_builder.on_tool_output_available(
-            call_id, output, langchain_id_holder["value"]
+            call_id,
+            output,
+            langchain_id_holder["value"],
+            metadata=tool_metadata,
         )
     return streaming_service.format_tool_output_available(
         call_id,
         output,
         langchain_tool_call_id=langchain_id_holder["value"],
+        metadata=tool_metadata,
     )
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py
index d9ff796c0..baa1d7336 100644
--- a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py
@@ -23,6 +23,7 @@ class ToolCompletionEmissionContext:
     stream_result: Any
     langgraph_config: dict[str, Any]
     staged_workspace_file_path: str | None
+    tool_metadata: dict[str, Any] | None = None
 
     def emit_tool_output_card(self, payload: Any) -> str:
         return emit_tool_output_available_frame(
@@ -31,4 +32,5 @@ class ToolCompletionEmissionContext:
             langchain_id_holder=self.langchain_tool_call_id_holder,
             call_id=self.tool_call_id,
             output=payload,
+            tool_metadata=self.tool_metadata,
         )

From 1dcb08e925f3d08bcd7936e6ce52b18efd9f9fd8 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 May 2026 22:47:46 +0200
Subject: [PATCH 31/58] Attach active span metadata to thinking-step SSE and
 completion.

---
 .../app/tasks/chat/streaming/relay/event_relay.py           | 1 +
 .../tasks/chat/streaming/relay/thinking_step_completion.py  | 3 +++
 .../app/tasks/chat/streaming/relay/thinking_step_sse.py     | 6 +++++-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py
index 872998926..03d6a66e6 100644
--- a/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py
@@ -115,6 +115,7 @@ class EventRelay:
             state.current_text_id = None
 
         completion_event, new_active = complete_active_thinking_step(
+            state=state,
             streaming_service=self.streaming_service,
             content_builder=content_builder,
             last_active_step_id=state.last_active_step_id,
diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py
index a0be71281..ad0930341 100644
--- a/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py
@@ -4,11 +4,13 @@ from __future__ import annotations
 
 from typing import Any
 
+from .state import AgentEventRelayState
 from .thinking_step_sse import emit_thinking_step_frame
 
 
 def complete_active_thinking_step(
     *,
+    state: AgentEventRelayState,
     streaming_service: Any,
     content_builder: Any | None,
     last_active_step_id: str | None,
@@ -26,6 +28,7 @@ def complete_active_thinking_step(
             title=last_active_step_title,
             status="completed",
             items=last_active_step_items if last_active_step_items else None,
+            metadata=state.span_metadata_if_active(),
         )
         return event, None
     return None, last_active_step_id
diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py
index 9e8c08dd5..6737f536b 100644
--- a/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py
@@ -13,12 +13,16 @@ def emit_thinking_step_frame(
     title: str,
     status: str = "in_progress",
     items: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
 ) -> str:
     if content_builder is not None:
-        content_builder.on_thinking_step(step_id, title, status, items)
+        content_builder.on_thinking_step(
+            step_id, title, status, items, metadata=metadata
+        )
     return streaming_service.format_thinking_step(
         step_id=step_id,
         title=title,
         status=status,
         items=items,
+        metadata=metadata,
     )

From f1d80ffe5d9ca9005b4aa4769cd34a418a6fc345 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 May 2026 22:47:50 +0200
Subject: [PATCH 32/58] Forward span metadata from report_progress thinking
 updates.

---
 .../app/tasks/chat/streaming/handlers/custom_event_dispatch.py  | 1 +
 .../app/tasks/chat/streaming/handlers/custom_events.py          | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py
index b373919cf..69f4b8a24 100644
--- a/surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py
@@ -33,6 +33,7 @@ def iter_custom_event_frames(
             last_active_step_items=state.last_active_step_items,
             streaming_service=streaming_service,
             content_builder=content_builder,
+            thinking_metadata=state.span_metadata_if_active(),
         )
         if frame:
             yield frame
diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py
index 765f1d790..e48e2c493 100644
--- a/surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py
@@ -15,6 +15,7 @@ def handle_report_progress(
     last_active_step_items: list[str],
     streaming_service: Any,
     content_builder: Any | None,
+    thinking_metadata: dict[str, Any] | None = None,
 ) -> tuple[str | None, list[str]]:
     """Update report step items; may emit one thinking SSE frame.
 
@@ -50,6 +51,7 @@ def handle_report_progress(
         title=last_active_step_title,
         status="in_progress",
         items=new_items,
+        metadata=thinking_metadata,
     )
     return frame, new_items
 

From e802de233330c8aeb913e02ab922fa4a65dc7a00 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 May 2026 22:47:58 +0200
Subject: [PATCH 33/58] Include optional metadata on tool and thinking-step SSE
 payloads.

---
 .../app/services/new_streaming_service.py     | 29 +++++++++++++------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/surfsense_backend/app/services/new_streaming_service.py b/surfsense_backend/app/services/new_streaming_service.py
index cec0c8a5e..ba0cb8753 100644
--- a/surfsense_backend/app/services/new_streaming_service.py
+++ b/surfsense_backend/app/services/new_streaming_service.py
@@ -456,6 +456,8 @@ class VercelStreamingService:
         title: str,
         status: str = "in_progress",
         items: list[str] | None = None,
+        *,
+        metadata: dict[str, Any] | None = None,
     ) -> str:
         """
         Format a thinking step for chain-of-thought display (SurfSense specific).
@@ -469,15 +471,15 @@ class VercelStreamingService:
         Returns:
             str: SSE formatted thinking step data part
         """
-        return self.format_data(
-            "thinking-step",
-            {
-                "id": step_id,
-                "title": title,
-                "status": status,
-                "items": items or [],
-            },
-        )
+        payload: dict[str, Any] = {
+            "id": step_id,
+            "title": title,
+            "status": status,
+            "items": items or [],
+        }
+        if metadata:
+            payload["metadata"] = metadata
+        return self.format_data("thinking-step", payload)
 
     def format_thread_title_update(self, thread_id: int, title: str) -> str:
         """
@@ -601,6 +603,7 @@ class VercelStreamingService:
         tool_name: str,
         *,
         langchain_tool_call_id: str | None = None,
+        metadata: dict[str, Any] | None = None,
     ) -> str:
         """
         Format the start of tool input streaming.
@@ -635,6 +638,8 @@ class VercelStreamingService:
         }
         if langchain_tool_call_id:
             payload["langchainToolCallId"] = langchain_tool_call_id
+        if metadata:
+            payload["metadata"] = metadata
         return self._format_sse(payload)
 
     def format_tool_input_delta(self, tool_call_id: str, input_text_delta: str) -> str:
@@ -666,6 +671,7 @@ class VercelStreamingService:
         input_data: dict[str, Any],
         *,
         langchain_tool_call_id: str | None = None,
+        metadata: dict[str, Any] | None = None,
     ) -> str:
         """
         Format the completion of tool input.
@@ -691,6 +697,8 @@ class VercelStreamingService:
         }
         if langchain_tool_call_id:
             payload["langchainToolCallId"] = langchain_tool_call_id
+        if metadata:
+            payload["metadata"] = metadata
         return self._format_sse(payload)
 
     def format_tool_output_available(
@@ -699,6 +707,7 @@ class VercelStreamingService:
         output: Any,
         *,
         langchain_tool_call_id: str | None = None,
+        metadata: dict[str, Any] | None = None,
     ) -> str:
         """
         Format tool execution output.
@@ -725,6 +734,8 @@ class VercelStreamingService:
         }
         if langchain_tool_call_id:
             payload["langchainToolCallId"] = langchain_tool_call_id
+        if metadata:
+            payload["metadata"] = metadata
         return self._format_sse(payload)
 
     # =========================================================================

From 3dbcac4b9d656af92fd7fe376386124411fd53e5 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 May 2026 22:48:07 +0200
Subject: [PATCH 34/58] Merge span metadata into persisted tool-call and
 thinking parts.

---
 .../app/tasks/chat/content_builder.py         | 38 ++++++++++++++++---
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/surfsense_backend/app/tasks/chat/content_builder.py b/surfsense_backend/app/tasks/chat/content_builder.py
index 32b49e6b5..2b6dbe649 100644
--- a/surfsense_backend/app/tasks/chat/content_builder.py
+++ b/surfsense_backend/app/tasks/chat/content_builder.py
@@ -51,6 +51,15 @@ logger = logging.getLogger(__name__)
 _MEANINGFUL_PART_TYPES: frozenset[str] = frozenset({"text", "reasoning", "tool-call"})
 
 
+def _merge_tool_part_metadata(part: dict[str, Any], metadata: dict[str, Any] | None) -> None:
+    if not metadata:
+        return
+    md = part.setdefault("metadata", {})
+    for k, v in metadata.items():
+        if k not in md:
+            md[k] = v
+
+
 class AssistantContentBuilder:
     """Server-side projection of ``surfsense_web/lib/chat/streaming-state.ts``.
 
@@ -177,6 +186,8 @@ class AssistantContentBuilder:
         ui_id: str,
         tool_name: str,
         langchain_tool_call_id: str | None,
+        *,
+        metadata: dict[str, Any] | None = None,
     ) -> None:
         """Register a tool-call card. Args are filled in by later events."""
         if not ui_id:
@@ -187,11 +198,11 @@ class AssistantContentBuilder:
         # (the canonical path). The FE de-dupes via ``toolCallIndices``;
         # we mirror that here.
         if ui_id in self._tool_call_idx_by_ui_id:
-            if langchain_tool_call_id:
-                idx = self._tool_call_idx_by_ui_id[ui_id]
-                part = self.parts[idx]
-                if not part.get("langchainToolCallId"):
-                    part["langchainToolCallId"] = langchain_tool_call_id
+            idx = self._tool_call_idx_by_ui_id[ui_id]
+            part = self.parts[idx]
+            if langchain_tool_call_id and not part.get("langchainToolCallId"):
+                part["langchainToolCallId"] = langchain_tool_call_id
+            _merge_tool_part_metadata(part, metadata)
             return
 
         part: dict[str, Any] = {
@@ -202,6 +213,8 @@ class AssistantContentBuilder:
         }
         if langchain_tool_call_id:
             part["langchainToolCallId"] = langchain_tool_call_id
+        if metadata:
+            part["metadata"] = dict(metadata)
         self.parts.append(part)
         self._tool_call_idx_by_ui_id[ui_id] = len(self.parts) - 1
 
@@ -235,6 +248,8 @@ class AssistantContentBuilder:
         tool_name: str,
         args: dict[str, Any],
         langchain_tool_call_id: str | None,
+        *,
+        metadata: dict[str, Any] | None = None,
     ) -> None:
         """Finalize the tool-call card's input.
 
@@ -264,6 +279,7 @@ class AssistantContentBuilder:
                 part["argsText"] = final_args_text
                 if langchain_tool_call_id and not part.get("langchainToolCallId"):
                     part["langchainToolCallId"] = langchain_tool_call_id
+                _merge_tool_part_metadata(part, metadata)
                 return
 
         # No prior tool-input-start: register the card now.
@@ -276,6 +292,7 @@ class AssistantContentBuilder:
         }
         if langchain_tool_call_id:
             new_part["langchainToolCallId"] = langchain_tool_call_id
+        _merge_tool_part_metadata(new_part, metadata)
         self.parts.append(new_part)
         self._tool_call_idx_by_ui_id[ui_id] = len(self.parts) - 1
 
@@ -287,6 +304,8 @@ class AssistantContentBuilder:
         ui_id: str,
         output: Any,
         langchain_tool_call_id: str | None,
+        *,
+        metadata: dict[str, Any] | None = None,
     ) -> None:
         """Attach the tool's output (``result``) to the matching card.
 
@@ -305,6 +324,7 @@ class AssistantContentBuilder:
         part["result"] = output
         if langchain_tool_call_id and not part.get("langchainToolCallId"):
             part["langchainToolCallId"] = langchain_tool_call_id
+        _merge_tool_part_metadata(part, metadata)
 
     # ------------------------------------------------------------------
     # Thinking steps & step separators
@@ -316,6 +336,8 @@ class AssistantContentBuilder:
         title: str,
         status: str,
         items: list[str] | None,
+        *,
+        metadata: dict[str, Any] | None = None,
     ) -> None:
         """Update / insert the singleton ``data-thinking-steps`` part.
 
@@ -328,12 +350,14 @@ class AssistantContentBuilder:
         if not step_id:
             return
 
-        new_step = {
+        new_step: dict[str, Any] = {
             "id": step_id,
             "title": title or "",
             "status": status or "in_progress",
             "items": list(items) if items else [],
         }
+        if metadata:
+            new_step["metadata"] = dict(metadata)
 
         # Find existing data-thinking-steps part.
         existing_idx = -1
@@ -347,6 +371,8 @@ class AssistantContentBuilder:
             replaced = False
             for i, step in enumerate(current_steps):
                 if step.get("id") == step_id:
+                    if not metadata and step.get("metadata"):
+                        new_step["metadata"] = dict(step["metadata"])
                     current_steps[i] = new_step
                     replaced = True
                     break

From d136fcd054d7bbd4d51ecd9c4e5368037cc43e0e Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 May 2026 23:16:44 +0200
Subject: [PATCH 35/58] Add tool_activity_metadata to merge spanId and
 thinkingStepId for tools.

---
 .../app/tasks/chat/streaming/relay/state.py   | 37 +++++++++++++++++--
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/state.py b/surfsense_backend/app/tasks/chat/streaming/relay/state.py
index 82525a52f..27898403d 100644
--- a/surfsense_backend/app/tasks/chat/streaming/relay/state.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/state.py
@@ -10,10 +10,16 @@ from typing import Any
 class AgentEventRelayState:
     """Tracks text, thinking steps, tool depth, and pending tool-call metadata.
 
-    ``active_span_id`` groups steps/tools for one open ``task`` episode.
-    ``active_task_run_id`` is the LangGraph ``run_id`` of that ``task`` so we
-    only clear the span when that run ends (not when child tools end). Handlers
-    will set/clear these via ``task_span`` helpers in a later change.
+    **Task span (`spanId`)** — ``active_span_id`` groups steps and tools for one
+    open delegating ``task`` episode. ``active_task_run_id`` is the LangGraph
+    ``run_id`` of that ``task`` so the span clears only when that run ends, not
+    when child tools end. Open/close uses ``relay.task_span`` helpers.
+
+    **Tool ↔ thinking link (`thinkingStepId`)** — Each tool run gets a thinking-row
+    id (``tool_step_ids[run_id]``, emitted as ``data-thinking-step`` ``data.id``).
+    ``tool_activity_metadata`` supplies ``metadata`` for ``tool-input-start`` /
+    ``tool-input-available`` (``handlers.tool_start``) and
+    ``tool-output-available`` (``handlers.tool_end``).
     """
 
     accumulated_text: str = ""
@@ -48,6 +54,29 @@ class AgentEventRelayState:
             return {"spanId": self.active_span_id}
         return None
 
+    def tool_activity_metadata(
+        self, *, thinking_step_id: str | None
+    ) -> dict[str, Any] | None:
+        """Build ``metadata`` for tool SSE and ``tool-call`` persistence.
+
+        Contract (keys omitted when not applicable):
+
+        - ``spanId`` (str): present while a task-delegation span is active
+          (same value as ``span_metadata_if_active()``).
+        - ``thinkingStepId`` (str): equals the thinking-step row ``id`` for this
+          tool (``data-thinking-step`` payload ``data.id`` on the wire).
+
+        Returns ``None`` if neither applies. Whitespace-only
+        ``thinking_step_id`` is ignored.
+        """
+        out: dict[str, Any] = {}
+        if self.active_span_id:
+            out["spanId"] = self.active_span_id
+        tid = (thinking_step_id or "").strip()
+        if tid:
+            out["thinkingStepId"] = tid
+        return out if out else None
+
     @classmethod
     def for_invocation(
         cls,

From 007a0a30ec7c2850316f5503729a5f8c67312074 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 May 2026 23:16:56 +0200
Subject: [PATCH 36/58] Cover tool_activity_metadata for span-only, step-only,
 and combined cases.

---
 .../streaming/test_tool_activity_metadata.py  | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_tool_activity_metadata.py

diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_tool_activity_metadata.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_tool_activity_metadata.py
new file mode 100644
index 000000000..c2e68dacd
--- /dev/null
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_tool_activity_metadata.py
@@ -0,0 +1,42 @@
+"""Unit tests for ``AgentEventRelayState.tool_activity_metadata``."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.tasks.chat.streaming.relay.state import AgentEventRelayState
+from app.tasks.chat.streaming.relay.task_span import open_task_span
+
+pytestmark = pytest.mark.unit
+
+
+def test_returns_none_when_no_span_and_no_thinking_step() -> None:
+    state = AgentEventRelayState.for_invocation()
+    assert state.tool_activity_metadata(thinking_step_id=None) is None
+    assert state.tool_activity_metadata(thinking_step_id="") is None
+    assert state.tool_activity_metadata(thinking_step_id="   ") is None
+
+
+def test_thinking_step_id_only() -> None:
+    state = AgentEventRelayState.for_invocation()
+    assert state.tool_activity_metadata(thinking_step_id="thinking-3") == {
+        "thinkingStepId": "thinking-3",
+    }
+
+
+def test_span_only_when_active() -> None:
+    state = AgentEventRelayState.for_invocation()
+    open_task_span(state, run_id="run-x")
+    assert state.tool_activity_metadata(thinking_step_id=None) == {
+        "spanId": state.active_span_id,
+    }
+
+
+def test_merges_span_and_thinking_step_when_both_set() -> None:
+    state = AgentEventRelayState.for_invocation()
+    open_task_span(state, run_id="run-x")
+    md = state.tool_activity_metadata(thinking_step_id="thinking-7")
+    assert md == {
+        "spanId": state.active_span_id,
+        "thinkingStepId": "thinking-7",
+    }

From a309e830d34dedfb07e3a263bc7e34ec30fa5335 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 May 2026 23:17:01 +0200
Subject: [PATCH 37/58] Document thinkingStepId on tool-call parts and
 first-key metadata merge.

---
 surfsense_backend/app/tasks/chat/content_builder.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/surfsense_backend/app/tasks/chat/content_builder.py b/surfsense_backend/app/tasks/chat/content_builder.py
index 2b6dbe649..f0804159a 100644
--- a/surfsense_backend/app/tasks/chat/content_builder.py
+++ b/surfsense_backend/app/tasks/chat/content_builder.py
@@ -52,6 +52,12 @@ _MEANINGFUL_PART_TYPES: frozenset[str] = frozenset({"text", "reasoning", "tool-c
 
 
 def _merge_tool_part_metadata(part: dict[str, Any], metadata: dict[str, Any] | None) -> None:
+    """Shallow-merge ``metadata`` into ``part["metadata"]``; first key wins.
+
+    Used for tool-call linkage (``spanId``, ``thinkingStepId``, …): a later
+    event must not overwrite an existing key so chunk order vs ``on_tool_start``
+    stays stable.
+    """
     if not metadata:
         return
     md = part.setdefault("metadata", {})
@@ -70,6 +76,7 @@ class AssistantContentBuilder:
         | { type: "reasoning"; text: string }
         | { type: "tool-call"; toolCallId: str; toolName: str;
             args: dict; result?: any; argsText?: str; langchainToolCallId?: str;
+            metadata?: { spanId?: str; thinkingStepId?: str; ... };
             state?: "aborted" }
         | { type: "data-thinking-steps"; data: { steps: ThinkingStepData[] } }
         | { type: "data-step-separator"; data: { stepIndex: int } }
@@ -189,7 +196,11 @@ class AssistantContentBuilder:
         *,
         metadata: dict[str, Any] | None = None,
     ) -> None:
-        """Register a tool-call card. Args are filled in by later events."""
+        """Register a tool-call card. Args are filled in by later events.
+
+        Optional ``metadata`` (``spanId``, ``thinkingStepId``, …) is stored on the
+        part; duplicate ``tool-input-start`` calls merge with first-key-wins.
+        """
         if not ui_id:
             return
         # Skip duplicate registration: the stream may emit

From 32092c0b65914a046a0f6a02e76bfa6a561af71b Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 May 2026 23:17:05 +0200
Subject: [PATCH 38/58] Pass thinkingStepId through tool-input start and
 available metadata.

---
 .../app/tasks/chat/streaming/handlers/tool_start.py      | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py
index 3b3537567..e0cac307c 100644
--- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py
@@ -99,20 +99,21 @@ def iter_tool_start_frames(
         )
 
     span_md = state.span_metadata_if_active()
+    tool_md = state.tool_activity_metadata(thinking_step_id=tool_step_id)
 
     if matched_meta is None:
         yield streaming_service.format_tool_input_start(
             tool_call_id,
             tool_name,
             langchain_tool_call_id=langchain_tool_call_id,
-            metadata=span_md,
+            metadata=tool_md,
         )
         if content_builder is not None:
             content_builder.on_tool_input_start(
                 tool_call_id,
                 tool_name,
                 langchain_tool_call_id,
-                metadata=span_md,
+                metadata=tool_md,
             )
 
     thinking = resolve_tool_start_thinking(tool_name, tool_input)
@@ -148,7 +149,7 @@ def iter_tool_start_frames(
         tool_name,
         _safe_input,
         langchain_tool_call_id=langchain_tool_call_id,
-        metadata=span_md,
+        metadata=tool_md,
     )
     if content_builder is not None:
         content_builder.on_tool_input_available(
@@ -156,5 +157,5 @@ def iter_tool_start_frames(
             tool_name,
             _safe_input,
             langchain_tool_call_id,
-            metadata=span_md,
+            metadata=tool_md,
         )

From 1761b60c16397310145cb8739b93cc105c5576e5 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 May 2026 23:17:12 +0200
Subject: [PATCH 39/58] Carry thinkingStepId on tool output and extend builder
 and parity tests.

---
 .../tasks/chat/streaming/handlers/tool_end.py |   4 +-
 .../chat/streaming/test_stage_2_parity.py     |   3 +
 .../unit/tasks/chat/test_content_builder.py   | 150 ++++++++++++++++++
 3 files changed, 156 insertions(+), 1 deletion(-)

diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py
index ec7d6551c..421c67a6d 100644
--- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py
@@ -110,7 +110,9 @@ def iter_tool_end_frames(
         stream_result=result,
         langgraph_config=config,
         staged_workspace_file_path=staged_file_path,
-        tool_metadata=state.span_metadata_if_active(),
+        tool_metadata=state.tool_activity_metadata(
+            thinking_step_id=original_step_id,
+        ),
     )
     yield from iter_tool_completion_emission_frames(emission_ctx)
 
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py
index 9ae7defec..3ee1ab622 100644
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py
@@ -111,8 +111,10 @@ def test_complete_active_thinking_step_mirrors_closure_semantics() -> None:
     svc = MagicMock()
     svc.format_thinking_step.return_value = "done-frame"
     completed: set[str] = set()
+    relay_state = AgentEventRelayState.for_invocation()
 
     frame, new_id = complete_active_thinking_step(
+        state=relay_state,
         streaming_service=svc,
         content_builder=None,
         last_active_step_id="thinking-1",
@@ -125,6 +127,7 @@ def test_complete_active_thinking_step_mirrors_closure_semantics() -> None:
     assert "thinking-1" in completed
 
     frame2, id2 = complete_active_thinking_step(
+        state=relay_state,
         streaming_service=svc,
         content_builder=None,
         last_active_step_id="thinking-1",
diff --git a/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py b/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py
index 4b1fadd9c..9d3eb6fa4 100644
--- a/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py
+++ b/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py
@@ -15,6 +15,7 @@ import json
 
 import pytest
 
+from app.services.new_streaming_service import VercelStreamingService
 from app.tasks.chat.content_builder import AssistantContentBuilder
 
 pytestmark = pytest.mark.unit
@@ -231,6 +232,155 @@ class TestToolHeavyTurn:
         )
 
 
+# ---------------------------------------------------------------------------
+# Task-span metadata on tool-call parts (JSONB persistence)
+# ---------------------------------------------------------------------------
+
+
+class TestToolCallSpanMetadata:
+    def test_input_available_merges_new_metadata_keys_after_start(self):
+        b = AssistantContentBuilder()
+        b.on_tool_input_start(
+            "call_t", "task", "lc_t", metadata={"spanId": "spn_1"}
+        )
+        b.on_tool_input_available(
+            "call_t",
+            "task",
+            {"goal": "x"},
+            "lc_t",
+            metadata={"traceId": "tr_1"},
+        )
+        part = b.snapshot()[0]
+        assert part["metadata"]["spanId"] == "spn_1"
+        assert part["metadata"]["traceId"] == "tr_1"
+        _assert_jsonb_safe(b.snapshot())
+
+    def test_input_available_does_not_overwrite_existing_metadata_keys(self):
+        b = AssistantContentBuilder()
+        b.on_tool_input_start(
+            "call_t", "task", "lc_t", metadata={"spanId": "spn_keep"}
+        )
+        b.on_tool_input_available(
+            "call_t", "task", {}, "lc_t", metadata={"spanId": "spn_other"}
+        )
+        assert b.snapshot()[0]["metadata"]["spanId"] == "spn_keep"
+
+    def test_late_tool_input_available_carries_metadata(self):
+        b = AssistantContentBuilder()
+        b.on_tool_input_available(
+            "call_l",
+            "grep",
+            {"pattern": "TODO"},
+            None,
+            metadata={"spanId": "spn_l"},
+        )
+        part = b.snapshot()[0]
+        assert part["metadata"] == {"spanId": "spn_l"}
+        _assert_jsonb_safe(b.snapshot())
+
+    def test_output_available_merges_without_clobbering_span_id(self):
+        b = AssistantContentBuilder()
+        b.on_tool_input_start("call_t", "ls", "lc", metadata={"spanId": "spn_x"})
+        b.on_tool_input_available("call_t", "ls", {"path": "/"}, "lc")
+        b.on_tool_output_available(
+            "call_t",
+            {"ok": True},
+            "lc",
+            metadata={"spanId": "spn_y", "extra": 1},
+        )
+        md = b.snapshot()[0]["metadata"]
+        assert md["spanId"] == "spn_x"
+        assert md["extra"] == 1
+
+    def test_output_available_adds_thinking_step_id_without_clobbering_span(self):
+        b = AssistantContentBuilder()
+        b.on_tool_input_start(
+            "call_t",
+            "ls",
+            "lc",
+            metadata={"spanId": "spn_x", "thinkingStepId": "thinking-3"},
+        )
+        b.on_tool_input_available("call_t", "ls", {"path": "/"}, "lc")
+        b.on_tool_output_available(
+            "call_t",
+            {"ok": True},
+            "lc",
+            metadata={"spanId": "spn_x", "thinkingStepId": "thinking-3"},
+        )
+        md = b.snapshot()[0]["metadata"]
+        assert md["spanId"] == "spn_x"
+        assert md["thinkingStepId"] == "thinking-3"
+
+    def test_output_available_with_none_metadata_preserves_prior(self):
+        b = AssistantContentBuilder()
+        b.on_tool_input_start("c", "ls", "lc", metadata={"spanId": "spn_1"})
+        b.on_tool_input_available("c", "ls", {}, "lc")
+        b.on_tool_output_available("c", {"r": 1}, "lc", metadata=None)
+        assert b.snapshot()[0]["metadata"] == {"spanId": "spn_1"}
+
+    def test_available_adds_thinking_step_id_after_chunk_only_start(self):
+        """Mirrors chunk ``tool-input-start`` then ``on_tool_start`` ``available``."""
+        b = AssistantContentBuilder()
+        b.on_tool_input_start("lc_1", "ls", "lc_1", metadata={"spanId": "spn_a"})
+        b.on_tool_input_available(
+            "lc_1",
+            "ls",
+            {"path": "/"},
+            "lc_1",
+            metadata={"spanId": "spn_a", "thinkingStepId": "thinking-2"},
+        )
+        md = b.snapshot()[0]["metadata"]
+        assert md["spanId"] == "spn_a"
+        assert md["thinkingStepId"] == "thinking-2"
+
+
+class TestVercelStreamingServiceToolMetadataWire:
+    """SSE payloads include optional ``metadata`` for FE grouping."""
+
+    @staticmethod
+    def _parse_sse_data_line(raw: str) -> dict:
+        assert raw.startswith("data: ")
+        payload = raw.split("data: ", 1)[1].split("\n\n", 1)[0].strip()
+        return json.loads(payload)
+
+    def test_tool_input_available_includes_metadata_when_set(self):
+        svc = VercelStreamingService()
+        raw = svc.format_tool_input_available(
+            "id1",
+            "task",
+            {"a": 1},
+            langchain_tool_call_id="lc1",
+            metadata={"spanId": "spn_w", "thinkingStepId": "thinking-4"},
+        )
+        body = self._parse_sse_data_line(raw)
+        assert body["type"] == "tool-input-available"
+        assert body["metadata"] == {
+            "spanId": "spn_w",
+            "thinkingStepId": "thinking-4",
+        }
+
+    def test_tool_output_available_includes_metadata_when_set(self):
+        svc = VercelStreamingService()
+        raw = svc.format_tool_output_available(
+            "id1",
+            {"status": "completed"},
+            langchain_tool_call_id="lc1",
+            metadata={"spanId": "spn_o", "thinkingStepId": "thinking-9"},
+        )
+        body = self._parse_sse_data_line(raw)
+        assert body["type"] == "tool-output-available"
+        assert body["metadata"] == {
+            "spanId": "spn_o",
+            "thinkingStepId": "thinking-9",
+        }
+
+    def test_tool_input_available_omits_metadata_key_when_none(self):
+        svc = VercelStreamingService()
+        raw = svc.format_tool_input_available("id1", "ls", {})
+        body = self._parse_sse_data_line(raw)
+        assert "metadata" not in body
+
+
 # ---------------------------------------------------------------------------
 # Thinking steps & separators
 # ---------------------------------------------------------------------------

From 47e64d1861a85636c8e8bde2268d6f34bf6bee2d Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 00:39:43 +0200
Subject: [PATCH 40/58] Persist optional relay metadata on tool-call and
 thinking-step state.

---
 surfsense_web/lib/chat/streaming-state.ts | 43 ++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/surfsense_web/lib/chat/streaming-state.ts b/surfsense_web/lib/chat/streaming-state.ts
index 809e214d1..ee3160a61 100644
--- a/surfsense_web/lib/chat/streaming-state.ts
+++ b/surfsense_web/lib/chat/streaming-state.ts
@@ -5,6 +5,11 @@ export interface ThinkingStepData {
 	title: string;
 	status: "pending" | "in_progress" | "completed";
 	items: string[];
+	/**
+	 * Optional relay fields from ``data-thinking-step`` when present on the wire
+	 * (e.g. ``spanId``). Populated in a later slice; equality helpers ignore until wired.
+	 */
+	metadata?: Record<string, unknown>;
 }
 
 export type ContentPart =
@@ -42,6 +47,11 @@ export type ContentPart =
 			 * ``data-action-log`` events.
 			 */
 			langchainToolCallId?: string;
+			/**
+			 * Relay correlation from tool SSE (e.g. ``spanId``, ``thinkingStepId``).
+			 * Merged by ``mergeToolPartMetadata`` when events carry ``metadata``.
+			 */
+			metadata?: Record<string, unknown>;
 	  }
 	| {
 			type: "data-thinking-steps";
@@ -252,6 +262,23 @@ function _toolPasses(gate: ToolUIGate, toolName: string): boolean {
 	return gate === "all" || gate.has(toolName);
 }
 
+/**
+ * Shallow-merge relay ``metadata`` into a tool-call part (SSE → content part).
+ * Keys already set on ``into`` are left unchanged so chunk vs canonical tool
+ * events cannot reorder or overwrite ``spanId`` / ``thinkingStepId``.
+ * Matches server ``AssistantContentBuilder`` merge semantics.
+ */
+function mergeToolPartMetadata(
+	into: Record<string, unknown>,
+	incoming: Record<string, unknown> | undefined
+): void {
+	if (!incoming) return;
+	for (const [k, v] of Object.entries(incoming)) {
+		if (k === "__proto__" || k === "constructor") continue;
+		if (!(k in into)) into[k] = v;
+	}
+}
+
 export function addToolCall(
 	state: ContentPartsState,
 	toolsWithUI: ToolUIGate,
@@ -259,15 +286,19 @@ export function addToolCall(
 	toolName: string,
 	args: Record<string, unknown>,
 	force = false,
-	langchainToolCallId?: string
+	langchainToolCallId?: string,
+	metadata?: Record<string, unknown>
 ): void {
 	if (force || _toolPasses(toolsWithUI, toolName)) {
+		const relayMeta: Record<string, unknown> = {};
+		mergeToolPartMetadata(relayMeta, metadata);
 		state.contentParts.push({
 			type: "tool-call",
 			toolCallId,
 			toolName,
 			args,
 			...(langchainToolCallId ? { langchainToolCallId } : {}),
+			...(Object.keys(relayMeta).length > 0 ? { metadata: relayMeta } : {}),
 		});
 		state.toolCallIndices.set(toolCallId, state.contentParts.length - 1);
 		state.currentTextPartIndex = -1;
@@ -304,6 +335,7 @@ export function updateToolCall(
 		argsText?: string;
 		result?: unknown;
 		langchainToolCallId?: string;
+		metadata?: Record<string, unknown>;
 	}
 ): void {
 	const index = state.toolCallIndices.get(toolCallId);
@@ -323,6 +355,11 @@ export function updateToolCall(
 		if (update.langchainToolCallId && !tc.langchainToolCallId) {
 			tc.langchainToolCallId = update.langchainToolCallId;
 		}
+		if (update.metadata && Object.keys(update.metadata).length > 0) {
+			const md = (tc.metadata ?? {}) as Record<string, unknown>;
+			mergeToolPartMetadata(md, update.metadata);
+			tc.metadata = md;
+		}
 	}
 }
 
@@ -416,6 +453,8 @@ export type SSEEvent =
 			toolName: string;
 			/** Authoritative LangChain ``tool_call.id``. Optional. */
 			langchainToolCallId?: string;
+			/** Optional JSON object from tool SSE (same keys as persisted tool-call metadata). */
+			metadata?: Record<string, unknown>;
 	  }
 	| {
 			/**
@@ -434,6 +473,7 @@ export type SSEEvent =
 			toolName: string;
 			input: Record<string, unknown>;
 			langchainToolCallId?: string;
+			metadata?: Record<string, unknown>;
 	  }
 	| {
 			type: "tool-output-available";
@@ -443,6 +483,7 @@ export type SSEEvent =
 			 * ``ToolMessage.tool_call_id`` at on_tool_end. Backfills cards
 			 * that didn't get the id at tool-input-start time. */
 			langchainToolCallId?: string;
+			metadata?: Record<string, unknown>;
 	  }
 	| { type: "data-thinking-step"; data: ThinkingStepData }
 	| { type: "data-thread-title-update"; data: { threadId: number; title: string } }

From 39084b3075ef0dd322d2cfab037fe1b0fbd4dc0f Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 00:39:50 +0200
Subject: [PATCH 41/58] Forward tool SSE metadata into the streaming content
 parts.

---
 surfsense_web/components/free-chat/free-chat-page.tsx | 8 ++++++--
 surfsense_web/lib/chat/stream-pipeline.ts             | 8 ++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/surfsense_web/components/free-chat/free-chat-page.tsx b/surfsense_web/components/free-chat/free-chat-page.tsx
index 080d9a2b6..b2f959f2e 100644
--- a/surfsense_web/components/free-chat/free-chat-page.tsx
+++ b/surfsense_web/components/free-chat/free-chat-page.tsx
@@ -228,7 +228,8 @@ export function FreeChatPage() {
 								parsed.toolName,
 								{},
 								false,
-								parsed.langchainToolCallId
+								parsed.langchainToolCallId,
+								parsed.metadata
 							);
 							forceFlush();
 							break;
@@ -245,6 +246,7 @@ export function FreeChatPage() {
 									args: parsed.input || {},
 									argsText: finalArgsText,
 									langchainToolCallId: parsed.langchainToolCallId,
+									metadata: parsed.metadata,
 								});
 							} else {
 								addToolCall(
@@ -254,7 +256,8 @@ export function FreeChatPage() {
 									parsed.toolName,
 									parsed.input || {},
 									false,
-									parsed.langchainToolCallId
+									parsed.langchainToolCallId,
+									parsed.metadata
 								);
 								updateToolCall(contentPartsState, parsed.toolCallId, {
 									argsText: finalArgsText,
@@ -268,6 +271,7 @@ export function FreeChatPage() {
 							updateToolCall(contentPartsState, parsed.toolCallId, {
 								result: parsed.output,
 								langchainToolCallId: parsed.langchainToolCallId,
+								metadata: parsed.metadata,
 							});
 							forceFlush();
 							break;
diff --git a/surfsense_web/lib/chat/stream-pipeline.ts b/surfsense_web/lib/chat/stream-pipeline.ts
index c76781083..a0f90afeb 100644
--- a/surfsense_web/lib/chat/stream-pipeline.ts
+++ b/surfsense_web/lib/chat/stream-pipeline.ts
@@ -112,7 +112,8 @@ export function processSharedStreamEvent(
 				parsed.toolName,
 				{},
 				false,
-				parsed.langchainToolCallId
+				parsed.langchainToolCallId,
+				parsed.metadata
 			);
 			forceFlush();
 			return true;
@@ -131,6 +132,7 @@ export function processSharedStreamEvent(
 					args: parsed.input || {},
 					argsText: finalArgsText,
 					langchainToolCallId: parsed.langchainToolCallId,
+					metadata: parsed.metadata,
 				});
 			} else {
 				addToolCall(
@@ -140,7 +142,8 @@ export function processSharedStreamEvent(
 					parsed.toolName,
 					parsed.input || {},
 					false,
-					parsed.langchainToolCallId
+					parsed.langchainToolCallId,
+					parsed.metadata
 				);
 				// addToolCall doesn't accept argsText today; backfill via
 				// updateToolCall so the new card renders pretty-printed JSON.
@@ -156,6 +159,7 @@ export function processSharedStreamEvent(
 			updateToolCall(contentPartsState, parsed.toolCallId, {
 				result: parsed.output,
 				langchainToolCallId: parsed.langchainToolCallId,
+				metadata: parsed.metadata,
 			});
 			markInterruptsCompleted(contentParts);
 			context.onToolOutputAvailable?.(parsed, { contentPartsState, toolCallIndices });

From e7c5204b0248d7ceb6262478a4419eb7e0fe6f58 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 00:39:59 +0200
Subject: [PATCH 42/58] Indent tool cards under an active delegating task span.

---
 .../assistant-ui/assistant-message.tsx        | 88 ++++++++++---------
 .../components/assistant-ui/tool-fallback.tsx | 22 +++++
 .../components/public-chat/public-thread.tsx  | 29 +++---
 .../lib/chat/delegation-span-indent.ts        | 19 ++++
 4 files changed, 106 insertions(+), 52 deletions(-)
 create mode 100644 surfsense_web/lib/chat/delegation-span-indent.ts

diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx
index 7bccc22ee..a21ade74a 100644
--- a/surfsense_web/components/assistant-ui/assistant-message.tsx
+++ b/surfsense_web/components/assistant-ui/assistant-message.tsx
@@ -4,6 +4,7 @@ import {
 	AuiIf,
 	ErrorPrimitive,
 	MessagePrimitive,
+	type ToolCallMessagePartComponent,
 	useAui,
 	useAuiState,
 } from "@assistant-ui/react";
@@ -36,7 +37,7 @@ import { MarkdownText } from "@/components/assistant-ui/markdown-text";
 import { ReasoningMessagePart } from "@/components/assistant-ui/reasoning-message-part";
 import { RevertTurnButton } from "@/components/assistant-ui/revert-turn-button";
 import { useTokenUsage } from "@/components/assistant-ui/token-usage-context";
-import { ToolFallback } from "@/components/assistant-ui/tool-fallback";
+import { ToolFallback, withDelegationSpanIndent } from "@/components/assistant-ui/tool-fallback";
 import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
 import { CommentPanelContainer } from "@/components/chat-comments/comment-panel-container/comment-panel-container";
 import { CommentSheet } from "@/components/chat-comments/comment-sheet/comment-sheet";
@@ -505,48 +506,55 @@ const MessageInfoDropdown: FC = () => {
 
 // Wrap each tool-ui card with ``withBundleStep`` so multi-card HITL bundles
 // page through them and stage decisions instead of firing one resume per card.
+// ``withDelegationSpanIndent`` wraps every entry (including Fallback) so delegated
+// subagent tools don't bypass span indentation via a named ``by_name`` UI.
+const bundleTool = (Component: ToolCallMessagePartComponent) =>
+	withBundleStep(withDelegationSpanIndent(Component));
+
+const NullToolUi: ToolCallMessagePartComponent = () => null;
+
 const TOOLS_BY_NAME = {
-	generate_report: withBundleStep(GenerateReportToolUI),
-	generate_resume: withBundleStep(GenerateResumeToolUI),
-	generate_podcast: withBundleStep(GeneratePodcastToolUI),
-	generate_video_presentation: withBundleStep(GenerateVideoPresentationToolUI),
-	display_image: withBundleStep(GenerateImageToolUI),
-	generate_image: withBundleStep(GenerateImageToolUI),
-	update_memory: withBundleStep(UpdateMemoryToolUI),
-	execute: withBundleStep(SandboxExecuteToolUI),
-	execute_code: withBundleStep(SandboxExecuteToolUI),
-	create_notion_page: withBundleStep(CreateNotionPageToolUI),
-	update_notion_page: withBundleStep(UpdateNotionPageToolUI),
-	delete_notion_page: withBundleStep(DeleteNotionPageToolUI),
-	create_linear_issue: withBundleStep(CreateLinearIssueToolUI),
-	update_linear_issue: withBundleStep(UpdateLinearIssueToolUI),
-	delete_linear_issue: withBundleStep(DeleteLinearIssueToolUI),
-	create_google_drive_file: withBundleStep(CreateGoogleDriveFileToolUI),
-	delete_google_drive_file: withBundleStep(DeleteGoogleDriveFileToolUI),
-	create_onedrive_file: withBundleStep(CreateOneDriveFileToolUI),
-	delete_onedrive_file: withBundleStep(DeleteOneDriveFileToolUI),
-	create_dropbox_file: withBundleStep(CreateDropboxFileToolUI),
-	delete_dropbox_file: withBundleStep(DeleteDropboxFileToolUI),
-	create_calendar_event: withBundleStep(CreateCalendarEventToolUI),
-	update_calendar_event: withBundleStep(UpdateCalendarEventToolUI),
-	delete_calendar_event: withBundleStep(DeleteCalendarEventToolUI),
-	create_gmail_draft: withBundleStep(CreateGmailDraftToolUI),
-	update_gmail_draft: withBundleStep(UpdateGmailDraftToolUI),
-	send_gmail_email: withBundleStep(SendGmailEmailToolUI),
-	trash_gmail_email: withBundleStep(TrashGmailEmailToolUI),
-	create_jira_issue: withBundleStep(CreateJiraIssueToolUI),
-	update_jira_issue: withBundleStep(UpdateJiraIssueToolUI),
-	delete_jira_issue: withBundleStep(DeleteJiraIssueToolUI),
-	create_confluence_page: withBundleStep(CreateConfluencePageToolUI),
-	update_confluence_page: withBundleStep(UpdateConfluencePageToolUI),
-	delete_confluence_page: withBundleStep(DeleteConfluencePageToolUI),
-	web_search: () => null,
-	link_preview: () => null,
-	multi_link_preview: () => null,
-	scrape_webpage: () => null,
+	generate_report: bundleTool(GenerateReportToolUI),
+	generate_resume: bundleTool(GenerateResumeToolUI),
+	generate_podcast: bundleTool(GeneratePodcastToolUI),
+	generate_video_presentation: bundleTool(GenerateVideoPresentationToolUI),
+	display_image: bundleTool(GenerateImageToolUI),
+	generate_image: bundleTool(GenerateImageToolUI),
+	update_memory: bundleTool(UpdateMemoryToolUI),
+	execute: bundleTool(SandboxExecuteToolUI),
+	execute_code: bundleTool(SandboxExecuteToolUI),
+	create_notion_page: bundleTool(CreateNotionPageToolUI),
+	update_notion_page: bundleTool(UpdateNotionPageToolUI),
+	delete_notion_page: bundleTool(DeleteNotionPageToolUI),
+	create_linear_issue: bundleTool(CreateLinearIssueToolUI),
+	update_linear_issue: bundleTool(UpdateLinearIssueToolUI),
+	delete_linear_issue: bundleTool(DeleteLinearIssueToolUI),
+	create_google_drive_file: bundleTool(CreateGoogleDriveFileToolUI),
+	delete_google_drive_file: bundleTool(DeleteGoogleDriveFileToolUI),
+	create_onedrive_file: bundleTool(CreateOneDriveFileToolUI),
+	delete_onedrive_file: bundleTool(DeleteOneDriveFileToolUI),
+	create_dropbox_file: bundleTool(CreateDropboxFileToolUI),
+	delete_dropbox_file: bundleTool(DeleteDropboxFileToolUI),
+	create_calendar_event: bundleTool(CreateCalendarEventToolUI),
+	update_calendar_event: bundleTool(UpdateCalendarEventToolUI),
+	delete_calendar_event: bundleTool(DeleteCalendarEventToolUI),
+	create_gmail_draft: bundleTool(CreateGmailDraftToolUI),
+	update_gmail_draft: bundleTool(UpdateGmailDraftToolUI),
+	send_gmail_email: bundleTool(SendGmailEmailToolUI),
+	trash_gmail_email: bundleTool(TrashGmailEmailToolUI),
+	create_jira_issue: bundleTool(CreateJiraIssueToolUI),
+	update_jira_issue: bundleTool(UpdateJiraIssueToolUI),
+	delete_jira_issue: bundleTool(DeleteJiraIssueToolUI),
+	create_confluence_page: bundleTool(CreateConfluencePageToolUI),
+	update_confluence_page: bundleTool(UpdateConfluencePageToolUI),
+	delete_confluence_page: bundleTool(DeleteConfluencePageToolUI),
+	web_search: NullToolUi,
+	link_preview: NullToolUi,
+	multi_link_preview: NullToolUi,
+	scrape_webpage: NullToolUi,
 } as const;
 
-const TOOLS_FALLBACK = withBundleStep(ToolFallback);
+const TOOLS_FALLBACK = bundleTool(ToolFallback);
 
 const AssistantMessageInner: FC = () => {
 	const isMobile = !useMediaQuery("(min-width: 768px)");
diff --git a/surfsense_web/components/assistant-ui/tool-fallback.tsx b/surfsense_web/components/assistant-ui/tool-fallback.tsx
index ba58f4158..ec93b1018 100644
--- a/surfsense_web/components/assistant-ui/tool-fallback.tsx
+++ b/surfsense_web/components/assistant-ui/tool-fallback.tsx
@@ -31,6 +31,10 @@ import { Spinner } from "@/components/ui/spinner";
 import { getToolDisplayName } from "@/contracts/enums/toolIcons";
 import { markActionRevertedInCache, useAgentActionsQuery } from "@/hooks/use-agent-actions-query";
 import { agentActionsApiService } from "@/lib/apis/agent-actions-api.service";
+import {
+	DELEGATION_SPAN_INDENT_CLASS,
+	shouldIndentToolCallForDelegationSpan,
+} from "@/lib/chat/delegation-span-indent";
 import { AppError } from "@/lib/error";
 import { isInterruptResult } from "@/lib/hitl";
 import { cn } from "@/lib/utils";
@@ -499,6 +503,24 @@ const DefaultToolFallbackInner: ToolCallMessagePartComponent = (props) => {
 	);
 };
 
+/**
+ * Wrap any tool-call UI so cards under an active delegating ``task`` span indent.
+ * Applied to named tool components as well as ``ToolFallback`` — only ``ToolFallback``
+ * would miss delegated tools otherwise.
+ */
+export function withDelegationSpanIndent(
+	Component: ToolCallMessagePartComponent
+): ToolCallMessagePartComponent {
+	const Wrapped: ToolCallMessagePartComponent = (props) => {
+		const metadata = (props as { metadata?: Record<string, unknown> }).metadata;
+		const indent = shouldIndentToolCallForDelegationSpan(props.toolName, metadata);
+		const inner = <Component {...props} />;
+		return indent ? <div className={cn(DELEGATION_SPAN_INDENT_CLASS)}>{inner}</div> : inner;
+	};
+	Wrapped.displayName = `withDelegationSpanIndent(${Component.displayName ?? Component.name ?? "ToolUI"})`;
+	return Wrapped;
+}
+
 export const ToolFallback: ToolCallMessagePartComponent = (props) => {
 	if (isInterruptResult(props.result)) {
 		if (isDoomLoopInterrupt(props.result)) {
diff --git a/surfsense_web/components/public-chat/public-thread.tsx b/surfsense_web/components/public-chat/public-thread.tsx
index 750b7410e..2075d82b8 100644
--- a/surfsense_web/components/public-chat/public-thread.tsx
+++ b/surfsense_web/components/public-chat/public-thread.tsx
@@ -5,6 +5,7 @@ import {
 	AuiIf,
 	MessagePrimitive,
 	ThreadPrimitive,
+	type ToolCallMessagePartComponent,
 	useAuiState,
 } from "@assistant-ui/react";
 import { CheckIcon, CopyIcon } from "lucide-react";
@@ -14,7 +15,7 @@ import { type FC, type ReactNode, useState } from "react";
 import { CitationMetadataProvider } from "@/components/assistant-ui/citation-metadata-context";
 import { MarkdownText } from "@/components/assistant-ui/markdown-text";
 import { ReasoningMessagePart } from "@/components/assistant-ui/reasoning-message-part";
-import { ToolFallback } from "@/components/assistant-ui/tool-fallback";
+import { ToolFallback, withDelegationSpanIndent } from "@/components/assistant-ui/tool-fallback";
 import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
 import { GenerateImageToolUI } from "@/components/tool-ui/generate-image";
 import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast";
@@ -29,6 +30,8 @@ const GenerateVideoPresentationToolUI = dynamic(
 	{ ssr: false }
 );
 
+const NullToolUi: ToolCallMessagePartComponent = () => null;
+
 interface PublicThreadProps {
 	footer?: ReactNode;
 }
@@ -162,18 +165,20 @@ const PublicAssistantMessage: FC = () => {
 							Reasoning: ReasoningMessagePart,
 							tools: {
 								by_name: {
-									generate_podcast: GeneratePodcastToolUI,
-									generate_report: GenerateReportToolUI,
-									generate_resume: GenerateResumeToolUI,
-									generate_video_presentation: GenerateVideoPresentationToolUI,
-									display_image: GenerateImageToolUI,
-									generate_image: GenerateImageToolUI,
-									web_search: () => null,
-									link_preview: () => null,
-									multi_link_preview: () => null,
-									scrape_webpage: () => null,
+									generate_podcast: withDelegationSpanIndent(GeneratePodcastToolUI),
+									generate_report: withDelegationSpanIndent(GenerateReportToolUI),
+									generate_resume: withDelegationSpanIndent(GenerateResumeToolUI),
+									generate_video_presentation: withDelegationSpanIndent(
+										GenerateVideoPresentationToolUI
+									),
+									display_image: withDelegationSpanIndent(GenerateImageToolUI),
+									generate_image: withDelegationSpanIndent(GenerateImageToolUI),
+									web_search: NullToolUi,
+									link_preview: NullToolUi,
+									multi_link_preview: NullToolUi,
+									scrape_webpage: NullToolUi,
 								},
-								Fallback: ToolFallback,
+								Fallback: withDelegationSpanIndent(ToolFallback),
 							},
 						}}
 					/>
diff --git a/surfsense_web/lib/chat/delegation-span-indent.ts b/surfsense_web/lib/chat/delegation-span-indent.ts
new file mode 100644
index 000000000..99e292eaf
--- /dev/null
+++ b/surfsense_web/lib/chat/delegation-span-indent.ts
@@ -0,0 +1,19 @@
+/**
+ * Indent tool-call cards that belong to an open delegating ``task`` episode.
+ *
+ * The backend only stamps ``metadata.spanId`` on tool SSE / persisted parts
+ * while a ``task`` is active (see ``AgentEventRelayState.tool_activity_metadata``),
+ * so its presence is sufficient. The opening ``task`` row itself carries the
+ * same span id but stays flush — it is the header of the delegation.
+ */
+
+export function shouldIndentToolCallForDelegationSpan(
+	toolName: string,
+	metadata: Record<string, unknown> | undefined
+): boolean {
+	if (toolName === "task") return false;
+	const v = metadata?.spanId;
+	return typeof v === "string" && v.trim().length > 0;
+}
+
+export const DELEGATION_SPAN_INDENT_CLASS = "pl-3 sm:ml-4";

From 4b2c9f07cd4966cd64a2d9a795a8556477cd3b91 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 00:40:06 +0200
Subject: [PATCH 43/58] Group delegated thinking steps under their task parent
 and show subagent name.

---
 .../assistant-ui/thinking-steps.tsx           | 247 +++++++++++++++---
 1 file changed, 213 insertions(+), 34 deletions(-)

diff --git a/surfsense_web/components/assistant-ui/thinking-steps.tsx b/surfsense_web/components/assistant-ui/thinking-steps.tsx
index df1cef12c..6c3832bff 100644
--- a/surfsense_web/components/assistant-ui/thinking-steps.tsx
+++ b/surfsense_web/components/assistant-ui/thinking-steps.tsx
@@ -1,7 +1,7 @@
 import { makeAssistantDataUI, useAuiState } from "@assistant-ui/react";
 import { ChevronRightIcon } from "lucide-react";
 import type { FC } from "react";
-import { useCallback, useEffect, useState } from "react";
+import { useCallback, useEffect, useMemo, useState } from "react";
 import { ChainOfThoughtItem } from "@/components/prompt-kit/chain-of-thought";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { cn } from "@/lib/utils";
@@ -11,15 +11,170 @@ export interface ThinkingStep {
 	title: string;
 	items: string[];
 	status: "pending" | "in_progress" | "completed";
+	/**
+	 * Optional relay metadata forwarded from ``data-thinking-step`` SSE
+	 * (e.g. ``spanId`` set by ``AgentEventRelayState.span_metadata_if_active``).
+	 * Steps under an open delegating ``task`` carry ``metadata.spanId`` and are
+	 * grouped under the preceding parent (``task`` step) as indented children.
+	 */
+	metadata?: Record<string, unknown>;
 }
 
 /**
- * Chain of thought display component - single collapsible dropdown design
+ * Per-step info joined from the assistant message ``tool-call`` parts via
+ * the shared ``metadata.thinkingStepId`` correlation
+ * (set on the server in ``AgentEventRelayState.tool_activity_metadata``).
  */
-export const ThinkingStepsDisplay: FC<{ steps: ThinkingStep[]; isThreadRunning?: boolean }> = ({
-	steps,
-	isThreadRunning = true,
-}) => {
+interface StepToolInfo {
+	toolName: string;
+	args: Record<string, unknown>;
+}
+
+export type ThinkingStepToolInfoMap = ReadonlyMap<string, StepToolInfo>;
+
+/**
+ * Build ``thinkingStepId → {toolName, args}`` from message content. Used to
+ *  - identify the opening ``task`` step (parent header, never indents) without
+ *    relying on the human-readable title;
+ *  - render the parent's display title from ``args.subagent_type`` instead of
+ *    the generic "Task" copy.
+ */
+export function buildThinkingStepToolInfo(
+	content: readonly unknown[] | undefined
+): ThinkingStepToolInfoMap {
+	const m = new Map<string, StepToolInfo>();
+	if (!content) return m;
+	for (const part of content) {
+		if (!part || typeof part !== "object") continue;
+		const o = part as {
+			type?: string;
+			toolName?: string;
+			args?: Record<string, unknown>;
+			metadata?: Record<string, unknown>;
+		};
+		if (o.type !== "tool-call" || !o.toolName) continue;
+		const tid = o.metadata?.thinkingStepId;
+		if (typeof tid === "string" && tid.trim().length > 0) {
+			m.set(tid, { toolName: o.toolName, args: o.args ?? {} });
+		}
+	}
+	return m;
+}
+
+function asNonEmptyString(v: unknown): string | undefined {
+	return typeof v === "string" && v.trim().length > 0 ? v.trim() : undefined;
+}
+
+function titleCaseSubagent(raw: string): string {
+	// "notion" → "Notion", "doc_research" → "Doc Research".
+	return raw
+		.split(/[\s_-]+/)
+		.filter(Boolean)
+		.map((part) => part.charAt(0).toUpperCase() + part.slice(1))
+		.join(" ");
+}
+
+/**
+ * Display title for a step. For the opening ``task`` step we substitute the
+ * subagent type from the matching tool-call args (e.g. ``"Notion"`` instead of
+ * the generic ``"Task"``). Falls back to the step's own title if the tool-call
+ * hasn't streamed in yet.
+ */
+function resolveDisplayTitle(step: ThinkingStep, info: StepToolInfo | undefined): string {
+	if (info?.toolName === "task") {
+		const subagent = asNonEmptyString(info.args?.subagent_type);
+		if (subagent) return titleCaseSubagent(subagent);
+	}
+	return step.title;
+}
+
+function isDelegatedChild(step: ThinkingStep, info: StepToolInfo | undefined): boolean {
+	const sid = asNonEmptyString(step.metadata?.spanId);
+	if (!sid) return false;
+	// The opening ``task`` step also carries ``spanId`` (it owns the span) but
+	// must render as the parent header. Prefer the joined ``toolName`` (set by
+	// ``buildThinkingStepToolInfo`` from ``tool-call.metadata.thinkingStepId``).
+	// Fall back to the title heuristic when no tool-call is matched — happens
+	// for messages persisted before ``thinkingStepId`` shipped, and briefly
+	// during streaming if the ``tool-input-start`` frame hasn't been processed
+	// yet for some reason.
+	if (info) return info.toolName !== "task";
+	return step.title !== "Task";
+}
+
+interface StepGroup {
+	parent: ThinkingStep;
+	children: ThinkingStep[];
+}
+
+/**
+ * Group consecutive delegated child steps under the preceding parent step.
+ * If the very first step is a child (no parent yet seen), it's promoted to a
+ * parent so it still renders — defensive only, real flows always start with a
+ * parent step.
+ */
+const EMPTY_STEP_TOOL_INFO: ThinkingStepToolInfoMap = new Map();
+
+function groupSteps(
+	steps: readonly ThinkingStep[],
+	stepToolInfo: ThinkingStepToolInfoMap
+): StepGroup[] {
+	const groups: StepGroup[] = [];
+	for (const step of steps) {
+		if (isDelegatedChild(step, stepToolInfo.get(step.id)) && groups.length > 0) {
+			groups[groups.length - 1].children.push(step);
+		} else {
+			groups.push({ parent: step, children: [] });
+		}
+	}
+	return groups;
+}
+
+const StepBody: FC<{
+	step: ThinkingStep;
+	status: "pending" | "in_progress" | "completed";
+	displayTitle: string;
+}> = ({ step, status, displayTitle }) => (
+	<div className="min-w-0">
+		<div
+			className={cn(
+				"text-sm leading-5",
+				status === "in_progress" && "text-foreground font-medium",
+				status === "completed" && "text-muted-foreground",
+				status === "pending" && "text-muted-foreground/60"
+			)}
+		>
+			{displayTitle}
+		</div>
+
+		{step.items && step.items.length > 0 && (
+			<div className="mt-1 space-y-0.5">
+				{step.items.map((item) => (
+					<ChainOfThoughtItem key={`${step.id}-${item}`} className="text-xs">
+						{item}
+					</ChainOfThoughtItem>
+				))}
+			</div>
+		)}
+	</div>
+);
+
+/**
+ * Chain of thought display component - single collapsible dropdown design.
+ *
+ * ``stepToolInfo`` joins each step (by ``thinkingStepId``) to its ``tool-call``
+ * part so we can:
+ *  - replace the generic ``"Task"`` title with the real subagent name
+ *    (``args.subagent_type``) on the parent header;
+ *  - decide parent-vs-child purely from the matched ``toolName`` instead of
+ *    relying on the displayed title.
+ */
+export const ThinkingStepsDisplay: FC<{
+	steps: ThinkingStep[];
+	isThreadRunning?: boolean;
+	stepToolInfo?: ThinkingStepToolInfoMap;
+}> = ({ steps, isThreadRunning = true, stepToolInfo }) => {
+	const toolInfo = stepToolInfo ?? EMPTY_STEP_TOOL_INFO;
 	const getEffectiveStatus = useCallback(
 		(step: ThinkingStep): "pending" | "in_progress" | "completed" => {
 			if (step.status === "in_progress" && !isThreadRunning) {
@@ -31,6 +186,9 @@ export const ThinkingStepsDisplay: FC<{ steps: ThinkingStep[]; isThreadRunning?:
 	);
 
 	const inProgressStep = steps.find((s) => getEffectiveStatus(s) === "in_progress");
+	const inProgressDisplayTitle = inProgressStep
+		? resolveDisplayTitle(inProgressStep, toolInfo.get(inProgressStep.id))
+		: undefined;
 	const allCompleted =
 		steps.length > 0 &&
 		!isThreadRunning &&
@@ -49,14 +207,16 @@ export const ThinkingStepsDisplay: FC<{ steps: ThinkingStep[]; isThreadRunning?:
 		}
 	}, [allCompleted, isProcessing]);
 
+	const groups = useMemo(() => groupSteps(steps, toolInfo), [steps, toolInfo]);
+
 	if (steps.length === 0) return null;
 
 	const getHeaderText = () => {
 		if (allCompleted) {
 			return "Reviewed";
 		}
-		if (inProgressStep) {
-			return inProgressStep.title;
+		if (inProgressDisplayTitle) {
+			return inProgressDisplayTitle;
 		}
 		if (isProcessing) {
 			return "Processing";
@@ -94,18 +254,26 @@ export const ThinkingStepsDisplay: FC<{ steps: ThinkingStep[]; isThreadRunning?:
 				>
 					<div className="overflow-hidden">
 						<div className="mt-3 pl-1">
-							{steps.map((step, index) => {
-								const effectiveStatus = getEffectiveStatus(step);
-								const isLast = index === steps.length - 1;
+							{groups.map((group, groupIndex) => {
+								const isLastGroup = groupIndex === groups.length - 1;
+								const parentStatus = getEffectiveStatus(group.parent);
+								const parentInfo = toolInfo.get(group.parent.id);
+								const parentTitle = resolveDisplayTitle(group.parent, parentInfo);
+								const hasChildren = group.children.length > 0;
+								// Parent dots are connected by a vertical line that runs through
+								// any indented children (their column has no dot, so the line
+								// passes cleanly behind them) and overshoots by ~15px to reach
+								// the next group's dot center (top-[15px]).
+								const showParentLine = !isLastGroup;
 
 								return (
-									<div key={step.id} className="relative flex gap-3">
-										<div className="relative flex flex-col items-center w-2">
-											{!isLast && (
-												<div className="absolute left-1/2 top-[15px] -bottom-[7px] w-px -translate-x-1/2 bg-muted-foreground/30" />
+									<div key={group.parent.id} className="relative flex gap-3">
+										<div className="relative flex flex-col items-center w-2 self-stretch">
+											{showParentLine && (
+												<div className="absolute left-1/2 top-[15px] -bottom-[15px] w-px -translate-x-1/2 bg-muted-foreground/30" />
 											)}
 											<div className="relative z-10 mt-[7px] flex shrink-0 items-center justify-center">
-												{effectiveStatus === "in_progress" ? (
+												{parentStatus === "in_progress" ? (
 													<span className="relative flex size-2">
 														<span className="absolute inline-flex size-full animate-ping rounded-full bg-primary/60" />
 														<span className="relative inline-flex size-2 rounded-full bg-primary" />
@@ -117,24 +285,25 @@ export const ThinkingStepsDisplay: FC<{ steps: ThinkingStep[]; isThreadRunning?:
 										</div>
 
 										<div className="flex-1 min-w-0 pb-4">
-											<div
-												className={cn(
-													"text-sm leading-5",
-													effectiveStatus === "in_progress" && "text-foreground font-medium",
-													effectiveStatus === "completed" && "text-muted-foreground",
-													effectiveStatus === "pending" && "text-muted-foreground/60"
-												)}
-											>
-												{step.title}
-											</div>
+											<StepBody
+												step={group.parent}
+												status={parentStatus}
+												displayTitle={parentTitle}
+											/>
 
-											{step.items && step.items.length > 0 && (
-												<div className="mt-1 space-y-0.5">
-													{step.items.map((item) => (
-														<ChainOfThoughtItem key={`${step.id}-${item}`} className="text-xs">
-															{item}
-														</ChainOfThoughtItem>
-													))}
+											{hasChildren && (
+												<div className="mt-2 ml-3 space-y-2">
+													{group.children.map((child) => {
+														const childInfo = toolInfo.get(child.id);
+														return (
+															<StepBody
+																key={child.id}
+																step={child}
+																status={getEffectiveStatus(child)}
+																displayTitle={resolveDisplayTitle(child, childInfo)}
+															/>
+														);
+													})}
 												</div>
 											)}
 										</div>
@@ -158,13 +327,23 @@ function ThinkingStepsDataRenderer({ data }: { name: string; data: unknown }) {
 	const isThreadRunning = useAuiState(({ thread }) => thread.isRunning);
 	const isLastMessage = useAuiState(({ message }) => message?.isLast ?? false);
 	const isMessageStreaming = isThreadRunning && isLastMessage;
+	const content = useAuiState(({ message }) => message?.content);
+
+	const stepToolInfo = useMemo(
+		() => buildThinkingStepToolInfo(Array.isArray(content) ? content : undefined),
+		[content]
+	);
 
 	const steps = (data as { steps: ThinkingStep[] } | null)?.steps ?? [];
 	if (steps.length === 0) return null;
 
 	return (
 		<div className="mb-3 -mx-2 leading-normal">
-			<ThinkingStepsDisplay steps={steps} isThreadRunning={isMessageStreaming} />
+			<ThinkingStepsDisplay
+				steps={steps}
+				isThreadRunning={isMessageStreaming}
+				stepToolInfo={stepToolInfo}
+			/>
 		</div>
 	);
 }

From a8417e3c451090da6d05e54ef152b3ff3c72757a Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 14:37:06 +0200
Subject: [PATCH 44/58] Render HITL approval cards inline in the thinking-steps
 timeline.

---
 .../assistant-ui/assistant-message.tsx        |  17 ++-
 .../assistant-ui/thinking-steps.tsx           | 101 ++++++++++++++++--
 surfsense_web/lib/hitl/index.ts               |   6 ++
 surfsense_web/lib/hitl/render-target.tsx      |  48 +++++++++
 4 files changed, 158 insertions(+), 14 deletions(-)
 create mode 100644 surfsense_web/lib/hitl/render-target.tsx

diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx
index a21ade74a..549141779 100644
--- a/surfsense_web/components/assistant-ui/assistant-message.tsx
+++ b/surfsense_web/components/assistant-ui/assistant-message.tsx
@@ -59,6 +59,7 @@ import { DropdownMenuLabel } from "@/components/ui/dropdown-menu";
 import { useComments } from "@/hooks/use-comments";
 import { useMediaQuery } from "@/hooks/use-media-query";
 import { useElectronAPI } from "@/hooks/use-platform";
+import { withHitlInTimeline } from "@/lib/hitl";
 import { getProviderIcon } from "@/lib/provider-icons";
 import { cn } from "@/lib/utils";
 
@@ -508,12 +509,22 @@ const MessageInfoDropdown: FC = () => {
 // page through them and stage decisions instead of firing one resume per card.
 // ``withDelegationSpanIndent`` wraps every entry (including Fallback) so delegated
 // subagent tools don't bypass span indentation via a named ``by_name`` UI.
+// ``withHitlInTimeline`` is the OUTERMOST wrapper so a body render with an
+// interrupt result returns ``null`` immediately — no inner wrappers paint
+// — while a timeline render (under ``HitlRenderTargetProvider value="timeline"``
+// inside ``ThinkingStepsDisplay``) passes through to the real component.
 const bundleTool = (Component: ToolCallMessagePartComponent) =>
-	withBundleStep(withDelegationSpanIndent(Component));
+	withHitlInTimeline(withBundleStep(withDelegationSpanIndent(Component)));
 
 const NullToolUi: ToolCallMessagePartComponent = () => null;
 
-const TOOLS_BY_NAME = {
+/**
+ * Tool-call UI registry. Exported so ``ThinkingStepsDisplay`` can mount
+ * the SAME wrapped components inline under a step row when the card's
+ * result is an HITL interrupt. The wrappers handle ``ToolCallIdProvider``
+ * and bundle paging consistently across both render targets.
+ */
+export const TOOLS_BY_NAME = {
 	generate_report: bundleTool(GenerateReportToolUI),
 	generate_resume: bundleTool(GenerateResumeToolUI),
 	generate_podcast: bundleTool(GeneratePodcastToolUI),
@@ -554,7 +565,7 @@ const TOOLS_BY_NAME = {
 	scrape_webpage: NullToolUi,
 } as const;
 
-const TOOLS_FALLBACK = bundleTool(ToolFallback);
+export const TOOLS_FALLBACK = bundleTool(ToolFallback);
 
 const AssistantMessageInner: FC = () => {
 	const isMobile = !useMediaQuery("(min-width: 768px)");
diff --git a/surfsense_web/components/assistant-ui/thinking-steps.tsx b/surfsense_web/components/assistant-ui/thinking-steps.tsx
index 6c3832bff..46d33a9fa 100644
--- a/surfsense_web/components/assistant-ui/thinking-steps.tsx
+++ b/surfsense_web/components/assistant-ui/thinking-steps.tsx
@@ -1,9 +1,15 @@
-import { makeAssistantDataUI, useAuiState } from "@assistant-ui/react";
+import {
+	makeAssistantDataUI,
+	type ToolCallMessagePartComponent,
+	useAuiState,
+} from "@assistant-ui/react";
 import { ChevronRightIcon } from "lucide-react";
 import type { FC } from "react";
 import { useCallback, useEffect, useMemo, useState } from "react";
+import { TOOLS_BY_NAME, TOOLS_FALLBACK } from "@/components/assistant-ui/assistant-message";
 import { ChainOfThoughtItem } from "@/components/prompt-kit/chain-of-thought";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
+import { HitlRenderTargetProvider, isInterruptResult } from "@/lib/hitl";
 import { cn } from "@/lib/utils";
 
 export interface ThinkingStep {
@@ -24,20 +30,33 @@ export interface ThinkingStep {
  * Per-step info joined from the assistant message ``tool-call`` parts via
  * the shared ``metadata.thinkingStepId`` correlation
  * (set on the server in ``AgentEventRelayState.tool_activity_metadata``).
+ *
+ * Carries enough of the part to:
+ *  - identify the opening ``task`` step and substitute the subagent display
+ *    name on the parent header (uses ``toolName`` and ``args``);
+ *  - render the matching tool component inline under the step row when the
+ *    card's result is an HITL interrupt (uses ``toolCallId``, ``argsText``,
+ *    ``result``, ``langchainToolCallId``).
  */
 interface StepToolInfo {
+	toolCallId: string;
 	toolName: string;
 	args: Record<string, unknown>;
+	argsText?: string;
+	result?: unknown;
+	langchainToolCallId?: string;
 }
 
 export type ThinkingStepToolInfoMap = ReadonlyMap<string, StepToolInfo>;
 
 /**
- * Build ``thinkingStepId → {toolName, args}`` from message content. Used to
+ * Build ``thinkingStepId → StepToolInfo`` from message content. Used to
  *  - identify the opening ``task`` step (parent header, never indents) without
  *    relying on the human-readable title;
  *  - render the parent's display title from ``args.subagent_type`` instead of
- *    the generic "Task" copy.
+ *    the generic "Task" copy;
+ *  - mount the matching tool-call card inline under a step row when the
+ *    result is an HITL interrupt (see ``TimelineHitlCard``).
  */
 export function buildThinkingStepToolInfo(
 	content: readonly unknown[] | undefined
@@ -48,14 +67,25 @@ export function buildThinkingStepToolInfo(
 		if (!part || typeof part !== "object") continue;
 		const o = part as {
 			type?: string;
+			toolCallId?: string;
 			toolName?: string;
 			args?: Record<string, unknown>;
+			argsText?: string;
+			result?: unknown;
+			langchainToolCallId?: string;
 			metadata?: Record<string, unknown>;
 		};
-		if (o.type !== "tool-call" || !o.toolName) continue;
+		if (o.type !== "tool-call" || !o.toolName || !o.toolCallId) continue;
 		const tid = o.metadata?.thinkingStepId;
 		if (typeof tid === "string" && tid.trim().length > 0) {
-			m.set(tid, { toolName: o.toolName, args: o.args ?? {} });
+			m.set(tid, {
+				toolCallId: o.toolCallId,
+				toolName: o.toolName,
+				args: o.args ?? {},
+				argsText: o.argsText,
+				result: o.result,
+				langchainToolCallId: o.langchainToolCallId,
+			});
 		}
 	}
 	return m;
@@ -159,6 +189,47 @@ const StepBody: FC<{
 	</div>
 );
 
+/**
+ * Mount the same tool-call UI used in the message body, but inside the
+ * chain-of-thought timeline. The body copy returns ``null`` (see
+ * ``withHitlInTimeline`` in ``lib/hitl/render-target``), so the card
+ * effectively moves from the body to the timeline for the lifetime of the
+ * interrupt (pending → processing → complete / rejected).
+ *
+ * ``metadata`` is intentionally omitted from the props we forward — the
+ * step row already provides any indentation it needs, so we don't want
+ * ``withDelegationSpanIndent`` to add a second indent + border on top.
+ *
+ * ``status`` is a placeholder (HITL UIs read only ``args`` + ``result``)
+ * so we don't need to mirror assistant-ui's runtime status object here.
+ */
+const TimelineHitlCard: FC<{ info: StepToolInfo }> = ({ info }) => {
+	const Comp =
+		(TOOLS_BY_NAME as Record<string, ToolCallMessagePartComponent | undefined>)[info.toolName] ??
+		TOOLS_FALLBACK;
+	const props = {
+		toolCallId: info.toolCallId,
+		toolName: info.toolName,
+		args: info.args,
+		argsText: info.argsText,
+		result: info.result,
+		langchainToolCallId: info.langchainToolCallId,
+		status: { type: "complete" } as const,
+	};
+	return (
+		<HitlRenderTargetProvider value="timeline">
+			{/* biome-ignore lint/suspicious/noExplicitAny: ToolCallMessagePartProps requires
+			    runtime-only fields (addResult, resume, MessagePartState) we don't have when
+			    re-rendering manually; HITL components only read args + result. */}
+			<Comp {...(props as any)} />
+		</HitlRenderTargetProvider>
+	);
+};
+
+function hitlInterruptInfo(info: StepToolInfo | undefined): StepToolInfo | undefined {
+	return info && isInterruptResult(info.result) ? info : undefined;
+}
+
 /**
  * Chain of thought display component - single collapsible dropdown design.
  *
@@ -291,17 +362,25 @@ export const ThinkingStepsDisplay: FC<{
 												displayTitle={parentTitle}
 											/>
 
+											{(() => {
+												const hitl = hitlInterruptInfo(parentInfo);
+												return hitl ? <TimelineHitlCard info={hitl} /> : null;
+											})()}
+
 											{hasChildren && (
 												<div className="mt-2 ml-3 space-y-2">
 													{group.children.map((child) => {
 														const childInfo = toolInfo.get(child.id);
+														const childHitl = hitlInterruptInfo(childInfo);
 														return (
-															<StepBody
-																key={child.id}
-																step={child}
-																status={getEffectiveStatus(child)}
-																displayTitle={resolveDisplayTitle(child, childInfo)}
-															/>
+															<div key={child.id}>
+																<StepBody
+																	step={child}
+																	status={getEffectiveStatus(child)}
+																	displayTitle={resolveDisplayTitle(child, childInfo)}
+																/>
+																{childHitl && <TimelineHitlCard info={childHitl} />}
+															</div>
 														);
 													})}
 												</div>
diff --git a/surfsense_web/lib/hitl/index.ts b/surfsense_web/lib/hitl/index.ts
index 4bb15e8b5..a2f218d5d 100644
--- a/surfsense_web/lib/hitl/index.ts
+++ b/surfsense_web/lib/hitl/index.ts
@@ -6,6 +6,12 @@ export {
 	useHitlBundle,
 	useToolCallIdContext,
 } from "./bundle-context";
+export {
+	type HitlRenderTarget,
+	HitlRenderTargetProvider,
+	useHitlRenderTarget,
+	withHitlInTimeline,
+} from "./render-target";
 export type {
 	HitlDecision,
 	InterruptActionRequest,
diff --git a/surfsense_web/lib/hitl/render-target.tsx b/surfsense_web/lib/hitl/render-target.tsx
new file mode 100644
index 000000000..cbfdbf2be
--- /dev/null
+++ b/surfsense_web/lib/hitl/render-target.tsx
@@ -0,0 +1,48 @@
+"use client";
+
+import type { ToolCallMessagePartComponent } from "@assistant-ui/react";
+import { createContext, useContext } from "react";
+import { isInterruptResult } from "./types";
+
+/**
+ * Where this tool-call card is currently rendering.
+ *
+ * - ``"body"`` (default) — assistant-ui's ``MessagePrimitive.Parts`` renders
+ *   the card inside the message bubble.
+ * - ``"timeline"`` — ``ThinkingStepsDisplay`` renders the SAME component
+ *   inline under the matching step row so the HITL approval lives in the
+ *   chain-of-thought instead of as a standalone card in the message body.
+ *
+ * The two render targets share one component implementation; the context
+ * lets the body render skip itself when the timeline copy will show the
+ * card, avoiding a double-render.
+ */
+export type HitlRenderTarget = "body" | "timeline";
+
+const HitlRenderTargetContext = createContext<HitlRenderTarget>("body");
+
+export const HitlRenderTargetProvider = HitlRenderTargetContext.Provider;
+
+export function useHitlRenderTarget(): HitlRenderTarget {
+	return useContext(HitlRenderTargetContext);
+}
+
+/**
+ * Hide the body render of a tool-call whose result is a HITL interrupt.
+ * The same component is mounted again inside ``ThinkingStepsDisplay``
+ * with ``HitlRenderTargetProvider value="timeline"`` — that copy renders
+ * normally, so the card "moves" from the message body to the timeline.
+ *
+ * Pure pass-through for non-HITL results AND for the timeline render.
+ */
+export function withHitlInTimeline(
+	Component: ToolCallMessagePartComponent
+): ToolCallMessagePartComponent {
+	const Wrapped: ToolCallMessagePartComponent = (props) => {
+		const target = useHitlRenderTarget();
+		if (target === "body" && isInterruptResult(props.result)) return null;
+		return <Component {...props} />;
+	};
+	Wrapped.displayName = `withHitlInTimeline(${Component.displayName ?? Component.name ?? "ToolUI"})`;
+	return Wrapped;
+}

From 5c1f5edd751e1e6916dff83102226d43f5ee0180 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 14:39:44 +0200
Subject: [PATCH 45/58] Add chat-messages feature module architecture doc.

---
 .../features/chat-messages/ARCHITECTURE.md    | 483 ++++++++++++++++++
 1 file changed, 483 insertions(+)
 create mode 100644 surfsense_web/features/chat-messages/ARCHITECTURE.md

diff --git a/surfsense_web/features/chat-messages/ARCHITECTURE.md b/surfsense_web/features/chat-messages/ARCHITECTURE.md
new file mode 100644
index 000000000..030374aaf
--- /dev/null
+++ b/surfsense_web/features/chat-messages/ARCHITECTURE.md
@@ -0,0 +1,483 @@
+# `features/chat-messages/` — Architecture
+
+> **Scope.** This module owns everything between an assistant message
+> arriving and its rendering inside the chat UI: the timeline (the
+> agent's process — reasoning + every tool call), and the HITL
+> primitives that per-tool components compose to render approval views.
+>
+> It does **NOT** own: the thread shell, the composer, the streaming
+> pipeline, the message frame (`assistant-message.tsx`,
+> `user-message.tsx`, markdown renderer, citations), the comments
+> sidebar, or any of the 63 individual tool-ui integration files
+> under `components/tool-ui/`.
+
+---
+
+## 1. Mental model
+
+Every assistant message has two regions:
+
+| Region | What it shows |
+|---|---|
+| **Timeline** | The agent's *process*. Reasoning, every tool call, grouped by delegation `spanId` into a tree. Each tool call is rendered by its registered component, which selects its own view (running, awaiting approval, success, error, etc.) by discriminating its `result` data. |
+| **Body** | The agent's *product*. Markdown text, citations, native reasoning blocks, and value-add deliverables (image viewer, chart, canvas). Connector tool cards do NOT render here. |
+
+**Principle: timeline = process, body = product. No overlap.**
+
+A tool's UI lives in the body **if and only if** it produces a deliverable
+the user wants to interact with — view, scrub, copy, share. If the UI
+just shows that the tool ran and what it did, it lives in the timeline.
+
+```
+┌─ Assistant Message ─────────────────────────────────────────┐
+│                                                             │
+│  ╔═════════════════════════════════════════════════════╗   │
+│  ║  TIMELINE  (process)                                ║   │
+│  ║                                                     ║   │
+│  ║  ▸ task: NotionAgent              [running]         ║   │
+│  ║      ▸ search_workspace           [completed]       ║   │
+│  ║      ▸ update_page                ← rendered by     ║   │
+│  ║          (Notion-styled approval     UpdateNotion-  ║   │
+│  ║           card OR Notion-styled      PageToolUI;    ║   │
+│  ║           success/error card,        the component  ║   │
+│  ║           per its own data           picks the view ║   │
+│  ║           discrimination)            from result)   ║   │
+│  ║  ▸ summarize                      [completed]       ║   │
+│  ╚═════════════════════════════════════════════════════╝   │
+│                                                             │
+│  ╔═════════════════════════════════════════════════════╗   │
+│  ║  BODY  (product)                                    ║   │
+│  ║                                                     ║   │
+│  ║  Markdown text, citations, value-add deliverables   ║   │
+│  ║  only. Connector tool cards do NOT render here.     ║   │
+│  ╚═════════════════════════════════════════════════════╝   │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## 2. The single data model
+
+The timeline reads ONE data structure: a `TimelineItem[]`. There are
+no parallel structures for "thinking steps", "tool calls", "HITL
+bundles", etc. Every visible piece of agent activity is a `TimelineItem`.
+
+### 2.1 The discriminated union (outer discrimination)
+
+Two kinds. The timeline does **outer discrimination** — it chooses
+reasoning view vs tool-call mounting based on `kind`.
+
+```ts
+type ItemStatus =
+    | "pending" | "running" | "completed" | "cancelled" | "error";
+
+interface BaseItem {
+    id: string;
+    spanId?: string;             // groups items into delegation tree (parent task + children)
+    status: ItemStatus;
+}
+
+interface ReasoningItem extends BaseItem {
+    kind: "reasoning";
+    text: string;
+}
+
+interface ToolCallItem extends BaseItem {
+    kind: "tool-call";
+    toolName: string;
+    args: Record<string, unknown>;
+    argsText?: string;
+    result?: unknown;            // per-tool component discriminates this internally
+    langchainToolCallId?: string;
+}
+
+type TimelineItem = ReasoningItem | ToolCallItem;
+
+interface TimelineGroup {
+    parent: TimelineItem;
+    children: TimelineItem[];
+}
+```
+
+**`ToolCallItem` has no `approval` field, no `phase`, no `view`.** All of
+that is derived inside the per-tool component from the result data.
+
+### 2.2 Inner discrimination (per-tool component)
+
+Each tool registers a component that receives the tool-call data and
+decides what to render based on its own result-shape discriminators:
+
+```tsx
+const UpdateNotionPageToolUI: TimelineToolComponent = (props) => {
+    if (isInterruptResult(props.result))    return <NotionApprovalCard {...props} />;
+    if (isAuthErrorResult(props.result))    return <NotionAuthErrorCard {...props} />;
+    if (isErrorResult(props.result))        return <NotionErrorCard {...props} />;
+    if (isInfoResult(props.result))         return <NotionNotFoundCard {...props} />;
+    if (isSuccessResult(props.result))      return <NotionSuccessCard {...props} />;
+    return <NotionPendingCard {...props} />;
+};
+```
+
+The discriminators (`isInterruptResult`, `isAuthErrorResult`, etc.)
+are **types, not centralized infrastructure**. The component owns
+the dispatch. The timeline knows none of this.
+
+### 2.3 The pure builder
+
+```ts
+function buildTimeline(
+    content: MessageContent[],
+    thinkingSteps: ThinkingStep[],
+): TimelineGroup[]
+```
+
+Builds the timeline from existing message content + thinking-step data
+parts. Pure function. Sets `kind` and `status` on each item; preserves
+`result` verbatim for per-tool discrimination.
+
+### 2.4 The dispatch (timeline-level)
+
+Two cases. Exhaustive switch. No runtime guards in the timeline renderer.
+
+```tsx
+function TimelineItemView({ item }: { item: TimelineItem }) {
+    switch (item.kind) {
+        case "reasoning": return <ReasoningItemView item={item} />;
+        case "tool-call": return <ToolCallItemView item={item} />;
+    }
+}
+
+function ToolCallItemView({ item }: { item: ToolCallItem }) {
+    const ToolBody = getToolComponent(item.toolName) ?? FallbackToolBody;
+    return <ToolBody {...adaptItemToProps(item)} />;
+}
+```
+
+**No card frame, header, body slot, approval area, or result panel
+at the timeline level.** Each tool component owns its own visual
+presentation. This matches how every existing tool-ui component
+already works — they each render their own rounded card with their
+own header.
+
+---
+
+## 3. The timeline's tool-component contract
+
+Tool components mounted by the timeline implement a subset of
+assistant-ui's `ToolCallMessagePartProps` — only the fields the
+timeline can supply:
+
+```ts
+interface TimelineToolProps {
+    toolCallId: string;
+    toolName: string;
+    args: Record<string, unknown>;
+    argsText?: string;
+    result?: unknown;
+    langchainToolCallId?: string;
+    status: ItemStatus;          // simple enum, not assistant-ui's complex status object
+}
+
+type TimelineToolComponent = (props: TimelineToolProps) => ReactNode;
+```
+
+Notably absent (compared to `ToolCallMessagePartProps`):
+- `addResult`, `resume` — runtime-only, not needed; HITL decisions
+  flow through `useHitlDecision` (a hook) which talks to the runtime
+  directly.
+- The complex `status: ToolCallMessagePartState["status"]` object —
+  replaced by our simple `ItemStatus` enum.
+
+The 15 existing HITL-aware tool-ui components only use the subset
+above. They are **retyped** to `TimelineToolComponent` in the cutover
+commit (mechanical: `ToolCallMessagePartComponent` → `TimelineToolComponent`).
+
+---
+
+## 4. Rendering topology — how the body opts out
+
+The body uses assistant-ui's `MessagePrimitive.Parts` and registers a
+**no-op fallback** for tool calls so they don't render here:
+
+```tsx
+<MessagePrimitive.Parts
+    components={{
+        Text: MarkdownText,
+        Reasoning,
+        Source,
+    }}
+    tools={{
+        by_name: BODY_TOOLS,         // value-add deliverables only (image viewer, etc.)
+        fallback: () => null,         // every other tool-call: render nothing in the body
+    }}
+/>
+```
+
+`BODY_TOOLS` starts empty (no value-add deliverables exist yet) and
+grows as we identify them. Every tool not in `BODY_TOOLS` renders
+nothing in the body.
+
+The timeline reads message content via `useAuiState(({ message }) =>
+message?.content)` and runs `buildTimeline` to produce the items it
+renders. Tool-call data IS in the message; the body just chooses not
+to render it.
+
+**Result:** zero dual placement. Zero suppression HOC. Zero
+render-target context. Zero coordination.
+
+---
+
+## 5. Slice layout
+
+```
+features/chat-messages/
+├── ARCHITECTURE.md
+│
+├── timeline/                                ← the process surface
+│   ├── types.ts                             (TimelineItem union, ToolCallItem, ItemStatus, TimelineGroup)
+│   ├── build-timeline.ts                    (pure: content + thinkingSteps → groups)
+│   ├── grouping.ts                          (pure: group items by spanId)
+│   ├── subagent-rename.ts                   (pure: parent task title from args.subagent_type)
+│   ├── tool-registry/                       (PRIVATE to timeline; only timeline mounts tools)
+│   │   ├── types.ts                         (TimelineToolComponent, TimelineToolProps)
+│   │   ├── registry.ts                      (TOOLS_BY_NAME from components/tool-ui/*)
+│   │   ├── adapt-props.ts                   (pure: ToolCallItem → TimelineToolProps)
+│   │   ├── fallback/
+│   │   │   ├── fallback-tool-body.tsx       (TimelineToolComponent for unregistered tools — discriminates internally)
+│   │   │   ├── default-fallback-card.tsx    (the non-HITL fallback view: status icon + collapsible + JSON)
+│   │   │   ├── revert-button.tsx            (revert affordance — used by default-fallback-card)
+│   │   │   ├── use-tool-action.ts           (action lookup hook for revert)
+│   │   │   └── index.ts
+│   │   └── index.ts
+│   ├── items/
+│   │   ├── reasoning-item.tsx               (renders kind: "reasoning")
+│   │   ├── tool-call-item.tsx               (lookup component + mount with adapted props — ~10 lines)
+│   │   └── index.ts
+│   ├── timeline.tsx                         (groups + iteration + 2-case dispatch)
+│   ├── data-renderer.tsx                    (assistant-ui adapter; exports TimelineDataUI)
+│   └── index.ts
+│
+├── hitl/                                    ← pure HITL primitives
+│   ├── types.ts                             (InterruptResult, HitlPhase, HitlDecision, isInterruptResult)
+│   ├── use-hitl-decision.ts                 (hook: dispatch approve/edit/reject — used by every approval card)
+│   ├── use-hitl-phase.ts                    (hook: tracks pending → processing → approved/rejected/edited)
+│   ├── approval-cards/                      (the FALLBACK-mounted approval views; per-tool components import from here OR build their own)
+│   │   ├── generic-approval.tsx             (default approval UI — what FallbackToolBody mounts for interrupt results)
+│   │   ├── doom-loop-approval.tsx           (special-case approval UI + isDoomLoopInterrupt)
+│   │   └── index.ts
+│   ├── edit-panel/
+│   │   ├── edit-panel.atom.ts               (Jotai atoms for the panel state)
+│   │   ├── edit-panel.tsx                   (root: atom wiring + desktop/mobile switch only)
+│   │   ├── fields/
+│   │   │   ├── email-tags-field.tsx         (EmailsTagField + parse/format helpers)
+│   │   │   ├── calendar-field.tsx           (DateTimePickerField + parse/format helpers)
+│   │   │   ├── extra-fields.tsx             (ExtraField switch renderer)
+│   │   │   └── index.ts                     (private barrel)
+│   │   └── index.ts
+│   └── index.ts
+│
+└── (no body slice yet — body just registers `tools={{ fallback: () => null }}`)
+```
+
+### 5.1 Notable absences
+
+| Was | Status | Reason |
+|---|---|---|
+| `tool-cards/` slice | **Folded into `timeline/`** | Tool-call rendering happens in the timeline; the tool-registry is private to timeline. |
+| `bundleTool` composer | **Deleted** | Body opts out via `fallback: () => null`. No HOCs to compose. |
+| `withDelegationSpanIndent` HOC | **Deleted** | Tree indent is owned by the timeline's group renderer. |
+| `withBundleStep` + `HitlBundleProvider` | **Deleted** | Multi-approval is just N inline renderings; no coordination needed. |
+| `withHitlInTimeline` + `HitlRenderTargetProvider` | **Deleted** | Tool cards never render in body; no dual-placement to suppress. |
+| `pickApprovalCard` central dispatcher | **Deleted** | Each tool component picks its own view via internal discrimination. The fallback has its OWN internal dispatcher (interrupt → generic-approval; doom-loop → doom-loop-approval). |
+| `getHitlToolComponent` registry | **Deleted** | The tool-registry is just a `Record<string, TimelineToolComponent>`; lookup is `TOOLS_BY_NAME[name]`. |
+| Centralized `approval-area.tsx` in timeline | **Deleted** | The approval is a view the per-tool component renders, not an area the timeline composes. |
+| `ApprovalState` on `ToolCallItem` | **Deleted** | Phase is local UI state inside per-tool approval cards (via `useHitlPhase`). The timeline doesn't track it. |
+| `ThinkingStepToolInfoMap` Map join | **Deleted** | The unified `TimelineItem` union eliminates the join. |
+
+---
+
+## 6. Public surfaces
+
+### `timeline/index.ts`
+
+```ts
+export { TimelineDataUI };               // the assistant-ui registration
+export { Timeline };                     // exposed for tests
+export type { TimelineItem, ReasoningItem, ToolCallItem, TimelineGroup, ItemStatus };
+export type { TimelineToolComponent, TimelineToolProps };
+```
+
+### `hitl/index.ts`
+
+```ts
+export type { InterruptResult, InterruptActionRequest, InterruptReviewConfig, HitlDecision, HitlPhase };
+export { isInterruptResult };
+
+export { useHitlDecision };
+export { useHitlPhase };
+
+export { GenericHitlApprovalToolUI };    // for tool-ui integrations that want to compose on top
+export { DoomLoopApprovalToolUI, isDoomLoopInterrupt };
+
+export { HitlEditPanel, MobileHitlEditPanel };
+export { openHitlEditPanelAtom, closeHitlEditPanelAtom, hitlEditPanelAtom };
+export type { ExtraField };
+```
+
+The 63 `components/tool-ui/*` integrations consume `hitl/`'s public
+surface (types, hooks, edit-panel atom, optionally the fallback
+approval cards). Nothing else.
+
+---
+
+## 7. Layering & SRP rules
+
+### 7.1 The "what knows about what" rule
+
+| Component | Knows about |
+|---|---|
+| `timeline/` | Itself + `hitl/` (via the fallback) + `components/tool-ui/*` (via the registry) |
+| `timeline/tool-registry/` | The `TimelineToolComponent` contract, `components/tool-ui/*`, and `hitl/` (for the fallback's approval views) |
+| `hitl/` | Itself only — no knowledge of timeline, tool-call types, registry |
+| `components/tool-ui/*` | `hitl/` only (for HITL primitives + optional fallback approval cards); never reaches into `timeline/` |
+| Body (`assistant-message.tsx`) | The `BODY_TOOLS` registry and `TimelineDataUI` from `timeline/index.ts` |
+
+`hitl/` does **NOT** import from `timeline/`. The dependency arrow is one-way.
+
+### 7.2 Render policy belongs to the surface, not the primitive
+
+- `hitl/` exposes hooks, types, and the fallback approval cards.
+- `timeline/` decides WHEN and WHERE tool components mount (inside
+  `tool-call-item.tsx`).
+- A `hitl/` primitive must never assume it's being rendered in the
+  timeline, the body, or anywhere else. It receives props, renders
+  UI, returns. No environment sniffing, no context.
+- Per-tool components in `components/tool-ui/*` decide WHICH view to
+  render based on result-shape discriminators. The timeline does not
+  know these discriminators exist.
+
+### 7.3 Single Responsibility
+
+Rules in priority order:
+
+1. **One responsibility per file.** Need "and" to describe it? Split it.
+2. **One responsibility per function.** Same.
+3. **Line count is a smell, not a budget.** ~250 lines = pause and
+   ask "still one responsibility?"; ~500 lines = strong presumption
+   of split needed unless explicitly justified at the top of the file.
+
+Notable splits driven by SRP during the port:
+
+- `hitl-edit-panel.tsx` (current 405 lines, 4 responsibilities) → 5
+  files: `edit-panel.tsx` (root + layout switch), `email-tags-field.tsx`,
+  `calendar-field.tsx`, `extra-fields.tsx`, `edit-panel.atom.ts`.
+- `tool-fallback.tsx` (current 533 lines, 3 responsibilities) → split
+  across `fallback-tool-body.tsx`, `default-fallback-card.tsx`,
+  `revert-button.tsx`, `use-tool-action.ts`.
+- `thinking-steps.tsx` (current 434 lines, 5 responsibilities) →
+  folded into the new `timeline/` slice across `types.ts`,
+  `build-timeline.ts`, `grouping.ts`, `subagent-rename.ts`,
+  `timeline.tsx`, `items/*`, `data-renderer.tsx`.
+
+---
+
+## 8. Tested behaviors
+
+Unit tests live next to the file they cover (`*.test.ts(x)`).
+
+- `timeline/build-timeline.test.ts` — content + thinkingSteps → correct items, correct kind, correct status, correct ordering. `result` preserved verbatim.
+- `timeline/grouping.test.ts` — items group correctly by spanId; first item with a spanId is the parent; orphaned children are promoted defensively.
+- `timeline/subagent-rename.test.ts` — `task` step's display title resolves to `args.subagent_type` (title-cased); falls back to "Task" when subagent type is missing.
+- `timeline/tool-registry/registry.test.ts` — `TOOLS_BY_NAME` includes every named tool; `FallbackToolBody` is returned for unknown names; the fallback dispatches correctly (interrupt → generic, doom-loop → doom-loop, otherwise → default fallback).
+- `timeline/tool-registry/adapt-props.test.ts` — `ToolCallItem` → `TimelineToolProps` mapping is lossless; status mapping is correct.
+- `hitl/use-hitl-phase.test.ts` — phase transitions through pending → processing → approved/rejected/edited correctly.
+- `hitl/approval-cards/doom-loop-approval.test.tsx` — `isDoomLoopInterrupt` matches doom-loop-shape interrupts only.
+
+Smoke test after cutover:
+- Assistant message renders; markdown + citations work in body.
+- All connector tool calls render in timeline only (none in body).
+- Reasoning steps render in timeline.
+- Single HITL flow (Notion update): approve, edit, reject — each transitions through the phases correctly.
+- Multiple pending HITL cards: each renders inline at its position; deciding one doesn't affect the others.
+- Doom-loop approval renders the special card.
+- Revert button works on completed default-fallback cards and survives reload.
+- Subagent name renaming on `task` parent step.
+
+---
+
+## 9. Migration plan (strangler fig, single atomic cutover)
+
+### Phase A — Build the new slice in parallel
+
+In dependency order: `hitl/` first (leaf), then `timeline/`. The
+existing code (`thinking-steps.tsx`, `tool-fallback.tsx`,
+`assistant-message.tsx`'s tool registry, etc.) remains fully
+functional throughout Phase A.
+
+1. Port `hitl/` primitives. Apply SRP splits (edit panel into 5 files).
+   `hitl/approval-cards/{generic,doom-loop}-approval.tsx` are ported
+   as standalone components — they're what the fallback mounts and
+   what per-tool integrations may compose on top of.
+2. Build `timeline/` slice. Implement `buildTimeline` from scratch
+   (do NOT copy thinking-steps logic verbatim — design the pure
+   function around the new union). Build the `tool-registry/` with
+   `TimelineToolComponent` contract; the registry imports from
+   `components/tool-ui/*` (no file moves yet).
+3. Add unit tests as listed in §8.
+4. Verify: tsc clean, biome clean, no consumer file touched, no
+   linter regressions.
+
+### Phase B — Atomic cutover (single commit)
+
+| File | Change |
+|---|---|
+| `components/assistant-ui/assistant-message.tsx` | Replace `TOOLS_BY_NAME`/`TOOLS_FALLBACK` definitions with `BODY_TOOLS` (initially empty) + `tools={{ fallback: () => null }}`. Replace `ThinkingStepsDataUI` registration with `TimelineDataUI`. |
+| `components/public-chat/public-thread.tsx` | Same registry + data UI swap. |
+| `app/dashboard/.../new-chat/page.tsx` | Switch `ThinkingStepsDataUI` → `TimelineDataUI`. Drop `HitlBundleProvider` (no longer needed). |
+| `components/free-chat/free-chat-page.tsx` | Switch `ThinkingStepsDataUI` → `TimelineDataUI`. |
+| `components/public-chat/public-chat-view.tsx` | Same. |
+| `components/layout/ui/right-panel/RightPanel.tsx` | Switch `HitlEditPanel` import to `@/features/chat-messages/hitl`. |
+| The 15 `components/tool-ui/*` HITL-aware integration files | (a) Switch HITL imports from `@/lib/hitl`, `@/hooks/use-hitl-phase`, `@/atoms/chat/hitl-edit-panel.atom` → `@/features/chat-messages/hitl`. (b) Retype from `ToolCallMessagePartComponent` → `TimelineToolComponent` (mechanical type rename). |
+
+### Phase C — Delete legacy
+
+After cutover passes smoke tests:
+
+- `components/assistant-ui/thinking-steps.tsx`
+- `components/assistant-ui/tool-fallback.tsx`
+- `lib/chat/delegation-span-indent.ts`
+- `lib/hitl/` (entire folder)
+- `components/hitl-bundle-pager/` (entire folder)
+- `components/tool-ui/generic-hitl-approval.tsx`
+- `components/tool-ui/doom-loop-approval.tsx`
+- `components/hitl-edit-panel/` (entire folder)
+- `hooks/use-hitl-phase.ts`
+- `atoms/chat/hitl-edit-panel.atom.ts`
+
+Verify: no orphan files, no dead imports, no test regressions.
+
+---
+
+## 10. Out of scope (and one consumer relationship)
+
+### 10.1 The 63 `components/tool-ui/*` integrations
+
+These are **first-class consumers** of `hitl/` and the
+`TimelineToolComponent` contract. They are imported by
+`timeline/tool-registry/registry.ts` to build `TOOLS_BY_NAME`. They
+never reach into `timeline/` themselves.
+
+They stay where they are. Future option to move them is a separate,
+mechanical follow-up refactor.
+
+### 10.2 Not touched by this refactor
+
+- The composer (input bar, mention picker, prompt picker, tool toggles).
+- The streaming pipeline (`lib/chat/streaming-state.ts`, `stream-pipeline.ts`, `thread-persistence.ts`).
+- The chat-comments sidebar.
+- The message frame (`assistant-message.tsx`, `user-message.tsx`, `markdown-text.tsx`, `inline-citation.tsx`) beyond swapping the registry imports.
+
+If any of these become a blocker for the refactor (e.g. the streaming
+pipeline needs a metadata field that doesn't exist), surface it
+explicitly and decide whether to expand scope before touching it.

From d9ad9ca5cbd55e178566387e9435ea1e7ea41539 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 18:31:16 +0200
Subject: [PATCH 46/58] chat-messages: refresh feature module architecture doc.

---
 .../features/chat-messages/ARCHITECTURE.md    | 120 +++++++++++++++---
 1 file changed, 105 insertions(+), 15 deletions(-)

diff --git a/surfsense_web/features/chat-messages/ARCHITECTURE.md b/surfsense_web/features/chat-messages/ARCHITECTURE.md
index 030374aaf..5b1dedd3b 100644
--- a/surfsense_web/features/chat-messages/ARCHITECTURE.md
+++ b/surfsense_web/features/chat-messages/ARCHITECTURE.md
@@ -184,8 +184,10 @@ type TimelineToolComponent = (props: TimelineToolProps) => ReactNode;
 
 Notably absent (compared to `ToolCallMessagePartProps`):
 - `addResult`, `resume` — runtime-only, not needed; HITL decisions
-  flow through `useHitlDecision` (a hook) which talks to the runtime
-  directly.
+  flow through `useHitlDecision`, which either stages in the active
+  bundle (N≥2) or fires the `hitl-decision` window event the page
+  listens for (N=1). The hook reads `useToolCallIdContext()` to know
+  which call is dispatching.
 - The complex `status: ToolCallMessagePartState["status"]` object —
   replaced by our simple `ItemStatus` enum.
 
@@ -224,7 +226,81 @@ renders. Tool-call data IS in the message; the body just chooses not
 to render it.
 
 **Result:** zero dual placement. Zero suppression HOC. Zero
-render-target context. Zero coordination.
+render-target context. Zero pager HOC.
+
+---
+
+## 4a. Multi-approval coordination (the bundle + pager)
+
+When N HITL interrupts are pending in the same assistant turn (e.g. an
+agent fires multiple gated tool calls in parallel), the LangGraph
+runtime expects **one resume call with N decisions in order**. Per-card
+independent submission isn't possible without backend changes.
+
+The slice handles this with a single React state container,
+`HitlBundleProvider`, mounted once at the thread root by the page that
+owns the runtime (currently `app/dashboard/.../new-chat/page.tsx` and
+`components/free-chat/free-chat-page.tsx`):
+
+```tsx
+<HitlBundleProvider
+    toolCallIds={pendingInterrupt?.bundleToolCallIds ?? null}
+    onSubmit={handleBundleSubmit}            // page-owned: dispatches the actual resume
+>
+    {/* Thread + Timeline + approval cards mount inside */}
+</HitlBundleProvider>
+```
+
+Per-card flow:
+
+1. `tool-call-item.tsx` wraps each mounted tool component in
+   `<ToolCallIdProvider toolCallId={item.id}>` so `useHitlDecision`
+   knows which call is dispatching.
+2. The user clicks approve/edit/reject on a card.
+3. `useHitlDecision().dispatch([decision])` runs:
+    - **Bundle active (N≥2):** stages the decision under this card's
+      `toolCallId` and fires a `hitl-stage` event so the card's local
+      result mirror updates immediately (UX continuity — no re-prompt
+      if user navigates back via the pager).
+    - **No bundle (N=1):** dispatches the `hitl-decision` event
+      directly — single-decision fast path.
+4. When all N decisions are staged, the user clicks "Submit decisions"
+   on the pager chrome. `bundle.submit()` dispatches the `hitl-decision`
+   event with the full ordered array. The page's listener calls
+   `runtime.resume({ resume: orderedDecisions })` once.
+
+### Pager UX (kept, not deleted)
+
+When a bundle is active (N≥2), only ONE approval card is visible at a
+time — the current step. Other bundle members are hidden until the user
+navigates to them. A small pager chrome (prev/next + "Step X / N" +
+"Submit decisions" button) renders once at the end of the timeline.
+
+Where the responsibilities live:
+
+- **`tool-call-item.tsx`** (timeline) — checks `useHitlBundle()`. If
+  the item is in an active bundle but not the current step, returns
+  null. Otherwise wraps the per-tool component in `ToolCallIdProvider`
+  and mounts it.
+- **`timeline.tsx`** — renders `<PagerChrome />` once at the bottom,
+  conditional on `useHitlBundle()` being non-null.
+- **`hitl/bundle/pager-chrome.tsx`** — pure presentational component;
+  reads bundle state, renders nav + Submit. No knowledge of the timeline.
+
+This is **the only Provider in the slice.** It's a state container, not
+a behavior HOC: nothing wraps individual cards. The hide-if-not-current
+decision is made at the single mount point (`tool-call-item.tsx`), not
+distributed across N HOC wrappers.
+
+What was deleted vs kept here:
+
+- **Deleted:** `withBundleStep` HOC. Its two responsibilities (hide
+  non-current cards; render pager after current card) split into the
+  two correct places: `tool-call-item.tsx` and `timeline.tsx`
+  respectively. No HOC to compose.
+- **Kept (ported as-is to the slice):** `HitlBundleProvider`,
+  `useHitlBundle`, `ToolCallIdProvider`, `useToolCallIdContext`,
+  `BundleSubmit`, `HitlBundleAPI`, `PagerChrome`.
 
 ---
 
@@ -258,10 +334,14 @@ features/chat-messages/
 │   ├── data-renderer.tsx                    (assistant-ui adapter; exports TimelineDataUI)
 │   └── index.ts
 │
-├── hitl/                                    ← pure HITL primitives
+├── hitl/                                    ← HITL primitives + bundle state container
 │   ├── types.ts                             (InterruptResult, HitlPhase, HitlDecision, isInterruptResult)
-│   ├── use-hitl-decision.ts                 (hook: dispatch approve/edit/reject — used by every approval card)
-│   ├── use-hitl-phase.ts                    (hook: tracks pending → processing → approved/rejected/edited)
+│   ├── bundle/                              (the ONLY Provider in the slice — coordinates N→1 submission + pager UX)
+│   │   ├── bundle-context.tsx               (HitlBundleProvider, useHitlBundle, ToolCallIdProvider, useToolCallIdContext, BundleSubmit, HitlBundleAPI)
+│   │   ├── pager-chrome.tsx                 (prev/next/submit chrome — mounted once by timeline.tsx when bundle active)
+│   │   └── index.ts
+│   ├── use-hitl-decision.ts                 (hook: stages in bundle when N≥2, direct-dispatches when N=1; used by every approval card)
+│   ├── use-hitl-phase.ts                    (hook: tracks pending → processing → complete/rejected)
 │   ├── approval-cards/                      (the FALLBACK-mounted approval views; per-tool components import from here OR build their own)
 │   │   ├── generic-approval.tsx             (default approval UI — what FallbackToolBody mounts for interrupt results)
 │   │   ├── doom-loop-approval.tsx           (special-case approval UI + isDoomLoopInterrupt)
@@ -287,7 +367,8 @@ features/chat-messages/
 | `tool-cards/` slice | **Folded into `timeline/`** | Tool-call rendering happens in the timeline; the tool-registry is private to timeline. |
 | `bundleTool` composer | **Deleted** | Body opts out via `fallback: () => null`. No HOCs to compose. |
 | `withDelegationSpanIndent` HOC | **Deleted** | Tree indent is owned by the timeline's group renderer. |
-| `withBundleStep` + `HitlBundleProvider` | **Deleted** | Multi-approval is just N inline renderings; no coordination needed. |
+| `withBundleStep` HOC | **Deleted** | Two responsibilities split into the right places: hide-if-not-current → `tool-call-item.tsx`; render pager after current card → `timeline.tsx`. No HOC. |
+| `HitlBundleProvider` + `useHitlBundle` + `PagerChrome` | **Kept** (state container + presentational chrome, not HOCs) | Backend constraint: parallel interrupts need ONE ordered resume call. Provider collects N decisions, pager is the user's submit affordance. |
 | `withHitlInTimeline` + `HitlRenderTargetProvider` | **Deleted** | Tool cards never render in body; no dual-placement to suppress. |
 | `pickApprovalCard` central dispatcher | **Deleted** | Each tool component picks its own view via internal discrimination. The fallback has its OWN internal dispatcher (interrupt → generic-approval; doom-loop → doom-loop-approval). |
 | `getHitlToolComponent` registry | **Deleted** | The tool-registry is just a `Record<string, TimelineToolComponent>`; lookup is `TOOLS_BY_NAME[name]`. |
@@ -317,6 +398,10 @@ export { isInterruptResult };
 export { useHitlDecision };
 export { useHitlPhase };
 
+export { HitlBundleProvider, ToolCallIdProvider, useHitlBundle, useToolCallIdContext };
+export { PagerChrome };
+export type { BundleSubmit, HitlBundleAPI };
+
 export { GenericHitlApprovalToolUI };    // for tool-ui integrations that want to compose on top
 export { DoomLoopApprovalToolUI, isDoomLoopInterrupt };
 
@@ -384,14 +469,19 @@ Notable splits driven by SRP during the port:
 
 ## 8. Tested behaviors
 
-Unit tests live next to the file they cover (`*.test.ts(x)`).
+> **Status:** No test runner is set up in `surfsense_web` yet. The pure
+> functions below are *intended* to be unit-tested but tests are
+> deferred to **Phase D** (post-cutover follow-up: install vitest,
+> write the suites, update this section).
 
-- `timeline/build-timeline.test.ts` — content + thinkingSteps → correct items, correct kind, correct status, correct ordering. `result` preserved verbatim.
-- `timeline/grouping.test.ts` — items group correctly by spanId; first item with a spanId is the parent; orphaned children are promoted defensively.
-- `timeline/subagent-rename.test.ts` — `task` step's display title resolves to `args.subagent_type` (title-cased); falls back to "Task" when subagent type is missing.
+Planned tests once vitest is in:
+
+- `timeline/build-timeline.test.ts` — content + thinkingSteps → correct items, correct kind, correct status, correct ordering. `result` preserved verbatim. Orphan tool-calls (no `thinkingStepId`) appended at end.
+- `timeline/grouping.test.ts` — items group correctly by spanId; first item with a spanId is the parent; orphaned children become parents defensively.
+- `timeline/subagent-rename.test.ts` — `task` tool-call's display title resolves to `args.subagent_type` (title-cased); falls back to `getToolDisplayName("task")` when subagent type is missing.
 - `timeline/tool-registry/registry.test.ts` — `TOOLS_BY_NAME` includes every named tool; `FallbackToolBody` is returned for unknown names; the fallback dispatches correctly (interrupt → generic, doom-loop → doom-loop, otherwise → default fallback).
 - `timeline/tool-registry/adapt-props.test.ts` — `ToolCallItem` → `TimelineToolProps` mapping is lossless; status mapping is correct.
-- `hitl/use-hitl-phase.test.ts` — phase transitions through pending → processing → approved/rejected/edited correctly.
+- `hitl/use-hitl-phase.test.ts` — phase transitions through pending → processing → complete/rejected correctly.
 - `hitl/approval-cards/doom-loop-approval.test.tsx` — `isDoomLoopInterrupt` matches doom-loop-shape interrupts only.
 
 Smoke test after cutover:
@@ -434,7 +524,7 @@ functional throughout Phase A.
 |---|---|
 | `components/assistant-ui/assistant-message.tsx` | Replace `TOOLS_BY_NAME`/`TOOLS_FALLBACK` definitions with `BODY_TOOLS` (initially empty) + `tools={{ fallback: () => null }}`. Replace `ThinkingStepsDataUI` registration with `TimelineDataUI`. |
 | `components/public-chat/public-thread.tsx` | Same registry + data UI swap. |
-| `app/dashboard/.../new-chat/page.tsx` | Switch `ThinkingStepsDataUI` → `TimelineDataUI`. Drop `HitlBundleProvider` (no longer needed). |
+| `app/dashboard/.../new-chat/page.tsx` | Switch `ThinkingStepsDataUI` → `TimelineDataUI`. Switch `HitlBundleProvider` import from `@/lib/hitl` → `@/features/chat-messages/hitl` (keep the wrap; just new path). |
 | `components/free-chat/free-chat-page.tsx` | Switch `ThinkingStepsDataUI` → `TimelineDataUI`. |
 | `components/public-chat/public-chat-view.tsx` | Same. |
 | `components/layout/ui/right-panel/RightPanel.tsx` | Switch `HitlEditPanel` import to `@/features/chat-messages/hitl`. |
@@ -447,8 +537,8 @@ After cutover passes smoke tests:
 - `components/assistant-ui/thinking-steps.tsx`
 - `components/assistant-ui/tool-fallback.tsx`
 - `lib/chat/delegation-span-indent.ts`
-- `lib/hitl/` (entire folder)
-- `components/hitl-bundle-pager/` (entire folder)
+- `lib/hitl/` (entire folder — replaced by `features/chat-messages/hitl/{types.ts,bundle/,use-hitl-decision.ts}`)
+- `components/hitl-bundle-pager/` (entire folder — `PagerChrome` ported to `hitl/bundle/pager-chrome.tsx`; `withBundleStep` deleted, responsibilities split into `tool-call-item.tsx` + `timeline.tsx`)
 - `components/tool-ui/generic-hitl-approval.tsx`
 - `components/tool-ui/doom-loop-approval.tsx`
 - `components/hitl-edit-panel/` (entire folder)

From 9e451a59072619ecfee560b4041d8077178ec681 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 18:31:23 +0200
Subject: [PATCH 47/58] chat-messages: add hitl module with types, hooks,
 bundle, approval cards, and edit panel.

---
 .../approval-cards/doom-loop-approval.tsx     | 191 +++++++++++++
 .../hitl/approval-cards/generic-approval.tsx  | 261 ++++++++++++++++++
 .../hitl/approval-cards/index.ts              |   2 +
 .../hitl/bundle/bundle-context.tsx            | 157 +++++++++++
 .../chat-messages/hitl/bundle/index.ts        |   8 +
 .../hitl/bundle/pager-chrome.tsx              |  65 +++++
 .../hitl/edit-panel/edit-panel.atom.ts        |  82 ++++++
 .../hitl/edit-panel/edit-panel.tsx            | 203 ++++++++++++++
 .../hitl/edit-panel/fields/calendar-field.tsx | 112 ++++++++
 .../edit-panel/fields/email-tags-field.tsx    |  86 ++++++
 .../hitl/edit-panel/fields/extra-fields.tsx   |  74 +++++
 .../hitl/edit-panel/fields/index.ts           |   3 +
 .../chat-messages/hitl/edit-panel/index.ts    |   7 +
 .../features/chat-messages/hitl/index.ts      |  31 +++
 .../features/chat-messages/hitl/types.ts      |  51 ++++
 .../chat-messages/hitl/use-hitl-decision.ts   |  45 +++
 .../chat-messages/hitl/use-hitl-phase.ts      |  66 +++++
 17 files changed, 1444 insertions(+)
 create mode 100644 surfsense_web/features/chat-messages/hitl/approval-cards/doom-loop-approval.tsx
 create mode 100644 surfsense_web/features/chat-messages/hitl/approval-cards/generic-approval.tsx
 create mode 100644 surfsense_web/features/chat-messages/hitl/approval-cards/index.ts
 create mode 100644 surfsense_web/features/chat-messages/hitl/bundle/bundle-context.tsx
 create mode 100644 surfsense_web/features/chat-messages/hitl/bundle/index.ts
 create mode 100644 surfsense_web/features/chat-messages/hitl/bundle/pager-chrome.tsx
 create mode 100644 surfsense_web/features/chat-messages/hitl/edit-panel/edit-panel.atom.ts
 create mode 100644 surfsense_web/features/chat-messages/hitl/edit-panel/edit-panel.tsx
 create mode 100644 surfsense_web/features/chat-messages/hitl/edit-panel/fields/calendar-field.tsx
 create mode 100644 surfsense_web/features/chat-messages/hitl/edit-panel/fields/email-tags-field.tsx
 create mode 100644 surfsense_web/features/chat-messages/hitl/edit-panel/fields/extra-fields.tsx
 create mode 100644 surfsense_web/features/chat-messages/hitl/edit-panel/fields/index.ts
 create mode 100644 surfsense_web/features/chat-messages/hitl/edit-panel/index.ts
 create mode 100644 surfsense_web/features/chat-messages/hitl/index.ts
 create mode 100644 surfsense_web/features/chat-messages/hitl/types.ts
 create mode 100644 surfsense_web/features/chat-messages/hitl/use-hitl-decision.ts
 create mode 100644 surfsense_web/features/chat-messages/hitl/use-hitl-phase.ts

diff --git a/surfsense_web/features/chat-messages/hitl/approval-cards/doom-loop-approval.tsx b/surfsense_web/features/chat-messages/hitl/approval-cards/doom-loop-approval.tsx
new file mode 100644
index 000000000..5b2b0e385
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/approval-cards/doom-loop-approval.tsx
@@ -0,0 +1,191 @@
+"use client";
+
+import { CornerDownLeftIcon, OctagonAlert } from "lucide-react";
+import { useCallback, useEffect, useMemo } from "react";
+import { TextShimmerLoader } from "@/components/prompt-kit/loader";
+import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Separator } from "@/components/ui/separator";
+import type { HitlApprovalCard, HitlDecision, InterruptResult } from "../types";
+import { isInterruptResult } from "../types";
+import { useHitlDecision } from "../use-hitl-decision";
+import { useHitlPhase } from "../use-hitl-phase";
+
+/**
+ * Specialized HITL card for ``DoomLoopMiddleware`` interrupts. The
+ * backend signals these by setting ``context.permission === "doom_loop"``
+ * on the ``permission_ask`` interrupt.
+ *
+ * The card replaces the generic "approve/reject" framing with a
+ * "continue/stop" affordance that better matches the user's mental
+ * model: the agent is stuck repeating itself, not asking permission
+ * for a destructive action.
+ */
+function DoomLoopCardView({
+	toolName,
+	args,
+	interruptData,
+	onDecision,
+}: {
+	toolName: string;
+	args: Record<string, unknown>;
+	interruptData: InterruptResult;
+	onDecision: (decision: HitlDecision) => void;
+}) {
+	const { phase, setProcessing, setRejected } = useHitlPhase(interruptData);
+
+	const context = (interruptData.context ?? {}) as Record<string, unknown>;
+	const threshold = typeof context.threshold === "number" ? context.threshold : 3;
+	const stuckTool = (typeof context.tool === "string" && context.tool) || toolName;
+	const recentSignatures = Array.isArray(context.recent_signatures)
+		? (context.recent_signatures as string[])
+		: [];
+	const displayName = stuckTool.replace(/_/g, " ").replace(/\b\w/g, (c) => c.toUpperCase());
+
+	const argPreview = useMemo(() => {
+		if (!args || Object.keys(args).length === 0) return null;
+		try {
+			const json = JSON.stringify(args, null, 2);
+			return json.length > 600 ? `${json.slice(0, 600)}…` : json;
+		} catch {
+			return null;
+		}
+	}, [args]);
+
+	const handleContinue = useCallback(() => {
+		if (phase !== "pending") return;
+		setProcessing();
+		onDecision({ type: "approve" });
+	}, [phase, setProcessing, onDecision]);
+
+	const handleStop = useCallback(() => {
+		if (phase !== "pending") return;
+		setRejected();
+		onDecision({ type: "reject", message: "Doom loop: user requested stop." });
+	}, [phase, setRejected, onDecision]);
+
+	useEffect(() => {
+		const handler = (e: KeyboardEvent) => {
+			if (phase !== "pending") return;
+			if (e.key === "Enter" && !e.shiftKey && !e.ctrlKey && !e.metaKey) {
+				e.preventDefault();
+				handleStop();
+			}
+		};
+		window.addEventListener("keydown", handler);
+		return () => window.removeEventListener("keydown", handler);
+	}, [phase, handleStop]);
+
+	const isResolved = phase === "complete" || phase === "rejected";
+
+	return (
+		<Alert variant={phase === "rejected" ? "default" : "destructive"} className="my-4 max-w-lg">
+			<OctagonAlert className="size-4" />
+			<AlertTitle className="flex items-center gap-2">
+				<span>
+					{phase === "rejected"
+						? "Stopped"
+						: phase === "processing"
+							? "Continuing…"
+							: phase === "complete"
+								? "Continued"
+								: "I might be stuck"}
+				</span>
+				{!isResolved && (
+					<Badge variant="outline" className="font-mono text-[10px]">
+						doom-loop
+					</Badge>
+				)}
+			</AlertTitle>
+			<AlertDescription className="flex flex-col gap-3">
+				{phase === "processing" ? (
+					<TextShimmerLoader text="Resuming…" size="sm" />
+				) : phase === "rejected" ? (
+					<p className="text-xs">
+						I stopped retrying <span className="font-medium">{displayName}</span> as you asked.
+					</p>
+				) : phase === "complete" ? (
+					<p className="text-xs">
+						Continuing to call <span className="font-medium">{displayName}</span> as you asked.
+					</p>
+				) : (
+					<p className="text-xs">
+						I called <span className="font-medium">{displayName}</span> {threshold} times in a row
+						with similar arguments. Should I keep going or stop and rethink?
+					</p>
+				)}
+
+				{argPreview && phase === "pending" && (
+					<>
+						<Separator />
+						<div className="flex flex-col gap-1">
+							<p className="text-[10px] font-medium uppercase tracking-wide text-muted-foreground">
+								Last arguments
+							</p>
+							<pre className="max-h-32 overflow-auto rounded-md bg-muted/50 p-2 text-[11px] text-foreground/80">
+								{argPreview}
+							</pre>
+						</div>
+					</>
+				)}
+
+				{recentSignatures.length > 0 && phase === "pending" && (
+					<details className="text-[11px] text-muted-foreground">
+						<summary className="cursor-pointer select-none">
+							Show repeated signatures ({recentSignatures.length})
+						</summary>
+						<ul className="mt-1 ml-4 list-disc">
+							{recentSignatures.map((sig) => (
+								<li key={sig} className="font-mono break-all">
+									{sig}
+								</li>
+							))}
+						</ul>
+					</details>
+				)}
+
+				{phase === "pending" && (
+					<div className="flex items-center gap-2">
+						<Button size="sm" variant="outline" className="rounded-lg gap-1.5" onClick={handleStop}>
+							Stop and rethink
+							<CornerDownLeftIcon className="size-3 opacity-60" />
+						</Button>
+						<Button size="sm" variant="ghost" onClick={handleContinue}>
+							Continue anyway
+						</Button>
+					</div>
+				)}
+			</AlertDescription>
+		</Alert>
+	);
+}
+
+/**
+ * Discriminator: returns true iff the result is a ``permission_ask``
+ * interrupt with ``context.permission === "doom_loop"``. The fallback
+ * uses this BEFORE mounting an approval card to choose between
+ * ``DoomLoopApproval`` and ``GenericHitlApproval``.
+ */
+export function isDoomLoopInterrupt(result: unknown): boolean {
+	if (!isInterruptResult(result)) return false;
+	const ctx = (result.context ?? {}) as Record<string, unknown>;
+	return ctx.permission === "doom_loop";
+}
+
+/**
+ * Specialized doom-loop approval mounted by ``FallbackToolBody`` when
+ * ``isDoomLoopInterrupt(result)`` is true. Caller is responsible for
+ * the discrimination; this card receives a known ``InterruptResult``.
+ */
+export const DoomLoopApproval: HitlApprovalCard = ({ toolName, args, result }) => {
+	const { dispatch } = useHitlDecision();
+	return (
+		<DoomLoopCardView
+			toolName={toolName}
+			args={args}
+			interruptData={result}
+			onDecision={(decision) => dispatch([decision])}
+		/>
+	);
+};
diff --git a/surfsense_web/features/chat-messages/hitl/approval-cards/generic-approval.tsx b/surfsense_web/features/chat-messages/hitl/approval-cards/generic-approval.tsx
new file mode 100644
index 000000000..c8b35dbe0
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/approval-cards/generic-approval.tsx
@@ -0,0 +1,261 @@
+"use client";
+
+import { CornerDownLeftIcon, Pencil } from "lucide-react";
+import { useCallback, useEffect, useMemo, useState } from "react";
+import { toast } from "sonner";
+import { TextShimmerLoader } from "@/components/prompt-kit/loader";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import { Textarea } from "@/components/ui/textarea";
+import { getToolDisplayName } from "@/contracts/enums/toolIcons";
+import { connectorsApiService } from "@/lib/apis/connectors-api.service";
+import type { HitlApprovalCard, HitlDecision, InterruptResult } from "../types";
+import { useHitlDecision } from "../use-hitl-decision";
+import { useHitlPhase } from "../use-hitl-phase";
+
+function ParamEditor({
+	params,
+	onChange,
+	disabled,
+}: {
+	params: Record<string, unknown>;
+	onChange: (updated: Record<string, unknown>) => void;
+	disabled: boolean;
+}) {
+	const entries = Object.entries(params);
+	if (entries.length === 0) return null;
+
+	return (
+		<div className="space-y-2">
+			{entries.map(([key, value]) => {
+				const strValue = value == null ? "" : String(value);
+				const isLong = strValue.length > 120;
+				const fieldId = `hitl-param-${key}`;
+
+				return (
+					<div key={key} className="space-y-1">
+						<label htmlFor={fieldId} className="text-xs font-medium text-muted-foreground">
+							{key}
+						</label>
+						{isLong ? (
+							<Textarea
+								id={fieldId}
+								value={strValue}
+								disabled={disabled}
+								rows={3}
+								onChange={(e) => onChange({ ...params, [key]: e.target.value })}
+								className="text-xs"
+							/>
+						) : (
+							<Input
+								id={fieldId}
+								value={strValue}
+								disabled={disabled}
+								onChange={(e) => onChange({ ...params, [key]: e.target.value })}
+								className="text-xs"
+							/>
+						)}
+					</div>
+				);
+			})}
+		</div>
+	);
+}
+
+function GenericApprovalCardView({
+	toolName,
+	args,
+	interruptData,
+	onDecision,
+}: {
+	toolName: string;
+	args: Record<string, unknown>;
+	interruptData: InterruptResult;
+	onDecision: (decision: HitlDecision) => void;
+}) {
+	const { phase, setProcessing, setRejected } = useHitlPhase(interruptData);
+	const [editedParams, setEditedParams] = useState<Record<string, unknown>>(args);
+	const [isEditing, setIsEditing] = useState(false);
+
+	const displayName = getToolDisplayName(toolName);
+
+	const mcpServer = interruptData.context?.mcp_server as string | undefined;
+	const toolDescription = interruptData.context?.tool_description as string | undefined;
+	const mcpConnectorId = interruptData.context?.mcp_connector_id as number | undefined;
+	const isMCPTool = mcpConnectorId != null;
+
+	const reviewConfig = interruptData.review_configs?.[0];
+	const allowedDecisions = reviewConfig?.allowed_decisions ?? ["approve", "reject"];
+	const canEdit = allowedDecisions.includes("edit");
+
+	const hasChanged = useMemo(() => {
+		return JSON.stringify(editedParams) !== JSON.stringify(args);
+	}, [editedParams, args]);
+
+	const handleApprove = useCallback(() => {
+		if (phase !== "pending") return;
+		const isEdited = isEditing && hasChanged;
+		setProcessing();
+		onDecision({
+			type: isEdited ? "edit" : "approve",
+			edited_action: isEdited
+				? { name: interruptData.action_requests[0]?.name ?? toolName, args: editedParams }
+				: undefined,
+		});
+	}, [
+		phase,
+		setProcessing,
+		isEditing,
+		hasChanged,
+		onDecision,
+		interruptData,
+		toolName,
+		editedParams,
+	]);
+
+	const handleAlwaysAllow = useCallback(() => {
+		if (phase !== "pending" || !isMCPTool) return;
+		setProcessing();
+		onDecision({ type: "approve" });
+		connectorsApiService.trustMCPTool(mcpConnectorId, toolName).catch(() => {
+			toast.error(
+				"Failed to save 'Always Allow' preference. The tool will still require approval next time."
+			);
+		});
+	}, [phase, setProcessing, onDecision, isMCPTool, mcpConnectorId, toolName]);
+
+	useEffect(() => {
+		const handler = (e: KeyboardEvent) => {
+			if (e.key === "Enter" && !e.shiftKey && !e.ctrlKey && !e.metaKey && phase === "pending") {
+				handleApprove();
+			}
+		};
+		window.addEventListener("keydown", handler);
+		return () => window.removeEventListener("keydown", handler);
+	}, [handleApprove, phase]);
+
+	return (
+		<div className="my-4 max-w-lg overflow-hidden rounded-2xl border bg-muted/30 transition-[box-shadow] duration-300">
+			<div className="flex items-start justify-between px-5 pt-5 pb-4 select-none">
+				<div>
+					<p className="text-sm font-semibold text-foreground">
+						{phase === "rejected"
+							? `${displayName} — Rejected`
+							: phase === "processing" || phase === "complete"
+								? `${displayName} — Approved`
+								: displayName}
+					</p>
+					{phase === "processing" ? (
+						<TextShimmerLoader text="Executing..." size="sm" />
+					) : phase === "complete" ? (
+						<p className="text-xs text-muted-foreground mt-0.5">Action completed</p>
+					) : phase === "rejected" ? (
+						<p className="text-xs text-muted-foreground mt-0.5">Action was cancelled</p>
+					) : (
+						<p className="text-xs text-muted-foreground mt-0.5">
+							Requires your approval to proceed
+						</p>
+					)}
+					{mcpServer && (
+						<p className="text-[10px] text-muted-foreground/70 mt-1">
+							via <span className="font-medium">{mcpServer}</span>
+						</p>
+					)}
+				</div>
+				{phase === "pending" && canEdit && !isEditing && (
+					<Button
+						size="sm"
+						variant="ghost"
+						className="rounded-lg text-muted-foreground -mt-1 -mr-2"
+						onClick={() => setIsEditing(true)}
+					>
+						<Pencil className="size-3.5" />
+						Edit
+					</Button>
+				)}
+			</div>
+
+			{toolDescription && phase === "pending" && (
+				<>
+					<div className="mx-5 h-px bg-border/50" />
+					<div className="px-5 py-3">
+						<p className="text-xs text-muted-foreground">{toolDescription}</p>
+					</div>
+				</>
+			)}
+
+			{Object.keys(args).length > 0 && (
+				<>
+					<div className="mx-5 h-px bg-border/50" />
+					<div className="px-5 py-4 space-y-2">
+						<p className="text-xs font-medium text-muted-foreground">Inputs</p>
+						{phase === "pending" && isEditing ? (
+							<ParamEditor
+								params={editedParams}
+								onChange={setEditedParams}
+								disabled={phase !== "pending"}
+							/>
+						) : (
+							<pre className="text-xs text-foreground/80 whitespace-pre-wrap break-all bg-muted/50 rounded-lg p-3">
+								{JSON.stringify(args, null, 2)}
+							</pre>
+						)}
+					</div>
+				</>
+			)}
+
+			{phase === "pending" && (
+				<>
+					<div className="mx-5 h-px bg-border/50" />
+					<div className="px-5 py-4 flex items-center gap-2 select-none">
+						{allowedDecisions.includes("approve") && (
+							<Button size="sm" className="rounded-lg gap-1.5" onClick={handleApprove}>
+								{isEditing && hasChanged ? "Approve with edits" : "Approve"}
+								<CornerDownLeftIcon className="size-3 opacity-60" />
+							</Button>
+						)}
+						{isMCPTool && (
+							<Button size="sm" className="rounded-lg" onClick={handleAlwaysAllow}>
+								Always Allow
+							</Button>
+						)}
+						{allowedDecisions.includes("reject") && (
+							<Button
+								size="sm"
+								variant="ghost"
+								className="rounded-lg text-muted-foreground"
+								onClick={() => {
+									setRejected();
+									onDecision({ type: "reject", message: "User rejected the action." });
+								}}
+							>
+								Reject
+							</Button>
+						)}
+					</div>
+				</>
+			)}
+		</div>
+	);
+}
+
+/**
+ * Default approval card mounted by ``FallbackToolBody`` for unknown HITL
+ * tools. Per-tool integrations may also import and compose this card on
+ * top of their own framing.
+ *
+ * Caller (the fallback) is responsible for the ``isInterruptResult``
+ * guard; this card receives a known ``InterruptResult`` and skips the
+ * defensive runtime check.
+ */
+export const GenericHitlApproval: HitlApprovalCard = ({ toolName, args, result }) => {
+	const { dispatch } = useHitlDecision();
+	return (
+		<GenericApprovalCardView
+			toolName={toolName}
+			args={args}
+			interruptData={result}
+			onDecision={(decision) => dispatch([decision])}
+		/>
+	);
+};
diff --git a/surfsense_web/features/chat-messages/hitl/approval-cards/index.ts b/surfsense_web/features/chat-messages/hitl/approval-cards/index.ts
new file mode 100644
index 000000000..bb85ccccb
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/approval-cards/index.ts
@@ -0,0 +1,2 @@
+export { DoomLoopApproval, isDoomLoopInterrupt } from "./doom-loop-approval";
+export { GenericHitlApproval } from "./generic-approval";
diff --git a/surfsense_web/features/chat-messages/hitl/bundle/bundle-context.tsx b/surfsense_web/features/chat-messages/hitl/bundle/bundle-context.tsx
new file mode 100644
index 000000000..72e9359f0
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/bundle/bundle-context.tsx
@@ -0,0 +1,157 @@
+"use client";
+
+import { createContext, type ReactNode, useCallback, useContext, useMemo, useState } from "react";
+import type { HitlDecision } from "../types";
+
+export type BundleSubmit = (orderedDecisions: HitlDecision[]) => void;
+
+export interface HitlBundleAPI {
+	toolCallIds: readonly string[];
+	currentStep: number;
+	stagedCount: number;
+	isInBundle: (toolCallId: string) => boolean;
+	isCurrentStep: (toolCallId: string) => boolean;
+	getStaged: (toolCallId: string) => HitlDecision | undefined;
+	stage: (toolCallId: string, decision: HitlDecision) => void;
+	goToStep: (i: number) => void;
+	next: () => void;
+	prev: () => void;
+	submit: () => void;
+}
+
+const HitlBundleContext = createContext<HitlBundleAPI | null>(null);
+const ToolCallIdContext = createContext<string | null>(null);
+
+export function useHitlBundle(): HitlBundleAPI | null {
+	return useContext(HitlBundleContext);
+}
+
+export function useToolCallIdContext(): string | null {
+	return useContext(ToolCallIdContext);
+}
+
+export function ToolCallIdProvider({
+	toolCallId,
+	children,
+}: {
+	toolCallId: string;
+	children: ReactNode;
+}) {
+	return <ToolCallIdContext.Provider value={toolCallId}>{children}</ToolCallIdContext.Provider>;
+}
+
+interface HitlBundleProviderProps {
+	toolCallIds: readonly string[] | null;
+	onSubmit: BundleSubmit;
+	children: ReactNode;
+}
+
+/**
+ * Coordinates N pending HITL decisions into ONE ordered submission.
+ *
+ * Active only when ``toolCallIds`` has 2+ entries (parallel interrupts);
+ * single-card interrupts bypass the bundle entirely (``useHitlDecision``
+ * fires the ``hitl-decision`` window event directly).
+ *
+ * Pager UX: ``tool-call-item.tsx`` reads ``isInBundle`` + ``isCurrentStep``
+ * to render only the current-step card; ``timeline.tsx`` mounts
+ * ``<PagerChrome />`` once when this Provider is active. Submission is
+ * user-initiated via the pager's "Submit decisions" button (calls
+ * ``submit()``); not auto.
+ */
+export function HitlBundleProvider({ toolCallIds, onSubmit, children }: HitlBundleProviderProps) {
+	const active = toolCallIds !== null && toolCallIds.length >= 2;
+	const ids = useMemo(() => (active ? [...toolCallIds] : []), [active, toolCallIds]);
+	const bundleKey = ids.join("|");
+
+	const [prevBundleKey, setPrevBundleKey] = useState(bundleKey);
+	const [staged, setStaged] = useState<Map<string, HitlDecision>>(() => new Map());
+	const [currentStep, setCurrentStep] = useState(0);
+	if (bundleKey !== prevBundleKey) {
+		setPrevBundleKey(bundleKey);
+		setStaged(new Map());
+		setCurrentStep(0);
+	}
+
+	const isInBundle = useCallback((tcId: string) => ids.includes(tcId), [ids]);
+	const isCurrentStep = useCallback(
+		(tcId: string) => active === true && ids[currentStep] === tcId,
+		[active, ids, currentStep]
+	);
+	const getStaged = useCallback((tcId: string) => staged.get(tcId), [staged]);
+	const stage = useCallback(
+		(tcId: string, decision: HitlDecision) => {
+			if (!active || !ids.includes(tcId)) return;
+			setStaged((prev) => {
+				const next = new Map(prev);
+				next.set(tcId, decision);
+				return next;
+			});
+			window.dispatchEvent(
+				new CustomEvent("hitl-stage", { detail: { toolCallId: tcId, decision } })
+			);
+			const idx = ids.indexOf(tcId);
+			if (idx >= 0 && idx < ids.length - 1) {
+				setCurrentStep(idx + 1);
+			}
+		},
+		[active, ids]
+	);
+	const goToStep = useCallback(
+		(i: number) => {
+			if (i < 0 || i >= ids.length) return;
+			setCurrentStep(i);
+		},
+		[ids.length]
+	);
+	const next = useCallback(() => {
+		setCurrentStep((s) => Math.min(s + 1, Math.max(0, ids.length - 1)));
+	}, [ids.length]);
+	const prev = useCallback(() => {
+		setCurrentStep((s) => Math.max(s - 1, 0));
+	}, []);
+
+	const submit = useCallback(() => {
+		if (!active) return;
+		if (staged.size !== ids.length) return;
+		const ordered: HitlDecision[] = [];
+		for (const tcId of ids) {
+			const d = staged.get(tcId);
+			if (!d) return;
+			ordered.push(d);
+		}
+		onSubmit(ordered);
+	}, [active, ids, staged, onSubmit]);
+
+	const value = useMemo<HitlBundleAPI | null>(() => {
+		if (!active) return null;
+		return {
+			toolCallIds: ids,
+			currentStep,
+			stagedCount: staged.size,
+			isInBundle,
+			isCurrentStep,
+			getStaged,
+			stage,
+			goToStep,
+			next,
+			prev,
+			submit,
+		};
+	}, [
+		active,
+		ids,
+		currentStep,
+		staged,
+		isInBundle,
+		isCurrentStep,
+		getStaged,
+		stage,
+		goToStep,
+		next,
+		prev,
+		submit,
+	]);
+
+	return <HitlBundleContext.Provider value={value}>{children}</HitlBundleContext.Provider>;
+}
diff --git a/surfsense_web/features/chat-messages/hitl/bundle/index.ts b/surfsense_web/features/chat-messages/hitl/bundle/index.ts
new file mode 100644
index 000000000..a97282a28
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/bundle/index.ts
@@ -0,0 +1,8 @@
+export type { BundleSubmit, HitlBundleAPI } from "./bundle-context";
+export {
+	HitlBundleProvider,
+	ToolCallIdProvider,
+	useHitlBundle,
+	useToolCallIdContext,
+} from "./bundle-context";
+export { PagerChrome } from "./pager-chrome";
diff --git a/surfsense_web/features/chat-messages/hitl/bundle/pager-chrome.tsx b/surfsense_web/features/chat-messages/hitl/bundle/pager-chrome.tsx
new file mode 100644
index 000000000..fa8333fe8
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/bundle/pager-chrome.tsx
@@ -0,0 +1,65 @@
+"use client";
+
+import { ChevronLeftIcon, ChevronRightIcon } from "lucide-react";
+import { Button } from "@/components/ui/button";
+import { useHitlBundle } from "./bundle-context";
+
+/**
+ * Prev/next nav and Submit for the current step of an active HITL bundle.
+ * Submission is gated on every action_request having a staged decision.
+ *
+ * Mounted ONCE by ``timeline.tsx`` when the bundle is active. Does NOT
+ * wrap individual cards. Reads bundle state via ``useHitlBundle()``;
+ * renders nothing when no bundle is active.
+ */
+export function PagerChrome() {
+	const bundle = useHitlBundle();
+	if (!bundle) return null;
+
+	const total = bundle.toolCallIds.length;
+	const step = bundle.currentStep;
+	const allStaged = bundle.stagedCount === total;
+
+	return (
+		<div className="mt-3 flex items-center gap-2 rounded-md border border-border bg-muted/40 p-2 text-sm">
+			<Button
+				type="button"
+				size="sm"
+				variant="outline"
+				onClick={bundle.prev}
+				disabled={step === 0}
+				aria-label="Previous approval"
+			>
+				<ChevronLeftIcon className="h-4 w-4" />
+			</Button>
+			<span className="font-medium tabular-nums">
+				{step + 1} / {total}
+			</span>
+			<span className="text-muted-foreground">·</span>
+			<span className="text-muted-foreground">
+				{bundle.stagedCount} of {total} decided
+			</span>
+			<Button
+				type="button"
+				size="sm"
+				variant="outline"
+				onClick={bundle.next}
+				disabled={step >= total - 1}
+				aria-label="Next approval"
+			>
+				<ChevronRightIcon className="h-4 w-4" />
+			</Button>
+			<div className="ml-auto">
+				<Button
+					type="button"
+					size="sm"
+					onClick={bundle.submit}
+					disabled={!allStaged}
+					title={allStaged ? "Submit decisions" : "Decide every action first"}
+				>
+					Submit decisions
+				</Button>
+			</div>
+		</div>
+	);
+}
diff --git a/surfsense_web/features/chat-messages/hitl/edit-panel/edit-panel.atom.ts b/surfsense_web/features/chat-messages/hitl/edit-panel/edit-panel.atom.ts
new file mode 100644
index 000000000..384854185
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/edit-panel/edit-panel.atom.ts
@@ -0,0 +1,82 @@
+import { atom } from "jotai";
+import { rightPanelCollapsedAtom, rightPanelTabAtom } from "@/atoms/layout/right-panel.atom";
+
+export interface ExtraField {
+	label: string;
+	key: string;
+	value: string;
+	type: "text" | "email" | "emails" | "datetime-local" | "textarea";
+}
+
+interface HitlEditPanelState {
+	isOpen: boolean;
+	title: string;
+	content: string;
+	toolName: string;
+	contentFormat?: "markdown" | "html";
+	extraFields?: ExtraField[];
+	onSave:
+		| ((title: string, content: string, extraFieldValues?: Record<string, string>) => void)
+		| null;
+	onClose: (() => void) | null;
+}
+
+const initialState: HitlEditPanelState = {
+	isOpen: false,
+	title: "",
+	content: "",
+	toolName: "",
+	contentFormat: undefined,
+	extraFields: undefined,
+	onSave: null,
+	onClose: null,
+};
+
+export const hitlEditPanelAtom = atom<HitlEditPanelState>(initialState);
+
+const preHitlCollapsedAtom = atom<boolean | null>(null);
+
+export const openHitlEditPanelAtom = atom(
+	null,
+	(
+		get,
+		set,
+		payload: {
+			title: string;
+			content: string;
+			toolName: string;
+			contentFormat?: "markdown" | "html";
+			extraFields?: ExtraField[];
+			onSave: (title: string, content: string, extraFieldValues?: Record<string, string>) => void;
+			onClose?: () => void;
+		}
+	) => {
+		if (!get(hitlEditPanelAtom).isOpen) {
+			set(preHitlCollapsedAtom, get(rightPanelCollapsedAtom));
+		}
+		set(hitlEditPanelAtom, {
+			isOpen: true,
+			title: payload.title,
+			content: payload.content,
+			toolName: payload.toolName,
+			contentFormat: payload.contentFormat,
+			extraFields: payload.extraFields,
+			onSave: payload.onSave,
+			onClose: payload.onClose ?? null,
+		});
+		set(rightPanelTabAtom, "hitl-edit");
+		set(rightPanelCollapsedAtom, false);
+	}
+);
+
+export const closeHitlEditPanelAtom = atom(null, (get, set) => {
+	const current = get(hitlEditPanelAtom);
+	current.onClose?.();
+	set(hitlEditPanelAtom, initialState);
+	set(rightPanelTabAtom, "sources");
+	const prev = get(preHitlCollapsedAtom);
+	if (prev !== null) {
+		set(rightPanelCollapsedAtom, prev);
+		set(preHitlCollapsedAtom, null);
+	}
+});
diff --git a/surfsense_web/features/chat-messages/hitl/edit-panel/edit-panel.tsx b/surfsense_web/features/chat-messages/hitl/edit-panel/edit-panel.tsx
new file mode 100644
index 000000000..98ee2a85a
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/edit-panel/edit-panel.tsx
@@ -0,0 +1,203 @@
+"use client";
+
+import { useAtomValue, useSetAtom } from "jotai";
+import { XIcon } from "lucide-react";
+import dynamic from "next/dynamic";
+import { useCallback, useRef, useState } from "react";
+import { Button } from "@/components/ui/button";
+import { Drawer, DrawerContent, DrawerHandle, DrawerTitle } from "@/components/ui/drawer";
+import { Skeleton } from "@/components/ui/skeleton";
+import { useMediaQuery } from "@/hooks/use-media-query";
+import { closeHitlEditPanelAtom, type ExtraField, hitlEditPanelAtom } from "./edit-panel.atom";
+import { ExtraFieldsSection } from "./fields";
+
+const PlateEditor = dynamic(
+	() => import("@/components/editor/plate-editor").then((m) => ({ default: m.PlateEditor })),
+	{ ssr: false, loading: () => <Skeleton className="h-64 w-full" /> }
+);
+
+/**
+ * The actual editable form. Controlled by atom data via the
+ * Desktop/Mobile shells below; isolated from layout so the same form
+ * renders identically in either container.
+ */
+export function HitlEditPanelContent({
+	title: initialTitle,
+	content: initialContent,
+	contentFormat,
+	extraFields,
+	onSave,
+	onClose,
+	showCloseButton = true,
+}: {
+	title: string;
+	content: string;
+	toolName: string;
+	contentFormat?: "markdown" | "html";
+	extraFields?: ExtraField[];
+	onSave: (title: string, content: string, extraFieldValues?: Record<string, string>) => void;
+	onClose?: () => void;
+	showCloseButton?: boolean;
+}) {
+	const [editedTitle, setEditedTitle] = useState(initialTitle);
+	const contentRef = useRef(initialContent);
+	const [isSaving, setIsSaving] = useState(false);
+	const [extraFieldValues, setExtraFieldValues] = useState<Record<string, string>>(() => {
+		if (!extraFields) return {};
+		const initial: Record<string, string> = {};
+		for (const field of extraFields) {
+			initial[field.key] = field.value;
+		}
+		return initial;
+	});
+
+	const handleContentChange = useCallback((content: string) => {
+		contentRef.current = content;
+	}, []);
+
+	const handleExtraFieldChange = useCallback((key: string, value: string) => {
+		setExtraFieldValues((prev) => ({ ...prev, [key]: value }));
+	}, []);
+
+	const handleSave = useCallback(() => {
+		if (!editedTitle.trim()) return;
+		setIsSaving(true);
+		const extras = extraFields && extraFields.length > 0 ? extraFieldValues : undefined;
+		onSave(editedTitle, contentRef.current, extras);
+		onClose?.();
+	}, [editedTitle, onSave, onClose, extraFields, extraFieldValues]);
+
+	return (
+		<>
+			<div className="flex items-center gap-2 px-4 py-2 shrink-0 border-b">
+				<input
+					value={editedTitle}
+					onChange={(e) => setEditedTitle(e.target.value)}
+					placeholder="Untitled"
+					className="flex-1 min-w-0 bg-transparent text-sm font-semibold text-foreground outline-none placeholder:text-muted-foreground"
+					aria-label="Page title"
+				/>
+				{onClose && showCloseButton && (
+					<Button variant="ghost" size="icon" onClick={onClose} className="size-7 shrink-0">
+						<XIcon className="size-4" />
+						<span className="sr-only">Close panel</span>
+					</Button>
+				)}
+			</div>
+
+			{extraFields && extraFields.length > 0 && (
+				<ExtraFieldsSection
+					fields={extraFields}
+					values={extraFieldValues}
+					onFieldChange={handleExtraFieldChange}
+				/>
+			)}
+
+			<div className="flex-1 overflow-hidden">
+				<PlateEditor
+					{...(contentFormat === "html"
+						? { html: initialContent, onHtmlChange: handleContentChange }
+						: { markdown: initialContent, onMarkdownChange: handleContentChange })}
+					readOnly={false}
+					preset="full"
+					placeholder="Start writing..."
+					editorVariant="default"
+					defaultEditing
+					onSave={handleSave}
+					hasUnsavedChanges
+					isSaving={isSaving}
+					className="[&_[role=toolbar]]:!bg-sidebar"
+				/>
+			</div>
+		</>
+	);
+}
+
+function DesktopHitlEditPanel() {
+	const panelState = useAtomValue(hitlEditPanelAtom);
+	const closePanel = useSetAtom(closeHitlEditPanelAtom);
+
+	if (!panelState.isOpen || !panelState.onSave) return null;
+
+	return (
+		<div className="flex w-[50%] max-w-[700px] min-w-[380px] flex-col border-l bg-sidebar text-sidebar-foreground animate-in slide-in-from-right-4 duration-300 ease-out">
+			<HitlEditPanelContent
+				title={panelState.title}
+				content={panelState.content}
+				toolName={panelState.toolName}
+				contentFormat={panelState.contentFormat}
+				extraFields={panelState.extraFields}
+				onSave={panelState.onSave}
+				onClose={closePanel}
+			/>
+		</div>
+	);
+}
+
+function MobileHitlEditDrawer() {
+	const panelState = useAtomValue(hitlEditPanelAtom);
+	const closePanel = useSetAtom(closeHitlEditPanelAtom);
+
+	if (!panelState.onSave) return null;
+
+	return (
+		<Drawer
+			open={panelState.isOpen}
+			onOpenChange={(open) => {
+				if (!open) closePanel();
+			}}
+			shouldScaleBackground={false}
+		>
+			<DrawerContent
+				className="h-[90vh] max-h-[90vh] z-80 bg-sidebar overflow-hidden"
+				overlayClassName="z-80"
+			>
+				<DrawerHandle />
+				<DrawerTitle className="sr-only">Edit {panelState.toolName}</DrawerTitle>
+				<div className="min-h-0 flex-1 flex flex-col overflow-hidden">
+					<HitlEditPanelContent
+						title={panelState.title}
+						content={panelState.content}
+						toolName={panelState.toolName}
+						contentFormat={panelState.contentFormat}
+						extraFields={panelState.extraFields}
+						onSave={panelState.onSave}
+						onClose={closePanel}
+						showCloseButton={false}
+					/>
+				</div>
+			</DrawerContent>
+		</Drawer>
+	);
+}
+
+/**
+ * Entry point mounted by the right-panel layout. Renders the desktop
+ * panel on lg+ and the mobile drawer below; both share state via the
+ * ``hitlEditPanelAtom``.
+ */
+export function HitlEditPanel() {
+	const panelState = useAtomValue(hitlEditPanelAtom);
+	const isDesktop = useMediaQuery("(min-width: 1024px)");
+
+	if (!panelState.isOpen) return null;
+
+	if (isDesktop) {
+		return <DesktopHitlEditPanel />;
+	}
+
+	return <MobileHitlEditDrawer />;
+}
+
+/**
+ * Entry point mounted by chat pages so the mobile drawer can render
+ * outside the desktop right-panel container.
+ */
+export function MobileHitlEditPanel() {
+	const panelState = useAtomValue(hitlEditPanelAtom);
+	const isDesktop = useMediaQuery("(min-width: 1024px)");
+
+	if (isDesktop || !panelState.isOpen) return null;
+
+	return <MobileHitlEditDrawer />;
+}
diff --git a/surfsense_web/features/chat-messages/hitl/edit-panel/fields/calendar-field.tsx b/surfsense_web/features/chat-messages/hitl/edit-panel/fields/calendar-field.tsx
new file mode 100644
index 000000000..23bc22d2c
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/edit-panel/fields/calendar-field.tsx
@@ -0,0 +1,112 @@
+"use client";
+
+import { format } from "date-fns";
+import { CalendarIcon } from "lucide-react";
+import type React from "react";
+import { useCallback, useMemo, useState } from "react";
+import { Calendar } from "@/components/ui/calendar";
+import { Input } from "@/components/ui/input";
+import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
+
+function parseDateTimeValue(value: string): { date: Date | undefined; time: string } {
+	if (!value) return { date: undefined, time: "09:00" };
+	try {
+		const d = new Date(value);
+		if (Number.isNaN(d.getTime())) return { date: undefined, time: "09:00" };
+		return {
+			date: d,
+			time: format(d, "HH:mm"),
+		};
+	} catch {
+		return { date: undefined, time: "09:00" };
+	}
+}
+
+function buildLocalDateTimeString(date: Date | undefined, time: string): string {
+	if (!date) return "";
+	const [hours, minutes] = time.split(":").map(Number);
+	const combined = new Date(date);
+	combined.setHours(hours ?? 9, minutes ?? 0, 0, 0);
+	const y = combined.getFullYear();
+	const m = String(combined.getMonth() + 1).padStart(2, "0");
+	const d = String(combined.getDate()).padStart(2, "0");
+	const h = String(combined.getHours()).padStart(2, "0");
+	const min = String(combined.getMinutes()).padStart(2, "0");
+	return `${y}-${m}-${d}T${h}:${min}:00`;
+}
+
+/**
+ * Calendar popover + 24h time input. Emits a local ISO-like string
+ * (``YYYY-MM-DDThh:mm:00``) on every change. Value is parsed back into
+ * date + time on every render so the picker stays in sync with
+ * controlled props.
+ */
+export function DateTimePickerField({
+	id,
+	value,
+	onChange,
+}: {
+	id: string;
+	value: string;
+	onChange: (value: string) => void;
+}) {
+	const parsed = useMemo(() => parseDateTimeValue(value), [value]);
+	const [selectedDate, setSelectedDate] = useState<Date | undefined>(parsed.date);
+	const [time, setTime] = useState(parsed.time);
+	const [open, setOpen] = useState(false);
+
+	const handleDateSelect = useCallback(
+		(day: Date | undefined) => {
+			setSelectedDate(day);
+			onChange(buildLocalDateTimeString(day, time));
+			setOpen(false);
+		},
+		[time, onChange]
+	);
+
+	const handleTimeChange = useCallback(
+		(e: React.ChangeEvent<HTMLInputElement>) => {
+			const newTime = e.target.value;
+			setTime(newTime);
+			onChange(buildLocalDateTimeString(selectedDate, newTime));
+		},
+		[selectedDate, onChange]
+	);
+
+	const displayLabel = selectedDate
+		? `${format(selectedDate, "MMM d, yyyy")} at ${time}`
+		: "Pick date & time";
+
+	return (
+		<div className="flex gap-2">
+			<Popover open={open} onOpenChange={setOpen}>
+				<PopoverTrigger asChild>
+					<button
+						id={id}
+						type="button"
+						className="flex-1 flex items-center gap-2 h-9 rounded-md border border-input bg-transparent px-3 py-1 text-sm shadow-xs transition-[color,box-shadow] outline-none focus-visible:border-ring"
+					>
+						<CalendarIcon className="size-3.5 text-muted-foreground shrink-0" />
+						<span className={selectedDate ? "text-foreground" : "text-muted-foreground"}>
+							{displayLabel}
+						</span>
+					</button>
+				</PopoverTrigger>
+				<PopoverContent className="w-auto p-0" align="start">
+					<Calendar
+						mode="single"
+						selected={selectedDate}
+						onSelect={handleDateSelect}
+						defaultMonth={selectedDate}
+					/>
+				</PopoverContent>
+			</Popover>
+			<Input
+				type="time"
+				value={time}
+				onChange={handleTimeChange}
+				className="w-[120px] text-sm shrink-0 appearance-none [&::-webkit-calendar-picker-indicator]:hidden [&::-webkit-calendar-picker-indicator]:appearance-none"
+			/>
+		</div>
+	);
+}
diff --git a/surfsense_web/features/chat-messages/hitl/edit-panel/fields/email-tags-field.tsx b/surfsense_web/features/chat-messages/hitl/edit-panel/fields/email-tags-field.tsx
new file mode 100644
index 000000000..693eaac34
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/edit-panel/fields/email-tags-field.tsx
@@ -0,0 +1,86 @@
+"use client";
+
+import { TagInput, type Tag as TagType } from "emblor";
+import { useCallback, useEffect, useRef, useState } from "react";
+
+function parseEmailsToTags(value: string): TagType[] {
+	if (!value.trim()) return [];
+	return value
+		.split(",")
+		.map((s) => s.trim())
+		.filter(Boolean)
+		.map((email, i) => ({ id: `${Date.now()}-${i}`, text: email }));
+}
+
+function tagsToEmailString(tags: TagType[]): string {
+	return tags.map((t) => t.text).join(", ");
+}
+
+/**
+ * Comma-separated email field rendered as a tag input. Internal tag
+ * state is the source of truth; comma-string is propagated to the
+ * caller via ``onChange`` whenever tags change (skipping the initial
+ * mount to avoid spurious updates).
+ */
+export function EmailsTagField({
+	id,
+	value,
+	onChange,
+	placeholder,
+}: {
+	id: string;
+	value: string;
+	onChange: (value: string) => void;
+	placeholder?: string;
+}) {
+	const [tags, setTags] = useState<TagType[]>(() => parseEmailsToTags(value));
+	const [activeTagIndex, setActiveTagIndex] = useState<number | null>(null);
+	const isInitialMount = useRef(true);
+	const onChangeRef = useRef(onChange);
+	onChangeRef.current = onChange;
+
+	useEffect(() => {
+		if (isInitialMount.current) {
+			isInitialMount.current = false;
+			return;
+		}
+		onChangeRef.current(tagsToEmailString(tags));
+	}, [tags]);
+
+	const handleSetTags = useCallback((newTags: TagType[] | ((prev: TagType[]) => TagType[])) => {
+		setTags((prev) => (typeof newTags === "function" ? newTags(prev) : newTags));
+	}, []);
+
+	const handleAddTag = useCallback((text: string) => {
+		const trimmed = text.trim();
+		if (!trimmed) return;
+		setTags((prev) => {
+			if (prev.some((tag) => tag.text === trimmed)) return prev;
+			const newTag: TagType = { id: Date.now().toString(), text: trimmed };
+			return [...prev, newTag];
+		});
+	}, []);
+
+	return (
+		<TagInput
+			id={id}
+			tags={tags}
+			setTags={handleSetTags}
+			placeholder={placeholder ?? "Add email"}
+			onAddTag={handleAddTag}
+			styleClasses={{
+				inlineTagsContainer:
+					"border border-input rounded-md bg-transparent shadow-xs transition-[color,box-shadow] outline-none focus-within:border-ring p-1 gap-1",
+				input:
+					"w-full min-w-[80px] focus-visible:outline-none shadow-none px-2 h-7 text-foreground placeholder:text-muted-foreground bg-transparent text-sm md:text-sm",
+				tag: {
+					body: "h-7 relative bg-accent dark:bg-muted/60 border-0 hover:bg-accent/80 dark:hover:bg-muted rounded-md font-medium text-xs text-foreground/80 ps-2 pe-7 flex",
+					closeButton:
+						"absolute -inset-y-px -end-px p-0 rounded-e-md flex size-7 transition-colors outline-0 focus-visible:outline focus-visible:outline-2 focus-visible:outline-ring/70 text-foreground hover:text-foreground",
+				},
+			}}
+			activeTagIndex={activeTagIndex}
+			setActiveTagIndex={setActiveTagIndex}
+		/>
+	);
+}
diff --git a/surfsense_web/features/chat-messages/hitl/edit-panel/fields/extra-fields.tsx b/surfsense_web/features/chat-messages/hitl/edit-panel/fields/extra-fields.tsx
new file mode 100644
index 000000000..4770d2bff
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/edit-panel/fields/extra-fields.tsx
@@ -0,0 +1,74 @@
+"use client";
+
+import { Input } from "@/components/ui/input";
+import { Label } from "@/components/ui/label";
+import { Textarea } from "@/components/ui/textarea";
+import type { ExtraField } from "../edit-panel.atom";
+import { DateTimePickerField } from "./calendar-field";
+import { EmailsTagField } from "./email-tags-field";
+
+/**
+ * Renders ``ExtraField[]`` as a labelled vertical stack. Picks the
+ * input control from ``field.type``; unknown types fall back to a
+ * plain ``<Input type={field.type} />`` (covers "text" and "email").
+ *
+ * Pure presentational component — owns no state, just maps values to
+ * controls and propagates changes through ``onFieldChange(key, value)``.
+ */
+export function ExtraFieldsSection({
+	fields,
+	values,
+	onFieldChange,
+}: {
+	fields: ExtraField[];
+	values: Record<string, string>;
+	onFieldChange: (key: string, value: string) => void;
+}) {
+	if (fields.length === 0) return null;
+
+	return (
+		<div className="flex flex-col gap-3 px-4 py-3 border-b">
+			{fields.map((field) => {
+				const fieldId = `extra-field-${field.key}`;
+				const currentValue = values[field.key] ?? "";
+
+				return (
+					<div key={field.key} className="flex flex-col gap-1.5">
+						<Label htmlFor={fieldId} className="text-xs font-medium text-muted-foreground">
+							{field.label}
+						</Label>
+						{field.type === "emails" ? (
+							<EmailsTagField
+								id={fieldId}
+								value={currentValue}
+								onChange={(v) => onFieldChange(field.key, v)}
+								placeholder={`Add ${field.label.toLowerCase()}`}
+							/>
+						) : field.type === "datetime-local" ? (
+							<DateTimePickerField
+								id={fieldId}
+								value={currentValue}
+								onChange={(v) => onFieldChange(field.key, v)}
+							/>
+						) : field.type === "textarea" ? (
+							<Textarea
+								id={fieldId}
+								value={currentValue}
+								onChange={(e) => onFieldChange(field.key, e.target.value)}
+								className="text-sm min-h-[60px]"
+							/>
+						) : (
+							<Input
+								id={fieldId}
+								type={field.type}
+								value={currentValue}
+								onChange={(e) => onFieldChange(field.key, e.target.value)}
+								className="text-sm"
+							/>
+						)}
+					</div>
+				);
+			})}
+		</div>
+	);
+}
diff --git a/surfsense_web/features/chat-messages/hitl/edit-panel/fields/index.ts b/surfsense_web/features/chat-messages/hitl/edit-panel/fields/index.ts
new file mode 100644
index 000000000..dba18fc47
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/edit-panel/fields/index.ts
@@ -0,0 +1,3 @@
+export { DateTimePickerField } from "./calendar-field";
+export { EmailsTagField } from "./email-tags-field";
+export { ExtraFieldsSection } from "./extra-fields";
diff --git a/surfsense_web/features/chat-messages/hitl/edit-panel/index.ts b/surfsense_web/features/chat-messages/hitl/edit-panel/index.ts
new file mode 100644
index 000000000..141bccdab
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/edit-panel/index.ts
@@ -0,0 +1,7 @@
+export { HitlEditPanel, HitlEditPanelContent, MobileHitlEditPanel } from "./edit-panel";
+export type { ExtraField } from "./edit-panel.atom";
+export {
+	closeHitlEditPanelAtom,
+	hitlEditPanelAtom,
+	openHitlEditPanelAtom,
+} from "./edit-panel.atom";
diff --git a/surfsense_web/features/chat-messages/hitl/index.ts b/surfsense_web/features/chat-messages/hitl/index.ts
new file mode 100644
index 000000000..13c37898f
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/index.ts
@@ -0,0 +1,31 @@
+export { DoomLoopApproval, GenericHitlApproval, isDoomLoopInterrupt } from "./approval-cards";
+export {
+	type BundleSubmit,
+	type HitlBundleAPI,
+	HitlBundleProvider,
+	PagerChrome,
+	ToolCallIdProvider,
+	useHitlBundle,
+	useToolCallIdContext,
+} from "./bundle";
+export {
+	closeHitlEditPanelAtom,
+	type ExtraField,
+	HitlEditPanel,
+	HitlEditPanelContent,
+	hitlEditPanelAtom,
+	MobileHitlEditPanel,
+	openHitlEditPanelAtom,
+} from "./edit-panel";
+export type {
+	HitlApprovalCard,
+	HitlApprovalCardProps,
+	HitlDecision,
+	HitlPhase,
+	InterruptActionRequest,
+	InterruptResult,
+	InterruptReviewConfig,
+} from "./types";
+export { isInterruptResult } from "./types";
+export { useHitlDecision } from "./use-hitl-decision";
+export { useHitlPhase } from "./use-hitl-phase";
diff --git a/surfsense_web/features/chat-messages/hitl/types.ts b/surfsense_web/features/chat-messages/hitl/types.ts
new file mode 100644
index 000000000..bcde7abf4
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/types.ts
@@ -0,0 +1,51 @@
+import type { ReactNode } from "react";
+
+export interface InterruptActionRequest {
+	name: string;
+	args: Record<string, unknown>;
+}
+
+export interface InterruptReviewConfig {
+	action_name: string;
+	allowed_decisions: Array<"approve" | "edit" | "reject">;
+}
+
+export interface InterruptResult<C extends Record<string, unknown> = Record<string, unknown>> {
+	__interrupt__: true;
+	__decided__?: "approve" | "reject" | "edit";
+	__completed__?: boolean;
+	action_requests: InterruptActionRequest[];
+	review_configs: InterruptReviewConfig[];
+	interrupt_type?: string;
+	context?: C;
+	message?: string;
+}
+
+export function isInterruptResult(result: unknown): result is InterruptResult {
+	return (
+		typeof result === "object" &&
+		result !== null &&
+		"__interrupt__" in result &&
+		(result as InterruptResult).__interrupt__ === true
+	);
+}
+
+export interface HitlDecision {
+	type: "approve" | "reject" | "edit";
+	message?: string;
+	edited_action?: {
+		name: string;
+		args: Record<string, unknown>;
+	};
+}
+
+export type HitlPhase = "pending" | "processing" | "complete" | "rejected";
+
+export interface HitlApprovalCardProps {
+	toolName: string;
+	toolCallId: string;
+	args: Record<string, unknown>;
+	result: InterruptResult;
+}
+
+export type HitlApprovalCard = (props: HitlApprovalCardProps) => ReactNode;
diff --git a/surfsense_web/features/chat-messages/hitl/use-hitl-decision.ts b/surfsense_web/features/chat-messages/hitl/use-hitl-decision.ts
new file mode 100644
index 000000000..10d02c982
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/use-hitl-decision.ts
@@ -0,0 +1,45 @@
+import { useCallback } from "react";
+import { useHitlBundle, useToolCallIdContext } from "./bundle/bundle-context";
+import type { HitlDecision } from "./types";
+
+/**
+ * Dispatches a HITL decision from inside an approval card.
+ *
+ * Behavior:
+ *  - **Bundle active** (N≥2 parallel interrupts) AND this card's
+ *    ``toolCallId`` is in the bundle: stage the (single) decision
+ *    against this ``toolCallId`` so the bundle can submit one ordered
+ *    N-payload when every card has decided. Multi-decision dispatches
+ *    in this path are a programming error: only ``decisions[0]`` is
+ *    staged; a dev warning fires for the rest.
+ *  - **Otherwise (N=1 or no bundle):** dispatch the ``hitl-decision``
+ *    window event directly with the full ``decisions`` array. The host
+ *    page's listener calls ``runtime.resume`` with the same array.
+ *
+ * Cards always call ``dispatch([decision])`` and don't need to know
+ * which path they're on.
+ */
+export function useHitlDecision() {
+	const bundle = useHitlBundle();
+	const toolCallId = useToolCallIdContext();
+
+	const dispatch = useCallback(
+		(decisions: HitlDecision[]) => {
+			if (bundle && toolCallId && bundle.isInBundle(toolCallId) && decisions.length > 0) {
+				if (decisions.length > 1 && process.env.NODE_ENV !== "production") {
+					console.warn(
+						"[hitl] dispatch received %d decisions inside an active bundle; only [0] will be staged for %s",
+						decisions.length,
+						toolCallId
+					);
+				}
+				bundle.stage(toolCallId, decisions[0]);
+				return;
+			}
+			window.dispatchEvent(new CustomEvent("hitl-decision", { detail: { decisions } }));
+		},
+		[bundle, toolCallId]
+	);
+
+	return { dispatch };
+}
diff --git a/surfsense_web/features/chat-messages/hitl/use-hitl-phase.ts b/surfsense_web/features/chat-messages/hitl/use-hitl-phase.ts
new file mode 100644
index 000000000..e2dfb3bba
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/use-hitl-phase.ts
@@ -0,0 +1,66 @@
+import { useEffect, useRef, useState } from "react";
+import type { HitlPhase } from "./types";
+
+interface HitlInterruptLike {
+	__decided__?: string | null;
+	__completed__?: boolean;
+}
+
+const MINIMUM_SHIMMER_MS = 500;
+const FALLBACK_TIMEOUT_MS = 30_000;
+
+/**
+ * Local UI state machine for a HITL approval card.
+ *
+ * Phase transitions:
+ *   pending    → user has not yet decided (show approve/edit/reject buttons)
+ *   processing → user clicked; awaiting backend confirmation (shimmer)
+ *   complete   → backend acknowledged via __completed__ (or fallback timeout)
+ *   rejected   → user explicitly rejected (terminal state, no backend wait)
+ *
+ * Initial phase is derived from the current ``__decided__`` /
+ * ``__completed__`` markers on the result, so cards rehydrate
+ * correctly from persisted history.
+ *
+ * NOT shared across cards. Each approval card calls ``useHitlPhase``
+ * once with its own interrupt result.
+ */
+export function useHitlPhase(interruptData: HitlInterruptLike): {
+	phase: HitlPhase;
+	setProcessing: () => void;
+	setRejected: () => void;
+} {
+	const [phase, setPhase] = useState<HitlPhase>(() => {
+		if (interruptData.__decided__ === "reject") return "rejected";
+		if (interruptData.__decided__) return "complete";
+		return "pending";
+	});
+
+	const shimmerStartRef = useRef<number | null>(null);
+
+	useEffect(() => {
+		if (phase !== "processing") return;
+		if (!interruptData.__completed__) return;
+
+		const elapsed = shimmerStartRef.current ? Date.now() - shimmerStartRef.current : Infinity;
+		const remaining = Math.max(0, MINIMUM_SHIMMER_MS - elapsed);
+
+		const timer = setTimeout(() => setPhase("complete"), remaining);
+		return () => clearTimeout(timer);
+	}, [phase, interruptData.__completed__]);
+
+	useEffect(() => {
+		if (phase !== "processing") return;
+		const fallback = setTimeout(() => setPhase("complete"), FALLBACK_TIMEOUT_MS);
+		return () => clearTimeout(fallback);
+	}, [phase]);
+
+	return {
+		phase,
+		setProcessing: () => {
+			shimmerStartRef.current = Date.now();
+			setPhase("processing");
+		},
+		setRejected: () => setPhase("rejected"),
+	};
+}

From 48c4df822a8062c6cc939d144bebc0c0f2de0328 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 18:31:33 +0200
Subject: [PATCH 48/58] chat-messages: add timeline module with builder,
 grouping, items, and rendering.

---
 .../chat-messages/timeline/build-timeline.ts  | 257 ++++++++++++++++++
 .../chat-messages/timeline/data-renderer.tsx  |  53 ++++
 .../chat-messages/timeline/grouping.ts        |  47 ++++
 .../features/chat-messages/timeline/index.ts  |  16 ++
 .../chat-messages/timeline/items/index.ts     |   3 +
 .../timeline/items/item-header.tsx            |  52 ++++
 .../timeline/items/reasoning-item.tsx         |  15 +
 .../timeline/items/tool-call-item.tsx         |  50 ++++
 .../chat-messages/timeline/subagent-rename.ts |  47 ++++
 .../timeline/timeline-group-row.tsx           |  68 +++++
 .../chat-messages/timeline/timeline.tsx       | 187 +++++++++++++
 .../features/chat-messages/timeline/types.ts  |  84 ++++++
 12 files changed, 879 insertions(+)
 create mode 100644 surfsense_web/features/chat-messages/timeline/build-timeline.ts
 create mode 100644 surfsense_web/features/chat-messages/timeline/data-renderer.tsx
 create mode 100644 surfsense_web/features/chat-messages/timeline/grouping.ts
 create mode 100644 surfsense_web/features/chat-messages/timeline/index.ts
 create mode 100644 surfsense_web/features/chat-messages/timeline/items/index.ts
 create mode 100644 surfsense_web/features/chat-messages/timeline/items/item-header.tsx
 create mode 100644 surfsense_web/features/chat-messages/timeline/items/reasoning-item.tsx
 create mode 100644 surfsense_web/features/chat-messages/timeline/items/tool-call-item.tsx
 create mode 100644 surfsense_web/features/chat-messages/timeline/subagent-rename.ts
 create mode 100644 surfsense_web/features/chat-messages/timeline/timeline-group-row.tsx
 create mode 100644 surfsense_web/features/chat-messages/timeline/timeline.tsx
 create mode 100644 surfsense_web/features/chat-messages/timeline/types.ts

diff --git a/surfsense_web/features/chat-messages/timeline/build-timeline.ts b/surfsense_web/features/chat-messages/timeline/build-timeline.ts
new file mode 100644
index 000000000..7c78dfb7b
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/build-timeline.ts
@@ -0,0 +1,257 @@
+import type { ItemStatus, ReasoningItem, TimelineItem, ToolCallItem } from "./types";
+
+/**
+ * The thinking-step shape produced by the streaming pipeline (see
+ * ``data-thinking-step`` SSE events). Kept structural here so this
+ * builder doesn't depend on the legacy ``thinking-steps.tsx`` file.
+ */
+export interface ThinkingStepInput {
+	id: string;
+	title: string;
+	items: string[];
+	status: "pending" | "in_progress" | "completed";
+	metadata?: Record<string, unknown>;
+}
+
+/**
+ * The minimum tool-call-part shape we read from message content. We
+ * accept ``unknown[]`` and structurally narrow per part — the assistant-
+ * ui content type has many shapes, but only ``tool-call`` parts matter
+ * here.
+ */
+interface ToolCallPart {
+	type: "tool-call";
+	toolCallId: string;
+	toolName: string;
+	args?: Record<string, unknown>;
+	argsText?: string;
+	result?: unknown;
+	langchainToolCallId?: string;
+	metadata?: Record<string, unknown>;
+}
+
+function isToolCallPart(part: unknown): part is ToolCallPart {
+	if (!part || typeof part !== "object") return false;
+	const o = part as { type?: unknown; toolCallId?: unknown; toolName?: unknown };
+	return (
+		o.type === "tool-call" && typeof o.toolCallId === "string" && typeof o.toolName === "string"
+	);
+}
+
+function asNonEmptyString(v: unknown): string | undefined {
+	return typeof v === "string" && v.trim().length > 0 ? v.trim() : undefined;
+}
+
+/**
+ * Derive coarse status for a tool-call from its result shape. Used
+ * when the tool-call has no joined thinking step (orphan path).
+ *
+ * - HITL ``__decided__: "reject"``  → ``cancelled``
+ * - Has any result                  → ``completed``
+ * - No result yet                   → ``running``
+ *
+ * The per-tool component picks its own visual state from the result;
+ * this is only the timeline chrome's coarse signal.
+ */
+function deriveToolCallStatus(result: unknown): ItemStatus {
+	if (!result) return "running";
+	if (typeof result === "object" && result !== null) {
+		const r = result as { __interrupt__?: unknown; __decided__?: unknown };
+		if (r.__interrupt__ === true && r.__decided__ === "reject") return "cancelled";
+	}
+	return "completed";
+}
+
+function mapStepStatus(status: ThinkingStepInput["status"]): ItemStatus {
+	if (status === "in_progress") return "running";
+	return status;
+}
+
+/**
+ * True when a tool-call's result carries an HITL interrupt. Catches
+ * both pre-decision (``__interrupt__: true``) and post-decision
+ * (``__interrupt__: true, __decided__: …``) states — the resume
+ * flow's decision-application spreads the original result and only
+ * adds ``__decided__``, so ``__interrupt__`` alone is the stable
+ * signal.
+ */
+function isInterruptInResult(result: unknown): boolean {
+	if (typeof result !== "object" || result === null) return false;
+	return (result as { __interrupt__?: unknown }).__interrupt__ === true;
+}
+
+/**
+ * Build the set of tool-call ids that have been superseded by the
+ * resume stream's continuation.
+ *
+ * The challenge: during the live resume window, the in-memory message
+ * holds BOTH the rehydrated interrupt-frame parts (the OLD ``task`` +
+ * its inner ``update_notion_page`` whose result has ``__decided__``)
+ * AND the freshly-streamed resume parts (a NEW ``task`` + a NEW
+ * ``update_notion_page`` with the actual success result). We need to
+ * drop the entire OLD delegation chain so only the NEW one renders.
+ *
+ * Two-stage detection:
+ *
+ * 1. **Identify "interrupted spans"** — any spanId that contains at
+ *    least one tool-call whose ``result.__interrupt__`` is true. This
+ *    captures both the inner decided tool and its outer ``task``
+ *    wrapper (which itself has no result but shares the spanId).
+ *    Without this the wrapper survives as an orphan parent — the
+ *    stray "Notion" row we saw post-approve.
+ *
+ * 2. **Mark a tool-call as superseded** when (a) it sits in an
+ *    interrupted span OR carries the interrupt marker directly, AND
+ *    (b) a later tool-call with the same ``toolName`` in a DIFFERENT
+ *    span exists. The "different span" guard prevents self-supersession
+ *    within the same delegation episode.
+ *
+ * Mirrors the message-level rule in
+ * ``filterSupersededAbortedMessages`` but at the part level — same
+ * data-shape problem (interrupt frame + resume continuation cohabiting
+ * one in-memory message) one level down.
+ *
+ * Conservative: an interrupted tool-call with NO later same-named
+ * different-span successor stays (e.g. a reject that ended the run, a
+ * never-resumed decision).
+ */
+function collectSupersededToolCallIds(content: readonly unknown[]): Set<string> {
+	const toolCallParts: ToolCallPart[] = [];
+	for (const part of content) {
+		if (isToolCallPart(part)) toolCallParts.push(part);
+	}
+
+	const interruptedSpans = new Set<string>();
+	for (const part of toolCallParts) {
+		if (!isInterruptInResult(part.result)) continue;
+		const sid = asNonEmptyString(part.metadata?.spanId);
+		if (sid) interruptedSpans.add(sid);
+	}
+
+	const superseded = new Set<string>();
+	for (let i = 0; i < toolCallParts.length; i++) {
+		const part = toolCallParts[i];
+		const sid = asNonEmptyString(part.metadata?.spanId);
+		const inInterruptedSpan = sid !== undefined && interruptedSpans.has(sid);
+		const isDirectInterrupt = isInterruptInResult(part.result);
+		if (!inInterruptedSpan && !isDirectInterrupt) continue;
+
+		for (let j = i + 1; j < toolCallParts.length; j++) {
+			const jsid = asNonEmptyString(toolCallParts[j].metadata?.spanId);
+			// Both-undefined counts as "different scopes" so standalone
+			// HITL tools (no delegation, no spanId) get caught. Naive
+			// ``jsid !== sid`` misses them since ``undefined !==
+			// undefined`` is false.
+			const sameSpan = sid !== undefined && jsid === sid;
+			if (toolCallParts[j].toolName === part.toolName && !sameSpan) {
+				superseded.add(part.toolCallId);
+				break;
+			}
+		}
+	}
+
+	return superseded;
+}
+
+/**
+ * Build the timeline's flat ``TimelineItem[]`` from thinking steps +
+ * message content tool-calls.
+ *
+ * 1. Index tool-call parts by ``metadata.thinkingStepId`` (O(1) join).
+ * 2. Walk thinking steps in order. Joined → ``ToolCallItem``;
+ *    unjoined → ``ReasoningItem``.
+ * 3. Append unjoined tool-calls as orphan ``ToolCallItem``s (legacy
+ *    history pre-``thinkingStepId``).
+ *
+ * Pure: no React, no I/O. ``result`` is forwarded verbatim — per-tool
+ * components own its discrimination. ``isThreadRunning`` lives in
+ * ``timeline.tsx`` as a runtime override.
+ */
+export function buildTimeline(
+	thinkingSteps: readonly ThinkingStepInput[],
+	content: readonly unknown[] | undefined
+): TimelineItem[] {
+	const toolByStepId = new Map<string, ToolCallPart>();
+	const consumedToolCallIds = new Set<string>();
+	const supersededToolCallIds = content
+		? collectSupersededToolCallIds(content)
+		: new Set<string>();
+
+	if (content) {
+		for (const part of content) {
+			if (!isToolCallPart(part)) continue;
+			const tid = asNonEmptyString(part.metadata?.thinkingStepId);
+			if (tid) toolByStepId.set(tid, part);
+		}
+	}
+
+	const items: TimelineItem[] = [];
+
+	for (const step of thinkingSteps) {
+		const stepSpanId = asNonEmptyString(step.metadata?.spanId);
+		const joined = toolByStepId.get(step.id);
+
+		// Drop the step entirely when it joins a superseded tool-call:
+		// the resume stream has emitted a fresh same-named tool-call
+		// (with its own thinking step) that takes over the row.
+		// Without this, the timeline shows two "Notion → Update
+		// Notion page" groups during the live resume window.
+		if (joined && supersededToolCallIds.has(joined.toolCallId)) {
+			consumedToolCallIds.add(joined.toolCallId);
+			continue;
+		}
+
+		if (joined) {
+			consumedToolCallIds.add(joined.toolCallId);
+			const item: ToolCallItem = {
+				kind: "tool-call",
+				id: step.id,
+				status: mapStepStatus(step.status),
+				items: step.items.length > 0 ? step.items : undefined,
+				spanId: stepSpanId ?? asNonEmptyString(joined.metadata?.spanId),
+				toolName: joined.toolName,
+				toolCallId: joined.toolCallId,
+				args: joined.args ?? {},
+				argsText: joined.argsText,
+				result: joined.result,
+				langchainToolCallId: joined.langchainToolCallId,
+				thinkingStepId: step.id,
+			};
+			items.push(item);
+			continue;
+		}
+
+		const reasoning: ReasoningItem = {
+			kind: "reasoning",
+			id: step.id,
+			status: mapStepStatus(step.status),
+			items: step.items.length > 0 ? step.items : undefined,
+			spanId: stepSpanId,
+			title: step.title,
+		};
+		items.push(reasoning);
+	}
+
+	if (content) {
+		for (const part of content) {
+			if (!isToolCallPart(part)) continue;
+			if (consumedToolCallIds.has(part.toolCallId)) continue;
+			if (supersededToolCallIds.has(part.toolCallId)) continue;
+			const orphan: ToolCallItem = {
+				kind: "tool-call",
+				id: part.toolCallId,
+				status: deriveToolCallStatus(part.result),
+				spanId: asNonEmptyString(part.metadata?.spanId),
+				toolName: part.toolName,
+				toolCallId: part.toolCallId,
+				args: part.args ?? {},
+				argsText: part.argsText,
+				result: part.result,
+				langchainToolCallId: part.langchainToolCallId,
+			};
+			items.push(orphan);
+		}
+	}
+
+	return items;
+}
diff --git a/surfsense_web/features/chat-messages/timeline/data-renderer.tsx b/surfsense_web/features/chat-messages/timeline/data-renderer.tsx
new file mode 100644
index 000000000..4ae160b84
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/data-renderer.tsx
@@ -0,0 +1,53 @@
+"use client";
+
+import { makeAssistantDataUI, useAuiState } from "@assistant-ui/react";
+import { useMemo } from "react";
+import { buildTimeline, type ThinkingStepInput } from "./build-timeline";
+import { Timeline } from "./timeline";
+
+/**
+ * assistant-ui data UI for the ``thinking-steps`` data-part. Receives
+ * the relay's step array as ``data``, reads message ``content`` via
+ * ``useAuiState``, builds the unified ``TimelineItem[]`` once
+ * (``buildTimeline`` is pure), and renders the ``Timeline``.
+ *
+ * ``isMessageStreaming`` is the AND of thread-running + this-message-
+ * is-last; that flag drives the ``isThreadRunning`` runtime override
+ * in ``Timeline`` (stale "running" → "completed" once the thread
+ * stops). Mirrors the legacy ``ThinkingStepsDataRenderer`` semantics.
+ */
+function TimelineDataRenderer({ data }: { name: string; data: unknown }) {
+	const isThreadRunning = useAuiState(({ thread }) => thread.isRunning);
+	const isLastMessage = useAuiState(({ message }) => message?.isLast ?? false);
+	const isMessageStreaming = isThreadRunning && isLastMessage;
+	const content = useAuiState(({ message }) => message?.content);
+
+	const steps = useMemo<ThinkingStepInput[]>(
+		() => (data as { steps: ThinkingStepInput[] } | null)?.steps ?? [],
+		[data]
+	);
+
+	const items = useMemo(
+		() => buildTimeline(steps, Array.isArray(content) ? content : undefined),
+		[steps, content]
+	);
+
+	if (items.length === 0) return null;
+
+	return (
+		<div className="mb-3 -mx-2 leading-normal">
+			<Timeline items={items} isThreadRunning={isMessageStreaming} />
+		</div>
+	);
+}
+
+/**
+ * Drop-in replacement for the legacy ``ThinkingStepsDataUI``. Same
+ * registration name (``thinking-steps``) so consumers (assistant-
+ * message.tsx, public-thread.tsx, free-chat-page.tsx, etc.) just swap
+ * the import — no SSE relay changes, no message format changes.
+ */
+export const TimelineDataUI = makeAssistantDataUI({
+	name: "thinking-steps",
+	render: TimelineDataRenderer,
+});
diff --git a/surfsense_web/features/chat-messages/timeline/grouping.ts b/surfsense_web/features/chat-messages/timeline/grouping.ts
new file mode 100644
index 000000000..1a4dfebcc
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/grouping.ts
@@ -0,0 +1,47 @@
+import type { TimelineGroup, TimelineItem } from "./types";
+
+/**
+ * Group consecutive delegated child items under their parent.
+ *
+ * The contract: the parent of a span is the FIRST item carrying that
+ * ``spanId``. Subsequent items with the same ``spanId`` are children.
+ * Items with no ``spanId`` are their own parent (no children).
+ *
+ * For ``task`` delegations specifically, the ``task`` tool-call IS the
+ * span owner — its ``spanId`` is set on the call itself, and child
+ * items emitted while the subagent is running carry the same ``spanId``.
+ * The ``task`` item must therefore become the parent header, NOT a
+ * child of itself. This is achieved by treating the FIRST occurrence
+ * of any ``spanId`` as the parent; downstream items with the same
+ * ``spanId`` are children.
+ *
+ * Defensive: if the very first item of a stream is a child of a span
+ * we haven't seen the parent for yet, it's promoted to a parent so it
+ * still renders. Real flows always emit the parent ``task`` first.
+ *
+ * Pure function. No React, no side effects. Trivially testable.
+ */
+export function groupItems(items: readonly TimelineItem[]): TimelineGroup[] {
+	const groups: TimelineGroup[] = [];
+	const spanParent = new Map<string, TimelineGroup>();
+
+	for (const item of items) {
+		const sid = item.spanId;
+		if (!sid) {
+			groups.push({ parent: item, children: [] });
+			continue;
+		}
+
+		const existing = spanParent.get(sid);
+		if (existing) {
+			existing.children.push(item);
+			continue;
+		}
+
+		const group: TimelineGroup = { parent: item, children: [] };
+		groups.push(group);
+		spanParent.set(sid, group);
+	}
+
+	return groups;
+}
diff --git a/surfsense_web/features/chat-messages/timeline/index.ts b/surfsense_web/features/chat-messages/timeline/index.ts
new file mode 100644
index 000000000..5e731acd3
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/index.ts
@@ -0,0 +1,16 @@
+/**
+ * Public surface of the ``timeline/`` slice.
+ *
+ * Consumers (assistant-message, public-thread, free-chat-page, etc.)
+ * import ONLY from this barrel. Internal modules — ``items/``,
+ * ``tool-registry/``, ``timeline-group-row``, ``build-timeline``,
+ * ``grouping``, ``subagent-rename`` — are intentionally NOT
+ * re-exported. Adding consumers? Talk to the architecture doc first
+ * (see §6 layering rules).
+ */
+
+export type { ThinkingStepInput } from "./build-timeline";
+export { TimelineDataUI } from "./data-renderer";
+export { Timeline } from "./timeline";
+export type { TimelineToolComponent, TimelineToolProps } from "./tool-registry/types";
+export type { ItemStatus, ReasoningItem, TimelineGroup, TimelineItem, ToolCallItem } from "./types";
diff --git a/surfsense_web/features/chat-messages/timeline/items/index.ts b/surfsense_web/features/chat-messages/timeline/items/index.ts
new file mode 100644
index 000000000..7b1817b61
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/items/index.ts
@@ -0,0 +1,3 @@
+export { ItemHeader } from "./item-header";
+export { ReasoningItem } from "./reasoning-item";
+export { ToolCallItem } from "./tool-call-item";
diff --git a/surfsense_web/features/chat-messages/timeline/items/item-header.tsx b/surfsense_web/features/chat-messages/timeline/items/item-header.tsx
new file mode 100644
index 000000000..192655986
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/items/item-header.tsx
@@ -0,0 +1,52 @@
+import type { FC } from "react";
+import { ChainOfThoughtItem } from "@/components/prompt-kit/chain-of-thought";
+import { cn } from "@/lib/utils";
+import type { ItemStatus } from "../types";
+
+/**
+ * The title row + sub-bullets shared by every timeline item kind. The
+ * timeline's chrome (status dot, indent, vertical line) renders to the
+ * left; this fills the right column.
+ *
+ * Status-aware text styling matches the legacy ``StepBody`` semantics:
+ *   running   → emphasised (font-medium foreground)
+ *   completed → muted
+ *   pending   → muted/60
+ *   error     → destructive
+ *   cancelled → strikethrough muted
+ *
+ * Sub-bullets render via ``ChainOfThoughtItem`` (reused from
+ * ``components/prompt-kit/chain-of-thought``) — same component the
+ * legacy ``StepBody`` used.
+ */
+export const ItemHeader: FC<{
+	title: string;
+	status: ItemStatus;
+	items?: readonly string[];
+	itemKey: string;
+}> = ({ title, status, items, itemKey }) => (
+	<div className="min-w-0">
+		<div
+			className={cn(
+				"text-sm leading-5",
+				status === "running" && "text-foreground font-medium",
+				status === "completed" && "text-muted-foreground",
+				status === "pending" && "text-muted-foreground/60",
+				status === "error" && "text-destructive",
+				status === "cancelled" && "text-muted-foreground line-through"
+			)}
+		>
+			{title}
+		</div>
+
+		{items && items.length > 0 && (
+			<div className="mt-1 space-y-0.5">
+				{items.map((item) => (
+					<ChainOfThoughtItem key={`${itemKey}-${item}`} className="text-xs">
+						{item}
+					</ChainOfThoughtItem>
+				))}
+			</div>
+		)}
+	</div>
+);
diff --git a/surfsense_web/features/chat-messages/timeline/items/reasoning-item.tsx b/surfsense_web/features/chat-messages/timeline/items/reasoning-item.tsx
new file mode 100644
index 000000000..3198d3375
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/items/reasoning-item.tsx
@@ -0,0 +1,15 @@
+import type { FC } from "react";
+import type { ReasoningItem as ReasoningItemModel } from "../types";
+import { ItemHeader } from "./item-header";
+
+/**
+ * Renders a ``kind: "reasoning"`` row — pure agent narration with no
+ * tool component beneath it. Just the shared header.
+ *
+ * Native ``<think>`` blocks (model-level reasoning) are NOT rendered
+ * here — they live in the body via assistant-ui's ``Reasoning``
+ * component.
+ */
+export const ReasoningItem: FC<{ item: ReasoningItemModel }> = ({ item }) => (
+	<ItemHeader title={item.title} status={item.status} items={item.items} itemKey={item.id} />
+);
diff --git a/surfsense_web/features/chat-messages/timeline/items/tool-call-item.tsx b/surfsense_web/features/chat-messages/timeline/items/tool-call-item.tsx
new file mode 100644
index 000000000..1848f0c5c
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/items/tool-call-item.tsx
@@ -0,0 +1,50 @@
+"use client";
+
+import type { FC } from "react";
+import { getToolDisplayName } from "@/contracts/enums/toolIcons";
+import { ToolCallIdProvider, useHitlBundle } from "@/features/chat-messages/hitl";
+import { resolveItemTitle } from "../subagent-rename";
+import { adaptItemToProps, FallbackToolBody, getToolComponent } from "../tool-registry";
+import type { ToolCallItem as ToolCallItemModel } from "../types";
+import { ItemHeader } from "./item-header";
+
+/**
+ * Renders a ``kind: "tool-call"`` row: ``ItemHeader`` (title + items)
+ * plus the resolved tool body underneath.
+ *
+ * Tool body is selected from the registry; unknown names fall through
+ * to ``FallbackToolBody`` (which itself dispatches between HITL
+ * approval cards and the default visual card based on result shape).
+ *
+ * Multi-approval bundle behaviour: when the HITL bundle is active, all
+ * cards EXCEPT the current step are hidden so the user is paged
+ * through them one at a time. Hiding is local to this row — the header
+ * and the timeline chrome around it are unaffected (the row collapses
+ * to its header only). The bundle's ``PagerChrome`` is mounted once
+ * at the end of the timeline by ``timeline.tsx``.
+ *
+ * Every tool body is wrapped in ``ToolCallIdProvider`` so
+ * ``useHitlDecision`` (called inside HITL approval cards) can read the
+ * tool-call id from context and stage decisions in the bundle.
+ */
+export const ToolCallItem: FC<{ item: ToolCallItemModel }> = ({ item }) => {
+	const bundle = useHitlBundle();
+	const hideForBundle =
+		bundle?.isInBundle(item.toolCallId) === true && !bundle.isCurrentStep(item.toolCallId);
+
+	const title = resolveItemTitle(item, getToolDisplayName);
+
+	const Body = getToolComponent(item.toolName) ?? FallbackToolBody;
+	const props = adaptItemToProps(item);
+
+	return (
+		<>
+			<ItemHeader title={title} status={item.status} items={item.items} itemKey={item.id} />
+			{!hideForBundle && (
+				<ToolCallIdProvider toolCallId={item.toolCallId}>
+					<Body {...props} />
+				</ToolCallIdProvider>
+			)}
+		</>
+	);
+};
diff --git a/surfsense_web/features/chat-messages/timeline/subagent-rename.ts b/surfsense_web/features/chat-messages/timeline/subagent-rename.ts
new file mode 100644
index 000000000..87accd8d7
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/subagent-rename.ts
@@ -0,0 +1,47 @@
+import type { TimelineItem, ToolCallItem } from "./types";
+
+function asNonEmptyString(v: unknown): string | undefined {
+	return typeof v === "string" && v.trim().length > 0 ? v.trim() : undefined;
+}
+
+/**
+ * Title-case a subagent identifier:
+ *   "notion"        → "Notion"
+ *   "doc_research"  → "Doc Research"
+ *   "ux-review"     → "Ux Review"
+ */
+export function titleCaseSubagent(raw: string): string {
+	return raw
+		.split(/[\s_-]+/)
+		.filter(Boolean)
+		.map((part) => part.charAt(0).toUpperCase() + part.slice(1))
+		.join(" ");
+}
+
+/**
+ * Display title for a tool-call item. For the ``task`` delegation
+ * primitive, substitute ``args.subagent_type`` (e.g. "Notion" instead
+ * of the generic "Task" label). Returns ``undefined`` if no rename
+ * applies — caller falls back to ``getToolDisplayName(toolName)``.
+ */
+export function resolveSubagentTitle(item: ToolCallItem): string | undefined {
+	if (item.toolName !== "task") return undefined;
+	const subagent = asNonEmptyString(item.args?.subagent_type);
+	return subagent ? titleCaseSubagent(subagent) : undefined;
+}
+
+/**
+ * Unified title resolver for any timeline item. Reasoning items use
+ * their own ``title``; tool-call items try the subagent rename first,
+ * then fall back to the resolver passed in (typically
+ * ``getToolDisplayName``).
+ *
+ * Pure: no React, no I/O. Trivially testable.
+ */
+export function resolveItemTitle(
+	item: TimelineItem,
+	getToolDisplayName: (toolName: string) => string
+): string {
+	if (item.kind === "reasoning") return item.title;
+	return resolveSubagentTitle(item) ?? getToolDisplayName(item.toolName);
+}
diff --git a/surfsense_web/features/chat-messages/timeline/timeline-group-row.tsx b/surfsense_web/features/chat-messages/timeline/timeline-group-row.tsx
new file mode 100644
index 000000000..4c33e52bd
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/timeline-group-row.tsx
@@ -0,0 +1,68 @@
+"use client";
+
+import type { FC } from "react";
+import { cn } from "@/lib/utils";
+import { ReasoningItem, ToolCallItem } from "./items";
+import type { ItemStatus, TimelineGroup, TimelineItem } from "./types";
+
+function renderItem(item: TimelineItem) {
+	if (item.kind === "reasoning") return <ReasoningItem item={item} />;
+	return <ToolCallItem item={item} />;
+}
+
+/**
+ * Single group row in the timeline tree: status dot + connector line in
+ * the gutter, parent item content + indented children in the body.
+ *
+ * The connector line overshoots by ~15px to land on the next group's
+ * dot center; the line passes BEHIND any indented children (whose
+ * column has no dot of its own) for a clean tree look.
+ */
+export const TimelineGroupRow: FC<{
+	group: TimelineGroup;
+	parentStatus: ItemStatus;
+	showParentLine: boolean;
+}> = ({ group, parentStatus, showParentLine }) => {
+	const hasChildren = group.children.length > 0;
+
+	return (
+		<div className="relative flex gap-3">
+			<div className="relative flex flex-col items-center w-2 self-stretch">
+				{showParentLine && (
+					<div className="absolute left-1/2 top-[15px] -bottom-[15px] w-px -translate-x-1/2 bg-muted-foreground/30" />
+				)}
+				<div className="relative z-10 mt-[7px] flex shrink-0 items-center justify-center">
+					{parentStatus === "running" ? (
+						<span className="relative flex size-2">
+							<span className="absolute inline-flex size-full animate-ping rounded-full bg-primary/60" />
+							<span className="relative inline-flex size-2 rounded-full bg-primary" />
+						</span>
+					) : (
+						<span
+							className={cn(
+								"size-2 rounded-full",
+								parentStatus === "error"
+									? "bg-destructive"
+									: parentStatus === "cancelled"
+										? "bg-muted-foreground/40"
+										: "bg-muted-foreground/30"
+							)}
+						/>
+					)}
+				</div>
+			</div>
+
+			<div className="flex-1 min-w-0 pb-4">
+				{renderItem(group.parent)}
+
+				{hasChildren && (
+					<div className="mt-2 ml-3 space-y-2">
+						{group.children.map((child) => (
+							<div key={child.id}>{renderItem(child)}</div>
+						))}
+					</div>
+				)}
+			</div>
+		</div>
+	);
+};
diff --git a/surfsense_web/features/chat-messages/timeline/timeline.tsx b/surfsense_web/features/chat-messages/timeline/timeline.tsx
new file mode 100644
index 000000000..cdabbb67a
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/timeline.tsx
@@ -0,0 +1,187 @@
+"use client";
+
+import { ChevronRightIcon } from "lucide-react";
+import { type FC, useEffect, useMemo, useState } from "react";
+import { TextShimmerLoader } from "@/components/prompt-kit/loader";
+import { getToolDisplayName } from "@/contracts/enums/toolIcons";
+import { PagerChrome, useHitlBundle } from "@/features/chat-messages/hitl";
+import { cn } from "@/lib/utils";
+import { groupItems } from "./grouping";
+import { resolveItemTitle } from "./subagent-rename";
+import { TimelineGroupRow } from "./timeline-group-row";
+import type { ItemStatus, TimelineItem } from "./types";
+
+/**
+ * Override coarse status when the thread isn't running anymore: a
+ * stale "running" must read as "completed" so the chrome stops
+ * pulsing. Mirrors the legacy ``getEffectiveStatus`` from
+ * ``thinking-steps.tsx``.
+ */
+function effectiveStatus(status: ItemStatus, isThreadRunning: boolean): ItemStatus {
+	if (status === "running" && !isThreadRunning) return "completed";
+	return status;
+}
+
+/**
+ * True when a tool-call's result is an HITL interrupt the user has
+ * NOT decided on yet. The backend marks the step as ``completed``
+ * (the tool DID complete — it returned an interrupt as its result),
+ * which would normally collapse the timeline. This predicate lets the
+ * chrome treat "waiting on user" as still-in-progress.
+ *
+ * Decided interrupts (``__decided__`` set to "approve"/"reject"/
+ * "edit") count as completed for chrome purposes — the resume stream
+ * will take it from there.
+ */
+function isPendingInterrupt(result: unknown): boolean {
+	if (typeof result !== "object" || result === null) return false;
+	const r = result as { __interrupt__?: unknown; __decided__?: unknown };
+	return r.__interrupt__ === true && r.__decided__ === undefined;
+}
+
+/**
+ * The chain-of-thought timeline. The "process" surface in the
+ * `body | timeline` split — owns chrome (collapsible header, tree
+ * dots/lines, indent, group iteration) and dispatches to per-kind
+ * items for the actual content.
+ *
+ * Rendering responsibilities (kept here, not on items):
+ *  - Outer max-width container.
+ *  - Collapsible header with state-aware label ("Reviewed" /
+ *    "Processing" / current step title) and shimmer.
+ *  - Open/close state derived from ``isThreadRunning`` + completion.
+ *  - Status dot + vertical connector line per group (delegates the
+ *    inner row to ``TimelineGroupRow``).
+ *  - Mounting ``PagerChrome`` once at the bottom when the HITL bundle
+ *    is active (multi-approval coordination — see
+ *    ``hitl/bundle/bundle-context.tsx``).
+ *
+ * Pure consumption of ``TimelineItem[]`` — does NOT call
+ * ``buildTimeline`` itself. The data-renderer adapter does that and
+ * passes the items in.
+ */
+export const Timeline: FC<{
+	items: readonly TimelineItem[];
+	isThreadRunning?: boolean;
+}> = ({ items, isThreadRunning = true }) => {
+	const bundle = useHitlBundle();
+
+	// Apply the runtime ``isThreadRunning`` override to every item once,
+	// up-front, so downstream code (grouping, group rows, item headers,
+	// status dot, all children) sees the corrected coarse status without
+	// having to thread a callback through. ``buildTimeline`` stays pure;
+	// the override is purely a render-time concern that lives here.
+	const effectiveItems = useMemo<TimelineItem[]>(
+		() =>
+			items.map((it) => ({
+				...it,
+				status: effectiveStatus(it.status, isThreadRunning),
+			})),
+		[items, isThreadRunning]
+	);
+
+	const inProgressItem = useMemo(
+		() => effectiveItems.find((it) => it.status === "running"),
+		[effectiveItems]
+	);
+	const inProgressTitle = useMemo(
+		() => (inProgressItem ? resolveItemTitle(inProgressItem, getToolDisplayName) : undefined),
+		[inProgressItem]
+	);
+
+	// Detect a tool-call that's parked on an HITL interrupt the user hasn't
+	// decided yet. Treated as "still in progress" by the chrome so the
+	// timeline doesn't auto-collapse on the user mid-decision (the LangGraph
+	// thread paused, but the agent's work is conceptually unfinished).
+	const pendingInterruptItem = useMemo(
+		() => effectiveItems.find((it) => it.kind === "tool-call" && isPendingInterrupt(it.result)),
+		[effectiveItems]
+	);
+	const pendingInterruptTitle = useMemo(
+		() =>
+			pendingInterruptItem ? resolveItemTitle(pendingInterruptItem, getToolDisplayName) : undefined,
+		[pendingInterruptItem]
+	);
+
+	const allCompleted = useMemo(
+		() =>
+			effectiveItems.length > 0 &&
+			!isThreadRunning &&
+			!pendingInterruptItem &&
+			effectiveItems.every((it) => it.status === "completed"),
+		[effectiveItems, isThreadRunning, pendingInterruptItem]
+	);
+	const isProcessing = (isThreadRunning || !!pendingInterruptItem) && !allCompleted;
+
+	const [isOpen, setIsOpen] = useState(() => isProcessing);
+	useEffect(() => {
+		if (isProcessing) {
+			setIsOpen(true);
+			return;
+		}
+		if (allCompleted) {
+			setIsOpen(false);
+		}
+	}, [allCompleted, isProcessing]);
+
+	const groups = useMemo(() => groupItems(effectiveItems), [effectiveItems]);
+
+	if (effectiveItems.length === 0) return null;
+
+	const headerText = (() => {
+		if (allCompleted) return "Reviewed";
+		if (inProgressTitle) return inProgressTitle;
+		// Pending HITL: prefer the tool's own name so the user knows WHICH
+		// approval is gating progress (e.g. "Update Notion page") rather
+		// than a generic "Awaiting approval" label.
+		if (pendingInterruptTitle) return pendingInterruptTitle;
+		if (isProcessing) return "Processing";
+		return "Reviewed";
+	})();
+
+	return (
+		<div className="mx-auto w-full max-w-(--thread-max-width) px-2 py-2">
+			<div className="rounded-lg">
+				<button
+					type="button"
+					onClick={() => setIsOpen((prev) => !prev)}
+					className={cn(
+						"flex w-full items-center gap-1.5 text-left text-sm transition-colors",
+						"text-muted-foreground hover:text-foreground"
+					)}
+				>
+					{isProcessing ? (
+						<TextShimmerLoader text={headerText} size="sm" />
+					) : (
+						<span>{headerText}</span>
+					)}
+					<ChevronRightIcon
+						className={cn("size-4 transition-transform duration-200", isOpen && "rotate-90")}
+					/>
+				</button>
+
+				<div
+					className={cn(
+						"grid transition-[grid-template-rows] duration-300 ease-out",
+						isOpen ? "grid-rows-[1fr]" : "grid-rows-[0fr]"
+					)}
+				>
+					<div className="overflow-hidden">
+						<div className="mt-3 pl-1">
+							{groups.map((group, groupIndex) => (
+								<TimelineGroupRow
+									key={group.parent.id}
+									group={group}
+									parentStatus={group.parent.status}
+									showParentLine={groupIndex < groups.length - 1}
+								/>
+							))}
+
+							{bundle && <PagerChrome />}
+						</div>
+					</div>
+				</div>
+			</div>
+		</div>
+	);
+};
diff --git a/surfsense_web/features/chat-messages/timeline/types.ts b/surfsense_web/features/chat-messages/timeline/types.ts
new file mode 100644
index 000000000..37bd0fbc3
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/types.ts
@@ -0,0 +1,84 @@
+/**
+ * Coarse status used by the timeline's chrome (the colored dot, the
+ * "in progress" pulse). NOT consulted by per-tool components — those
+ * own their own visual state machines (e.g. ``useHitlPhase`` for HITL
+ * approval cards).
+ *
+ * - ``pending``    — known but not started yet (rare; usually only seen briefly during streaming)
+ * - ``running``    — currently executing (assistant-ui ``in_progress``)
+ * - ``completed``  — finished without error
+ * - ``cancelled``  — user rejected (HITL ``__decided__: "reject"``)
+ * - ``error``      — threw or returned an error result
+ */
+export type ItemStatus = "pending" | "running" | "completed" | "cancelled" | "error";
+
+interface BaseItem {
+	/**
+	 * Stable React key for the timeline. When a thinking-step row is joined
+	 * with a tool-call part (via ``metadata.thinkingStepId``), this is the
+	 * thinking-step ID — preserves identity across rehydration. For
+	 * tool-calls with no joined step, this is the ``toolCallId``.
+	 */
+	id: string;
+	status: ItemStatus;
+	/**
+	 * Optional sub-bullets shown beneath the row's title. Forwarded
+	 * verbatim from ``ThinkingStep.items`` when the timeline item was
+	 * built from a thinking-step row.
+	 */
+	items?: string[];
+	/**
+	 * Groups items into the delegation tree. All items emitted while a
+	 * delegating ``task`` is open carry the same ``spanId``; the ``task``
+	 * step itself owns the span (see ``grouping.ts``).
+	 */
+	spanId?: string;
+}
+
+/**
+ * Pure agent narration (e.g. "Reviewing the request", "Planning"). NOT
+ * a model-level ``<think>`` block — those are rendered in the BODY by
+ * the assistant-ui ``Reasoning`` component. This kind covers thinking-
+ * step rows that are NOT linked to a tool call.
+ */
+export interface ReasoningItem extends BaseItem {
+	kind: "reasoning";
+	title: string;
+}
+
+/**
+ * A tool invocation. Per-tool components (mounted by the timeline's
+ * tool-registry) discriminate the ``result`` shape internally to pick
+ * a view (interrupt → approval card; success → result card; etc.).
+ *
+ * The timeline does NOT inspect ``result`` beyond deriving ``status``.
+ */
+export interface ToolCallItem extends BaseItem {
+	kind: "tool-call";
+	toolName: string;
+	/** The actual tool-call ID — used by HITL (bundle membership, ``ToolCallIdProvider``). */
+	toolCallId: string;
+	args: Record<string, unknown>;
+	argsText?: string;
+	result?: unknown;
+	langchainToolCallId?: string;
+	/**
+	 * Set when the tool-call was joined with a thinking-step row via
+	 * ``metadata.thinkingStepId``. In that case ``id`` is the
+	 * thinking-step ID, not the ``toolCallId``.
+	 */
+	thinkingStepId?: string;
+}
+
+export type TimelineItem = ReasoningItem | ToolCallItem;
+
+/**
+ * The output shape of the grouping pass. Each group is a parent item
+ * (typically a delegating ``task`` tool-call) plus the items emitted
+ * inside its span. Items with no ``spanId`` become parents with no
+ * children.
+ */
+export interface TimelineGroup {
+	parent: TimelineItem;
+	children: TimelineItem[];
+}

From 97a7626179296d53ff6a1f675c254b4ed40e2840 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 18:31:45 +0200
Subject: [PATCH 49/58] chat-messages: add timeline tool registry with
 HITL-aware fallback.

---
 .../timeline/tool-registry/adapt-props.ts     |  23 ++
 .../fallback/default-fallback-card.tsx        | 216 +++++++++++++++++
 .../fallback/fallback-tool-body.tsx           |  41 ++++
 .../timeline/tool-registry/fallback/index.ts  |   1 +
 .../tool-registry/fallback/revert-button.tsx  | 137 +++++++++++
 .../tool-registry/fallback/use-tool-action.ts |  90 +++++++
 .../timeline/tool-registry/index.ts           |   4 +
 .../timeline/tool-registry/registry.ts        | 229 ++++++++++++++++++
 .../timeline/tool-registry/types.ts           |  37 +++
 9 files changed, 778 insertions(+)
 create mode 100644 surfsense_web/features/chat-messages/timeline/tool-registry/adapt-props.ts
 create mode 100644 surfsense_web/features/chat-messages/timeline/tool-registry/fallback/default-fallback-card.tsx
 create mode 100644 surfsense_web/features/chat-messages/timeline/tool-registry/fallback/fallback-tool-body.tsx
 create mode 100644 surfsense_web/features/chat-messages/timeline/tool-registry/fallback/index.ts
 create mode 100644 surfsense_web/features/chat-messages/timeline/tool-registry/fallback/revert-button.tsx
 create mode 100644 surfsense_web/features/chat-messages/timeline/tool-registry/fallback/use-tool-action.ts
 create mode 100644 surfsense_web/features/chat-messages/timeline/tool-registry/index.ts
 create mode 100644 surfsense_web/features/chat-messages/timeline/tool-registry/registry.ts
 create mode 100644 surfsense_web/features/chat-messages/timeline/tool-registry/types.ts

diff --git a/surfsense_web/features/chat-messages/timeline/tool-registry/adapt-props.ts b/surfsense_web/features/chat-messages/timeline/tool-registry/adapt-props.ts
new file mode 100644
index 000000000..ff9671372
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/tool-registry/adapt-props.ts
@@ -0,0 +1,23 @@
+import type { ToolCallItem } from "../types";
+import type { TimelineToolProps } from "./types";
+
+/**
+ * Lossless mapping ``ToolCallItem → TimelineToolProps``. Pure;
+ * extracts only the fields tool components actually consume.
+ *
+ * ``id``, ``kind``, ``items``, ``spanId``, ``thinkingStepId`` are
+ * intentionally dropped — they're timeline-internal concerns (React
+ * key, dispatch, indentation, back-correlation) that tool components
+ * have no reason to see.
+ */
+export function adaptItemToProps(item: ToolCallItem): TimelineToolProps {
+	return {
+		toolCallId: item.toolCallId,
+		toolName: item.toolName,
+		args: item.args,
+		argsText: item.argsText,
+		result: item.result,
+		langchainToolCallId: item.langchainToolCallId,
+		status: item.status,
+	};
+}
diff --git a/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/default-fallback-card.tsx b/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/default-fallback-card.tsx
new file mode 100644
index 000000000..228249ba1
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/default-fallback-card.tsx
@@ -0,0 +1,216 @@
+"use client";
+
+import { CheckIcon, ChevronDownIcon, XCircleIcon } from "lucide-react";
+import { useEffect, useMemo, useState } from "react";
+import { NestedScroll } from "@/components/assistant-ui/nested-scroll";
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Card } from "@/components/ui/card";
+import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/components/ui/collapsible";
+import { Separator } from "@/components/ui/separator";
+import { Spinner } from "@/components/ui/spinner";
+import { getToolDisplayName } from "@/contracts/enums/toolIcons";
+import { cn } from "@/lib/utils";
+import type { TimelineToolComponent } from "../types";
+import { ToolCardRevertButton } from "./revert-button";
+
+/**
+ * Best-effort error/cancellation reason from a tool result. Used as
+ * the card subtitle when ``status`` is "error" or "cancelled". Returns
+ * ``null`` if no usable text can be extracted.
+ *
+ * Tries: plain string → ``result.error`` → ``result.message`` →
+ * stringified result. Per-tool components own richer error UIs; this
+ * is the generic fallback's coarse summary.
+ */
+function deriveResultMessage(result: unknown): string | null {
+	if (result == null) return null;
+	if (typeof result === "string") return result;
+	if (typeof result !== "object") return null;
+	const r = result as { error?: unknown; message?: unknown };
+	if (typeof r.error === "string") return r.error;
+	if (typeof r.message === "string") return r.message;
+	try {
+		return JSON.stringify(result);
+	} catch {
+		return null;
+	}
+}
+
+/**
+ * Compact tool-call card. Used by ``FallbackToolBody`` for unregistered
+ * tools whose result is not an HITL interrupt.
+ *
+ * shadcn composition note: ``Card`` is used as a visual frame WITHOUT
+ * ``CardHeader``/``CardContent`` — the full composition's ``p-6``
+ * doesn't fit a compact collapsible header that IS the trigger.
+ *
+ * Per-card expansion auto-syncs to ``isRunning`` (auto-expand on
+ * stream start, auto-collapse on completion); manual toggle takes over
+ * once streaming ends.
+ */
+export const DefaultFallbackCard: TimelineToolComponent = ({
+	toolCallId,
+	toolName,
+	argsText,
+	result,
+	status,
+	langchainToolCallId,
+}) => {
+	const isCancelled = status === "cancelled";
+	const isError = status === "error";
+	const isRunning = status === "running";
+
+	const [isExpanded, setIsExpanded] = useState(isRunning);
+	useEffect(() => {
+		setIsExpanded(isRunning);
+	}, [isRunning]);
+
+	const serializedResult = useMemo(
+		() =>
+			result !== undefined && typeof result !== "string" ? JSON.stringify(result, null, 2) : null,
+		[result]
+	);
+
+	const subtitle = useMemo(
+		() => (isError || isCancelled ? deriveResultMessage(result) : null),
+		[isError, isCancelled, result]
+	);
+
+	const displayName = getToolDisplayName(toolName);
+
+	return (
+		<Card
+			className={cn(
+				"my-4 max-w-lg overflow-hidden",
+				isCancelled && "opacity-60",
+				isError && "border-destructive/30"
+			)}
+		>
+			<Collapsible
+				className="group"
+				open={isExpanded}
+				onOpenChange={(next) => {
+					if (isRunning) return;
+					setIsExpanded(next);
+				}}
+			>
+				<div className="flex items-stretch transition-colors hover:bg-muted/50">
+					<CollapsibleTrigger asChild>
+						<button
+							type="button"
+							className={cn(
+								"flex flex-1 min-w-0 items-center gap-3 py-4 pl-5 pr-2 text-left",
+								"focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-inset",
+								"disabled:cursor-default"
+							)}
+						>
+							<div
+								className={cn(
+									"flex size-8 shrink-0 items-center justify-center rounded-lg",
+									isError ? "bg-destructive/10" : isCancelled ? "bg-muted" : "bg-primary/10"
+								)}
+							>
+								{isError ? (
+									<XCircleIcon className="size-4 text-destructive" />
+								) : isCancelled ? (
+									<XCircleIcon className="size-4 text-muted-foreground" />
+								) : isRunning ? (
+									<Spinner size="sm" className="text-primary" />
+								) : (
+									<CheckIcon className="size-4 text-primary" />
+								)}
+							</div>
+
+							<div className="flex flex-1 min-w-0 flex-col gap-0.5">
+								<div className="flex items-center gap-2">
+									<p
+										className={cn(
+											"text-sm font-semibold truncate",
+											isCancelled && "text-muted-foreground line-through",
+											isError && "text-destructive"
+										)}
+									>
+										{displayName}
+									</p>
+									{isRunning && <Badge variant="secondary">Running</Badge>}
+									{isError && <Badge variant="destructive">Failed</Badge>}
+									{isCancelled && <Badge variant="outline">Cancelled</Badge>}
+								</div>
+								{subtitle && (
+									<p
+										className={cn(
+											"text-xs truncate",
+											isError ? "text-destructive/80" : "text-muted-foreground"
+										)}
+									>
+										{subtitle}
+									</p>
+								)}
+							</div>
+						</button>
+					</CollapsibleTrigger>
+
+					<div className="flex shrink-0 items-center gap-2 pl-2 pr-5">
+						<ToolCardRevertButton
+							toolCallId={toolCallId}
+							toolName={toolName}
+							langchainToolCallId={langchainToolCallId}
+						/>
+						<CollapsibleTrigger asChild>
+							<Button
+								type="button"
+								variant="ghost"
+								size="icon"
+								aria-label={isExpanded ? "Collapse details" : "Expand details"}
+								className="size-7 shrink-0"
+							>
+								<ChevronDownIcon
+									className={cn(
+										"size-4 transition-transform duration-200",
+										"group-data-[state=open]:rotate-180"
+									)}
+								/>
+							</Button>
+						</CollapsibleTrigger>
+					</div>
+				</div>
+
+				<CollapsibleContent>
+					<Separator />
+					<div className="flex flex-col gap-3 px-5 py-3">
+						{(argsText || isRunning) && (
+							<div className="flex flex-col gap-1 min-w-0">
+								<p className="text-xs font-medium text-muted-foreground">Inputs</p>
+								<NestedScroll className="max-h-48 overflow-auto rounded-md bg-muted/40">
+									{argsText ? (
+										<pre className="px-3 py-2 text-xs text-foreground/80 whitespace-pre-wrap break-all font-mono">
+											{argsText}
+										</pre>
+									) : (
+										<p className="px-3 py-2 text-xs italic text-muted-foreground">
+											Waiting for input…
+										</p>
+									)}
+								</NestedScroll>
+							</div>
+						)}
+						{!isCancelled && result !== undefined && (
+							<>
+								<Separator />
+								<div className="flex flex-col gap-1 min-w-0">
+									<p className="text-xs font-medium text-muted-foreground">Result</p>
+									<NestedScroll className="max-h-64 overflow-auto rounded-md bg-muted/40">
+										<pre className="px-3 py-2 text-xs text-foreground/80 whitespace-pre-wrap break-all font-mono">
+											{typeof result === "string" ? result : serializedResult}
+										</pre>
+									</NestedScroll>
+								</div>
+							</>
+						)}
+					</div>
+				</CollapsibleContent>
+			</Collapsible>
+		</Card>
+	);
+};
diff --git a/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/fallback-tool-body.tsx b/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/fallback-tool-body.tsx
new file mode 100644
index 000000000..562bbfc10
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/fallback-tool-body.tsx
@@ -0,0 +1,41 @@
+"use client";
+
+import {
+	DoomLoopApproval,
+	GenericHitlApproval,
+	type InterruptResult,
+	isDoomLoopInterrupt,
+	isInterruptResult,
+} from "@/features/chat-messages/hitl";
+import type { TimelineToolComponent } from "../types";
+import { DefaultFallbackCard } from "./default-fallback-card";
+
+/**
+ * Mounted by the timeline for any tool name not in the registry. The
+ * fallback owns the inner discrimination between HITL approval cards
+ * and the default visual card:
+ *
+ *   isInterruptResult(result) ─┬─ isDoomLoopInterrupt → DoomLoopApproval
+ *                              └─ otherwise           → GenericHitlApproval
+ *   else                                              → DefaultFallbackCard
+ *
+ * This is the ONLY place ``isInterruptResult`` is checked for unknown
+ * tools. Per-tool components in ``components/tool-ui/*`` perform their
+ * own internal discrimination over richer result shapes; the fallback
+ * only knows the two top-level branches.
+ */
+export const FallbackToolBody: TimelineToolComponent = (props) => {
+	if (isInterruptResult(props.result)) {
+		const approvalProps = {
+			toolCallId: props.toolCallId,
+			toolName: props.toolName,
+			args: props.args,
+			result: props.result as InterruptResult,
+		};
+		if (isDoomLoopInterrupt(props.result)) {
+			return <DoomLoopApproval {...approvalProps} />;
+		}
+		return <GenericHitlApproval {...approvalProps} />;
+	}
+	return <DefaultFallbackCard {...props} />;
+};
diff --git a/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/index.ts b/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/index.ts
new file mode 100644
index 000000000..0188b45c8
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/index.ts
@@ -0,0 +1 @@
+export { FallbackToolBody } from "./fallback-tool-body";
diff --git a/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/revert-button.tsx b/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/revert-button.tsx
new file mode 100644
index 000000000..6b050523f
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/revert-button.tsx
@@ -0,0 +1,137 @@
+"use client";
+
+import { useQueryClient } from "@tanstack/react-query";
+import { RotateCcw } from "lucide-react";
+import { useState } from "react";
+import { toast } from "sonner";
+import {
+	AlertDialog,
+	AlertDialogAction,
+	AlertDialogCancel,
+	AlertDialogContent,
+	AlertDialogDescription,
+	AlertDialogFooter,
+	AlertDialogHeader,
+	AlertDialogTitle,
+	AlertDialogTrigger,
+} from "@/components/ui/alert-dialog";
+import { Button } from "@/components/ui/button";
+import { Spinner } from "@/components/ui/spinner";
+import { getToolDisplayName } from "@/contracts/enums/toolIcons";
+import { markActionRevertedInCache } from "@/hooks/use-agent-actions-query";
+import { agentActionsApiService } from "@/lib/apis/agent-actions-api.service";
+import { AppError } from "@/lib/error";
+import { useToolAction } from "./use-tool-action";
+
+/**
+ * Inline Revert button rendered on a default-fallback tool card when
+ * the matching ``AgentActionLog`` row is reversible and hasn't been
+ * reverted yet.
+ *
+ * Renders ``null`` (silent) in any of these cases:
+ *  - no matching action row (still streaming, or never logged)
+ *  - action not reversible
+ *  - already reverted (``reverted_by_action_id`` set)
+ *  - this card IS itself a revert action
+ *  - tool errored
+ *  - no thread context
+ *
+ * 503 from the revert API means the deployment has revert gated off;
+ * we hide the failure silently rather than nag the user. Other errors
+ * surface as toasts.
+ */
+export function ToolCardRevertButton({
+	toolCallId,
+	toolName,
+	langchainToolCallId,
+}: {
+	toolCallId: string;
+	toolName: string;
+	langchainToolCallId?: string;
+}) {
+	const queryClient = useQueryClient();
+	const { threadId, action } = useToolAction({
+		toolCallId,
+		toolName,
+		langchainToolCallId,
+	});
+
+	const [isReverting, setIsReverting] = useState(false);
+	const [confirmOpen, setConfirmOpen] = useState(false);
+
+	if (!action) return null;
+	if (!action.reversible) return null;
+	if (action.reverted_by_action_id !== null && action.reverted_by_action_id !== undefined)
+		return null;
+	if (action.is_revert_action) return null;
+	if (action.error !== null && action.error !== undefined) return null;
+	if (!threadId) return null;
+
+	const handleRevert = async () => {
+		setIsReverting(true);
+		try {
+			const response = await agentActionsApiService.revert(threadId, action.id);
+			markActionRevertedInCache(queryClient, threadId, action.id, response.new_action_id ?? null);
+			toast.success(response.message || "Action reverted.");
+		} catch (err) {
+			if (err instanceof AppError && err.status === 503) {
+				return;
+			}
+			const message =
+				err instanceof AppError
+					? err.message
+					: err instanceof Error
+						? err.message
+						: "Failed to revert action.";
+			toast.error(message);
+		} finally {
+			setIsReverting(false);
+			setConfirmOpen(false);
+		}
+	};
+
+	return (
+		<AlertDialog open={confirmOpen} onOpenChange={setConfirmOpen}>
+			<AlertDialogTrigger asChild>
+				<Button
+					size="sm"
+					variant="outline"
+					className="gap-1.5"
+					onClick={(e) => {
+						e.stopPropagation();
+						setConfirmOpen(true);
+					}}
+					disabled={isReverting}
+				>
+					{isReverting ? <Spinner size="xs" /> : <RotateCcw data-icon="inline-start" />}
+					Revert
+				</Button>
+			</AlertDialogTrigger>
+			<AlertDialogContent>
+				<AlertDialogHeader>
+					<AlertDialogTitle>Revert this action?</AlertDialogTitle>
+					<AlertDialogDescription>
+						This will undo{" "}
+						<span className="font-medium">{getToolDisplayName(action.tool_name)}</span> and add a
+						new entry to the history. Your chat is preserved — only the changes the agent made to
+						your knowledge base or connected apps will be rolled back where possible.
+					</AlertDialogDescription>
+				</AlertDialogHeader>
+				<AlertDialogFooter>
+					<AlertDialogCancel disabled={isReverting}>Cancel</AlertDialogCancel>
+					<AlertDialogAction
+						onClick={(e) => {
+							e.preventDefault();
+							handleRevert();
+						}}
+						disabled={isReverting}
+						className="gap-1.5"
+					>
+						{isReverting && <Spinner size="xs" />}
+						Revert
+					</AlertDialogAction>
+				</AlertDialogFooter>
+			</AlertDialogContent>
+		</AlertDialog>
+	);
+}
diff --git a/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/use-tool-action.ts b/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/use-tool-action.ts
new file mode 100644
index 000000000..9e34724b9
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/tool-registry/fallback/use-tool-action.ts
@@ -0,0 +1,90 @@
+"use client";
+
+import { useAuiState } from "@assistant-ui/react";
+import { useAtomValue } from "jotai";
+import { useMemo } from "react";
+import { chatSessionStateAtom } from "@/atoms/chat/chat-session-state.atom";
+import { useAgentActionsQuery } from "@/hooks/use-agent-actions-query";
+
+/**
+ * Resolve the ``AgentActionLog`` row for a given tool-call card. Tries
+ * three lookup strategies, in priority order, against the unified
+ * ``useAgentActionsQuery`` cache (the same react-query cache the
+ * agent-actions sheet consumes — keeps the card and the sheet in
+ * lockstep across reload, navigation, live stream, post-stream
+ * reversibility flips, and explicit revert clicks).
+ *
+ * **Tier 1+2 — direct id match (O(1) Map):**
+ *   - ``a.tool_call_id === toolCallId`` — hits when the model streamed
+ *     ``tool_call_chunks`` so the card id matches the LangChain id.
+ *   - ``a.tool_call_id === langchainToolCallId`` — synthetic card id
+ *     is ``call_<run_id>`` and the LangChain id was backfilled by
+ *     ``tool-output-available``.
+ *
+ * **Tier 3 — position-within-turn fallback:** only kicks in when the
+ *   card has a synthetic ``call_<run_id>`` id AND no
+ *   ``langchainToolCallId`` was ever backfilled (tool emitted as a
+ *   single non-chunked payload AND streaming pre-dated the
+ *   ``on_tool_end`` backfill, e.g. older threads).
+ *
+ * Returns ``null`` if no row matches OR if there's no thread context.
+ *
+ * Performance note: ``useAuiState`` returns a PRIMITIVE
+ * (``positionInTurn`` is a number; ``chatTurnId`` is a string) so the
+ * hook's ``Object.is`` short-circuit prevents re-renders on every
+ * text-delta of every other part in the same message during streaming.
+ * (See Vercel React rule ``rerender-defer-reads``.)
+ */
+export function useToolAction({
+	toolCallId,
+	toolName,
+	langchainToolCallId,
+}: {
+	toolCallId: string;
+	toolName: string;
+	langchainToolCallId?: string;
+}) {
+	const session = useAtomValue(chatSessionStateAtom);
+	const threadId = session?.threadId ?? null;
+	const { findByToolCallId, findByChatTurnAndTool } = useAgentActionsQuery(threadId);
+
+	const chatTurnId = useAuiState(({ message }) => {
+		const meta = message?.metadata as { custom?: { chatTurnId?: string } } | undefined;
+		return meta?.custom?.chatTurnId ?? null;
+	});
+	const positionInTurn = useAuiState(({ message }) => {
+		const content = message?.content;
+		if (!Array.isArray(content)) return -1;
+		let n = -1;
+		for (const part of content) {
+			if (
+				part &&
+				typeof part === "object" &&
+				(part as { type?: string }).type === "tool-call" &&
+				(part as { toolName?: string }).toolName === toolName
+			) {
+				n += 1;
+				if ((part as { toolCallId?: string }).toolCallId === toolCallId) return n;
+			}
+		}
+		return -1;
+	});
+
+	const action = useMemo(() => {
+		const direct = findByToolCallId(toolCallId) ?? findByToolCallId(langchainToolCallId);
+		if (direct) return direct;
+		if (!chatTurnId || positionInTurn < 0) return null;
+		const turnSameTool = findByChatTurnAndTool(chatTurnId, toolName);
+		return turnSameTool[positionInTurn] ?? null;
+	}, [
+		findByToolCallId,
+		findByChatTurnAndTool,
+		toolCallId,
+		langchainToolCallId,
+		chatTurnId,
+		toolName,
+		positionInTurn,
+	]);
+
+	return { threadId, action };
+}
diff --git a/surfsense_web/features/chat-messages/timeline/tool-registry/index.ts b/surfsense_web/features/chat-messages/timeline/tool-registry/index.ts
new file mode 100644
index 000000000..212d1a7ea
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/tool-registry/index.ts
@@ -0,0 +1,4 @@
+export { adaptItemToProps } from "./adapt-props";
+export { FallbackToolBody } from "./fallback";
+export { getToolComponent, TIMELINE_TOOL_NAMES } from "./registry";
+export type { TimelineToolComponent, TimelineToolProps } from "./types";
diff --git a/surfsense_web/features/chat-messages/timeline/tool-registry/registry.ts b/surfsense_web/features/chat-messages/timeline/tool-registry/registry.ts
new file mode 100644
index 000000000..8acc6b4fa
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/tool-registry/registry.ts
@@ -0,0 +1,229 @@
+"use client";
+
+import dynamic from "next/dynamic";
+import type { TimelineToolComponent } from "./types";
+
+// Dynamic imports keep the per-tool UI bundles out of the main chunk —
+// each component only loads when an assistant turn references it. Mirrors
+// the existing ``components/assistant-ui/assistant-message.tsx`` pattern.
+//
+// Phase A note: the imported components are still typed as
+// ``ToolCallMessagePartComponent`` from assistant-ui; the cast at the
+// bottom of this file bridges the contract until the cutover commit
+// retypes them to ``TimelineToolComponent``. The cast is a structural
+// no-op — every consumed prop overlaps.
+
+const UpdateMemoryToolUI = dynamic(
+	() => import("@/components/tool-ui/user-memory").then((m) => ({ default: m.UpdateMemoryToolUI })),
+	{ ssr: false }
+);
+const SandboxExecuteToolUI = dynamic(
+	() =>
+		import("@/components/tool-ui/sandbox-execute").then((m) => ({
+			default: m.SandboxExecuteToolUI,
+		})),
+	{ ssr: false }
+);
+const CreateNotionPageToolUI = dynamic(
+	() => import("@/components/tool-ui/notion").then((m) => ({ default: m.CreateNotionPageToolUI })),
+	{ ssr: false }
+);
+const UpdateNotionPageToolUI = dynamic(
+	() => import("@/components/tool-ui/notion").then((m) => ({ default: m.UpdateNotionPageToolUI })),
+	{ ssr: false }
+);
+const DeleteNotionPageToolUI = dynamic(
+	() => import("@/components/tool-ui/notion").then((m) => ({ default: m.DeleteNotionPageToolUI })),
+	{ ssr: false }
+);
+const CreateLinearIssueToolUI = dynamic(
+	() => import("@/components/tool-ui/linear").then((m) => ({ default: m.CreateLinearIssueToolUI })),
+	{ ssr: false }
+);
+const UpdateLinearIssueToolUI = dynamic(
+	() => import("@/components/tool-ui/linear").then((m) => ({ default: m.UpdateLinearIssueToolUI })),
+	{ ssr: false }
+);
+const DeleteLinearIssueToolUI = dynamic(
+	() => import("@/components/tool-ui/linear").then((m) => ({ default: m.DeleteLinearIssueToolUI })),
+	{ ssr: false }
+);
+const CreateGoogleDriveFileToolUI = dynamic(
+	() =>
+		import("@/components/tool-ui/google-drive").then((m) => ({
+			default: m.CreateGoogleDriveFileToolUI,
+		})),
+	{ ssr: false }
+);
+const DeleteGoogleDriveFileToolUI = dynamic(
+	() =>
+		import("@/components/tool-ui/google-drive").then((m) => ({
+			default: m.DeleteGoogleDriveFileToolUI,
+		})),
+	{ ssr: false }
+);
+const CreateOneDriveFileToolUI = dynamic(
+	() =>
+		import("@/components/tool-ui/onedrive").then((m) => ({ default: m.CreateOneDriveFileToolUI })),
+	{ ssr: false }
+);
+const DeleteOneDriveFileToolUI = dynamic(
+	() =>
+		import("@/components/tool-ui/onedrive").then((m) => ({ default: m.DeleteOneDriveFileToolUI })),
+	{ ssr: false }
+);
+const CreateDropboxFileToolUI = dynamic(
+	() =>
+		import("@/components/tool-ui/dropbox").then((m) => ({ default: m.CreateDropboxFileToolUI })),
+	{ ssr: false }
+);
+const DeleteDropboxFileToolUI = dynamic(
+	() =>
+		import("@/components/tool-ui/dropbox").then((m) => ({ default: m.DeleteDropboxFileToolUI })),
+	{ ssr: false }
+);
+const CreateCalendarEventToolUI = dynamic(
+	() =>
+		import("@/components/tool-ui/google-calendar").then((m) => ({
+			default: m.CreateCalendarEventToolUI,
+		})),
+	{ ssr: false }
+);
+const UpdateCalendarEventToolUI = dynamic(
+	() =>
+		import("@/components/tool-ui/google-calendar").then((m) => ({
+			default: m.UpdateCalendarEventToolUI,
+		})),
+	{ ssr: false }
+);
+const DeleteCalendarEventToolUI = dynamic(
+	() =>
+		import("@/components/tool-ui/google-calendar").then((m) => ({
+			default: m.DeleteCalendarEventToolUI,
+		})),
+	{ ssr: false }
+);
+const CreateGmailDraftToolUI = dynamic(
+	() => import("@/components/tool-ui/gmail").then((m) => ({ default: m.CreateGmailDraftToolUI })),
+	{ ssr: false }
+);
+const UpdateGmailDraftToolUI = dynamic(
+	() => import("@/components/tool-ui/gmail").then((m) => ({ default: m.UpdateGmailDraftToolUI })),
+	{ ssr: false }
+);
+const SendGmailEmailToolUI = dynamic(
+	() => import("@/components/tool-ui/gmail").then((m) => ({ default: m.SendGmailEmailToolUI })),
+	{ ssr: false }
+);
+const TrashGmailEmailToolUI = dynamic(
+	() => import("@/components/tool-ui/gmail").then((m) => ({ default: m.TrashGmailEmailToolUI })),
+	{ ssr: false }
+);
+const CreateJiraIssueToolUI = dynamic(
+	() => import("@/components/tool-ui/jira").then((m) => ({ default: m.CreateJiraIssueToolUI })),
+	{ ssr: false }
+);
+const UpdateJiraIssueToolUI = dynamic(
+	() => import("@/components/tool-ui/jira").then((m) => ({ default: m.UpdateJiraIssueToolUI })),
+	{ ssr: false }
+);
+const DeleteJiraIssueToolUI = dynamic(
+	() => import("@/components/tool-ui/jira").then((m) => ({ default: m.DeleteJiraIssueToolUI })),
+	{ ssr: false }
+);
+const CreateConfluencePageToolUI = dynamic(
+	() =>
+		import("@/components/tool-ui/confluence").then((m) => ({
+			default: m.CreateConfluencePageToolUI,
+		})),
+	{ ssr: false }
+);
+const UpdateConfluencePageToolUI = dynamic(
+	() =>
+		import("@/components/tool-ui/confluence").then((m) => ({
+			default: m.UpdateConfluencePageToolUI,
+		})),
+	{ ssr: false }
+);
+const DeleteConfluencePageToolUI = dynamic(
+	() =>
+		import("@/components/tool-ui/confluence").then((m) => ({
+			default: m.DeleteConfluencePageToolUI,
+		})),
+	{ ssr: false }
+);
+
+/**
+ * Headers-only tools — the timeline shows their ``ItemHeader`` (title +
+ * sub-bullets) but mounts no tool body beneath. Two reasons to use
+ * this:
+ *  - **Structural primitives** (``task``): the row IS the parent of a
+ *    delegation span; its job is to label the group. Children render
+ *    as their own indented entries.
+ *  - **Suppressed connectors** (``web_search``, ``link_preview``,
+ *    ``multi_link_preview``, ``scrape_webpage``): citations they
+ *    produce render inline in markdown; a separate card would be
+ *    redundant noise.
+ */
+const NullTimelineBody: TimelineToolComponent = () => null;
+
+/**
+ * The timeline's tool-name → component map. Mounted by
+ * ``timeline/items/tool-call-item.tsx`` via ``getToolComponent(name)``.
+ *
+ * Includes only "process" tools (connector CRUD, sandbox execute,
+ * memory updates) and the 4 invisible tools mapped to a null component.
+ * Deliverables (``generate_report``, ``generate_resume``,
+ * ``generate_podcast``, ``generate_video_presentation``,
+ * ``display_image``, ``generate_image``) live in ``BODY_TOOLS`` in
+ * ``assistant-message.tsx`` — they're product, not process.
+ *
+ * Tools NOT in this map fall through to ``FallbackToolBody`` (which
+ * itself dispatches between HITL approval cards and
+ * ``DefaultFallbackCard`` based on result discrimination).
+ */
+const TOOLS_BY_NAME = {
+	task: NullTimelineBody,
+	update_memory: UpdateMemoryToolUI,
+	execute: SandboxExecuteToolUI,
+	execute_code: SandboxExecuteToolUI,
+	create_notion_page: CreateNotionPageToolUI,
+	update_notion_page: UpdateNotionPageToolUI,
+	delete_notion_page: DeleteNotionPageToolUI,
+	create_linear_issue: CreateLinearIssueToolUI,
+	update_linear_issue: UpdateLinearIssueToolUI,
+	delete_linear_issue: DeleteLinearIssueToolUI,
+	create_google_drive_file: CreateGoogleDriveFileToolUI,
+	delete_google_drive_file: DeleteGoogleDriveFileToolUI,
+	create_onedrive_file: CreateOneDriveFileToolUI,
+	delete_onedrive_file: DeleteOneDriveFileToolUI,
+	create_dropbox_file: CreateDropboxFileToolUI,
+	delete_dropbox_file: DeleteDropboxFileToolUI,
+	create_calendar_event: CreateCalendarEventToolUI,
+	update_calendar_event: UpdateCalendarEventToolUI,
+	delete_calendar_event: DeleteCalendarEventToolUI,
+	create_gmail_draft: CreateGmailDraftToolUI,
+	update_gmail_draft: UpdateGmailDraftToolUI,
+	send_gmail_email: SendGmailEmailToolUI,
+	trash_gmail_email: TrashGmailEmailToolUI,
+	create_jira_issue: CreateJiraIssueToolUI,
+	update_jira_issue: UpdateJiraIssueToolUI,
+	delete_jira_issue: DeleteJiraIssueToolUI,
+	create_confluence_page: CreateConfluencePageToolUI,
+	update_confluence_page: UpdateConfluencePageToolUI,
+	delete_confluence_page: DeleteConfluencePageToolUI,
+	web_search: NullTimelineBody,
+	link_preview: NullTimelineBody,
+	multi_link_preview: NullTimelineBody,
+	scrape_webpage: NullTimelineBody,
+} as unknown as Record<string, TimelineToolComponent>;
+
+/**
+ * Lookup a tool component by name. Returns ``undefined`` for unknown
+ * tools so the caller can mount ``FallbackToolBody`` instead.
+ */
+export function getToolComponent(toolName: string): TimelineToolComponent | undefined {
+	return TOOLS_BY_NAME[toolName];
+}
+
+export const TIMELINE_TOOL_NAMES = Object.keys(TOOLS_BY_NAME) as readonly string[];
diff --git a/surfsense_web/features/chat-messages/timeline/tool-registry/types.ts b/surfsense_web/features/chat-messages/timeline/tool-registry/types.ts
new file mode 100644
index 000000000..8483d67c3
--- /dev/null
+++ b/surfsense_web/features/chat-messages/timeline/tool-registry/types.ts
@@ -0,0 +1,37 @@
+import type { ReactNode } from "react";
+import type { ItemStatus } from "../types";
+
+/**
+ * The exact prop subset the timeline supplies when mounting a tool
+ * component. A strict subset of assistant-ui's
+ * ``ToolCallMessagePartProps`` — only the fields we actually have when
+ * rendering manually from a ``ToolCallItem``.
+ *
+ * Notably absent vs. assistant-ui:
+ * - ``addResult`` / ``resume`` (runtime-only, not available to us)
+ * - The complex ``status: ToolCallMessagePartState["status"]`` object
+ *   (replaced by our simple ``ItemStatus`` enum)
+ * - ``messageId`` and other parent-message context (not needed by any
+ *   of the 15 HITL-aware tool-ui components today)
+ */
+export interface TimelineToolProps {
+	toolCallId: string;
+	toolName: string;
+	args: Record<string, unknown>;
+	argsText?: string;
+	result?: unknown;
+	langchainToolCallId?: string;
+	status: ItemStatus;
+}
+
+/**
+ * Contract for every tool component mounted by the timeline. The 15
+ * existing HITL-aware ``components/tool-ui/*`` files retype to this
+ * during the cutover commit (a mechanical rename from
+ * ``ToolCallMessagePartComponent`` → ``TimelineToolComponent``).
+ *
+ * Components are expected to perform internal discrimination on
+ * ``result`` to pick a view (interrupt → approval card; success →
+ * result card; etc.) — see §2.2 of the architecture doc.
+ */
+export type TimelineToolComponent = (props: TimelineToolProps) => ReactNode;

From a32d08919953549839001a055a40eab0108bed4c Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 18:31:52 +0200
Subject: [PATCH 50/58] tool-ui: route HITL imports through chat-messages
 slice.

---
 .../confluence/create-confluence-page.tsx     |  11 +-
 .../confluence/delete-confluence-page.tsx     |   5 +-
 .../confluence/update-confluence-page.tsx     |  11 +-
 .../components/tool-ui/doom-loop-approval.tsx | 187 -------------
 .../tool-ui/dropbox/create-file.tsx           |  11 +-
 .../components/tool-ui/dropbox/trash-file.tsx |   5 +-
 .../tool-ui/generic-hitl-approval.tsx         | 263 ------------------
 .../components/tool-ui/gmail/create-draft.tsx |  12 +-
 .../components/tool-ui/gmail/send-email.tsx   |  12 +-
 .../components/tool-ui/gmail/trash-email.tsx  |   5 +-
 .../components/tool-ui/gmail/update-draft.tsx |  12 +-
 .../tool-ui/google-calendar/create-event.tsx  |  12 +-
 .../tool-ui/google-calendar/delete-event.tsx  |   5 +-
 .../tool-ui/google-calendar/update-event.tsx  |  12 +-
 .../tool-ui/google-drive/create-file.tsx      |  11 +-
 .../tool-ui/google-drive/trash-file.tsx       |   5 +-
 .../tool-ui/jira/create-jira-issue.tsx        |  11 +-
 .../tool-ui/jira/delete-jira-issue.tsx        |   5 +-
 .../tool-ui/jira/update-jira-issue.tsx        |  11 +-
 .../tool-ui/linear/create-linear-issue.tsx    |  11 +-
 .../tool-ui/linear/delete-linear-issue.tsx    |   5 +-
 .../tool-ui/linear/update-linear-issue.tsx    |  11 +-
 .../tool-ui/notion/create-notion-page.tsx     |  11 +-
 .../tool-ui/notion/delete-notion-page.tsx     |   5 +-
 .../tool-ui/notion/update-notion-page.tsx     |  11 +-
 .../tool-ui/onedrive/create-file.tsx          |  11 +-
 .../tool-ui/onedrive/trash-file.tsx           |   5 +-
 27 files changed, 130 insertions(+), 546 deletions(-)
 delete mode 100644 surfsense_web/components/tool-ui/doom-loop-approval.tsx
 delete mode 100644 surfsense_web/components/tool-ui/generic-hitl-approval.tsx

diff --git a/surfsense_web/components/tool-ui/confluence/create-confluence-page.tsx b/surfsense_web/components/tool-ui/confluence/create-confluence-page.tsx
index 1bef1f008..d5add7b8b 100644
--- a/surfsense_web/components/tool-ui/confluence/create-confluence-page.tsx
+++ b/surfsense_web/components/tool-ui/confluence/create-confluence-page.tsx
@@ -4,7 +4,6 @@ import type { ToolCallMessagePartProps } from "@assistant-ui/react";
 import { useSetAtom } from "jotai";
 import { CornerDownLeftIcon, Pencil } from "lucide-react";
 import { useCallback, useEffect, useMemo, useState } from "react";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
@@ -15,9 +14,13 @@ import {
 	SelectTrigger,
 	SelectValue,
 } from "@/components/ui/select";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 interface ConfluenceAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/confluence/delete-confluence-page.tsx b/surfsense_web/components/tool-ui/confluence/delete-confluence-page.tsx
index 74326cf87..68ecca6a4 100644
--- a/surfsense_web/components/tool-ui/confluence/delete-confluence-page.tsx
+++ b/surfsense_web/components/tool-ui/confluence/delete-confluence-page.tsx
@@ -6,9 +6,8 @@ import { useCallback, useEffect, useState } from "react";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import { isInterruptResult, useHitlDecision, useHitlPhase } from "@/features/chat-messages/hitl";
 
 type DeleteConfluencePageInterruptContext = {
 	account?: {
diff --git a/surfsense_web/components/tool-ui/confluence/update-confluence-page.tsx b/surfsense_web/components/tool-ui/confluence/update-confluence-page.tsx
index c30357fb6..436d69532 100644
--- a/surfsense_web/components/tool-ui/confluence/update-confluence-page.tsx
+++ b/surfsense_web/components/tool-ui/confluence/update-confluence-page.tsx
@@ -4,13 +4,16 @@ import type { ToolCallMessagePartProps } from "@assistant-ui/react";
 import { useSetAtom } from "jotai";
 import { CornerDownLeftIcon, Pencil } from "lucide-react";
 import { useCallback, useEffect, useState } from "react";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 type UpdateConfluencePageInterruptContext = {
 	account?: {
diff --git a/surfsense_web/components/tool-ui/doom-loop-approval.tsx b/surfsense_web/components/tool-ui/doom-loop-approval.tsx
deleted file mode 100644
index 6132a71ed..000000000
--- a/surfsense_web/components/tool-ui/doom-loop-approval.tsx
+++ /dev/null
@@ -1,187 +0,0 @@
-"use client";
-
-import type { ToolCallMessagePartComponent } from "@assistant-ui/react";
-import { CornerDownLeftIcon, OctagonAlert } from "lucide-react";
-import { useCallback, useEffect, useMemo } from "react";
-import { TextShimmerLoader } from "@/components/prompt-kit/loader";
-import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
-import { Badge } from "@/components/ui/badge";
-import { Button } from "@/components/ui/button";
-import { Separator } from "@/components/ui/separator";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
-
-/**
- * Specialized HITL card for ``DoomLoopMiddleware`` interrupts. The
- * backend signals these by setting ``context.permission === "doom_loop"``
- * on the ``permission_ask`` interrupt.
- *
- * The card replaces the generic "approve/reject" framing with a
- * "continue/stop" affordance that better matches the user's mental
- * model: the agent is stuck repeating itself, not asking permission
- * for a destructive action.
- */
-function DoomLoopCard({
-	toolName,
-	args,
-	interruptData,
-	onDecision,
-}: {
-	toolName: string;
-	args: Record<string, unknown>;
-	interruptData: InterruptResult;
-	onDecision: (decision: HitlDecision) => void;
-}) {
-	const { phase, setProcessing, setRejected } = useHitlPhase(interruptData);
-
-	const context = (interruptData.context ?? {}) as Record<string, unknown>;
-	const threshold = typeof context.threshold === "number" ? context.threshold : 3;
-	const stuckTool = (typeof context.tool === "string" && context.tool) || toolName;
-	const recentSignatures = Array.isArray(context.recent_signatures)
-		? (context.recent_signatures as string[])
-		: [];
-	const displayName = stuckTool.replace(/_/g, " ").replace(/\b\w/g, (c) => c.toUpperCase());
-
-	const argPreview = useMemo(() => {
-		if (!args || Object.keys(args).length === 0) return null;
-		try {
-			const json = JSON.stringify(args, null, 2);
-			return json.length > 600 ? `${json.slice(0, 600)}…` : json;
-		} catch {
-			return null;
-		}
-	}, [args]);
-
-	const handleContinue = useCallback(() => {
-		if (phase !== "pending") return;
-		setProcessing();
-		onDecision({ type: "approve" });
-	}, [phase, setProcessing, onDecision]);
-
-	const handleStop = useCallback(() => {
-		if (phase !== "pending") return;
-		setRejected();
-		onDecision({ type: "reject", message: "Doom loop: user requested stop." });
-	}, [phase, setRejected, onDecision]);
-
-	useEffect(() => {
-		const handler = (e: KeyboardEvent) => {
-			if (phase !== "pending") return;
-			if (e.key === "Enter" && !e.shiftKey && !e.ctrlKey && !e.metaKey) {
-				e.preventDefault();
-				handleStop();
-			}
-		};
-		window.addEventListener("keydown", handler);
-		return () => window.removeEventListener("keydown", handler);
-	}, [phase, handleStop]);
-
-	const isResolved = phase === "complete" || phase === "rejected";
-
-	return (
-		<Alert variant={phase === "rejected" ? "default" : "destructive"} className="my-4 max-w-lg">
-			<OctagonAlert className="size-4" />
-			<AlertTitle className="flex items-center gap-2">
-				<span>
-					{phase === "rejected"
-						? "Stopped"
-						: phase === "processing"
-							? "Continuing…"
-							: phase === "complete"
-								? "Continued"
-								: "I might be stuck"}
-				</span>
-				{!isResolved && (
-					<Badge variant="outline" className="font-mono text-[10px]">
-						doom-loop
-					</Badge>
-				)}
-			</AlertTitle>
-			<AlertDescription className="flex flex-col gap-3">
-				{phase === "processing" ? (
-					<TextShimmerLoader text="Resuming…" size="sm" />
-				) : phase === "rejected" ? (
-					<p className="text-xs">
-						I stopped retrying <span className="font-medium">{displayName}</span> as you asked.
-					</p>
-				) : phase === "complete" ? (
-					<p className="text-xs">
-						Continuing to call <span className="font-medium">{displayName}</span> as you asked.
-					</p>
-				) : (
-					<p className="text-xs">
-						I called <span className="font-medium">{displayName}</span> {threshold} times in a row
-						with similar arguments. Should I keep going or stop and rethink?
-					</p>
-				)}
-
-				{argPreview && phase === "pending" && (
-					<>
-						<Separator />
-						<div className="flex flex-col gap-1">
-							<p className="text-[10px] font-medium uppercase tracking-wide text-muted-foreground">
-								Last arguments
-							</p>
-							<pre className="max-h-32 overflow-auto rounded-md bg-muted/50 p-2 text-[11px] text-foreground/80">
-								{argPreview}
-							</pre>
-						</div>
-					</>
-				)}
-
-				{recentSignatures.length > 0 && phase === "pending" && (
-					<details className="text-[11px] text-muted-foreground">
-						<summary className="cursor-pointer select-none">
-							Show repeated signatures ({recentSignatures.length})
-						</summary>
-						<ul className="mt-1 ml-4 list-disc">
-							{recentSignatures.map((sig) => (
-								<li key={sig} className="font-mono break-all">
-									{sig}
-								</li>
-							))}
-						</ul>
-					</details>
-				)}
-
-				{phase === "pending" && (
-					<div className="flex items-center gap-2">
-						<Button size="sm" variant="outline" className="rounded-lg gap-1.5" onClick={handleStop}>
-							Stop and rethink
-							<CornerDownLeftIcon className="size-3 opacity-60" />
-						</Button>
-						<Button size="sm" variant="ghost" onClick={handleContinue}>
-							Continue anyway
-						</Button>
-					</div>
-				)}
-			</AlertDescription>
-		</Alert>
-	);
-}
-
-export const DoomLoopApprovalToolUI: ToolCallMessagePartComponent = ({
-	toolName,
-	args,
-	result,
-}) => {
-	const { dispatch } = useHitlDecision();
-
-	if (!result || !isInterruptResult(result)) return null;
-
-	return (
-		<DoomLoopCard
-			toolName={toolName}
-			args={args as Record<string, unknown>}
-			interruptData={result}
-			onDecision={(decision) => dispatch([decision])}
-		/>
-	);
-};
-
-export function isDoomLoopInterrupt(result: unknown): boolean {
-	if (!isInterruptResult(result)) return false;
-	const ctx = (result.context ?? {}) as Record<string, unknown>;
-	return ctx.permission === "doom_loop";
-}
diff --git a/surfsense_web/components/tool-ui/dropbox/create-file.tsx b/surfsense_web/components/tool-ui/dropbox/create-file.tsx
index f76a45f62..d810f88c2 100644
--- a/surfsense_web/components/tool-ui/dropbox/create-file.tsx
+++ b/surfsense_web/components/tool-ui/dropbox/create-file.tsx
@@ -4,7 +4,6 @@ import type { ToolCallMessagePartProps } from "@assistant-ui/react";
 import { useSetAtom } from "jotai";
 import { CornerDownLeftIcon, FileIcon, Pencil } from "lucide-react";
 import { useCallback, useEffect, useMemo, useState } from "react";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
@@ -15,9 +14,13 @@ import {
 	SelectTrigger,
 	SelectValue,
 } from "@/components/ui/select";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 interface DropboxAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/dropbox/trash-file.tsx b/surfsense_web/components/tool-ui/dropbox/trash-file.tsx
index 0356da41e..57dff9d38 100644
--- a/surfsense_web/components/tool-ui/dropbox/trash-file.tsx
+++ b/surfsense_web/components/tool-ui/dropbox/trash-file.tsx
@@ -6,9 +6,8 @@ import { useCallback, useEffect, useState } from "react";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import { isInterruptResult, useHitlDecision, useHitlPhase } from "@/features/chat-messages/hitl";
 
 interface DropboxAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/generic-hitl-approval.tsx b/surfsense_web/components/tool-ui/generic-hitl-approval.tsx
deleted file mode 100644
index a584084ff..000000000
--- a/surfsense_web/components/tool-ui/generic-hitl-approval.tsx
+++ /dev/null
@@ -1,263 +0,0 @@
-"use client";
-
-import type { ToolCallMessagePartComponent } from "@assistant-ui/react";
-import { CornerDownLeftIcon, Pencil } from "lucide-react";
-import { useCallback, useEffect, useMemo, useState } from "react";
-import { toast } from "sonner";
-import { TextShimmerLoader } from "@/components/prompt-kit/loader";
-import { Button } from "@/components/ui/button";
-import { Input } from "@/components/ui/input";
-import { Textarea } from "@/components/ui/textarea";
-import { getToolDisplayName } from "@/contracts/enums/toolIcons";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import { connectorsApiService } from "@/lib/apis/connectors-api.service";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
-
-function ParamEditor({
-	params,
-	onChange,
-	disabled,
-}: {
-	params: Record<string, unknown>;
-	onChange: (updated: Record<string, unknown>) => void;
-	disabled: boolean;
-}) {
-	const entries = Object.entries(params);
-	if (entries.length === 0) return null;
-
-	return (
-		<div className="space-y-2">
-			{entries.map(([key, value]) => {
-				const strValue = value == null ? "" : String(value);
-				const isLong = strValue.length > 120;
-				const fieldId = `hitl-param-${key}`;
-
-				return (
-					<div key={key} className="space-y-1">
-						<label htmlFor={fieldId} className="text-xs font-medium text-muted-foreground">
-							{key}
-						</label>
-						{isLong ? (
-							<Textarea
-								id={fieldId}
-								value={strValue}
-								disabled={disabled}
-								rows={3}
-								onChange={(e) => onChange({ ...params, [key]: e.target.value })}
-								className="text-xs"
-							/>
-						) : (
-							<Input
-								id={fieldId}
-								value={strValue}
-								disabled={disabled}
-								onChange={(e) => onChange({ ...params, [key]: e.target.value })}
-								className="text-xs"
-							/>
-						)}
-					</div>
-				);
-			})}
-		</div>
-	);
-}
-
-function GenericApprovalCard({
-	toolName,
-	args,
-	interruptData,
-	onDecision,
-}: {
-	toolName: string;
-	args: Record<string, unknown>;
-	interruptData: InterruptResult;
-	onDecision: (decision: HitlDecision) => void;
-}) {
-	const { phase, setProcessing, setRejected } = useHitlPhase(interruptData);
-	const [editedParams, setEditedParams] = useState<Record<string, unknown>>(args);
-	const [isEditing, setIsEditing] = useState(false);
-
-	const displayName = getToolDisplayName(toolName);
-
-	const mcpServer = interruptData.context?.mcp_server as string | undefined;
-	const toolDescription = interruptData.context?.tool_description as string | undefined;
-	const mcpConnectorId = interruptData.context?.mcp_connector_id as number | undefined;
-	const isMCPTool = mcpConnectorId != null;
-
-	const reviewConfig = interruptData.review_configs?.[0];
-	const allowedDecisions = reviewConfig?.allowed_decisions ?? ["approve", "reject"];
-	const canEdit = allowedDecisions.includes("edit");
-
-	const hasChanged = useMemo(() => {
-		return JSON.stringify(editedParams) !== JSON.stringify(args);
-	}, [editedParams, args]);
-
-	const handleApprove = useCallback(() => {
-		if (phase !== "pending") return;
-		const isEdited = isEditing && hasChanged;
-		setProcessing();
-		onDecision({
-			type: isEdited ? "edit" : "approve",
-			edited_action: isEdited
-				? { name: interruptData.action_requests[0]?.name ?? toolName, args: editedParams }
-				: undefined,
-		});
-	}, [
-		phase,
-		setProcessing,
-		isEditing,
-		hasChanged,
-		onDecision,
-		interruptData,
-		toolName,
-		editedParams,
-	]);
-
-	const handleAlwaysAllow = useCallback(() => {
-		if (phase !== "pending" || !isMCPTool) return;
-		setProcessing();
-		onDecision({ type: "approve" });
-		connectorsApiService.trustMCPTool(mcpConnectorId, toolName).catch(() => {
-			toast.error(
-				"Failed to save 'Always Allow' preference. The tool will still require approval next time."
-			);
-		});
-	}, [phase, setProcessing, onDecision, isMCPTool, mcpConnectorId, toolName]);
-
-	useEffect(() => {
-		const handler = (e: KeyboardEvent) => {
-			if (e.key === "Enter" && !e.shiftKey && !e.ctrlKey && !e.metaKey && phase === "pending") {
-				handleApprove();
-			}
-		};
-		window.addEventListener("keydown", handler);
-		return () => window.removeEventListener("keydown", handler);
-	}, [handleApprove, phase]);
-
-	return (
-		<div className="my-4 max-w-lg overflow-hidden rounded-2xl border bg-muted/30 transition-[box-shadow] duration-300">
-			{/* Header */}
-			<div className="flex items-start justify-between px-5 pt-5 pb-4 select-none">
-				<div>
-					<p className="text-sm font-semibold text-foreground">
-						{phase === "rejected"
-							? `${displayName} — Rejected`
-							: phase === "processing" || phase === "complete"
-								? `${displayName} — Approved`
-								: displayName}
-					</p>
-					{phase === "processing" ? (
-						<TextShimmerLoader text="Executing..." size="sm" />
-					) : phase === "complete" ? (
-						<p className="text-xs text-muted-foreground mt-0.5">Action completed</p>
-					) : phase === "rejected" ? (
-						<p className="text-xs text-muted-foreground mt-0.5">Action was cancelled</p>
-					) : (
-						<p className="text-xs text-muted-foreground mt-0.5">
-							Requires your approval to proceed
-						</p>
-					)}
-					{mcpServer && (
-						<p className="text-[10px] text-muted-foreground/70 mt-1">
-							via <span className="font-medium">{mcpServer}</span>
-						</p>
-					)}
-				</div>
-				{phase === "pending" && canEdit && !isEditing && (
-					<Button
-						size="sm"
-						variant="ghost"
-						className="rounded-lg text-muted-foreground -mt-1 -mr-2"
-						onClick={() => setIsEditing(true)}
-					>
-						<Pencil className="size-3.5" />
-						Edit
-					</Button>
-				)}
-			</div>
-
-			{/* Description */}
-			{toolDescription && phase === "pending" && (
-				<>
-					<div className="mx-5 h-px bg-border/50" />
-					<div className="px-5 py-3">
-						<p className="text-xs text-muted-foreground">{toolDescription}</p>
-					</div>
-				</>
-			)}
-
-			{Object.keys(args).length > 0 && (
-				<>
-					<div className="mx-5 h-px bg-border/50" />
-					<div className="px-5 py-4 space-y-2">
-						<p className="text-xs font-medium text-muted-foreground">Inputs</p>
-						{phase === "pending" && isEditing ? (
-							<ParamEditor
-								params={editedParams}
-								onChange={setEditedParams}
-								disabled={phase !== "pending"}
-							/>
-						) : (
-							<pre className="text-xs text-foreground/80 whitespace-pre-wrap break-all bg-muted/50 rounded-lg p-3">
-								{JSON.stringify(args, null, 2)}
-							</pre>
-						)}
-					</div>
-				</>
-			)}
-
-			{/* Action buttons */}
-			{phase === "pending" && (
-				<>
-					<div className="mx-5 h-px bg-border/50" />
-					<div className="px-5 py-4 flex items-center gap-2 select-none">
-						{allowedDecisions.includes("approve") && (
-							<Button size="sm" className="rounded-lg gap-1.5" onClick={handleApprove}>
-								{isEditing && hasChanged ? "Approve with edits" : "Approve"}
-								<CornerDownLeftIcon className="size-3 opacity-60" />
-							</Button>
-						)}
-						{isMCPTool && (
-							<Button size="sm" className="rounded-lg" onClick={handleAlwaysAllow}>
-								Always Allow
-							</Button>
-						)}
-						{allowedDecisions.includes("reject") && (
-							<Button
-								size="sm"
-								variant="ghost"
-								className="rounded-lg text-muted-foreground"
-								onClick={() => {
-									setRejected();
-									onDecision({ type: "reject", message: "User rejected the action." });
-								}}
-							>
-								Reject
-							</Button>
-						)}
-					</div>
-				</>
-			)}
-		</div>
-	);
-}
-
-export const GenericHitlApprovalToolUI: ToolCallMessagePartComponent = ({
-	toolName,
-	args,
-	result,
-}) => {
-	const { dispatch } = useHitlDecision();
-
-	if (!result || !isInterruptResult(result)) return null;
-
-	return (
-		<GenericApprovalCard
-			toolName={toolName}
-			args={args as Record<string, unknown>}
-			interruptData={result}
-			onDecision={(decision) => dispatch([decision])}
-		/>
-	);
-};
diff --git a/surfsense_web/components/tool-ui/gmail/create-draft.tsx b/surfsense_web/components/tool-ui/gmail/create-draft.tsx
index a00760ca3..0c0f3a71c 100644
--- a/surfsense_web/components/tool-ui/gmail/create-draft.tsx
+++ b/surfsense_web/components/tool-ui/gmail/create-draft.tsx
@@ -4,8 +4,6 @@ import type { ToolCallMessagePartProps } from "@assistant-ui/react";
 import { useSetAtom } from "jotai";
 import { CornerDownLeftIcon, Pencil, UserIcon, UsersIcon } from "lucide-react";
 import { useCallback, useEffect, useMemo, useState } from "react";
-import type { ExtraField } from "@/atoms/chat/hitl-edit-panel.atom";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
@@ -16,9 +14,13 @@ import {
 	SelectTrigger,
 	SelectValue,
 } from "@/components/ui/select";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { ExtraField, HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 interface GmailAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/gmail/send-email.tsx b/surfsense_web/components/tool-ui/gmail/send-email.tsx
index c22045fa1..defd795bf 100644
--- a/surfsense_web/components/tool-ui/gmail/send-email.tsx
+++ b/surfsense_web/components/tool-ui/gmail/send-email.tsx
@@ -4,8 +4,6 @@ import type { ToolCallMessagePartProps } from "@assistant-ui/react";
 import { useSetAtom } from "jotai";
 import { CornerDownLeftIcon, MailIcon, Pencil, UserIcon, UsersIcon } from "lucide-react";
 import { useCallback, useEffect, useMemo, useState } from "react";
-import type { ExtraField } from "@/atoms/chat/hitl-edit-panel.atom";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
@@ -16,9 +14,13 @@ import {
 	SelectTrigger,
 	SelectValue,
 } from "@/components/ui/select";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { ExtraField, HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 interface GmailAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/gmail/trash-email.tsx b/surfsense_web/components/tool-ui/gmail/trash-email.tsx
index 7384a13f9..d2b4c7198 100644
--- a/surfsense_web/components/tool-ui/gmail/trash-email.tsx
+++ b/surfsense_web/components/tool-ui/gmail/trash-email.tsx
@@ -6,9 +6,8 @@ import { useCallback, useEffect, useState } from "react";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import { isInterruptResult, useHitlDecision, useHitlPhase } from "@/features/chat-messages/hitl";
 
 interface GmailAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/gmail/update-draft.tsx b/surfsense_web/components/tool-ui/gmail/update-draft.tsx
index b8c8c10f6..1a05dc972 100644
--- a/surfsense_web/components/tool-ui/gmail/update-draft.tsx
+++ b/surfsense_web/components/tool-ui/gmail/update-draft.tsx
@@ -4,14 +4,16 @@ import type { ToolCallMessagePartProps } from "@assistant-ui/react";
 import { useSetAtom } from "jotai";
 import { CornerDownLeftIcon, MailIcon, Pencil, UserIcon, UsersIcon } from "lucide-react";
 import { useCallback, useEffect, useState } from "react";
-import type { ExtraField } from "@/atoms/chat/hitl-edit-panel.atom";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { ExtraField, HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 interface GmailAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/google-calendar/create-event.tsx b/surfsense_web/components/tool-ui/google-calendar/create-event.tsx
index 523be31f6..4835c5694 100644
--- a/surfsense_web/components/tool-ui/google-calendar/create-event.tsx
+++ b/surfsense_web/components/tool-ui/google-calendar/create-event.tsx
@@ -11,8 +11,6 @@ import {
 	UsersIcon,
 } from "lucide-react";
 import { useCallback, useEffect, useMemo, useState } from "react";
-import type { ExtraField } from "@/atoms/chat/hitl-edit-panel.atom";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
@@ -23,9 +21,13 @@ import {
 	SelectTrigger,
 	SelectValue,
 } from "@/components/ui/select";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { ExtraField, HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 interface GoogleCalendarAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/google-calendar/delete-event.tsx b/surfsense_web/components/tool-ui/google-calendar/delete-event.tsx
index 1218d0f34..5c5cdea63 100644
--- a/surfsense_web/components/tool-ui/google-calendar/delete-event.tsx
+++ b/surfsense_web/components/tool-ui/google-calendar/delete-event.tsx
@@ -6,9 +6,8 @@ import { useCallback, useEffect, useState } from "react";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import { isInterruptResult, useHitlDecision, useHitlPhase } from "@/features/chat-messages/hitl";
 
 interface GoogleCalendarAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/google-calendar/update-event.tsx b/surfsense_web/components/tool-ui/google-calendar/update-event.tsx
index 649174245..fcab5091e 100644
--- a/surfsense_web/components/tool-ui/google-calendar/update-event.tsx
+++ b/surfsense_web/components/tool-ui/google-calendar/update-event.tsx
@@ -11,14 +11,16 @@ import {
 	UsersIcon,
 } from "lucide-react";
 import { useCallback, useEffect, useState } from "react";
-import type { ExtraField } from "@/atoms/chat/hitl-edit-panel.atom";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { ExtraField, HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 interface GoogleCalendarAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/google-drive/create-file.tsx b/surfsense_web/components/tool-ui/google-drive/create-file.tsx
index b13089877..a38abea04 100644
--- a/surfsense_web/components/tool-ui/google-drive/create-file.tsx
+++ b/surfsense_web/components/tool-ui/google-drive/create-file.tsx
@@ -4,7 +4,6 @@ import type { ToolCallMessagePartProps } from "@assistant-ui/react";
 import { useSetAtom } from "jotai";
 import { CornerDownLeftIcon, FileIcon, Pencil } from "lucide-react";
 import { useCallback, useEffect, useMemo, useState } from "react";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
@@ -15,9 +14,13 @@ import {
 	SelectTrigger,
 	SelectValue,
 } from "@/components/ui/select";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 interface GoogleDriveAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/google-drive/trash-file.tsx b/surfsense_web/components/tool-ui/google-drive/trash-file.tsx
index 25cd75e74..bcce546fe 100644
--- a/surfsense_web/components/tool-ui/google-drive/trash-file.tsx
+++ b/surfsense_web/components/tool-ui/google-drive/trash-file.tsx
@@ -6,9 +6,8 @@ import { useCallback, useEffect, useState } from "react";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import { isInterruptResult, useHitlDecision, useHitlPhase } from "@/features/chat-messages/hitl";
 
 interface GoogleDriveAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/jira/create-jira-issue.tsx b/surfsense_web/components/tool-ui/jira/create-jira-issue.tsx
index 6916f9fa0..1e9111401 100644
--- a/surfsense_web/components/tool-ui/jira/create-jira-issue.tsx
+++ b/surfsense_web/components/tool-ui/jira/create-jira-issue.tsx
@@ -4,7 +4,6 @@ import type { ToolCallMessagePartProps } from "@assistant-ui/react";
 import { useSetAtom } from "jotai";
 import { CornerDownLeftIcon, Pencil } from "lucide-react";
 import { useCallback, useEffect, useMemo, useState } from "react";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
@@ -15,9 +14,13 @@ import {
 	SelectTrigger,
 	SelectValue,
 } from "@/components/ui/select";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 interface JiraAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/jira/delete-jira-issue.tsx b/surfsense_web/components/tool-ui/jira/delete-jira-issue.tsx
index 637333d23..20044b1b6 100644
--- a/surfsense_web/components/tool-ui/jira/delete-jira-issue.tsx
+++ b/surfsense_web/components/tool-ui/jira/delete-jira-issue.tsx
@@ -6,9 +6,8 @@ import { useCallback, useEffect, useState } from "react";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import { isInterruptResult, useHitlDecision, useHitlPhase } from "@/features/chat-messages/hitl";
 
 interface JiraAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/jira/update-jira-issue.tsx b/surfsense_web/components/tool-ui/jira/update-jira-issue.tsx
index 72e697532..030f16af8 100644
--- a/surfsense_web/components/tool-ui/jira/update-jira-issue.tsx
+++ b/surfsense_web/components/tool-ui/jira/update-jira-issue.tsx
@@ -4,7 +4,6 @@ import type { ToolCallMessagePartProps } from "@assistant-ui/react";
 import { useSetAtom } from "jotai";
 import { CornerDownLeftIcon, Pencil } from "lucide-react";
 import { useCallback, useEffect, useState } from "react";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Badge } from "@/components/ui/badge";
@@ -16,9 +15,13 @@ import {
 	SelectTrigger,
 	SelectValue,
 } from "@/components/ui/select";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 interface JiraIssue {
 	issue_id: string;
diff --git a/surfsense_web/components/tool-ui/linear/create-linear-issue.tsx b/surfsense_web/components/tool-ui/linear/create-linear-issue.tsx
index 7d5098c3e..0970cd124 100644
--- a/surfsense_web/components/tool-ui/linear/create-linear-issue.tsx
+++ b/surfsense_web/components/tool-ui/linear/create-linear-issue.tsx
@@ -4,7 +4,6 @@ import type { ToolCallMessagePartProps } from "@assistant-ui/react";
 import { useSetAtom } from "jotai";
 import { CornerDownLeftIcon, Pencil } from "lucide-react";
 import { useCallback, useEffect, useMemo, useState } from "react";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Badge } from "@/components/ui/badge";
@@ -17,9 +16,13 @@ import {
 	SelectValue,
 } from "@/components/ui/select";
 import { ToggleGroup, ToggleGroupItem } from "@/components/ui/toggle-group";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 interface LinearLabel {
 	id: string;
diff --git a/surfsense_web/components/tool-ui/linear/delete-linear-issue.tsx b/surfsense_web/components/tool-ui/linear/delete-linear-issue.tsx
index 28152d48d..de04a5fa4 100644
--- a/surfsense_web/components/tool-ui/linear/delete-linear-issue.tsx
+++ b/surfsense_web/components/tool-ui/linear/delete-linear-issue.tsx
@@ -6,9 +6,8 @@ import { useCallback, useEffect, useState } from "react";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import { isInterruptResult, useHitlDecision, useHitlPhase } from "@/features/chat-messages/hitl";
 
 type LinearDeleteIssueContext = {
 	workspace?: { id: number; organization_name: string };
diff --git a/surfsense_web/components/tool-ui/linear/update-linear-issue.tsx b/surfsense_web/components/tool-ui/linear/update-linear-issue.tsx
index 2d6846cea..c24023e34 100644
--- a/surfsense_web/components/tool-ui/linear/update-linear-issue.tsx
+++ b/surfsense_web/components/tool-ui/linear/update-linear-issue.tsx
@@ -4,7 +4,6 @@ import type { ToolCallMessagePartProps } from "@assistant-ui/react";
 import { useSetAtom } from "jotai";
 import { CornerDownLeftIcon, Pencil } from "lucide-react";
 import { useCallback, useEffect, useState } from "react";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Badge } from "@/components/ui/badge";
@@ -17,9 +16,13 @@ import {
 	SelectValue,
 } from "@/components/ui/select";
 import { ToggleGroup, ToggleGroupItem } from "@/components/ui/toggle-group";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 interface LinearLabel {
 	id: string;
diff --git a/surfsense_web/components/tool-ui/notion/create-notion-page.tsx b/surfsense_web/components/tool-ui/notion/create-notion-page.tsx
index b16a1d8cd..6d783ab30 100644
--- a/surfsense_web/components/tool-ui/notion/create-notion-page.tsx
+++ b/surfsense_web/components/tool-ui/notion/create-notion-page.tsx
@@ -4,7 +4,6 @@ import type { ToolCallMessagePartProps } from "@assistant-ui/react";
 import { useSetAtom } from "jotai";
 import { CornerDownLeftIcon, Pencil } from "lucide-react";
 import { useCallback, useEffect, useMemo, useState } from "react";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
@@ -15,9 +14,13 @@ import {
 	SelectTrigger,
 	SelectValue,
 } from "@/components/ui/select";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 type NotionCreatePageContext = {
 	accounts?: Array<{
diff --git a/surfsense_web/components/tool-ui/notion/delete-notion-page.tsx b/surfsense_web/components/tool-ui/notion/delete-notion-page.tsx
index f180c5220..0f54a96c9 100644
--- a/surfsense_web/components/tool-ui/notion/delete-notion-page.tsx
+++ b/surfsense_web/components/tool-ui/notion/delete-notion-page.tsx
@@ -6,9 +6,8 @@ import { useCallback, useEffect, useState } from "react";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import { isInterruptResult, useHitlDecision, useHitlPhase } from "@/features/chat-messages/hitl";
 
 type NotionDeletePageContext = {
 	account?: {
diff --git a/surfsense_web/components/tool-ui/notion/update-notion-page.tsx b/surfsense_web/components/tool-ui/notion/update-notion-page.tsx
index ef75c5d92..ef40c3199 100644
--- a/surfsense_web/components/tool-ui/notion/update-notion-page.tsx
+++ b/surfsense_web/components/tool-ui/notion/update-notion-page.tsx
@@ -4,13 +4,16 @@ import type { ToolCallMessagePartProps } from "@assistant-ui/react";
 import { useSetAtom } from "jotai";
 import { CornerDownLeftIcon, Pencil } from "lucide-react";
 import { useCallback, useEffect, useState } from "react";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 type NotionUpdatePageContext = {
 	account?: {
diff --git a/surfsense_web/components/tool-ui/onedrive/create-file.tsx b/surfsense_web/components/tool-ui/onedrive/create-file.tsx
index 7621f152f..c5f4db365 100644
--- a/surfsense_web/components/tool-ui/onedrive/create-file.tsx
+++ b/surfsense_web/components/tool-ui/onedrive/create-file.tsx
@@ -4,7 +4,6 @@ import type { ToolCallMessagePartProps } from "@assistant-ui/react";
 import { useSetAtom } from "jotai";
 import { CornerDownLeftIcon, FileIcon, Pencil } from "lucide-react";
 import { useCallback, useEffect, useMemo, useState } from "react";
-import { openHitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
@@ -15,9 +14,13 @@ import {
 	SelectTrigger,
 	SelectValue,
 } from "@/components/ui/select";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import {
+	isInterruptResult,
+	openHitlEditPanelAtom,
+	useHitlDecision,
+	useHitlPhase,
+} from "@/features/chat-messages/hitl";
 
 interface OneDriveAccount {
 	id: number;
diff --git a/surfsense_web/components/tool-ui/onedrive/trash-file.tsx b/surfsense_web/components/tool-ui/onedrive/trash-file.tsx
index 96c362548..2643e103d 100644
--- a/surfsense_web/components/tool-ui/onedrive/trash-file.tsx
+++ b/surfsense_web/components/tool-ui/onedrive/trash-file.tsx
@@ -6,9 +6,8 @@ import { useCallback, useEffect, useState } from "react";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
-import { useHitlPhase } from "@/hooks/use-hitl-phase";
-import type { HitlDecision, InterruptResult } from "@/lib/hitl";
-import { isInterruptResult, useHitlDecision } from "@/lib/hitl";
+import type { HitlDecision, InterruptResult } from "@/features/chat-messages/hitl";
+import { isInterruptResult, useHitlDecision, useHitlPhase } from "@/features/chat-messages/hitl";
 
 interface OneDriveAccount {
 	id: number;

From aafeee051686289bd70034954fabc56b2849c938 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 18:32:03 +0200
Subject: [PATCH 51/58] assistant-message: render only deliverable tools and
 delegate process tools to slice timeline.

---
 .../assistant-ui/assistant-message.tsx        | 220 ++----------------
 1 file changed, 18 insertions(+), 202 deletions(-)

diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx
index 549141779..00f3acebf 100644
--- a/surfsense_web/components/assistant-ui/assistant-message.tsx
+++ b/surfsense_web/components/assistant-ui/assistant-message.tsx
@@ -37,11 +37,9 @@ import { MarkdownText } from "@/components/assistant-ui/markdown-text";
 import { ReasoningMessagePart } from "@/components/assistant-ui/reasoning-message-part";
 import { RevertTurnButton } from "@/components/assistant-ui/revert-turn-button";
 import { useTokenUsage } from "@/components/assistant-ui/token-usage-context";
-import { ToolFallback, withDelegationSpanIndent } from "@/components/assistant-ui/tool-fallback";
 import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
 import { CommentPanelContainer } from "@/components/chat-comments/comment-panel-container/comment-panel-container";
 import { CommentSheet } from "@/components/chat-comments/comment-sheet/comment-sheet";
-import { withBundleStep } from "@/components/hitl-bundle-pager";
 import type { SerializableCitation } from "@/components/tool-ui/citation";
 import {
 	openSafeNavigationHref,
@@ -59,7 +57,6 @@ import { DropdownMenuLabel } from "@/components/ui/dropdown-menu";
 import { useComments } from "@/hooks/use-comments";
 import { useMediaQuery } from "@/hooks/use-media-query";
 import { useElectronAPI } from "@/hooks/use-platform";
-import { withHitlInTimeline } from "@/lib/hitl";
 import { getProviderIcon } from "@/lib/provider-icons";
 import { cn } from "@/lib/utils";
 
@@ -102,146 +99,6 @@ const GenerateImageToolUI = dynamic(
 		import("@/components/tool-ui/generate-image").then((m) => ({ default: m.GenerateImageToolUI })),
 	{ ssr: false }
 );
-const UpdateMemoryToolUI = dynamic(
-	() => import("@/components/tool-ui/user-memory").then((m) => ({ default: m.UpdateMemoryToolUI })),
-	{ ssr: false }
-);
-const SandboxExecuteToolUI = dynamic(
-	() =>
-		import("@/components/tool-ui/sandbox-execute").then((m) => ({
-			default: m.SandboxExecuteToolUI,
-		})),
-	{ ssr: false }
-);
-const CreateNotionPageToolUI = dynamic(
-	() => import("@/components/tool-ui/notion").then((m) => ({ default: m.CreateNotionPageToolUI })),
-	{ ssr: false }
-);
-const UpdateNotionPageToolUI = dynamic(
-	() => import("@/components/tool-ui/notion").then((m) => ({ default: m.UpdateNotionPageToolUI })),
-	{ ssr: false }
-);
-const DeleteNotionPageToolUI = dynamic(
-	() => import("@/components/tool-ui/notion").then((m) => ({ default: m.DeleteNotionPageToolUI })),
-	{ ssr: false }
-);
-const CreateLinearIssueToolUI = dynamic(
-	() => import("@/components/tool-ui/linear").then((m) => ({ default: m.CreateLinearIssueToolUI })),
-	{ ssr: false }
-);
-const UpdateLinearIssueToolUI = dynamic(
-	() => import("@/components/tool-ui/linear").then((m) => ({ default: m.UpdateLinearIssueToolUI })),
-	{ ssr: false }
-);
-const DeleteLinearIssueToolUI = dynamic(
-	() => import("@/components/tool-ui/linear").then((m) => ({ default: m.DeleteLinearIssueToolUI })),
-	{ ssr: false }
-);
-const CreateGoogleDriveFileToolUI = dynamic(
-	() =>
-		import("@/components/tool-ui/google-drive").then((m) => ({
-			default: m.CreateGoogleDriveFileToolUI,
-		})),
-	{ ssr: false }
-);
-const DeleteGoogleDriveFileToolUI = dynamic(
-	() =>
-		import("@/components/tool-ui/google-drive").then((m) => ({
-			default: m.DeleteGoogleDriveFileToolUI,
-		})),
-	{ ssr: false }
-);
-const CreateOneDriveFileToolUI = dynamic(
-	() =>
-		import("@/components/tool-ui/onedrive").then((m) => ({ default: m.CreateOneDriveFileToolUI })),
-	{ ssr: false }
-);
-const DeleteOneDriveFileToolUI = dynamic(
-	() =>
-		import("@/components/tool-ui/onedrive").then((m) => ({ default: m.DeleteOneDriveFileToolUI })),
-	{ ssr: false }
-);
-const CreateDropboxFileToolUI = dynamic(
-	() =>
-		import("@/components/tool-ui/dropbox").then((m) => ({ default: m.CreateDropboxFileToolUI })),
-	{ ssr: false }
-);
-const DeleteDropboxFileToolUI = dynamic(
-	() =>
-		import("@/components/tool-ui/dropbox").then((m) => ({ default: m.DeleteDropboxFileToolUI })),
-	{ ssr: false }
-);
-const CreateCalendarEventToolUI = dynamic(
-	() =>
-		import("@/components/tool-ui/google-calendar").then((m) => ({
-			default: m.CreateCalendarEventToolUI,
-		})),
-	{ ssr: false }
-);
-const UpdateCalendarEventToolUI = dynamic(
-	() =>
-		import("@/components/tool-ui/google-calendar").then((m) => ({
-			default: m.UpdateCalendarEventToolUI,
-		})),
-	{ ssr: false }
-);
-const DeleteCalendarEventToolUI = dynamic(
-	() =>
-		import("@/components/tool-ui/google-calendar").then((m) => ({
-			default: m.DeleteCalendarEventToolUI,
-		})),
-	{ ssr: false }
-);
-const CreateGmailDraftToolUI = dynamic(
-	() => import("@/components/tool-ui/gmail").then((m) => ({ default: m.CreateGmailDraftToolUI })),
-	{ ssr: false }
-);
-const UpdateGmailDraftToolUI = dynamic(
-	() => import("@/components/tool-ui/gmail").then((m) => ({ default: m.UpdateGmailDraftToolUI })),
-	{ ssr: false }
-);
-const SendGmailEmailToolUI = dynamic(
-	() => import("@/components/tool-ui/gmail").then((m) => ({ default: m.SendGmailEmailToolUI })),
-	{ ssr: false }
-);
-const TrashGmailEmailToolUI = dynamic(
-	() => import("@/components/tool-ui/gmail").then((m) => ({ default: m.TrashGmailEmailToolUI })),
-	{ ssr: false }
-);
-const CreateJiraIssueToolUI = dynamic(
-	() => import("@/components/tool-ui/jira").then((m) => ({ default: m.CreateJiraIssueToolUI })),
-	{ ssr: false }
-);
-const UpdateJiraIssueToolUI = dynamic(
-	() => import("@/components/tool-ui/jira").then((m) => ({ default: m.UpdateJiraIssueToolUI })),
-	{ ssr: false }
-);
-const DeleteJiraIssueToolUI = dynamic(
-	() => import("@/components/tool-ui/jira").then((m) => ({ default: m.DeleteJiraIssueToolUI })),
-	{ ssr: false }
-);
-const CreateConfluencePageToolUI = dynamic(
-	() =>
-		import("@/components/tool-ui/confluence").then((m) => ({
-			default: m.CreateConfluencePageToolUI,
-		})),
-	{ ssr: false }
-);
-const UpdateConfluencePageToolUI = dynamic(
-	() =>
-		import("@/components/tool-ui/confluence").then((m) => ({
-			default: m.UpdateConfluencePageToolUI,
-		})),
-	{ ssr: false }
-);
-const DeleteConfluencePageToolUI = dynamic(
-	() =>
-		import("@/components/tool-ui/confluence").then((m) => ({
-			default: m.DeleteConfluencePageToolUI,
-		})),
-	{ ssr: false }
-);
-
 function extractDomain(url: string): string | undefined {
 	try {
 		return new URL(url).hostname.replace(/^www\./, "");
@@ -505,67 +362,26 @@ const MessageInfoDropdown: FC = () => {
 	);
 };
 
-// Wrap each tool-ui card with ``withBundleStep`` so multi-card HITL bundles
-// page through them and stage decisions instead of firing one resume per card.
-// ``withDelegationSpanIndent`` wraps every entry (including Fallback) so delegated
-// subagent tools don't bypass span indentation via a named ``by_name`` UI.
-// ``withHitlInTimeline`` is the OUTERMOST wrapper so a body render with an
-// interrupt result returns ``null`` immediately — no inner wrappers paint
-// — while a timeline render (under ``HitlRenderTargetProvider value="timeline"``
-// inside ``ThinkingStepsDisplay``) passes through to the real component.
-const bundleTool = (Component: ToolCallMessagePartComponent) =>
-	withHitlInTimeline(withBundleStep(withDelegationSpanIndent(Component)));
-
-const NullToolUi: ToolCallMessagePartComponent = () => null;
-
 /**
- * Tool-call UI registry. Exported so ``ThinkingStepsDisplay`` can mount
- * the SAME wrapped components inline under a step row when the card's
- * result is an HITL interrupt. The wrappers handle ``ToolCallIdProvider``
- * and bundle paging consistently across both render targets.
+ * Tools rendered in the message BODY — value-add deliverables only.
+ *
+ * Process tools (connector CRUD, sandbox execute, memory updates,
+ * etc.) are NOT here; they render in the timeline via the slice's
+ * tool registry (see ``features/chat-messages/timeline``). The body
+ * opts out of every other tool by registering ``NullBodyTool`` as the
+ * fallback — any tool name not in this map renders nothing in the
+ * body and is picked up by the timeline instead.
  */
-export const TOOLS_BY_NAME = {
-	generate_report: bundleTool(GenerateReportToolUI),
-	generate_resume: bundleTool(GenerateResumeToolUI),
-	generate_podcast: bundleTool(GeneratePodcastToolUI),
-	generate_video_presentation: bundleTool(GenerateVideoPresentationToolUI),
-	display_image: bundleTool(GenerateImageToolUI),
-	generate_image: bundleTool(GenerateImageToolUI),
-	update_memory: bundleTool(UpdateMemoryToolUI),
-	execute: bundleTool(SandboxExecuteToolUI),
-	execute_code: bundleTool(SandboxExecuteToolUI),
-	create_notion_page: bundleTool(CreateNotionPageToolUI),
-	update_notion_page: bundleTool(UpdateNotionPageToolUI),
-	delete_notion_page: bundleTool(DeleteNotionPageToolUI),
-	create_linear_issue: bundleTool(CreateLinearIssueToolUI),
-	update_linear_issue: bundleTool(UpdateLinearIssueToolUI),
-	delete_linear_issue: bundleTool(DeleteLinearIssueToolUI),
-	create_google_drive_file: bundleTool(CreateGoogleDriveFileToolUI),
-	delete_google_drive_file: bundleTool(DeleteGoogleDriveFileToolUI),
-	create_onedrive_file: bundleTool(CreateOneDriveFileToolUI),
-	delete_onedrive_file: bundleTool(DeleteOneDriveFileToolUI),
-	create_dropbox_file: bundleTool(CreateDropboxFileToolUI),
-	delete_dropbox_file: bundleTool(DeleteDropboxFileToolUI),
-	create_calendar_event: bundleTool(CreateCalendarEventToolUI),
-	update_calendar_event: bundleTool(UpdateCalendarEventToolUI),
-	delete_calendar_event: bundleTool(DeleteCalendarEventToolUI),
-	create_gmail_draft: bundleTool(CreateGmailDraftToolUI),
-	update_gmail_draft: bundleTool(UpdateGmailDraftToolUI),
-	send_gmail_email: bundleTool(SendGmailEmailToolUI),
-	trash_gmail_email: bundleTool(TrashGmailEmailToolUI),
-	create_jira_issue: bundleTool(CreateJiraIssueToolUI),
-	update_jira_issue: bundleTool(UpdateJiraIssueToolUI),
-	delete_jira_issue: bundleTool(DeleteJiraIssueToolUI),
-	create_confluence_page: bundleTool(CreateConfluencePageToolUI),
-	update_confluence_page: bundleTool(UpdateConfluencePageToolUI),
-	delete_confluence_page: bundleTool(DeleteConfluencePageToolUI),
-	web_search: NullToolUi,
-	link_preview: NullToolUi,
-	multi_link_preview: NullToolUi,
-	scrape_webpage: NullToolUi,
+const BODY_TOOLS = {
+	generate_report: GenerateReportToolUI,
+	generate_resume: GenerateResumeToolUI,
+	generate_podcast: GeneratePodcastToolUI,
+	generate_video_presentation: GenerateVideoPresentationToolUI,
+	display_image: GenerateImageToolUI,
+	generate_image: GenerateImageToolUI,
 } as const;
 
-export const TOOLS_FALLBACK = bundleTool(ToolFallback);
+const NullBodyTool: ToolCallMessagePartComponent = () => null;
 
 const AssistantMessageInner: FC = () => {
 	const isMobile = !useMediaQuery("(min-width: 768px)");
@@ -578,8 +394,8 @@ const AssistantMessageInner: FC = () => {
 						Text: MarkdownText,
 						Reasoning: ReasoningMessagePart,
 						tools: {
-							by_name: TOOLS_BY_NAME,
-							Fallback: TOOLS_FALLBACK,
+							by_name: BODY_TOOLS,
+							Fallback: NullBodyTool,
 						},
 					}}
 				/>

From d96f966c8f1f0faaad5a9675f1b53962e28dd900 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 18:32:12 +0200
Subject: [PATCH 52/58] chat: switch consumer chat shells to slice
 TimelineDataUI and HITL exports.

---
 .../components/free-chat/free-chat-page.tsx   |  4 ++--
 .../layout/ui/right-panel/RightPanel.tsx      |  4 ++--
 .../public-chat/public-chat-view.tsx          |  4 ++--
 .../components/public-chat/public-thread.tsx  | 21 +++++++------------
 4 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/surfsense_web/components/free-chat/free-chat-page.tsx b/surfsense_web/components/free-chat/free-chat-page.tsx
index b2f959f2e..927eaef87 100644
--- a/surfsense_web/components/free-chat/free-chat-page.tsx
+++ b/surfsense_web/components/free-chat/free-chat-page.tsx
@@ -10,13 +10,13 @@ import { Turnstile, type TurnstileInstance } from "@marsidev/react-turnstile";
 import { ShieldCheck } from "lucide-react";
 import { useCallback, useEffect, useRef, useState } from "react";
 import { StepSeparatorDataUI } from "@/components/assistant-ui/step-separator";
-import { ThinkingStepsDataUI } from "@/components/assistant-ui/thinking-steps";
 import {
 	createTokenUsageStore,
 	type TokenUsageData,
 	TokenUsageProvider,
 } from "@/components/assistant-ui/token-usage-context";
 import { useAnonymousMode } from "@/contexts/anonymous-mode";
+import { TimelineDataUI } from "@/features/chat-messages/timeline";
 import {
 	addStepSeparator,
 	addToolCall,
@@ -473,7 +473,7 @@ export function FreeChatPage() {
 	return (
 		<TokenUsageProvider store={tokenUsageStore}>
 			<AssistantRuntimeProvider runtime={runtime}>
-				<ThinkingStepsDataUI />
+				<TimelineDataUI />
 				<StepSeparatorDataUI />
 				<div className="flex h-full flex-col overflow-hidden">
 					<div className="flex h-14 shrink-0 items-center justify-between border-b border-border/40 px-4">
diff --git a/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx b/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx
index 3481eec28..58c800863 100644
--- a/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx
+++ b/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx
@@ -4,7 +4,6 @@ import { useAtom, useAtomValue, useSetAtom } from "jotai";
 import { PanelRight } from "lucide-react";
 import dynamic from "next/dynamic";
 import { startTransition, useEffect } from "react";
-import { closeHitlEditPanelAtom, hitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
 import { closeReportPanelAtom, reportPanelAtom } from "@/atoms/chat/report-panel.atom";
 import { citationPanelAtom, closeCitationPanelAtom } from "@/atoms/citation/citation-panel.atom";
 import { documentsSidebarOpenAtom } from "@/atoms/documents/ui.atoms";
@@ -12,6 +11,7 @@ import { closeEditorPanelAtom, editorPanelAtom } from "@/atoms/editor/editor-pan
 import { rightPanelCollapsedAtom, rightPanelTabAtom } from "@/atoms/layout/right-panel.atom";
 import { Button } from "@/components/ui/button";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
+import { closeHitlEditPanelAtom, hitlEditPanelAtom } from "@/features/chat-messages/hitl";
 import { DocumentsSidebar } from "../sidebar";
 
 const EditorPanelContent = dynamic(
@@ -32,7 +32,7 @@ const CitationPanelContent = dynamic(
 
 const HitlEditPanelContent = dynamic(
 	() =>
-		import("@/components/hitl-edit-panel/hitl-edit-panel").then((m) => ({
+		import("@/features/chat-messages/hitl").then((m) => ({
 			default: m.HitlEditPanelContent,
 		})),
 	{ ssr: false, loading: () => null }
diff --git a/surfsense_web/components/public-chat/public-chat-view.tsx b/surfsense_web/components/public-chat/public-chat-view.tsx
index e47ba9bf1..356174280 100644
--- a/surfsense_web/components/public-chat/public-chat-view.tsx
+++ b/surfsense_web/components/public-chat/public-chat-view.tsx
@@ -2,10 +2,10 @@
 
 import { AssistantRuntimeProvider } from "@assistant-ui/react";
 import { StepSeparatorDataUI } from "@/components/assistant-ui/step-separator";
-import { ThinkingStepsDataUI } from "@/components/assistant-ui/thinking-steps";
 import { Navbar } from "@/components/homepage/navbar";
 import { ReportPanel } from "@/components/report-panel/report-panel";
 import { Spinner } from "@/components/ui/spinner";
+import { TimelineDataUI } from "@/features/chat-messages/timeline";
 import { usePublicChat } from "@/hooks/use-public-chat";
 import { usePublicChatRuntime } from "@/hooks/use-public-chat-runtime";
 import { PublicChatFooter } from "./public-chat-footer";
@@ -41,7 +41,7 @@ export function PublicChatView({ shareToken }: PublicChatViewProps) {
 		<main className="min-h-screen bg-main-panel text-foreground overflow-x-hidden">
 			<Navbar scrolledBgClassName={navbarScrolledBg} />
 			<AssistantRuntimeProvider runtime={runtime}>
-				<ThinkingStepsDataUI />
+				<TimelineDataUI />
 				<StepSeparatorDataUI />
 				<div className="flex h-screen pt-16 overflow-hidden">
 					<div className="flex-1 flex flex-col min-w-0 overflow-hidden">
diff --git a/surfsense_web/components/public-chat/public-thread.tsx b/surfsense_web/components/public-chat/public-thread.tsx
index 2075d82b8..c9e5f4075 100644
--- a/surfsense_web/components/public-chat/public-thread.tsx
+++ b/surfsense_web/components/public-chat/public-thread.tsx
@@ -15,7 +15,6 @@ import { type FC, type ReactNode, useState } from "react";
 import { CitationMetadataProvider } from "@/components/assistant-ui/citation-metadata-context";
 import { MarkdownText } from "@/components/assistant-ui/markdown-text";
 import { ReasoningMessagePart } from "@/components/assistant-ui/reasoning-message-part";
-import { ToolFallback, withDelegationSpanIndent } from "@/components/assistant-ui/tool-fallback";
 import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
 import { GenerateImageToolUI } from "@/components/tool-ui/generate-image";
 import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast";
@@ -165,20 +164,14 @@ const PublicAssistantMessage: FC = () => {
 							Reasoning: ReasoningMessagePart,
 							tools: {
 								by_name: {
-									generate_podcast: withDelegationSpanIndent(GeneratePodcastToolUI),
-									generate_report: withDelegationSpanIndent(GenerateReportToolUI),
-									generate_resume: withDelegationSpanIndent(GenerateResumeToolUI),
-									generate_video_presentation: withDelegationSpanIndent(
-										GenerateVideoPresentationToolUI
-									),
-									display_image: withDelegationSpanIndent(GenerateImageToolUI),
-									generate_image: withDelegationSpanIndent(GenerateImageToolUI),
-									web_search: NullToolUi,
-									link_preview: NullToolUi,
-									multi_link_preview: NullToolUi,
-									scrape_webpage: NullToolUi,
+									generate_podcast: GeneratePodcastToolUI,
+									generate_report: GenerateReportToolUI,
+									generate_resume: GenerateResumeToolUI,
+									generate_video_presentation: GenerateVideoPresentationToolUI,
+									display_image: GenerateImageToolUI,
+									generate_image: GenerateImageToolUI,
 								},
-								Fallback: withDelegationSpanIndent(ToolFallback),
+								Fallback: NullToolUi,
 							},
 						}}
 					/>

From 9c5a178468e504a41fa9dadba1641189eeec9054 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 18:35:39 +0200
Subject: [PATCH 53/58] chat: switch dashboard chat page to slice and drop
 superseded aborted rows on resume.

---
 .../new-chat/[[...chat_id]]/page.tsx          | 32 +++++---
 surfsense_web/lib/chat/message-utils.ts       | 78 +++++++++++++++++--
 2 files changed, 92 insertions(+), 18 deletions(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
index 9b5510df3..64bfda7d0 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@@ -43,13 +43,14 @@ import {
 	type EditMessageDialogChoice,
 } from "@/components/assistant-ui/edit-message-dialog";
 import { StepSeparatorDataUI } from "@/components/assistant-ui/step-separator";
-import { ThinkingStepsDataUI } from "@/components/assistant-ui/thinking-steps";
 import { Thread } from "@/components/assistant-ui/thread";
 import {
 	createTokenUsageStore,
 	type TokenUsageData,
 	TokenUsageProvider,
 } from "@/components/assistant-ui/token-usage-context";
+import { type BundleSubmit, HitlBundleProvider } from "@/features/chat-messages/hitl";
+import { TimelineDataUI } from "@/features/chat-messages/timeline";
 import {
 	applyActionLogSse,
 	applyActionLogUpdatedSse,
@@ -63,7 +64,7 @@ import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { getBearerToken } from "@/lib/auth-utils";
 import { type ChatFlow, classifyChatError } from "@/lib/chat/chat-error-classifier";
 import { tagPreAcceptSendFailure, toHttpResponseError } from "@/lib/chat/chat-request-errors";
-import { convertToThreadMessage } from "@/lib/chat/message-utils";
+import { convertToThreadMessage, filterSupersededAbortedMessages } from "@/lib/chat/message-utils";
 import {
 	isPodcastGenerating,
 	looksLikePodcastRequest,
@@ -107,7 +108,6 @@ import {
 	type NewChatUserImagePayload,
 } from "@/lib/chat/user-turn-api-parts";
 import { NotFoundError } from "@/lib/error";
-import { type BundleSubmit, HitlBundleProvider } from "@/lib/hitl";
 import {
 	trackChatBlocked,
 	trackChatCreated,
@@ -126,7 +126,7 @@ const MobileEditorPanel = dynamic(
 );
 const MobileHitlEditPanel = dynamic(
 	() =>
-		import("@/components/hitl-edit-panel/hitl-edit-panel").then((m) => ({
+		import("@/features/chat-messages/hitl").then((m) => ({
 			default: m.MobileHitlEditPanel,
 		})),
 	{ ssr: false }
@@ -395,7 +395,7 @@ export default function NewChatPage() {
 				const memberById = new Map(membersData?.map((m) => [m.user_id, m]) ?? []);
 				const prevById = new Map(prev.map((m) => [m.id, m]));
 
-				return syncedMessages.map((msg) => {
+				return filterSupersededAbortedMessages(syncedMessages).map((msg) => {
 					const member = msg.author_id ? (memberById.get(msg.author_id) ?? null) : null;
 
 					// Preserve existing author info if member lookup fails (e.g., cloned chats)
@@ -622,7 +622,9 @@ export default function NewChatPage() {
 				setCurrentThread(threadData);
 
 				if (messagesResponse.messages && messagesResponse.messages.length > 0) {
-					const loadedMessages = messagesResponse.messages.map(convertToThreadMessage);
+					const loadedMessages = filterSupersededAbortedMessages(messagesResponse.messages).map(
+						convertToThreadMessage
+					);
 					setMessages(loadedMessages);
 
 					for (const msg of messagesResponse.messages) {
@@ -1388,6 +1390,8 @@ export default function NewChatPage() {
 
 			const existingMsg = messages.find((m) => m.id === assistantMsgId);
 			if (existingMsg && Array.isArray(existingMsg.content)) {
+				// See ``ContentPartsState.suppressStepSeparators`` doc.
+				contentPartsState.suppressStepSeparators = true;
 				for (const part of existingMsg.content) {
 					if (typeof part === "object" && part !== null) {
 						const p = part as Record<string, unknown>;
@@ -1402,15 +1406,19 @@ export default function NewChatPage() {
 								toolName: String(p.toolName),
 								args: (p.args as Record<string, unknown>) ?? {},
 								result: p.result as unknown,
-								// Restore argsText so persisted pretty-printed
-								// JSON survives reloads (assistant-ui prefers
-								// supplied argsText over JSON.stringify(args)).
-								// langchainToolCallId restoration also fixes a
-								// pre-existing dropped-id bug on resume.
+								// argsText: assistant-ui prefers it over
+								// JSON.stringify(args), so restoring it keeps
+								// pretty-printed JSON across reloads.
 								...(typeof p.argsText === "string" ? { argsText: p.argsText } : {}),
 								...(typeof p.langchainToolCallId === "string"
 									? { langchainToolCallId: p.langchainToolCallId }
 									: {}),
+								// metadata: spanId / thinkingStepId drive the
+								// timeline's step↔tool join. Dropping these
+								// here orphans every rehydrated tool-call.
+								...(p.metadata && typeof p.metadata === "object"
+									? { metadata: p.metadata as Record<string, unknown> }
+									: {}),
 							});
 							contentPartsState.currentTextPartIndex = -1;
 						} else if (p.type === "data-thinking-steps") {
@@ -2353,7 +2361,7 @@ export default function NewChatPage() {
 	return (
 		<TokenUsageProvider store={tokenUsageStore}>
 			<AssistantRuntimeProvider runtime={runtime}>
-				<ThinkingStepsDataUI />
+				<TimelineDataUI />
 				<StepSeparatorDataUI />
 				<HitlBundleProvider
 					toolCallIds={pendingInterrupt?.bundleToolCallIds ?? null}
diff --git a/surfsense_web/lib/chat/message-utils.ts b/surfsense_web/lib/chat/message-utils.ts
index 004542489..3267afd76 100644
--- a/surfsense_web/lib/chat/message-utils.ts
+++ b/surfsense_web/lib/chat/message-utils.ts
@@ -1,9 +1,75 @@
 import type { ThreadMessageLike } from "@assistant-ui/react";
 import type { MessageRecord } from "./thread-persistence";
 
+/** Minimal shape used by ``filterSupersededAbortedMessages``. */
+interface AbortableMessage {
+	id: number;
+	role: string;
+	content: unknown;
+	turn_id?: string | null;
+}
+
 /**
- * Convert backend message to assistant-ui ThreadMessageLike format.
- * Migrates legacy `thinking-steps` parts to `data-thinking-steps` (assistant-ui data parts).
+ * True when the row is a frozen interrupt frame: an assistant message
+ * whose tool-calls all carry ``state: "aborted"``. A single non-aborted
+ * tool-call disqualifies (defensive against future mixed states).
+ */
+function isAbortedAssistantMessage(msg: AbortableMessage): boolean {
+	if (msg.role.toLowerCase() !== "assistant") return false;
+	if (!Array.isArray(msg.content)) return false;
+	let hasToolCalls = false;
+	for (const part of msg.content) {
+		if (typeof part !== "object" || part === null) continue;
+		if ((part as { type?: string }).type !== "tool-call") continue;
+		hasToolCalls = true;
+		if ((part as { state?: unknown }).state !== "aborted") return false;
+	}
+	return hasToolCalls;
+}
+
+/**
+ * Positional supersede check: an aborted assistant row is superseded
+ * iff another assistant row appears later before any user row.
+ *
+ * NOT turn-id-based: ``stream_resume_chat`` allocates a fresh
+ * ``turn_id`` for the resumed row, so interrupt+resume rows never
+ * share a turn_id. Conversational adjacency is the reliable signal —
+ * an assistant→assistant pair without a user row between them is the
+ * unique signature of an interrupt+resume cycle.
+ */
+function isSupersededByLaterAssistant<T extends AbortableMessage>(
+	messages: readonly T[],
+	idx: number
+): boolean {
+	for (let i = idx + 1; i < messages.length; i++) {
+		const role = messages[i].role.toLowerCase();
+		if (role === "user") return false;
+		if (role === "assistant") return true;
+	}
+	return false;
+}
+
+/**
+ * Drop frozen interrupt-frame rows once they have a resumed
+ * continuation. Pure (returns a new array). Caller passes messages in
+ * chronological order.
+ *
+ * Never-resumed aborts are preserved (user navigated away mid-decision)
+ * so the user still sees what happened.
+ */
+export function filterSupersededAbortedMessages<T extends AbortableMessage>(
+	messages: readonly T[]
+): T[] {
+	return messages.filter((msg, idx) => {
+		if (!isAbortedAssistantMessage(msg)) return true;
+		return !isSupersededByLaterAssistant(messages, idx);
+	});
+}
+
+/**
+ * Convert a backend ``MessageRecord`` to assistant-ui's
+ * ``ThreadMessageLike``. Also migrates legacy ``thinking-steps`` parts
+ * to ``data-thinking-steps``.
  */
 export function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
 	let content: ThreadMessageLike["content"];
@@ -24,9 +90,10 @@ export function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
 					"type" in part &&
 					(part as { type: string }).type === "thinking-steps"
 				) {
+					const steps = (part as unknown as { steps?: unknown[] }).steps;
 					return {
 						type: "data-thinking-steps",
-						data: { steps: (part as { steps: unknown[] }).steps ?? [] },
+						data: { steps: Array.isArray(steps) ? steps : [] },
 					};
 				}
 				return part;
@@ -50,9 +117,8 @@ export function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
 							},
 						}),
 						...(msg.token_usage && { usage: msg.token_usage }),
-						// Surface ``chat_turn_id`` so the assistant message
-						// footer can scope its "Revert turn" button to just
-						// this turn's actions. Null on legacy rows.
+						// Surfaced for the assistant footer's per-turn
+						// "Revert turn" button. Null on legacy rows.
 						...(msg.turn_id && { chatTurnId: msg.turn_id }),
 					},
 				}

From ba0e1e70a0de6013041777925d3c4b18019c7c13 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 18:35:52 +0200
Subject: [PATCH 54/58] chat: drop legacy thinking-steps, tool-fallback, hitl
 modules, and span-indent helper.

---
 .../atoms/chat/hitl-edit-panel.atom.ts        |  82 ---
 .../assistant-ui/thinking-steps.tsx           | 433 --------------
 .../components/assistant-ui/tool-fallback.tsx | 532 ------------------
 .../components/hitl-bundle-pager/index.ts     |   2 -
 .../hitl-bundle-pager/pager-chrome.tsx        |  61 --
 .../hitl-bundle-pager/with-bundle-step.tsx    |  37 --
 .../hitl-edit-panel/hitl-edit-panel.tsx       | 405 -------------
 surfsense_web/hooks/use-hitl-phase.ts         |  62 --
 .../lib/chat/delegation-span-indent.ts        |  19 -
 surfsense_web/lib/hitl/bundle-context.tsx     | 153 -----
 surfsense_web/lib/hitl/index.ts               |  22 -
 surfsense_web/lib/hitl/render-target.tsx      |  48 --
 surfsense_web/lib/hitl/types.ts               |  45 --
 surfsense_web/lib/hitl/use-hitl-decision.ts   |  41 --
 14 files changed, 1942 deletions(-)
 delete mode 100644 surfsense_web/atoms/chat/hitl-edit-panel.atom.ts
 delete mode 100644 surfsense_web/components/assistant-ui/thinking-steps.tsx
 delete mode 100644 surfsense_web/components/assistant-ui/tool-fallback.tsx
 delete mode 100644 surfsense_web/components/hitl-bundle-pager/index.ts
 delete mode 100644 surfsense_web/components/hitl-bundle-pager/pager-chrome.tsx
 delete mode 100644 surfsense_web/components/hitl-bundle-pager/with-bundle-step.tsx
 delete mode 100644 surfsense_web/components/hitl-edit-panel/hitl-edit-panel.tsx
 delete mode 100644 surfsense_web/hooks/use-hitl-phase.ts
 delete mode 100644 surfsense_web/lib/chat/delegation-span-indent.ts
 delete mode 100644 surfsense_web/lib/hitl/bundle-context.tsx
 delete mode 100644 surfsense_web/lib/hitl/index.ts
 delete mode 100644 surfsense_web/lib/hitl/render-target.tsx
 delete mode 100644 surfsense_web/lib/hitl/types.ts
 delete mode 100644 surfsense_web/lib/hitl/use-hitl-decision.ts

diff --git a/surfsense_web/atoms/chat/hitl-edit-panel.atom.ts b/surfsense_web/atoms/chat/hitl-edit-panel.atom.ts
deleted file mode 100644
index 384854185..000000000
--- a/surfsense_web/atoms/chat/hitl-edit-panel.atom.ts
+++ /dev/null
@@ -1,82 +0,0 @@
-import { atom } from "jotai";
-import { rightPanelCollapsedAtom, rightPanelTabAtom } from "@/atoms/layout/right-panel.atom";
-
-export interface ExtraField {
-	label: string;
-	key: string;
-	value: string;
-	type: "text" | "email" | "emails" | "datetime-local" | "textarea";
-}
-
-interface HitlEditPanelState {
-	isOpen: boolean;
-	title: string;
-	content: string;
-	toolName: string;
-	contentFormat?: "markdown" | "html";
-	extraFields?: ExtraField[];
-	onSave:
-		| ((title: string, content: string, extraFieldValues?: Record<string, string>) => void)
-		| null;
-	onClose: (() => void) | null;
-}
-
-const initialState: HitlEditPanelState = {
-	isOpen: false,
-	title: "",
-	content: "",
-	toolName: "",
-	contentFormat: undefined,
-	extraFields: undefined,
-	onSave: null,
-	onClose: null,
-};
-
-export const hitlEditPanelAtom = atom<HitlEditPanelState>(initialState);
-
-const preHitlCollapsedAtom = atom<boolean | null>(null);
-
-export const openHitlEditPanelAtom = atom(
-	null,
-	(
-		get,
-		set,
-		payload: {
-			title: string;
-			content: string;
-			toolName: string;
-			contentFormat?: "markdown" | "html";
-			extraFields?: ExtraField[];
-			onSave: (title: string, content: string, extraFieldValues?: Record<string, string>) => void;
-			onClose?: () => void;
-		}
-	) => {
-		if (!get(hitlEditPanelAtom).isOpen) {
-			set(preHitlCollapsedAtom, get(rightPanelCollapsedAtom));
-		}
-		set(hitlEditPanelAtom, {
-			isOpen: true,
-			title: payload.title,
-			content: payload.content,
-			toolName: payload.toolName,
-			contentFormat: payload.contentFormat,
-			extraFields: payload.extraFields,
-			onSave: payload.onSave,
-			onClose: payload.onClose ?? null,
-		});
-		set(rightPanelTabAtom, "hitl-edit");
-		set(rightPanelCollapsedAtom, false);
-	}
-);
-
-export const closeHitlEditPanelAtom = atom(null, (get, set) => {
-	const current = get(hitlEditPanelAtom);
-	current.onClose?.();
-	set(hitlEditPanelAtom, initialState);
-	set(rightPanelTabAtom, "sources");
-	const prev = get(preHitlCollapsedAtom);
-	if (prev !== null) {
-		set(rightPanelCollapsedAtom, prev);
-		set(preHitlCollapsedAtom, null);
-	}
-});
diff --git a/surfsense_web/components/assistant-ui/thinking-steps.tsx b/surfsense_web/components/assistant-ui/thinking-steps.tsx
deleted file mode 100644
index 46d33a9fa..000000000
--- a/surfsense_web/components/assistant-ui/thinking-steps.tsx
+++ /dev/null
@@ -1,433 +0,0 @@
-import {
-	makeAssistantDataUI,
-	type ToolCallMessagePartComponent,
-	useAuiState,
-} from "@assistant-ui/react";
-import { ChevronRightIcon } from "lucide-react";
-import type { FC } from "react";
-import { useCallback, useEffect, useMemo, useState } from "react";
-import { TOOLS_BY_NAME, TOOLS_FALLBACK } from "@/components/assistant-ui/assistant-message";
-import { ChainOfThoughtItem } from "@/components/prompt-kit/chain-of-thought";
-import { TextShimmerLoader } from "@/components/prompt-kit/loader";
-import { HitlRenderTargetProvider, isInterruptResult } from "@/lib/hitl";
-import { cn } from "@/lib/utils";
-
-export interface ThinkingStep {
-	id: string;
-	title: string;
-	items: string[];
-	status: "pending" | "in_progress" | "completed";
-	/**
-	 * Optional relay metadata forwarded from ``data-thinking-step`` SSE
-	 * (e.g. ``spanId`` set by ``AgentEventRelayState.span_metadata_if_active``).
-	 * Steps under an open delegating ``task`` carry ``metadata.spanId`` and are
-	 * grouped under the preceding parent (``task`` step) as indented children.
-	 */
-	metadata?: Record<string, unknown>;
-}
-
-/**
- * Per-step info joined from the assistant message ``tool-call`` parts via
- * the shared ``metadata.thinkingStepId`` correlation
- * (set on the server in ``AgentEventRelayState.tool_activity_metadata``).
- *
- * Carries enough of the part to:
- *  - identify the opening ``task`` step and substitute the subagent display
- *    name on the parent header (uses ``toolName`` and ``args``);
- *  - render the matching tool component inline under the step row when the
- *    card's result is an HITL interrupt (uses ``toolCallId``, ``argsText``,
- *    ``result``, ``langchainToolCallId``).
- */
-interface StepToolInfo {
-	toolCallId: string;
-	toolName: string;
-	args: Record<string, unknown>;
-	argsText?: string;
-	result?: unknown;
-	langchainToolCallId?: string;
-}
-
-export type ThinkingStepToolInfoMap = ReadonlyMap<string, StepToolInfo>;
-
-/**
- * Build ``thinkingStepId → StepToolInfo`` from message content. Used to
- *  - identify the opening ``task`` step (parent header, never indents) without
- *    relying on the human-readable title;
- *  - render the parent's display title from ``args.subagent_type`` instead of
- *    the generic "Task" copy;
- *  - mount the matching tool-call card inline under a step row when the
- *    result is an HITL interrupt (see ``TimelineHitlCard``).
- */
-export function buildThinkingStepToolInfo(
-	content: readonly unknown[] | undefined
-): ThinkingStepToolInfoMap {
-	const m = new Map<string, StepToolInfo>();
-	if (!content) return m;
-	for (const part of content) {
-		if (!part || typeof part !== "object") continue;
-		const o = part as {
-			type?: string;
-			toolCallId?: string;
-			toolName?: string;
-			args?: Record<string, unknown>;
-			argsText?: string;
-			result?: unknown;
-			langchainToolCallId?: string;
-			metadata?: Record<string, unknown>;
-		};
-		if (o.type !== "tool-call" || !o.toolName || !o.toolCallId) continue;
-		const tid = o.metadata?.thinkingStepId;
-		if (typeof tid === "string" && tid.trim().length > 0) {
-			m.set(tid, {
-				toolCallId: o.toolCallId,
-				toolName: o.toolName,
-				args: o.args ?? {},
-				argsText: o.argsText,
-				result: o.result,
-				langchainToolCallId: o.langchainToolCallId,
-			});
-		}
-	}
-	return m;
-}
-
-function asNonEmptyString(v: unknown): string | undefined {
-	return typeof v === "string" && v.trim().length > 0 ? v.trim() : undefined;
-}
-
-function titleCaseSubagent(raw: string): string {
-	// "notion" → "Notion", "doc_research" → "Doc Research".
-	return raw
-		.split(/[\s_-]+/)
-		.filter(Boolean)
-		.map((part) => part.charAt(0).toUpperCase() + part.slice(1))
-		.join(" ");
-}
-
-/**
- * Display title for a step. For the opening ``task`` step we substitute the
- * subagent type from the matching tool-call args (e.g. ``"Notion"`` instead of
- * the generic ``"Task"``). Falls back to the step's own title if the tool-call
- * hasn't streamed in yet.
- */
-function resolveDisplayTitle(step: ThinkingStep, info: StepToolInfo | undefined): string {
-	if (info?.toolName === "task") {
-		const subagent = asNonEmptyString(info.args?.subagent_type);
-		if (subagent) return titleCaseSubagent(subagent);
-	}
-	return step.title;
-}
-
-function isDelegatedChild(step: ThinkingStep, info: StepToolInfo | undefined): boolean {
-	const sid = asNonEmptyString(step.metadata?.spanId);
-	if (!sid) return false;
-	// The opening ``task`` step also carries ``spanId`` (it owns the span) but
-	// must render as the parent header. Prefer the joined ``toolName`` (set by
-	// ``buildThinkingStepToolInfo`` from ``tool-call.metadata.thinkingStepId``).
-	// Fall back to the title heuristic when no tool-call is matched — happens
-	// for messages persisted before ``thinkingStepId`` shipped, and briefly
-	// during streaming if the ``tool-input-start`` frame hasn't been processed
-	// yet for some reason.
-	if (info) return info.toolName !== "task";
-	return step.title !== "Task";
-}
-
-interface StepGroup {
-	parent: ThinkingStep;
-	children: ThinkingStep[];
-}
-
-/**
- * Group consecutive delegated child steps under the preceding parent step.
- * If the very first step is a child (no parent yet seen), it's promoted to a
- * parent so it still renders — defensive only, real flows always start with a
- * parent step.
- */
-const EMPTY_STEP_TOOL_INFO: ThinkingStepToolInfoMap = new Map();
-
-function groupSteps(
-	steps: readonly ThinkingStep[],
-	stepToolInfo: ThinkingStepToolInfoMap
-): StepGroup[] {
-	const groups: StepGroup[] = [];
-	for (const step of steps) {
-		if (isDelegatedChild(step, stepToolInfo.get(step.id)) && groups.length > 0) {
-			groups[groups.length - 1].children.push(step);
-		} else {
-			groups.push({ parent: step, children: [] });
-		}
-	}
-	return groups;
-}
-
-const StepBody: FC<{
-	step: ThinkingStep;
-	status: "pending" | "in_progress" | "completed";
-	displayTitle: string;
-}> = ({ step, status, displayTitle }) => (
-	<div className="min-w-0">
-		<div
-			className={cn(
-				"text-sm leading-5",
-				status === "in_progress" && "text-foreground font-medium",
-				status === "completed" && "text-muted-foreground",
-				status === "pending" && "text-muted-foreground/60"
-			)}
-		>
-			{displayTitle}
-		</div>
-
-		{step.items && step.items.length > 0 && (
-			<div className="mt-1 space-y-0.5">
-				{step.items.map((item) => (
-					<ChainOfThoughtItem key={`${step.id}-${item}`} className="text-xs">
-						{item}
-					</ChainOfThoughtItem>
-				))}
-			</div>
-		)}
-	</div>
-);
-
-/**
- * Mount the same tool-call UI used in the message body, but inside the
- * chain-of-thought timeline. The body copy returns ``null`` (see
- * ``withHitlInTimeline`` in ``lib/hitl/render-target``), so the card
- * effectively moves from the body to the timeline for the lifetime of the
- * interrupt (pending → processing → complete / rejected).
- *
- * ``metadata`` is intentionally omitted from the props we forward — the
- * step row already provides any indentation it needs, so we don't want
- * ``withDelegationSpanIndent`` to add a second indent + border on top.
- *
- * ``status`` is a placeholder (HITL UIs read only ``args`` + ``result``)
- * so we don't need to mirror assistant-ui's runtime status object here.
- */
-const TimelineHitlCard: FC<{ info: StepToolInfo }> = ({ info }) => {
-	const Comp =
-		(TOOLS_BY_NAME as Record<string, ToolCallMessagePartComponent | undefined>)[info.toolName] ??
-		TOOLS_FALLBACK;
-	const props = {
-		toolCallId: info.toolCallId,
-		toolName: info.toolName,
-		args: info.args,
-		argsText: info.argsText,
-		result: info.result,
-		langchainToolCallId: info.langchainToolCallId,
-		status: { type: "complete" } as const,
-	};
-	return (
-		<HitlRenderTargetProvider value="timeline">
-			{/* biome-ignore lint/suspicious/noExplicitAny: ToolCallMessagePartProps requires
-			    runtime-only fields (addResult, resume, MessagePartState) we don't have when
-			    re-rendering manually; HITL components only read args + result. */}
-			<Comp {...(props as any)} />
-		</HitlRenderTargetProvider>
-	);
-};
-
-function hitlInterruptInfo(info: StepToolInfo | undefined): StepToolInfo | undefined {
-	return info && isInterruptResult(info.result) ? info : undefined;
-}
-
-/**
- * Chain of thought display component - single collapsible dropdown design.
- *
- * ``stepToolInfo`` joins each step (by ``thinkingStepId``) to its ``tool-call``
- * part so we can:
- *  - replace the generic ``"Task"`` title with the real subagent name
- *    (``args.subagent_type``) on the parent header;
- *  - decide parent-vs-child purely from the matched ``toolName`` instead of
- *    relying on the displayed title.
- */
-export const ThinkingStepsDisplay: FC<{
-	steps: ThinkingStep[];
-	isThreadRunning?: boolean;
-	stepToolInfo?: ThinkingStepToolInfoMap;
-}> = ({ steps, isThreadRunning = true, stepToolInfo }) => {
-	const toolInfo = stepToolInfo ?? EMPTY_STEP_TOOL_INFO;
-	const getEffectiveStatus = useCallback(
-		(step: ThinkingStep): "pending" | "in_progress" | "completed" => {
-			if (step.status === "in_progress" && !isThreadRunning) {
-				return "completed";
-			}
-			return step.status;
-		},
-		[isThreadRunning]
-	);
-
-	const inProgressStep = steps.find((s) => getEffectiveStatus(s) === "in_progress");
-	const inProgressDisplayTitle = inProgressStep
-		? resolveDisplayTitle(inProgressStep, toolInfo.get(inProgressStep.id))
-		: undefined;
-	const allCompleted =
-		steps.length > 0 &&
-		!isThreadRunning &&
-		steps.every((s) => getEffectiveStatus(s) === "completed");
-	const isProcessing = isThreadRunning && !allCompleted;
-	const [isOpen, setIsOpen] = useState(() => isProcessing);
-
-	useEffect(() => {
-		if (isProcessing) {
-			setIsOpen(true);
-			return;
-		}
-
-		if (allCompleted) {
-			setIsOpen(false);
-		}
-	}, [allCompleted, isProcessing]);
-
-	const groups = useMemo(() => groupSteps(steps, toolInfo), [steps, toolInfo]);
-
-	if (steps.length === 0) return null;
-
-	const getHeaderText = () => {
-		if (allCompleted) {
-			return "Reviewed";
-		}
-		if (inProgressDisplayTitle) {
-			return inProgressDisplayTitle;
-		}
-		if (isProcessing) {
-			return "Processing";
-		}
-		return "Reviewed";
-	};
-
-	return (
-		<div className="mx-auto w-full max-w-(--thread-max-width) px-2 py-2">
-			<div className="rounded-lg">
-				<button
-					type="button"
-					onClick={() => setIsOpen((prev) => !prev)}
-					className={cn(
-						"flex w-full items-center gap-1.5 text-left text-sm transition-colors",
-						"text-muted-foreground hover:text-foreground"
-					)}
-				>
-					{isProcessing ? (
-						<TextShimmerLoader text={getHeaderText()} size="sm" />
-					) : (
-						<span>{getHeaderText()}</span>
-					)}
-
-					<ChevronRightIcon
-						className={cn("size-4 transition-transform duration-200", isOpen && "rotate-90")}
-					/>
-				</button>
-
-				<div
-					className={cn(
-						"grid transition-[grid-template-rows] duration-300 ease-out",
-						isOpen ? "grid-rows-[1fr]" : "grid-rows-[0fr]"
-					)}
-				>
-					<div className="overflow-hidden">
-						<div className="mt-3 pl-1">
-							{groups.map((group, groupIndex) => {
-								const isLastGroup = groupIndex === groups.length - 1;
-								const parentStatus = getEffectiveStatus(group.parent);
-								const parentInfo = toolInfo.get(group.parent.id);
-								const parentTitle = resolveDisplayTitle(group.parent, parentInfo);
-								const hasChildren = group.children.length > 0;
-								// Parent dots are connected by a vertical line that runs through
-								// any indented children (their column has no dot, so the line
-								// passes cleanly behind them) and overshoots by ~15px to reach
-								// the next group's dot center (top-[15px]).
-								const showParentLine = !isLastGroup;
-
-								return (
-									<div key={group.parent.id} className="relative flex gap-3">
-										<div className="relative flex flex-col items-center w-2 self-stretch">
-											{showParentLine && (
-												<div className="absolute left-1/2 top-[15px] -bottom-[15px] w-px -translate-x-1/2 bg-muted-foreground/30" />
-											)}
-											<div className="relative z-10 mt-[7px] flex shrink-0 items-center justify-center">
-												{parentStatus === "in_progress" ? (
-													<span className="relative flex size-2">
-														<span className="absolute inline-flex size-full animate-ping rounded-full bg-primary/60" />
-														<span className="relative inline-flex size-2 rounded-full bg-primary" />
-													</span>
-												) : (
-													<span className="size-2 rounded-full bg-muted-foreground/30" />
-												)}
-											</div>
-										</div>
-
-										<div className="flex-1 min-w-0 pb-4">
-											<StepBody
-												step={group.parent}
-												status={parentStatus}
-												displayTitle={parentTitle}
-											/>
-
-											{(() => {
-												const hitl = hitlInterruptInfo(parentInfo);
-												return hitl ? <TimelineHitlCard info={hitl} /> : null;
-											})()}
-
-											{hasChildren && (
-												<div className="mt-2 ml-3 space-y-2">
-													{group.children.map((child) => {
-														const childInfo = toolInfo.get(child.id);
-														const childHitl = hitlInterruptInfo(childInfo);
-														return (
-															<div key={child.id}>
-																<StepBody
-																	step={child}
-																	status={getEffectiveStatus(child)}
-																	displayTitle={resolveDisplayTitle(child, childInfo)}
-																/>
-																{childHitl && <TimelineHitlCard info={childHitl} />}
-															</div>
-														);
-													})}
-												</div>
-											)}
-										</div>
-									</div>
-								);
-							})}
-						</div>
-					</div>
-				</div>
-			</div>
-		</div>
-	);
-};
-
-/**
- * assistant-ui data UI component that renders thinking steps from message content.
- * Registered globally via makeAssistantDataUI — renders inside MessagePrimitive.Parts
- * at the position of the data part in the content array.
- */
-function ThinkingStepsDataRenderer({ data }: { name: string; data: unknown }) {
-	const isThreadRunning = useAuiState(({ thread }) => thread.isRunning);
-	const isLastMessage = useAuiState(({ message }) => message?.isLast ?? false);
-	const isMessageStreaming = isThreadRunning && isLastMessage;
-	const content = useAuiState(({ message }) => message?.content);
-
-	const stepToolInfo = useMemo(
-		() => buildThinkingStepToolInfo(Array.isArray(content) ? content : undefined),
-		[content]
-	);
-
-	const steps = (data as { steps: ThinkingStep[] } | null)?.steps ?? [];
-	if (steps.length === 0) return null;
-
-	return (
-		<div className="mb-3 -mx-2 leading-normal">
-			<ThinkingStepsDisplay
-				steps={steps}
-				isThreadRunning={isMessageStreaming}
-				stepToolInfo={stepToolInfo}
-			/>
-		</div>
-	);
-}
-
-export const ThinkingStepsDataUI = makeAssistantDataUI({
-	name: "thinking-steps",
-	render: ThinkingStepsDataRenderer,
-});
diff --git a/surfsense_web/components/assistant-ui/tool-fallback.tsx b/surfsense_web/components/assistant-ui/tool-fallback.tsx
deleted file mode 100644
index ec93b1018..000000000
--- a/surfsense_web/components/assistant-ui/tool-fallback.tsx
+++ /dev/null
@@ -1,532 +0,0 @@
-import { type ToolCallMessagePartComponent, useAuiState } from "@assistant-ui/react";
-import { useQueryClient } from "@tanstack/react-query";
-import { useAtomValue } from "jotai";
-import { CheckIcon, ChevronDownIcon, RotateCcw, XCircleIcon } from "lucide-react";
-import { useEffect, useMemo, useState } from "react";
-import { toast } from "sonner";
-import { chatSessionStateAtom } from "@/atoms/chat/chat-session-state.atom";
-import { NestedScroll } from "@/components/assistant-ui/nested-scroll";
-import {
-	DoomLoopApprovalToolUI,
-	isDoomLoopInterrupt,
-} from "@/components/tool-ui/doom-loop-approval";
-import { GenericHitlApprovalToolUI } from "@/components/tool-ui/generic-hitl-approval";
-import {
-	AlertDialog,
-	AlertDialogAction,
-	AlertDialogCancel,
-	AlertDialogContent,
-	AlertDialogDescription,
-	AlertDialogFooter,
-	AlertDialogHeader,
-	AlertDialogTitle,
-	AlertDialogTrigger,
-} from "@/components/ui/alert-dialog";
-import { Badge } from "@/components/ui/badge";
-import { Button } from "@/components/ui/button";
-import { Card } from "@/components/ui/card";
-import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/components/ui/collapsible";
-import { Separator } from "@/components/ui/separator";
-import { Spinner } from "@/components/ui/spinner";
-import { getToolDisplayName } from "@/contracts/enums/toolIcons";
-import { markActionRevertedInCache, useAgentActionsQuery } from "@/hooks/use-agent-actions-query";
-import { agentActionsApiService } from "@/lib/apis/agent-actions-api.service";
-import {
-	DELEGATION_SPAN_INDENT_CLASS,
-	shouldIndentToolCallForDelegationSpan,
-} from "@/lib/chat/delegation-span-indent";
-import { AppError } from "@/lib/error";
-import { isInterruptResult } from "@/lib/hitl";
-import { cn } from "@/lib/utils";
-
-/**
- * Inline Revert button rendered on a tool card when the matching
- * ``AgentActionLog`` row is reversible and hasn't been reverted yet.
- *
- * Reads from the unified ``useAgentActionsQuery`` cache — the SAME
- * react-query cache the agent-actions sheet consumes. SSE events
- * (``data-action-log`` / ``data-action-log-updated``) and
- * ``POST /threads/{id}/revert/{id}`` responses both flow through the
- * cache via ``setQueryData`` helpers, so the card and the sheet stay
- * in lockstep on every code path: page reload, navigation, live
- * stream, post-stream reversibility flip, and explicit revert clicks.
- *
- * Match key (in priority order):
- * 1. ``a.tool_call_id === toolCallId`` — direct hit when the model
- *    streamed ``tool_call_chunks`` so the card id matches the LangChain id.
- * 2. ``a.tool_call_id === langchainToolCallId`` — synthetic card id is
- *    ``call_<run_id>`` and the LangChain id is backfilled by
- *    ``tool-output-available``.
- * 3. ``(chat_turn_id, tool_name, position-within-turn)`` — fallback
- *    for cards whose synthetic id is ``call_<run_id>`` AND whose
- *    ``langchainToolCallId`` never got backfilled (provider emitted
- *    the tool_call as a single payload with no chunks AND streaming
- *    pre-dated the ``tool-output-available langchainToolCallId``
- *    backfill, e.g. older threads). Reads the parent message's
- *    ``chatTurnId`` and ``content`` via ``useAuiState`` so we can
- *    match position-by-tool-name within the turn against the
- *    action_log rows the server returned in ``created_at`` order.
- */
-function ToolCardRevertButton({
-	toolCallId,
-	toolName,
-	langchainToolCallId,
-}: {
-	toolCallId: string;
-	toolName: string;
-	langchainToolCallId?: string;
-}) {
-	const session = useAtomValue(chatSessionStateAtom);
-	const threadId = session?.threadId ?? null;
-	const queryClient = useQueryClient();
-	const { findByToolCallId, findByChatTurnAndTool } = useAgentActionsQuery(threadId);
-
-	// Parent message metadata, read via the narrowest possible
-	// selectors so this card doesn't re-render on every text-delta of
-	// every other part in the same message during streaming.
-	//
-	// IMPORTANT — ``useAuiState`` re-renders the component whenever the
-	// returned slice's identity changes. Returning ``message?.content``
-	// (an array) would re-render on every token because the runtime
-	// rebuilds the parts array. Returning a PRIMITIVE (the position
-	// number) lets ``useAuiState``'s ``Object.is`` check short-circuit
-	// when the position hasn't actually moved — which is the common
-	// case during text streaming, when only ``text``/``reasoning``
-	// parts are mutating and the same-toolName tool-call ordering is
-	// stable. (See Vercel React rule ``rerender-defer-reads``.)
-	const chatTurnId = useAuiState(({ message }) => {
-		const meta = message?.metadata as { custom?: { chatTurnId?: string } } | undefined;
-		return meta?.custom?.chatTurnId ?? null;
-	});
-	const positionInTurn = useAuiState(({ message }) => {
-		const content = message?.content;
-		if (!Array.isArray(content)) return -1;
-		let n = -1;
-		for (const part of content) {
-			if (
-				part &&
-				typeof part === "object" &&
-				(part as { type?: string }).type === "tool-call" &&
-				(part as { toolName?: string }).toolName === toolName
-			) {
-				n += 1;
-				if ((part as { toolCallId?: string }).toolCallId === toolCallId) return n;
-			}
-		}
-		return -1;
-	});
-
-	const action = useMemo(() => {
-		// Tier 1 + 2: O(1) Map-backed direct id match. Covers
-		// Indexed chunk streams and any stream that backfilled
-		// ``langchainToolCallId`` via ``tool-output-available``.
-		const direct = findByToolCallId(toolCallId) ?? findByToolCallId(langchainToolCallId);
-		if (direct) return direct;
-		// Tier 3: position-within-turn fallback. Only kicks in when the
-		// card has a synthetic ``call_<run_id>`` id AND no
-		// ``langchainToolCallId`` was ever backfilled — i.e. the tool
-		// was emitted as a single non-chunked payload AND streaming
-		// pre-dated the on_tool_end backfill.
-		if (!chatTurnId || positionInTurn < 0) return null;
-		const turnSameTool = findByChatTurnAndTool(chatTurnId, toolName);
-		return turnSameTool[positionInTurn] ?? null;
-	}, [
-		findByToolCallId,
-		findByChatTurnAndTool,
-		toolCallId,
-		langchainToolCallId,
-		chatTurnId,
-		toolName,
-		positionInTurn,
-	]);
-
-	const [isReverting, setIsReverting] = useState(false);
-	const [confirmOpen, setConfirmOpen] = useState(false);
-
-	if (!action) return null;
-	if (!action.reversible) return null;
-	if (action.reverted_by_action_id !== null && action.reverted_by_action_id !== undefined)
-		return null;
-	if (action.is_revert_action) return null;
-	if (action.error !== null && action.error !== undefined) return null;
-	if (!threadId) return null;
-
-	const handleRevert = async () => {
-		setIsReverting(true);
-		try {
-			const response = await agentActionsApiService.revert(threadId, action.id);
-			markActionRevertedInCache(queryClient, threadId, action.id, response.new_action_id ?? null);
-			toast.success(response.message || "Action reverted.");
-		} catch (err) {
-			// 503 means revert is gated off on this deployment — hide the
-			// button silently rather than nagging the user. Any other error
-			// is surfaced as a toast so the operator can investigate.
-			if (err instanceof AppError && err.status === 503) {
-				return;
-			}
-			const message =
-				err instanceof AppError
-					? err.message
-					: err instanceof Error
-						? err.message
-						: "Failed to revert action.";
-			toast.error(message);
-		} finally {
-			setIsReverting(false);
-			setConfirmOpen(false);
-		}
-	};
-
-	return (
-		<AlertDialog open={confirmOpen} onOpenChange={setConfirmOpen}>
-			<AlertDialogTrigger asChild>
-				<Button
-					size="sm"
-					variant="outline"
-					className="gap-1.5"
-					onClick={(e) => {
-						e.stopPropagation();
-						setConfirmOpen(true);
-					}}
-					disabled={isReverting}
-				>
-					{isReverting ? (
-						// Spinner's typed props don't accept ``data-icon`` and
-						// it renders an <output>, not an <svg>, so Button's
-						// auto-sizing rule doesn't apply. Bare spinner +
-						// Button's gap handle layout.
-						<Spinner size="xs" />
-					) : (
-						<RotateCcw data-icon="inline-start" />
-					)}
-					Revert
-				</Button>
-			</AlertDialogTrigger>
-			<AlertDialogContent>
-				<AlertDialogHeader>
-					<AlertDialogTitle>Revert this action?</AlertDialogTitle>
-					<AlertDialogDescription>
-						This will undo{" "}
-						<span className="font-medium">{getToolDisplayName(action.tool_name)}</span> and add a
-						new entry to the history. Your chat is preserved — only the changes the agent made to
-						your knowledge base or connected apps will be rolled back where possible.
-					</AlertDialogDescription>
-				</AlertDialogHeader>
-				<AlertDialogFooter>
-					<AlertDialogCancel disabled={isReverting}>Cancel</AlertDialogCancel>
-					<AlertDialogAction
-						onClick={(e) => {
-							e.preventDefault();
-							handleRevert();
-						}}
-						disabled={isReverting}
-						className="gap-1.5"
-					>
-						{isReverting && <Spinner size="xs" />}
-						Revert
-					</AlertDialogAction>
-				</AlertDialogFooter>
-			</AlertDialogContent>
-		</AlertDialog>
-	);
-}
-
-/**
- * Compact tool-call card.
- *
- * shadcn composition note: we intentionally use ``Card`` as a visual
- * frame WITHOUT ``CardHeader / CardContent``. The full composition's
- * ``p-6`` padding doesn't fit a compact collapsible header that IS the
- * trigger; using ``Card`` alone preserves the rounded border, shadow,
- * and ``bg-card`` token (semantic colors) without forcing a layout
- * that doesn't fit. All status colors use semantic tokens — no manual
- * dark-mode overrides, no raw hex.
- */
-const DefaultToolFallbackInner: ToolCallMessagePartComponent = (props) => {
-	const { toolCallId, toolName, argsText, result, status } = props;
-	// ``langchainToolCallId`` is a SurfSense-specific extension the
-	// streaming pipeline attaches to the tool-call content part so
-	// the Revert button can resolve its ``AgentActionLog`` row even
-	// when only the LC id is known. assistant-ui's
-	// ``ToolCallMessagePartProps`` doesn't list it, but the runtime
-	// spreads ``{...part}`` so the prop reaches us at runtime.
-	const langchainToolCallId = (props as { langchainToolCallId?: string }).langchainToolCallId;
-
-	const isCancelled = status?.type === "incomplete" && status.reason === "cancelled";
-	const isError = status?.type === "incomplete" && status.reason === "error";
-	const isRunning = status?.type === "running" || status?.type === "requires-action";
-
-	/*
-		Per-card expansion state. Initial value is ``isRunning`` so a
-		card streaming in mounts already-expanded (no flash of
-		collapsed → expanded on first paint), while a card loaded from
-		history (status="complete") mounts collapsed. The useEffect
-		below keeps this in lockstep with this card's own ``isRunning``
-		when it transitions: false → true auto-expands (e.g. a tool
-		that re-runs after edit), true → false auto-collapses once the
-		tool finishes. Because the dep is per-card ``isRunning`` and
-		not the chat-level streaming flag, sibling cards on the same
-		assistant turn each manage their own expansion independently.
-		Once ``isRunning`` is false the user controls expansion via
-		``onOpenChange``.
-	*/
-	const [isExpanded, setIsExpanded] = useState(isRunning);
-	useEffect(() => {
-		setIsExpanded(isRunning);
-	}, [isRunning]);
-	const errorData = status?.type === "incomplete" ? status.error : undefined;
-	const serializedError = useMemo(
-		() => (errorData && typeof errorData !== "string" ? JSON.stringify(errorData) : null),
-		[errorData]
-	);
-
-	const serializedResult = useMemo(
-		() =>
-			result !== undefined && typeof result !== "string" ? JSON.stringify(result, null, 2) : null,
-		[result]
-	);
-
-	const cancelledReason =
-		isCancelled && status.error
-			? typeof status.error === "string"
-				? status.error
-				: serializedError
-			: null;
-	const errorReason =
-		isError && status.error
-			? typeof status.error === "string"
-				? status.error
-				: serializedError
-			: null;
-
-	const displayName = getToolDisplayName(toolName);
-	const subtitle = errorReason ?? cancelledReason;
-
-	return (
-		<Card
-			className={cn(
-				"my-4 max-w-lg overflow-hidden",
-				isCancelled && "opacity-60",
-				isError && "border-destructive/30"
-			)}
-		>
-			{/*
-				``group`` lets the chevron (rendered as a sibling of the
-				main trigger button) read the Collapsible Root's
-				``data-[state=open]`` for rotation. The Collapsible is
-				fully controlled via ``isExpanded`` — the useEffect
-				above syncs it to ``isRunning`` so the card auto-opens
-				while a tool streams in and auto-collapses once it
-				finishes. We deliberately DON'T pass ``disabled`` so
-				both triggers stay clickable; ``onOpenChange`` is wired
-				to a setter that no-ops while ``isRunning`` (see
-				``handleOpenChange`` below) which keeps the card pinned
-				open mid-stream without losing keyboard / pointer
-				affordance the moment streaming ends.
-			*/}
-			<Collapsible
-				className="group"
-				open={isExpanded}
-				onOpenChange={(next) => {
-					// Block manual collapse while the tool is still
-					// streaming — otherwise a stray click on either
-					// trigger would close the card and hide the live
-					// ``argsText`` panel mid-run. After streaming the
-					// user has full control again.
-					if (isRunning) return;
-					setIsExpanded(next);
-				}}
-			>
-				{/*
-					Header row: main trigger on the left (icon + title
-					col), Revert + chevron-trigger on the right as
-					siblings of the main trigger. The chevron is wrapped
-					in its OWN ``CollapsibleTrigger`` (Radix supports
-					multiple triggers per Root) so clicking the chevron
-					toggles the same state as clicking the title row.
-					The Revert button stays a separate AlertDialog
-					trigger and stops propagation in its onClick so it
-					doesn't toggle the collapsible while opening the
-					confirm dialog. Keeping these as flat siblings —
-					rather than nesting Revert / chevron inside the
-					title trigger — avoids invalid HTML
-					(button-in-button) and lets the Revert button
-					render in BOTH the collapsed and expanded states.
-				*/}
-				<div className="flex items-stretch transition-colors hover:bg-muted/50">
-					<CollapsibleTrigger asChild>
-						<button
-							type="button"
-							className={cn(
-								"flex flex-1 min-w-0 items-center gap-3 py-4 pl-5 pr-2 text-left",
-								// Inset ring — Card's ``overflow-hidden`` would
-								// clip an ``offset-2`` ring; ``ring-inset``
-								// paints inside the button box.
-								"focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-inset",
-								"disabled:cursor-default"
-							)}
-						>
-							<div
-								className={cn(
-									"flex size-8 shrink-0 items-center justify-center rounded-lg",
-									isError ? "bg-destructive/10" : isCancelled ? "bg-muted" : "bg-primary/10"
-								)}
-							>
-								{isError ? (
-									<XCircleIcon className="size-4 text-destructive" />
-								) : isCancelled ? (
-									<XCircleIcon className="size-4 text-muted-foreground" />
-								) : isRunning ? (
-									<Spinner size="sm" className="text-primary" />
-								) : (
-									<CheckIcon className="size-4 text-primary" />
-								)}
-							</div>
-
-							<div className="flex flex-1 min-w-0 flex-col gap-0.5">
-								<div className="flex items-center gap-2">
-									<p
-										className={cn(
-											"text-sm font-semibold truncate",
-											isCancelled && "text-muted-foreground line-through",
-											isError && "text-destructive"
-										)}
-									>
-										{displayName}
-									</p>
-									{isRunning && <Badge variant="secondary">Running</Badge>}
-									{isError && <Badge variant="destructive">Failed</Badge>}
-									{isCancelled && <Badge variant="outline">Cancelled</Badge>}
-								</div>
-								{subtitle && (
-									<p
-										className={cn(
-											"text-xs truncate",
-											isError ? "text-destructive/80" : "text-muted-foreground"
-										)}
-									>
-										{subtitle}
-									</p>
-								)}
-							</div>
-						</button>
-					</CollapsibleTrigger>
-
-					{/*
-						Right-side controls. The Revert button is
-						visible whenever the matching action is
-						reversible — including the collapsed state —
-						but ``ToolCardRevertButton`` itself returns
-						``null`` while a tool is still running because
-						no action-log row exists yet, so it doesn't
-						need an explicit ``isRunning`` gate here.
-					*/}
-					<div className="flex shrink-0 items-center gap-2 pl-2 pr-5">
-						<ToolCardRevertButton
-							toolCallId={toolCallId}
-							toolName={toolName}
-							langchainToolCallId={langchainToolCallId}
-						/>
-						<CollapsibleTrigger asChild>
-							<button
-								type="button"
-								aria-label={isExpanded ? "Collapse details" : "Expand details"}
-								className={cn(
-									"flex size-7 shrink-0 items-center justify-center rounded-md",
-									"text-muted-foreground hover:bg-muted hover:text-foreground",
-									"focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-inset",
-									"disabled:cursor-default"
-								)}
-							>
-								<ChevronDownIcon
-									className={cn(
-										"size-4 transition-transform duration-200",
-										"group-data-[state=open]:rotate-180"
-									)}
-								/>
-							</button>
-						</CollapsibleTrigger>
-					</div>
-				</div>
-
-				{/*
-					CollapsibleContent body — auto-open while streaming
-					(see ``open`` prop above) so the live ``argsText``
-					streams into the Inputs panel directly, no need for
-					a separate "Live input" panel. Native
-					``overflow-auto`` instead of ``ScrollArea`` because
-					Radix's Viewport can let content bleed past
-					``max-h-*`` in dynamic flex layouts. ``min-w-0`` on
-					the column wrappers guarantees ``break-all`` wraps
-					correctly within the bounded ``max-w-lg`` Card.
-				*/}
-				<CollapsibleContent>
-					<Separator />
-					<div className="flex flex-col gap-3 px-5 py-3">
-						{(argsText || isRunning) && (
-							<div className="flex flex-col gap-1 min-w-0">
-								<p className="text-xs font-medium text-muted-foreground">Inputs</p>
-								<NestedScroll className="max-h-48 overflow-auto rounded-md bg-muted/40">
-									{argsText ? (
-										<pre className="px-3 py-2 text-xs text-foreground/80 whitespace-pre-wrap break-all font-mono">
-											{argsText}
-										</pre>
-									) : (
-										// Bridges the brief gap between
-										// ``tool-input-start`` (creates the
-										// card, ``argsText`` undefined) and
-										// the first ``tool-input-delta``.
-										<p className="px-3 py-2 text-xs italic text-muted-foreground">
-											Waiting for input…
-										</p>
-									)}
-								</NestedScroll>
-							</div>
-						)}
-						{!isCancelled && result !== undefined && (
-							<>
-								<Separator />
-								<div className="flex flex-col gap-1 min-w-0">
-									<p className="text-xs font-medium text-muted-foreground">Result</p>
-									<NestedScroll className="max-h-64 overflow-auto rounded-md bg-muted/40">
-										<pre className="px-3 py-2 text-xs text-foreground/80 whitespace-pre-wrap break-all font-mono">
-											{typeof result === "string" ? result : serializedResult}
-										</pre>
-									</NestedScroll>
-								</div>
-							</>
-						)}
-					</div>
-				</CollapsibleContent>
-			</Collapsible>
-		</Card>
-	);
-};
-
-/**
- * Wrap any tool-call UI so cards under an active delegating ``task`` span indent.
- * Applied to named tool components as well as ``ToolFallback`` — only ``ToolFallback``
- * would miss delegated tools otherwise.
- */
-export function withDelegationSpanIndent(
-	Component: ToolCallMessagePartComponent
-): ToolCallMessagePartComponent {
-	const Wrapped: ToolCallMessagePartComponent = (props) => {
-		const metadata = (props as { metadata?: Record<string, unknown> }).metadata;
-		const indent = shouldIndentToolCallForDelegationSpan(props.toolName, metadata);
-		const inner = <Component {...props} />;
-		return indent ? <div className={cn(DELEGATION_SPAN_INDENT_CLASS)}>{inner}</div> : inner;
-	};
-	Wrapped.displayName = `withDelegationSpanIndent(${Component.displayName ?? Component.name ?? "ToolUI"})`;
-	return Wrapped;
-}
-
-export const ToolFallback: ToolCallMessagePartComponent = (props) => {
-	if (isInterruptResult(props.result)) {
-		if (isDoomLoopInterrupt(props.result)) {
-			return <DoomLoopApprovalToolUI {...props} />;
-		}
-		return <GenericHitlApprovalToolUI {...props} />;
-	}
-	return <DefaultToolFallbackInner {...props} />;
-};
diff --git a/surfsense_web/components/hitl-bundle-pager/index.ts b/surfsense_web/components/hitl-bundle-pager/index.ts
deleted file mode 100644
index ce434d224..000000000
--- a/surfsense_web/components/hitl-bundle-pager/index.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-export { PagerChrome } from "./pager-chrome";
-export { withBundleStep } from "./with-bundle-step";
diff --git a/surfsense_web/components/hitl-bundle-pager/pager-chrome.tsx b/surfsense_web/components/hitl-bundle-pager/pager-chrome.tsx
deleted file mode 100644
index 77d75fb6d..000000000
--- a/surfsense_web/components/hitl-bundle-pager/pager-chrome.tsx
+++ /dev/null
@@ -1,61 +0,0 @@
-"use client";
-
-import { ChevronLeftIcon, ChevronRightIcon } from "lucide-react";
-import { Button } from "@/components/ui/button";
-import { useHitlBundle } from "@/lib/hitl";
-
-/**
- * Prev/next nav and Submit for the current step of an active HITL bundle.
- * Submission is gated on every action_request having a staged decision.
- */
-export function PagerChrome() {
-	const bundle = useHitlBundle();
-	if (!bundle) return null;
-
-	const total = bundle.toolCallIds.length;
-	const step = bundle.currentStep;
-	const allStaged = bundle.stagedCount === total;
-
-	return (
-		<div className="mt-3 flex items-center gap-2 rounded-md border border-border bg-muted/40 p-2 text-sm">
-			<Button
-				type="button"
-				size="sm"
-				variant="outline"
-				onClick={bundle.prev}
-				disabled={step === 0}
-				aria-label="Previous approval"
-			>
-				<ChevronLeftIcon className="h-4 w-4" />
-			</Button>
-			<span className="font-medium tabular-nums">
-				{step + 1} / {total}
-			</span>
-			<span className="text-muted-foreground">·</span>
-			<span className="text-muted-foreground">
-				{bundle.stagedCount} of {total} decided
-			</span>
-			<Button
-				type="button"
-				size="sm"
-				variant="outline"
-				onClick={bundle.next}
-				disabled={step >= total - 1}
-				aria-label="Next approval"
-			>
-				<ChevronRightIcon className="h-4 w-4" />
-			</Button>
-			<div className="ml-auto">
-				<Button
-					type="button"
-					size="sm"
-					onClick={bundle.submit}
-					disabled={!allStaged}
-					title={allStaged ? "Submit decisions" : "Decide every action first"}
-				>
-					Submit decisions
-				</Button>
-			</div>
-		</div>
-	);
-}
diff --git a/surfsense_web/components/hitl-bundle-pager/with-bundle-step.tsx b/surfsense_web/components/hitl-bundle-pager/with-bundle-step.tsx
deleted file mode 100644
index 64ac801fb..000000000
--- a/surfsense_web/components/hitl-bundle-pager/with-bundle-step.tsx
+++ /dev/null
@@ -1,37 +0,0 @@
-"use client";
-
-import type { ToolCallMessagePartProps } from "@assistant-ui/react";
-import type { ComponentType } from "react";
-import { ToolCallIdProvider, useHitlBundle } from "@/lib/hitl";
-import { PagerChrome } from "./pager-chrome";
-
-/**
- * Wrap a tool-ui card so that, when a multi-card HITL bundle is active:
- *  - cards belonging to the bundle but not the current step render ``null``;
- *  - the current-step card renders normally and is followed by ``PagerChrome``.
- *
- * Cards stay completely unchanged — the wrapper provides the
- * ``ToolCallIdContext`` that ``useHitlDecision`` reads to stage decisions
- * against the right ``toolCallId`` instead of firing the global event.
- */
-export function withBundleStep<P extends ToolCallMessagePartProps<any, any>>(
-	Component: ComponentType<P>
-): ComponentType<P> {
-	function BundleStepWrapped(props: P) {
-		const bundle = useHitlBundle();
-		const toolCallId = props.toolCallId;
-		const inBundle = bundle?.isInBundle(toolCallId) ?? false;
-		const isStep = bundle?.isCurrentStep(toolCallId) ?? false;
-
-		if (bundle && inBundle && !isStep) return null;
-
-		return (
-			<ToolCallIdProvider toolCallId={toolCallId}>
-				<Component {...props} />
-				{bundle && isStep ? <PagerChrome /> : null}
-			</ToolCallIdProvider>
-		);
-	}
-	BundleStepWrapped.displayName = `withBundleStep(${Component.displayName ?? Component.name ?? "ToolUI"})`;
-	return BundleStepWrapped as ComponentType<P>;
-}
diff --git a/surfsense_web/components/hitl-edit-panel/hitl-edit-panel.tsx b/surfsense_web/components/hitl-edit-panel/hitl-edit-panel.tsx
deleted file mode 100644
index b33392f38..000000000
--- a/surfsense_web/components/hitl-edit-panel/hitl-edit-panel.tsx
+++ /dev/null
@@ -1,405 +0,0 @@
-"use client";
-
-import { format } from "date-fns";
-import { TagInput, type Tag as TagType } from "emblor";
-import { useAtomValue, useSetAtom } from "jotai";
-import { CalendarIcon, XIcon } from "lucide-react";
-import dynamic from "next/dynamic";
-import { useCallback, useEffect, useMemo, useRef, useState } from "react";
-import type { ExtraField } from "@/atoms/chat/hitl-edit-panel.atom";
-import { closeHitlEditPanelAtom, hitlEditPanelAtom } from "@/atoms/chat/hitl-edit-panel.atom";
-import { Button } from "@/components/ui/button";
-import { Calendar } from "@/components/ui/calendar";
-import { Drawer, DrawerContent, DrawerHandle, DrawerTitle } from "@/components/ui/drawer";
-import { Input } from "@/components/ui/input";
-import { Label } from "@/components/ui/label";
-import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
-import { Skeleton } from "@/components/ui/skeleton";
-import { Textarea } from "@/components/ui/textarea";
-import { useMediaQuery } from "@/hooks/use-media-query";
-
-const PlateEditor = dynamic(
-	() => import("@/components/editor/plate-editor").then((m) => ({ default: m.PlateEditor })),
-	{ ssr: false, loading: () => <Skeleton className="h-64 w-full" /> }
-);
-
-function parseEmailsToTags(value: string): TagType[] {
-	if (!value.trim()) return [];
-	return value
-		.split(",")
-		.map((s) => s.trim())
-		.filter(Boolean)
-		.map((email, i) => ({ id: `${Date.now()}-${i}`, text: email }));
-}
-
-function tagsToEmailString(tags: TagType[]): string {
-	return tags.map((t) => t.text).join(", ");
-}
-
-function EmailsTagField({
-	id,
-	value,
-	onChange,
-	placeholder,
-}: {
-	id: string;
-	value: string;
-	onChange: (value: string) => void;
-	placeholder?: string;
-}) {
-	const [tags, setTags] = useState<TagType[]>(() => parseEmailsToTags(value));
-	const [activeTagIndex, setActiveTagIndex] = useState<number | null>(null);
-	const isInitialMount = useRef(true);
-	const onChangeRef = useRef(onChange);
-	onChangeRef.current = onChange;
-
-	useEffect(() => {
-		if (isInitialMount.current) {
-			isInitialMount.current = false;
-			return;
-		}
-		onChangeRef.current(tagsToEmailString(tags));
-	}, [tags]);
-
-	const handleSetTags = useCallback((newTags: TagType[] | ((prev: TagType[]) => TagType[])) => {
-		setTags((prev) => (typeof newTags === "function" ? newTags(prev) : newTags));
-	}, []);
-
-	const handleAddTag = useCallback((text: string) => {
-		const trimmed = text.trim();
-		if (!trimmed) return;
-		setTags((prev) => {
-			if (prev.some((tag) => tag.text === trimmed)) return prev;
-			const newTag: TagType = { id: Date.now().toString(), text: trimmed };
-			return [...prev, newTag];
-		});
-	}, []);
-
-	return (
-		<TagInput
-			id={id}
-			tags={tags}
-			setTags={handleSetTags}
-			placeholder={placeholder ?? "Add email"}
-			onAddTag={handleAddTag}
-			styleClasses={{
-				inlineTagsContainer:
-					"border border-input rounded-md bg-transparent shadow-xs transition-[color,box-shadow] outline-none focus-within:border-ring p-1 gap-1",
-				input:
-					"w-full min-w-[80px] focus-visible:outline-none shadow-none px-2 h-7 text-foreground placeholder:text-muted-foreground bg-transparent text-sm md:text-sm",
-				tag: {
-					body: "h-7 relative bg-accent dark:bg-muted/60 border-0 hover:bg-accent/80 dark:hover:bg-muted rounded-md font-medium text-xs text-foreground/80 ps-2 pe-7 flex",
-					closeButton:
-						"absolute -inset-y-px -end-px p-0 rounded-e-md flex size-7 transition-colors outline-0 focus-visible:outline focus-visible:outline-2 focus-visible:outline-ring/70 text-foreground hover:text-foreground",
-				},
-			}}
-			activeTagIndex={activeTagIndex}
-			setActiveTagIndex={setActiveTagIndex}
-		/>
-	);
-}
-
-function parseDateTimeValue(value: string): { date: Date | undefined; time: string } {
-	if (!value) return { date: undefined, time: "09:00" };
-	try {
-		const d = new Date(value);
-		if (Number.isNaN(d.getTime())) return { date: undefined, time: "09:00" };
-		return {
-			date: d,
-			time: format(d, "HH:mm"),
-		};
-	} catch {
-		return { date: undefined, time: "09:00" };
-	}
-}
-
-function buildLocalDateTimeString(date: Date | undefined, time: string): string {
-	if (!date) return "";
-	const [hours, minutes] = time.split(":").map(Number);
-	const combined = new Date(date);
-	combined.setHours(hours ?? 9, minutes ?? 0, 0, 0);
-	const y = combined.getFullYear();
-	const m = String(combined.getMonth() + 1).padStart(2, "0");
-	const d = String(combined.getDate()).padStart(2, "0");
-	const h = String(combined.getHours()).padStart(2, "0");
-	const min = String(combined.getMinutes()).padStart(2, "0");
-	return `${y}-${m}-${d}T${h}:${min}:00`;
-}
-
-function DateTimePickerField({
-	id,
-	value,
-	onChange,
-}: {
-	id: string;
-	value: string;
-	onChange: (value: string) => void;
-}) {
-	const parsed = useMemo(() => parseDateTimeValue(value), [value]);
-	const [selectedDate, setSelectedDate] = useState<Date | undefined>(parsed.date);
-	const [time, setTime] = useState(parsed.time);
-	const [open, setOpen] = useState(false);
-
-	const handleDateSelect = useCallback(
-		(day: Date | undefined) => {
-			setSelectedDate(day);
-			onChange(buildLocalDateTimeString(day, time));
-			setOpen(false);
-		},
-		[time, onChange]
-	);
-
-	const handleTimeChange = useCallback(
-		(e: React.ChangeEvent<HTMLInputElement>) => {
-			const newTime = e.target.value;
-			setTime(newTime);
-			onChange(buildLocalDateTimeString(selectedDate, newTime));
-		},
-		[selectedDate, onChange]
-	);
-
-	const displayLabel = selectedDate
-		? `${format(selectedDate, "MMM d, yyyy")} at ${time}`
-		: "Pick date & time";
-
-	return (
-		<div className="flex gap-2">
-			<Popover open={open} onOpenChange={setOpen}>
-				<PopoverTrigger asChild>
-					<button
-						id={id}
-						type="button"
-						className="flex-1 flex items-center gap-2 h-9 rounded-md border border-input bg-transparent px-3 py-1 text-sm shadow-xs transition-[color,box-shadow] outline-none focus-visible:border-ring"
-					>
-						<CalendarIcon className="size-3.5 text-muted-foreground shrink-0" />
-						<span className={selectedDate ? "text-foreground" : "text-muted-foreground"}>
-							{displayLabel}
-						</span>
-					</button>
-				</PopoverTrigger>
-				<PopoverContent className="w-auto p-0" align="start">
-					<Calendar
-						mode="single"
-						selected={selectedDate}
-						onSelect={handleDateSelect}
-						defaultMonth={selectedDate}
-					/>
-				</PopoverContent>
-			</Popover>
-			<Input
-				type="time"
-				value={time}
-				onChange={handleTimeChange}
-				className="w-[120px] text-sm shrink-0 appearance-none [&::-webkit-calendar-picker-indicator]:hidden [&::-webkit-calendar-picker-indicator]:appearance-none"
-			/>
-		</div>
-	);
-}
-
-export function HitlEditPanelContent({
-	title: initialTitle,
-	content: initialContent,
-	contentFormat,
-	extraFields,
-	onSave,
-	onClose,
-	showCloseButton = true,
-}: {
-	title: string;
-	content: string;
-	toolName: string;
-	contentFormat?: "markdown" | "html";
-	extraFields?: ExtraField[];
-	onSave: (title: string, content: string, extraFieldValues?: Record<string, string>) => void;
-	onClose?: () => void;
-	showCloseButton?: boolean;
-}) {
-	const [editedTitle, setEditedTitle] = useState(initialTitle);
-	const contentRef = useRef(initialContent);
-	const [isSaving, setIsSaving] = useState(false);
-	const [extraFieldValues, setExtraFieldValues] = useState<Record<string, string>>(() => {
-		if (!extraFields) return {};
-		const initial: Record<string, string> = {};
-		for (const field of extraFields) {
-			initial[field.key] = field.value;
-		}
-		return initial;
-	});
-
-	const handleContentChange = useCallback((content: string) => {
-		contentRef.current = content;
-	}, []);
-
-	const handleExtraFieldChange = useCallback((key: string, value: string) => {
-		setExtraFieldValues((prev) => ({ ...prev, [key]: value }));
-	}, []);
-
-	const handleSave = useCallback(() => {
-		if (!editedTitle.trim()) return;
-		setIsSaving(true);
-		const extras = extraFields && extraFields.length > 0 ? extraFieldValues : undefined;
-		onSave(editedTitle, contentRef.current, extras);
-		onClose?.();
-	}, [editedTitle, onSave, onClose, extraFields, extraFieldValues]);
-
-	return (
-		<>
-			<div className="flex items-center gap-2 px-4 py-2 shrink-0 border-b">
-				<input
-					value={editedTitle}
-					onChange={(e) => setEditedTitle(e.target.value)}
-					placeholder="Untitled"
-					className="flex-1 min-w-0 bg-transparent text-sm font-semibold text-foreground outline-none placeholder:text-muted-foreground"
-					aria-label="Page title"
-				/>
-				{onClose && showCloseButton && (
-					<Button variant="ghost" size="icon" onClick={onClose} className="size-7 shrink-0">
-						<XIcon className="size-4" />
-						<span className="sr-only">Close panel</span>
-					</Button>
-				)}
-			</div>
-
-			{extraFields && extraFields.length > 0 && (
-				<div className="flex flex-col gap-3 px-4 py-3 border-b">
-					{extraFields.map((field) => (
-						<div key={field.key} className="flex flex-col gap-1.5">
-							<Label
-								htmlFor={`extra-field-${field.key}`}
-								className="text-xs font-medium text-muted-foreground"
-							>
-								{field.label}
-							</Label>
-							{field.type === "emails" ? (
-								<EmailsTagField
-									id={`extra-field-${field.key}`}
-									value={extraFieldValues[field.key] ?? ""}
-									onChange={(v) => handleExtraFieldChange(field.key, v)}
-									placeholder={`Add ${field.label.toLowerCase()}`}
-								/>
-							) : field.type === "datetime-local" ? (
-								<DateTimePickerField
-									id={`extra-field-${field.key}`}
-									value={extraFieldValues[field.key] ?? ""}
-									onChange={(v) => handleExtraFieldChange(field.key, v)}
-								/>
-							) : field.type === "textarea" ? (
-								<Textarea
-									id={`extra-field-${field.key}`}
-									value={extraFieldValues[field.key] ?? ""}
-									onChange={(e) => handleExtraFieldChange(field.key, e.target.value)}
-									className="text-sm min-h-[60px]"
-								/>
-							) : (
-								<Input
-									id={`extra-field-${field.key}`}
-									type={field.type}
-									value={extraFieldValues[field.key] ?? ""}
-									onChange={(e) => handleExtraFieldChange(field.key, e.target.value)}
-									className="text-sm"
-								/>
-							)}
-						</div>
-					))}
-				</div>
-			)}
-
-			<div className="flex-1 overflow-hidden">
-				<PlateEditor
-					{...(contentFormat === "html"
-						? { html: initialContent, onHtmlChange: handleContentChange }
-						: { markdown: initialContent, onMarkdownChange: handleContentChange })}
-					readOnly={false}
-					preset="full"
-					placeholder="Start writing..."
-					editorVariant="default"
-					defaultEditing
-					onSave={handleSave}
-					hasUnsavedChanges
-					isSaving={isSaving}
-					className="[&_[role=toolbar]]:!bg-sidebar"
-				/>
-			</div>
-		</>
-	);
-}
-
-function DesktopHitlEditPanel() {
-	const panelState = useAtomValue(hitlEditPanelAtom);
-	const closePanel = useSetAtom(closeHitlEditPanelAtom);
-
-	if (!panelState.isOpen || !panelState.onSave) return null;
-
-	return (
-		<div className="flex w-[50%] max-w-[700px] min-w-[380px] flex-col border-l bg-sidebar text-sidebar-foreground animate-in slide-in-from-right-4 duration-300 ease-out">
-			<HitlEditPanelContent
-				title={panelState.title}
-				content={panelState.content}
-				toolName={panelState.toolName}
-				contentFormat={panelState.contentFormat}
-				extraFields={panelState.extraFields}
-				onSave={panelState.onSave}
-				onClose={closePanel}
-			/>
-		</div>
-	);
-}
-
-function MobileHitlEditDrawer() {
-	const panelState = useAtomValue(hitlEditPanelAtom);
-	const closePanel = useSetAtom(closeHitlEditPanelAtom);
-
-	if (!panelState.onSave) return null;
-
-	return (
-		<Drawer
-			open={panelState.isOpen}
-			onOpenChange={(open) => {
-				if (!open) closePanel();
-			}}
-			shouldScaleBackground={false}
-		>
-			<DrawerContent
-				className="h-[90vh] max-h-[90vh] z-80 bg-sidebar overflow-hidden"
-				overlayClassName="z-80"
-			>
-				<DrawerHandle />
-				<DrawerTitle className="sr-only">Edit {panelState.toolName}</DrawerTitle>
-				<div className="min-h-0 flex-1 flex flex-col overflow-hidden">
-					<HitlEditPanelContent
-						title={panelState.title}
-						content={panelState.content}
-						toolName={panelState.toolName}
-						contentFormat={panelState.contentFormat}
-						extraFields={panelState.extraFields}
-						onSave={panelState.onSave}
-						onClose={closePanel}
-						showCloseButton={false}
-					/>
-				</div>
-			</DrawerContent>
-		</Drawer>
-	);
-}
-
-export function HitlEditPanel() {
-	const panelState = useAtomValue(hitlEditPanelAtom);
-	const isDesktop = useMediaQuery("(min-width: 1024px)");
-
-	if (!panelState.isOpen) return null;
-
-	if (isDesktop) {
-		return <DesktopHitlEditPanel />;
-	}
-
-	return <MobileHitlEditDrawer />;
-}
-
-export function MobileHitlEditPanel() {
-	const panelState = useAtomValue(hitlEditPanelAtom);
-	const isDesktop = useMediaQuery("(min-width: 1024px)");
-
-	if (isDesktop || !panelState.isOpen) return null;
-
-	return <MobileHitlEditDrawer />;
-}
diff --git a/surfsense_web/hooks/use-hitl-phase.ts b/surfsense_web/hooks/use-hitl-phase.ts
deleted file mode 100644
index 2921dc221..000000000
--- a/surfsense_web/hooks/use-hitl-phase.ts
+++ /dev/null
@@ -1,62 +0,0 @@
-import { useEffect, useRef, useState } from "react";
-
-export type HitlPhase = "pending" | "processing" | "complete" | "rejected";
-
-interface HitlInterruptLike {
-	__decided__?: string | null;
-	__completed__?: boolean;
-}
-
-const MINIMUM_SHIMMER_MS = 500;
-const FALLBACK_TIMEOUT_MS = 30_000;
-
-/**
- * State machine for HITL approval card phases.
- *
- * Phases:
- *   pending    – waiting for user decision (show buttons)
- *   processing – user approved/edited, waiting for backend (shimmer)
- *   complete   – backend responded with __completed__ (done text)
- *   rejected   – user rejected (cancelled text)
- */
-export function useHitlPhase(interruptData: HitlInterruptLike): {
-	phase: HitlPhase;
-	setProcessing: () => void;
-	setRejected: () => void;
-} {
-	const [phase, setPhase] = useState<HitlPhase>(() => {
-		if (interruptData.__decided__ === "reject") return "rejected";
-		if (interruptData.__decided__) return "complete";
-		return "pending";
-	});
-
-	const shimmerStartRef = useRef<number | null>(null);
-
-	// processing → complete when __completed__ arrives (with min shimmer duration)
-	useEffect(() => {
-		if (phase !== "processing") return;
-		if (!interruptData.__completed__) return;
-
-		const elapsed = shimmerStartRef.current ? Date.now() - shimmerStartRef.current : Infinity;
-		const remaining = Math.max(0, MINIMUM_SHIMMER_MS - elapsed);
-
-		const timer = setTimeout(() => setPhase("complete"), remaining);
-		return () => clearTimeout(timer);
-	}, [phase, interruptData.__completed__]);
-
-	// Fallback: processing → complete after 30s even if __completed__ never arrives
-	useEffect(() => {
-		if (phase !== "processing") return;
-		const fallback = setTimeout(() => setPhase("complete"), FALLBACK_TIMEOUT_MS);
-		return () => clearTimeout(fallback);
-	}, [phase]);
-
-	return {
-		phase,
-		setProcessing: () => {
-			shimmerStartRef.current = Date.now();
-			setPhase("processing");
-		},
-		setRejected: () => setPhase("rejected"),
-	};
-}
diff --git a/surfsense_web/lib/chat/delegation-span-indent.ts b/surfsense_web/lib/chat/delegation-span-indent.ts
deleted file mode 100644
index 99e292eaf..000000000
--- a/surfsense_web/lib/chat/delegation-span-indent.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-/**
- * Indent tool-call cards that belong to an open delegating ``task`` episode.
- *
- * The backend only stamps ``metadata.spanId`` on tool SSE / persisted parts
- * while a ``task`` is active (see ``AgentEventRelayState.tool_activity_metadata``),
- * so its presence is sufficient. The opening ``task`` row itself carries the
- * same span id but stays flush — it is the header of the delegation.
- */
-
-export function shouldIndentToolCallForDelegationSpan(
-	toolName: string,
-	metadata: Record<string, unknown> | undefined
-): boolean {
-	if (toolName === "task") return false;
-	const v = metadata?.spanId;
-	return typeof v === "string" && v.trim().length > 0;
-}
-
-export const DELEGATION_SPAN_INDENT_CLASS = "pl-3 sm:ml-4";
diff --git a/surfsense_web/lib/hitl/bundle-context.tsx b/surfsense_web/lib/hitl/bundle-context.tsx
deleted file mode 100644
index 3f52ee4d0..000000000
--- a/surfsense_web/lib/hitl/bundle-context.tsx
+++ /dev/null
@@ -1,153 +0,0 @@
-"use client";
-
-import { createContext, type ReactNode, useCallback, useContext, useMemo, useState } from "react";
-import type { HitlDecision } from "./types";
-
-export type BundleSubmit = (orderedDecisions: HitlDecision[]) => void;
-
-export interface HitlBundleAPI {
-	toolCallIds: readonly string[];
-	currentStep: number;
-	stagedCount: number;
-	isInBundle: (toolCallId: string) => boolean;
-	isCurrentStep: (toolCallId: string) => boolean;
-	getStaged: (toolCallId: string) => HitlDecision | undefined;
-	stage: (toolCallId: string, decision: HitlDecision) => void;
-	goToStep: (i: number) => void;
-	next: () => void;
-	prev: () => void;
-	submit: () => void;
-}
-
-const HitlBundleContext = createContext<HitlBundleAPI | null>(null);
-const ToolCallIdContext = createContext<string | null>(null);
-
-export function useHitlBundle(): HitlBundleAPI | null {
-	return useContext(HitlBundleContext);
-}
-
-export function useToolCallIdContext(): string | null {
-	return useContext(ToolCallIdContext);
-}
-
-export function ToolCallIdProvider({
-	toolCallId,
-	children,
-}: {
-	toolCallId: string;
-	children: ReactNode;
-}) {
-	return <ToolCallIdContext.Provider value={toolCallId}>{children}</ToolCallIdContext.Provider>;
-}
-
-interface HitlBundleProviderProps {
-	toolCallIds: readonly string[] | null;
-	onSubmit: BundleSubmit;
-	children: ReactNode;
-}
-
-/**
- * Activates only when ``toolCallIds`` has 2+ entries; single-card interrupts
- * keep their direct ``window`` dispatch path so N=1 UX is unchanged.
- */
-export function HitlBundleProvider({ toolCallIds, onSubmit, children }: HitlBundleProviderProps) {
-	const active = toolCallIds !== null && toolCallIds.length >= 2;
-	const ids = useMemo(() => (active ? [...toolCallIds] : []), [active, toolCallIds]);
-	const bundleKey = ids.join("|");
-
-	// Derived-state-from-props: reset staging + step when the bundle changes.
-	const [prevBundleKey, setPrevBundleKey] = useState(bundleKey);
-	const [staged, setStaged] = useState<Map<string, HitlDecision>>(() => new Map());
-	const [currentStep, setCurrentStep] = useState(0);
-	if (bundleKey !== prevBundleKey) {
-		setPrevBundleKey(bundleKey);
-		setStaged(new Map());
-		setCurrentStep(0);
-	}
-
-	const isInBundle = useCallback((tcId: string) => ids.includes(tcId), [ids]);
-	const isCurrentStep = useCallback(
-		(tcId: string) => active === true && ids[currentStep] === tcId,
-		[active, ids, currentStep]
-	);
-	const getStaged = useCallback((tcId: string) => staged.get(tcId), [staged]);
-	const stage = useCallback(
-		(tcId: string, decision: HitlDecision) => {
-			if (!active || !ids.includes(tcId)) return;
-			setStaged((prev) => {
-				const next = new Map(prev);
-				next.set(tcId, decision);
-				return next;
-			});
-			// Mirror the staged decision onto the card immediately so prev/next
-			// nav doesn't re-show approve/reject buttons for already-decided cards.
-			// Submit's ``hitl-decision`` event re-applies these (no-op) and runs
-			// the actual resume.
-			window.dispatchEvent(
-				new CustomEvent("hitl-stage", { detail: { toolCallId: tcId, decision } })
-			);
-			const idx = ids.indexOf(tcId);
-			if (idx >= 0 && idx < ids.length - 1) {
-				setCurrentStep(idx + 1);
-			}
-		},
-		[active, ids]
-	);
-	const goToStep = useCallback(
-		(i: number) => {
-			if (i < 0 || i >= ids.length) return;
-			setCurrentStep(i);
-		},
-		[ids.length]
-	);
-	const next = useCallback(() => {
-		setCurrentStep((s) => Math.min(s + 1, Math.max(0, ids.length - 1)));
-	}, [ids.length]);
-	const prev = useCallback(() => {
-		setCurrentStep((s) => Math.max(s - 1, 0));
-	}, []);
-
-	const submit = useCallback(() => {
-		if (!active) return;
-		if (staged.size !== ids.length) return;
-		const ordered: HitlDecision[] = [];
-		for (const tcId of ids) {
-			const d = staged.get(tcId);
-			if (!d) return;
-			ordered.push(d);
-		}
-		onSubmit(ordered);
-	}, [active, ids, staged, onSubmit]);
-
-	const value = useMemo<HitlBundleAPI | null>(() => {
-		if (!active) return null;
-		return {
-			toolCallIds: ids,
-			currentStep,
-			stagedCount: staged.size,
-			isInBundle,
-			isCurrentStep,
-			getStaged,
-			stage,
-			goToStep,
-			next,
-			prev,
-			submit,
-		};
-	}, [
-		active,
-		ids,
-		currentStep,
-		staged,
-		isInBundle,
-		isCurrentStep,
-		getStaged,
-		stage,
-		goToStep,
-		next,
-		prev,
-		submit,
-	]);
-
-	return <HitlBundleContext.Provider value={value}>{children}</HitlBundleContext.Provider>;
-}
diff --git a/surfsense_web/lib/hitl/index.ts b/surfsense_web/lib/hitl/index.ts
deleted file mode 100644
index a2f218d5d..000000000
--- a/surfsense_web/lib/hitl/index.ts
+++ /dev/null
@@ -1,22 +0,0 @@
-export {
-	type BundleSubmit,
-	type HitlBundleAPI,
-	HitlBundleProvider,
-	ToolCallIdProvider,
-	useHitlBundle,
-	useToolCallIdContext,
-} from "./bundle-context";
-export {
-	type HitlRenderTarget,
-	HitlRenderTargetProvider,
-	useHitlRenderTarget,
-	withHitlInTimeline,
-} from "./render-target";
-export type {
-	HitlDecision,
-	InterruptActionRequest,
-	InterruptResult,
-	InterruptReviewConfig,
-} from "./types";
-export { isInterruptResult } from "./types";
-export { useHitlDecision } from "./use-hitl-decision";
diff --git a/surfsense_web/lib/hitl/render-target.tsx b/surfsense_web/lib/hitl/render-target.tsx
deleted file mode 100644
index cbfdbf2be..000000000
--- a/surfsense_web/lib/hitl/render-target.tsx
+++ /dev/null
@@ -1,48 +0,0 @@
-"use client";
-
-import type { ToolCallMessagePartComponent } from "@assistant-ui/react";
-import { createContext, useContext } from "react";
-import { isInterruptResult } from "./types";
-
-/**
- * Where this tool-call card is currently rendering.
- *
- * - ``"body"`` (default) — assistant-ui's ``MessagePrimitive.Parts`` renders
- *   the card inside the message bubble.
- * - ``"timeline"`` — ``ThinkingStepsDisplay`` renders the SAME component
- *   inline under the matching step row so the HITL approval lives in the
- *   chain-of-thought instead of as a standalone card in the message body.
- *
- * The two render targets share one component implementation; the context
- * lets the body render skip itself when the timeline copy will show the
- * card, avoiding a double-render.
- */
-export type HitlRenderTarget = "body" | "timeline";
-
-const HitlRenderTargetContext = createContext<HitlRenderTarget>("body");
-
-export const HitlRenderTargetProvider = HitlRenderTargetContext.Provider;
-
-export function useHitlRenderTarget(): HitlRenderTarget {
-	return useContext(HitlRenderTargetContext);
-}
-
-/**
- * Hide the body render of a tool-call whose result is a HITL interrupt.
- * The same component is mounted again inside ``ThinkingStepsDisplay``
- * with ``HitlRenderTargetProvider value="timeline"`` — that copy renders
- * normally, so the card "moves" from the message body to the timeline.
- *
- * Pure pass-through for non-HITL results AND for the timeline render.
- */
-export function withHitlInTimeline(
-	Component: ToolCallMessagePartComponent
-): ToolCallMessagePartComponent {
-	const Wrapped: ToolCallMessagePartComponent = (props) => {
-		const target = useHitlRenderTarget();
-		if (target === "body" && isInterruptResult(props.result)) return null;
-		return <Component {...props} />;
-	};
-	Wrapped.displayName = `withHitlInTimeline(${Component.displayName ?? Component.name ?? "ToolUI"})`;
-	return Wrapped;
-}
diff --git a/surfsense_web/lib/hitl/types.ts b/surfsense_web/lib/hitl/types.ts
deleted file mode 100644
index 53df61145..000000000
--- a/surfsense_web/lib/hitl/types.ts
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Shared types for Human-in-the-Loop (HITL) approval across all tools.
- *
- * Every tool-ui component that handles interrupts should import from here
- * instead of defining its own `InterruptResult` / `isInterruptResult`.
- */
-
-export interface InterruptActionRequest {
-	name: string;
-	args: Record<string, unknown>;
-}
-
-export interface InterruptReviewConfig {
-	action_name: string;
-	allowed_decisions: Array<"approve" | "edit" | "reject">;
-}
-
-export interface InterruptResult<C extends Record<string, unknown> = Record<string, unknown>> {
-	__interrupt__: true;
-	__decided__?: "approve" | "reject" | "edit";
-	__completed__?: boolean;
-	action_requests: InterruptActionRequest[];
-	review_configs: InterruptReviewConfig[];
-	interrupt_type?: string;
-	context?: C;
-	message?: string;
-}
-
-export function isInterruptResult(result: unknown): result is InterruptResult {
-	return (
-		typeof result === "object" &&
-		result !== null &&
-		"__interrupt__" in result &&
-		(result as InterruptResult).__interrupt__ === true
-	);
-}
-
-export interface HitlDecision {
-	type: "approve" | "reject" | "edit";
-	message?: string;
-	edited_action?: {
-		name: string;
-		args: Record<string, unknown>;
-	};
-}
diff --git a/surfsense_web/lib/hitl/use-hitl-decision.ts b/surfsense_web/lib/hitl/use-hitl-decision.ts
deleted file mode 100644
index e2aaf8514..000000000
--- a/surfsense_web/lib/hitl/use-hitl-decision.ts
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Shared hook for dispatching HITL decisions.
- *
- * Tool-ui cards always call ``dispatch([decision])``. When a multi-card bundle
- * is active (``HitlBundleProvider``), the dispatch is intercepted and staged
- * against this card's ``toolCallId`` so the orchestrator can submit one
- * ordered N-decision payload. With no bundle active (N=1 path), it falls back
- * to the legacy ``window`` event the host listens for in ``page.tsx``.
- */
-
-import { useCallback } from "react";
-import { useHitlBundle, useToolCallIdContext } from "./bundle-context";
-import type { HitlDecision } from "./types";
-
-export function useHitlDecision() {
-	const bundle = useHitlBundle();
-	const toolCallId = useToolCallIdContext();
-
-	const dispatch = useCallback(
-		(decisions: HitlDecision[]) => {
-			if (bundle && toolCallId && bundle.isInBundle(toolCallId) && decisions.length > 0) {
-				if (decisions.length > 1 && process.env.NODE_ENV !== "production") {
-					// Tool-ui cards stage one decision per call; a multi-decision
-					// dispatch into an active bundle would silently drop tail entries.
-					// eslint-disable-next-line no-console
-					console.warn(
-						"[hitl] dispatch received %d decisions inside an active bundle; only [0] will be staged for %s",
-						decisions.length,
-						toolCallId
-					);
-				}
-				bundle.stage(toolCallId, decisions[0]);
-				return;
-			}
-			window.dispatchEvent(new CustomEvent("hitl-decision", { detail: { decisions } }));
-		},
-		[bundle, toolCallId]
-	);
-
-	return { dispatch };
-}

From 89e495380022210d0de7e39fab2f8082ae83a90a Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 18:36:00 +0200
Subject: [PATCH 55/58] chat: suppress stale step separator emitted during
 resume rehydration.

---
 surfsense_web/lib/chat/streaming-state.ts | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/surfsense_web/lib/chat/streaming-state.ts b/surfsense_web/lib/chat/streaming-state.ts
index ee3160a61..e3bfdcaea 100644
--- a/surfsense_web/lib/chat/streaming-state.ts
+++ b/surfsense_web/lib/chat/streaming-state.ts
@@ -73,6 +73,18 @@ export interface ContentPartsState {
 	currentTextPartIndex: number;
 	currentReasoningPartIndex: number;
 	toolCallIndices: Map<string, number>;
+	/**
+	 * Set by the resume flow's rehydration to suppress
+	 * ``data-step-separator`` for the rest of this turn. Without it,
+	 * the resume stream's first ``start-step`` fires
+	 * ``addStepSeparator`` while rehydrated OLD content already makes
+	 * ``hasContent`` true → a divider lands between OLD and NEW
+	 * content with no semantic value (OLD content is filtered by
+	 * ``buildTimeline`` + ``filterSupersededAbortedMessages``,
+	 * persisted state carries no separator, so the line vanishes on
+	 * reload).
+	 */
+	suppressStepSeparators?: boolean;
 }
 
 function areThinkingStepsEqual(current: ThinkingStepData[], next: ThinkingStepData[]): boolean {
@@ -234,7 +246,9 @@ export function addStepSeparator(state: ContentPartsState): void {
 	// non-step content (so the FIRST step of a turn doesn't
 	// generate a leading separator) and when the previous part isn't
 	// itself a separator (defensive against duplicate `start-step`
-	// events).
+	// events). Also skipped during a resume turn (see
+	// ``suppressStepSeparators`` on ``ContentPartsState``).
+	if (state.suppressStepSeparators) return;
 	const hasContent = state.contentParts.some(
 		(p) => p.type === "text" || p.type === "reasoning" || p.type === "tool-call"
 	);

From 2e132513be830199b2594b31ee47c46c6df81f9e Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 21:44:54 +0200
Subject: [PATCH 56/58] chat: unify HITL approval UX behind a single paginated
 card and harden timeline supersede.

---
 .../connectors/jira/tools/__init__.py         |  12 +-
 .../subagents/connectors/jira/tools/index.py  |  24 +-
 .../connectors/linear/tools/__init__.py       |  12 +-
 .../connectors/linear/tools/create_issue.py   | 248 ----------------
 .../connectors/linear/tools/delete_issue.py   | 245 ----------------
 .../connectors/linear/tools/index.py          |  24 +-
 .../new-chat/[[...chat_id]]/page.tsx          |  63 +----
 .../approval-cards/doom-loop-approval.tsx     |   4 +-
 .../hitl/approval-cards/generic-approval.tsx  |   4 +-
 .../hitl/approval/approval-context.tsx        |  28 ++
 .../hitl/approval/hitl-approval-card.tsx      | 267 ++++++++++++++++++
 .../chat-messages/hitl/approval/index.ts      |   9 +
 .../approval/pending-interrupt-context.tsx    |  44 +++
 .../hitl/bundle/bundle-context.tsx            | 157 ----------
 .../chat-messages/hitl/bundle/index.ts        |   8 -
 .../hitl/bundle/pager-chrome.tsx              |  65 -----
 .../features/chat-messages/hitl/index.ts      |  22 +-
 .../features/chat-messages/hitl/types.ts      |  12 +-
 .../chat-messages/hitl/use-hitl-decision.ts   |  35 +--
 .../chat-messages/timeline/build-timeline.ts  | 242 ++++++++--------
 .../chat-messages/timeline/data-renderer.tsx  |  36 +--
 .../chat-messages/timeline/grouping.ts        |  36 +--
 .../timeline/items/tool-call-item.tsx         |  32 +--
 .../chat-messages/timeline/timeline.tsx       | 130 +++------
 .../features/chat-messages/timeline/types.ts  |   2 +-
 25 files changed, 604 insertions(+), 1157 deletions(-)
 delete mode 100644 surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/create_issue.py
 delete mode 100644 surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/delete_issue.py
 create mode 100644 surfsense_web/features/chat-messages/hitl/approval/approval-context.tsx
 create mode 100644 surfsense_web/features/chat-messages/hitl/approval/hitl-approval-card.tsx
 create mode 100644 surfsense_web/features/chat-messages/hitl/approval/index.ts
 create mode 100644 surfsense_web/features/chat-messages/hitl/approval/pending-interrupt-context.tsx
 delete mode 100644 surfsense_web/features/chat-messages/hitl/bundle/bundle-context.tsx
 delete mode 100644 surfsense_web/features/chat-messages/hitl/bundle/index.ts
 delete mode 100644 surfsense_web/features/chat-messages/hitl/bundle/pager-chrome.tsx

diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/tools/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/tools/__init__.py
index 768738118..dc721013a 100644
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/tools/__init__.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/tools/__init__.py
@@ -1,11 +1,3 @@
-"""Jira tools for creating, updating, and deleting issues."""
+"""Jira route: native tool factories are empty; MCP supplies tools when configured."""
 
-from .create_issue import create_create_jira_issue_tool
-from .delete_issue import create_delete_jira_issue_tool
-from .update_issue import create_update_jira_issue_tool
-
-__all__ = [
-    "create_create_jira_issue_tool",
-    "create_delete_jira_issue_tool",
-    "create_update_jira_issue_tool",
-]
+__all__: list[str] = []
diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/tools/index.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/tools/index.py
index 342f120be..08b0e005e 100644
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/tools/index.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/tools/index.py
@@ -6,29 +6,9 @@ from app.agents.multi_agent_chat.subagents.shared.permissions import (
     ToolsPermissions,
 )
 
-from .create_issue import create_create_jira_issue_tool
-from .delete_issue import create_delete_jira_issue_tool
-from .update_issue import create_update_jira_issue_tool
-
 
 def load_tools(
     *, dependencies: dict[str, Any] | None = None, **kwargs: Any
 ) -> ToolsPermissions:
-    d = {**(dependencies or {}), **kwargs}
-    common = {
-        "db_session": d["db_session"],
-        "search_space_id": d["search_space_id"],
-        "user_id": d["user_id"],
-        "connector_id": d.get("connector_id"),
-    }
-    create = create_create_jira_issue_tool(**common)
-    update = create_update_jira_issue_tool(**common)
-    delete = create_delete_jira_issue_tool(**common)
-    return {
-        "allow": [],
-        "ask": [
-            {"name": getattr(create, "name", "") or "", "tool": create},
-            {"name": getattr(update, "name", "") or "", "tool": update},
-            {"name": getattr(delete, "name", "") or "", "tool": delete},
-        ],
-    }
+    _ = {**(dependencies or {}), **kwargs}
+    return {"allow": [], "ask": []}
diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/__init__.py
index 31acf1e2a..5b464a9df 100644
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/__init__.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/__init__.py
@@ -1,11 +1,3 @@
-"""Linear tools for creating, updating, and deleting issues."""
+"""Linear route: native tool factories are empty; MCP supplies tools when configured."""
 
-from .create_issue import create_create_linear_issue_tool
-from .delete_issue import create_delete_linear_issue_tool
-from .update_issue import create_update_linear_issue_tool
-
-__all__ = [
-    "create_create_linear_issue_tool",
-    "create_delete_linear_issue_tool",
-    "create_update_linear_issue_tool",
-]
+__all__: list[str] = []
diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/create_issue.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/create_issue.py
deleted file mode 100644
index ff254e133..000000000
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/create_issue.py
+++ /dev/null
@@ -1,248 +0,0 @@
-import logging
-from typing import Any
-
-from langchain_core.tools import tool
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.agents.new_chat.tools.hitl import request_approval
-from app.connectors.linear_connector import LinearAPIError, LinearConnector
-from app.services.linear import LinearToolMetadataService
-
-logger = logging.getLogger(__name__)
-
-
-def create_create_linear_issue_tool(
-    db_session: AsyncSession | None = None,
-    search_space_id: int | None = None,
-    user_id: str | None = None,
-    connector_id: int | None = None,
-):
-    """
-    Factory function to create the create_linear_issue tool.
-
-    Args:
-        db_session: Database session for accessing the Linear connector
-        search_space_id: Search space ID to find the Linear connector
-        user_id: User ID for fetching user-specific context
-        connector_id: Optional specific connector ID (if known)
-
-    Returns:
-        Configured create_linear_issue tool
-    """
-
-    @tool
-    async def create_linear_issue(
-        title: str,
-        description: str | None = None,
-    ) -> dict[str, Any]:
-        """Create a new issue in Linear.
-
-        Use this tool when the user explicitly asks to create, add, or file
-        a new issue / ticket / task in Linear. The user MUST describe the issue
-        before you call this tool. If the request is vague, ask what the issue
-        should be about. Never call this tool without a clear topic from the user.
-
-        Args:
-            title: Short, descriptive issue title. Infer from the user's request.
-            description: Optional markdown body for the issue. Generate from context.
-
-        Returns:
-            Dictionary with:
-            - status: "success", "rejected", or "error"
-            - issue_id: Linear issue UUID (if success)
-            - identifier: Human-readable ID like "ENG-42" (if success)
-            - url: URL to the created issue (if success)
-            - message: Result message
-
-            IMPORTANT: If status is "rejected", the user explicitly declined the action.
-            Respond with a brief acknowledgment (e.g., "Understood, I won't create the issue.")
-            and move on. Do NOT retry, troubleshoot, or suggest alternatives.
-
-        Examples:
-            - "Create a Linear issue for the login bug"
-            - "File a ticket about the payment timeout problem"
-            - "Add an issue for the broken search feature"
-        """
-        logger.info(f"create_linear_issue called: title='{title}'")
-
-        if db_session is None or search_space_id is None or user_id is None:
-            logger.error(
-                "Linear tool not properly configured - missing required parameters"
-            )
-            return {
-                "status": "error",
-                "message": "Linear tool not properly configured. Please contact support.",
-            }
-
-        try:
-            metadata_service = LinearToolMetadataService(db_session)
-            context = await metadata_service.get_creation_context(
-                search_space_id, user_id
-            )
-
-            if "error" in context:
-                logger.error(f"Failed to fetch creation context: {context['error']}")
-                return {"status": "error", "message": context["error"]}
-
-            workspaces = context.get("workspaces", [])
-            if workspaces and all(w.get("auth_expired") for w in workspaces):
-                logger.warning("All Linear accounts have expired authentication")
-                return {
-                    "status": "auth_error",
-                    "message": "All connected Linear accounts need re-authentication. Please re-authenticate in your connector settings.",
-                    "connector_type": "linear",
-                }
-
-            logger.info(f"Requesting approval for creating Linear issue: '{title}'")
-            result = request_approval(
-                action_type="linear_issue_creation",
-                tool_name="create_linear_issue",
-                params={
-                    "title": title,
-                    "description": description,
-                    "team_id": None,
-                    "state_id": None,
-                    "assignee_id": None,
-                    "priority": None,
-                    "label_ids": [],
-                    "connector_id": connector_id,
-                },
-                context=context,
-            )
-
-            if result.rejected:
-                logger.info("Linear issue creation rejected by user")
-                return {
-                    "status": "rejected",
-                    "message": "User declined. Do not retry or suggest alternatives.",
-                }
-
-            final_title = result.params.get("title", title)
-            final_description = result.params.get("description", description)
-            final_team_id = result.params.get("team_id")
-            final_state_id = result.params.get("state_id")
-            final_assignee_id = result.params.get("assignee_id")
-            final_priority = result.params.get("priority")
-            final_label_ids = result.params.get("label_ids") or []
-            final_connector_id = result.params.get("connector_id", connector_id)
-
-            if not final_title or not final_title.strip():
-                logger.error("Title is empty or contains only whitespace")
-                return {"status": "error", "message": "Issue title cannot be empty."}
-            if not final_team_id:
-                return {
-                    "status": "error",
-                    "message": "A team must be selected to create an issue.",
-                }
-
-            from sqlalchemy.future import select
-
-            from app.db import SearchSourceConnector, SearchSourceConnectorType
-
-            actual_connector_id = final_connector_id
-            if actual_connector_id is None:
-                result = await db_session.execute(
-                    select(SearchSourceConnector).filter(
-                        SearchSourceConnector.search_space_id == search_space_id,
-                        SearchSourceConnector.user_id == user_id,
-                        SearchSourceConnector.connector_type
-                        == SearchSourceConnectorType.LINEAR_CONNECTOR,
-                    )
-                )
-                connector = result.scalars().first()
-                if not connector:
-                    return {
-                        "status": "error",
-                        "message": "No Linear connector found. Please connect Linear in your workspace settings.",
-                    }
-                actual_connector_id = connector.id
-                logger.info(f"Found Linear connector: id={actual_connector_id}")
-            else:
-                result = await db_session.execute(
-                    select(SearchSourceConnector).filter(
-                        SearchSourceConnector.id == actual_connector_id,
-                        SearchSourceConnector.search_space_id == search_space_id,
-                        SearchSourceConnector.user_id == user_id,
-                        SearchSourceConnector.connector_type
-                        == SearchSourceConnectorType.LINEAR_CONNECTOR,
-                    )
-                )
-                connector = result.scalars().first()
-                if not connector:
-                    return {
-                        "status": "error",
-                        "message": "Selected Linear connector is invalid or has been disconnected.",
-                    }
-                logger.info(f"Validated Linear connector: id={actual_connector_id}")
-
-            logger.info(
-                f"Creating Linear issue with final params: title='{final_title}'"
-            )
-            linear_client = LinearConnector(
-                session=db_session, connector_id=actual_connector_id
-            )
-            result = await linear_client.create_issue(
-                team_id=final_team_id,
-                title=final_title,
-                description=final_description,
-                state_id=final_state_id,
-                assignee_id=final_assignee_id,
-                priority=final_priority,
-                label_ids=final_label_ids if final_label_ids else None,
-            )
-
-            if result.get("status") == "error":
-                logger.error(f"Failed to create Linear issue: {result.get('message')}")
-                return {"status": "error", "message": result.get("message")}
-
-            logger.info(
-                f"Linear issue created: {result.get('identifier')} - {result.get('title')}"
-            )
-
-            kb_message_suffix = ""
-            try:
-                from app.services.linear import LinearKBSyncService
-
-                kb_service = LinearKBSyncService(db_session)
-                kb_result = await kb_service.sync_after_create(
-                    issue_id=result.get("id"),
-                    issue_identifier=result.get("identifier", ""),
-                    issue_title=result.get("title", final_title),
-                    issue_url=result.get("url"),
-                    description=final_description,
-                    connector_id=actual_connector_id,
-                    search_space_id=search_space_id,
-                    user_id=user_id,
-                )
-                if kb_result["status"] == "success":
-                    kb_message_suffix = " Your knowledge base has also been updated."
-                else:
-                    kb_message_suffix = " This issue will be added to your knowledge base in the next scheduled sync."
-            except Exception as kb_err:
-                logger.warning(f"KB sync after create failed: {kb_err}")
-                kb_message_suffix = " This issue will be added to your knowledge base in the next scheduled sync."
-
-            return {
-                "status": "success",
-                "issue_id": result.get("id"),
-                "identifier": result.get("identifier"),
-                "url": result.get("url"),
-                "message": (result.get("message", "") + kb_message_suffix),
-            }
-
-        except Exception as e:
-            from langgraph.errors import GraphInterrupt
-
-            if isinstance(e, GraphInterrupt):
-                raise
-
-            logger.error(f"Error creating Linear issue: {e}", exc_info=True)
-            if isinstance(e, ValueError | LinearAPIError):
-                message = str(e)
-            else:
-                message = (
-                    "Something went wrong while creating the issue. Please try again."
-                )
-            return {"status": "error", "message": message}
-
-    return create_linear_issue
diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/delete_issue.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/delete_issue.py
deleted file mode 100644
index 29ef0cdf2..000000000
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/delete_issue.py
+++ /dev/null
@@ -1,245 +0,0 @@
-import logging
-from typing import Any
-
-from langchain_core.tools import tool
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.agents.new_chat.tools.hitl import request_approval
-from app.connectors.linear_connector import LinearAPIError, LinearConnector
-from app.services.linear import LinearToolMetadataService
-
-logger = logging.getLogger(__name__)
-
-
-def create_delete_linear_issue_tool(
-    db_session: AsyncSession | None = None,
-    search_space_id: int | None = None,
-    user_id: str | None = None,
-    connector_id: int | None = None,
-):
-    """
-    Factory function to create the delete_linear_issue tool.
-
-    Args:
-        db_session: Database session for accessing the Linear connector
-        search_space_id: Search space ID to find the Linear connector
-        user_id: User ID for finding the correct Linear connector
-        connector_id: Optional specific connector ID (if known)
-
-    Returns:
-        Configured delete_linear_issue tool
-    """
-
-    @tool
-    async def delete_linear_issue(
-        issue_ref: str,
-        delete_from_kb: bool = False,
-    ) -> dict[str, Any]:
-        """Archive (delete) a Linear issue.
-
-        Use this tool when the user asks to delete, remove, or archive a Linear issue.
-        Note that Linear archives issues rather than permanently deleting them
-        (they can be restored from the archive).
-
-
-        Args:
-            issue_ref: The issue to delete. Can be the issue title (e.g. "Fix login bug"),
-                       the identifier (e.g. "ENG-42"), or the full document title
-                       (e.g. "ENG-42: Fix login bug").
-            delete_from_kb: Whether to also remove the issue from the knowledge base.
-                            Default is False. Set to True to remove from both Linear
-                            and the knowledge base.
-
-        Returns:
-            Dictionary with:
-            - status: "success", "rejected", "not_found", or "error"
-            - identifier: Human-readable ID like "ENG-42" (if success)
-            - message: Success or error message
-            - deleted_from_kb: Whether the issue was also removed from the knowledge base (if success)
-
-            IMPORTANT:
-            - If status is "rejected", the user explicitly declined the action.
-              Respond with a brief acknowledgment (e.g., "Understood, I won't delete the issue.")
-              and move on. Do NOT ask for alternatives or troubleshoot.
-            - If status is "not_found", inform the user conversationally using the exact message
-              provided. Do NOT treat this as an error. Simply relay the message and ask the user
-              to verify the issue title or identifier, or check if it has been indexed.
-        Examples:
-            - "Delete the 'Fix login bug' Linear issue"
-            - "Archive ENG-42"
-            - "Remove the 'Old payment flow' issue from Linear"
-        """
-        logger.info(
-            f"delete_linear_issue called: issue_ref='{issue_ref}', delete_from_kb={delete_from_kb}"
-        )
-
-        if db_session is None or search_space_id is None or user_id is None:
-            logger.error(
-                "Linear tool not properly configured - missing required parameters"
-            )
-            return {
-                "status": "error",
-                "message": "Linear tool not properly configured. Please contact support.",
-            }
-
-        try:
-            metadata_service = LinearToolMetadataService(db_session)
-            context = await metadata_service.get_delete_context(
-                search_space_id, user_id, issue_ref
-            )
-
-            if "error" in context:
-                error_msg = context["error"]
-                if context.get("auth_expired"):
-                    logger.warning(f"Auth expired for delete context: {error_msg}")
-                    return {
-                        "status": "auth_error",
-                        "message": error_msg,
-                        "connector_id": context.get("connector_id"),
-                        "connector_type": "linear",
-                    }
-                if "not found" in error_msg.lower():
-                    logger.warning(f"Issue not found: {error_msg}")
-                    return {"status": "not_found", "message": error_msg}
-                else:
-                    logger.error(f"Failed to fetch delete context: {error_msg}")
-                    return {"status": "error", "message": error_msg}
-
-            issue_id = context["issue"]["id"]
-            issue_identifier = context["issue"].get("identifier", "")
-            document_id = context["issue"]["document_id"]
-            connector_id_from_context = context.get("workspace", {}).get("id")
-
-            logger.info(
-                f"Requesting approval for deleting Linear issue: '{issue_ref}' "
-                f"(id={issue_id}, delete_from_kb={delete_from_kb})"
-            )
-            result = request_approval(
-                action_type="linear_issue_deletion",
-                tool_name="delete_linear_issue",
-                params={
-                    "issue_id": issue_id,
-                    "connector_id": connector_id_from_context,
-                    "delete_from_kb": delete_from_kb,
-                },
-                context=context,
-            )
-
-            if result.rejected:
-                logger.info("Linear issue deletion rejected by user")
-                return {
-                    "status": "rejected",
-                    "message": "User declined. Do not retry or suggest alternatives.",
-                }
-
-            final_issue_id = result.params.get("issue_id", issue_id)
-            final_connector_id = result.params.get(
-                "connector_id", connector_id_from_context
-            )
-            final_delete_from_kb = result.params.get("delete_from_kb", delete_from_kb)
-
-            logger.info(
-                f"Deleting Linear issue with final params: issue_id={final_issue_id}, "
-                f"connector_id={final_connector_id}, delete_from_kb={final_delete_from_kb}"
-            )
-
-            from sqlalchemy.future import select
-
-            from app.db import SearchSourceConnector, SearchSourceConnectorType
-
-            if final_connector_id:
-                result = await db_session.execute(
-                    select(SearchSourceConnector).filter(
-                        SearchSourceConnector.id == final_connector_id,
-                        SearchSourceConnector.search_space_id == search_space_id,
-                        SearchSourceConnector.user_id == user_id,
-                        SearchSourceConnector.connector_type
-                        == SearchSourceConnectorType.LINEAR_CONNECTOR,
-                    )
-                )
-                connector = result.scalars().first()
-                if not connector:
-                    logger.error(
-                        f"Invalid connector_id={final_connector_id} for search_space_id={search_space_id}"
-                    )
-                    return {
-                        "status": "error",
-                        "message": "Selected Linear connector is invalid or has been disconnected.",
-                    }
-                actual_connector_id = connector.id
-                logger.info(f"Validated Linear connector: id={actual_connector_id}")
-            else:
-                logger.error("No connector found for this issue")
-                return {
-                    "status": "error",
-                    "message": "No connector found for this issue.",
-                }
-
-            linear_client = LinearConnector(
-                session=db_session, connector_id=actual_connector_id
-            )
-
-            result = await linear_client.archive_issue(issue_id=final_issue_id)
-
-            logger.info(
-                f"archive_issue result: {result.get('status')} - {result.get('message', '')}"
-            )
-
-            deleted_from_kb = False
-            if (
-                result.get("status") == "success"
-                and final_delete_from_kb
-                and document_id
-            ):
-                try:
-                    from app.db import Document
-
-                    doc_result = await db_session.execute(
-                        select(Document).filter(Document.id == document_id)
-                    )
-                    document = doc_result.scalars().first()
-                    if document:
-                        await db_session.delete(document)
-                        await db_session.commit()
-                        deleted_from_kb = True
-                        logger.info(
-                            f"Deleted document {document_id} from knowledge base"
-                        )
-                    else:
-                        logger.warning(f"Document {document_id} not found in KB")
-                except Exception as e:
-                    logger.error(f"Failed to delete document from KB: {e}")
-                    await db_session.rollback()
-                    result["warning"] = (
-                        f"Issue archived in Linear, but failed to remove from knowledge base: {e!s}"
-                    )
-
-            if result.get("status") == "success":
-                result["deleted_from_kb"] = deleted_from_kb
-                if issue_identifier:
-                    result["message"] = (
-                        f"Issue {issue_identifier} archived successfully."
-                    )
-                if deleted_from_kb:
-                    result["message"] = (
-                        f"{result.get('message', '')} Also removed from the knowledge base."
-                    )
-
-            return result
-
-        except Exception as e:
-            from langgraph.errors import GraphInterrupt
-
-            if isinstance(e, GraphInterrupt):
-                raise
-
-            logger.error(f"Error deleting Linear issue: {e}", exc_info=True)
-            if isinstance(e, ValueError | LinearAPIError):
-                message = str(e)
-            else:
-                message = (
-                    "Something went wrong while deleting the issue. Please try again."
-                )
-            return {"status": "error", "message": message}
-
-    return delete_linear_issue
diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/index.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/index.py
index f1ee49964..08b0e005e 100644
--- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/index.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/tools/index.py
@@ -6,29 +6,9 @@ from app.agents.multi_agent_chat.subagents.shared.permissions import (
     ToolsPermissions,
 )
 
-from .create_issue import create_create_linear_issue_tool
-from .delete_issue import create_delete_linear_issue_tool
-from .update_issue import create_update_linear_issue_tool
-
 
 def load_tools(
     *, dependencies: dict[str, Any] | None = None, **kwargs: Any
 ) -> ToolsPermissions:
-    d = {**(dependencies or {}), **kwargs}
-    common = {
-        "db_session": d["db_session"],
-        "search_space_id": d["search_space_id"],
-        "user_id": d["user_id"],
-        "connector_id": d.get("connector_id"),
-    }
-    create = create_create_linear_issue_tool(**common)
-    update = create_update_linear_issue_tool(**common)
-    delete = create_delete_linear_issue_tool(**common)
-    return {
-        "allow": [],
-        "ask": [
-            {"name": getattr(create, "name", "") or "", "tool": create},
-            {"name": getattr(update, "name", "") or "", "tool": update},
-            {"name": getattr(delete, "name", "") or "", "tool": delete},
-        ],
-    }
+    _ = {**(dependencies or {}), **kwargs}
+    return {"allow": [], "ask": []}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
index 64bfda7d0..76f48bc92 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@@ -49,7 +49,7 @@ import {
 	type TokenUsageData,
 	TokenUsageProvider,
 } from "@/components/assistant-ui/token-usage-context";
-import { type BundleSubmit, HitlBundleProvider } from "@/features/chat-messages/hitl";
+import { type HitlDecision, PendingInterruptProvider } from "@/features/chat-messages/hitl";
 import { TimelineDataUI } from "@/features/chat-messages/timeline";
 import {
 	applyActionLogSse,
@@ -1738,57 +1738,6 @@ export default function NewChatPage() {
 		return () => window.removeEventListener("hitl-decision", handler);
 	}, [handleResume, pendingInterrupt]);
 
-	// Mirror staged bundle decisions onto the cards visually so prev/next nav
-	// reflects past choices instead of re-prompting. Submit's ``hitl-decision``
-	// handler still runs the actual resume.
-	useEffect(() => {
-		const handler = (e: Event) => {
-			const detail = (e as CustomEvent).detail as {
-				toolCallId: string;
-				decision: {
-					type: string;
-					message?: string;
-					edited_action?: { name: string; args: Record<string, unknown> };
-				};
-			};
-			if (!detail?.toolCallId || !detail?.decision || !pendingInterrupt) return;
-			setMessages((prev) =>
-				prev.map((m) => {
-					if (m.id !== pendingInterrupt.assistantMsgId) return m;
-					const parts = m.content as unknown as Array<Record<string, unknown>>;
-					const newContent = parts.map((part) => {
-						if (part.toolCallId !== detail.toolCallId) return part;
-						if (part.type !== "tool-call") return part;
-						if (typeof part.result !== "object" || part.result === null) return part;
-						if (!("__interrupt__" in (part.result as Record<string, unknown>))) return part;
-						const decided = detail.decision.type as "approve" | "reject" | "edit";
-						if (decided === "edit" && detail.decision.edited_action) {
-							return {
-								...part,
-								args: detail.decision.edited_action.args,
-								argsText: JSON.stringify(detail.decision.edited_action.args, null, 2),
-								result: {
-									...(part.result as Record<string, unknown>),
-									__decided__: decided,
-								},
-							};
-						}
-						return {
-							...part,
-							result: {
-								...(part.result as Record<string, unknown>),
-								__decided__: decided,
-							},
-						};
-					});
-					return { ...m, content: newContent as unknown as ThreadMessageLike["content"] };
-				})
-			);
-		};
-		window.addEventListener("hitl-stage", handler);
-		return () => window.removeEventListener("hitl-stage", handler);
-	}, [pendingInterrupt]);
-
 	// Convert message (pass through since already in correct format)
 	const convertMessage = useCallback(
 		(message: ThreadMessageLike): ThreadMessageLike => message,
@@ -2287,7 +2236,7 @@ export default function NewChatPage() {
 		[handleRegenerate, messages, agentActionItems]
 	);
 
-	const handleBundleSubmit = useCallback<BundleSubmit>((orderedDecisions) => {
+	const handleApprovalSubmit = useCallback((orderedDecisions: HitlDecision[]) => {
 		window.dispatchEvent(
 			new CustomEvent("hitl-decision", { detail: { decisions: orderedDecisions } })
 		);
@@ -2363,9 +2312,9 @@ export default function NewChatPage() {
 			<AssistantRuntimeProvider runtime={runtime}>
 				<TimelineDataUI />
 				<StepSeparatorDataUI />
-				<HitlBundleProvider
-					toolCallIds={pendingInterrupt?.bundleToolCallIds ?? null}
-					onSubmit={handleBundleSubmit}
+				<PendingInterruptProvider
+					pendingInterrupt={pendingInterrupt}
+					onSubmit={handleApprovalSubmit}
 				>
 					<div key={searchSpaceId} className="flex h-full overflow-hidden">
 						<div className="flex-1 flex flex-col min-w-0 overflow-hidden">
@@ -2375,7 +2324,7 @@ export default function NewChatPage() {
 						<MobileEditorPanel />
 						<MobileHitlEditPanel />
 					</div>
-				</HitlBundleProvider>
+				</PendingInterruptProvider>
 				<EditMessageDialog
 					open={editDialogState !== null}
 					onOpenChange={(open) => {
diff --git a/surfsense_web/features/chat-messages/hitl/approval-cards/doom-loop-approval.tsx b/surfsense_web/features/chat-messages/hitl/approval-cards/doom-loop-approval.tsx
index 5b2b0e385..1b7cf645d 100644
--- a/surfsense_web/features/chat-messages/hitl/approval-cards/doom-loop-approval.tsx
+++ b/surfsense_web/features/chat-messages/hitl/approval-cards/doom-loop-approval.tsx
@@ -7,7 +7,7 @@ import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import { Separator } from "@/components/ui/separator";
-import type { HitlApprovalCard, HitlDecision, InterruptResult } from "../types";
+import type { HitlDecision, InterruptResult, PerToolApprovalCard } from "../types";
 import { isInterruptResult } from "../types";
 import { useHitlDecision } from "../use-hitl-decision";
 import { useHitlPhase } from "../use-hitl-phase";
@@ -178,7 +178,7 @@ export function isDoomLoopInterrupt(result: unknown): boolean {
  * ``isDoomLoopInterrupt(result)`` is true. Caller is responsible for
  * the discrimination; this card receives a known ``InterruptResult``.
  */
-export const DoomLoopApproval: HitlApprovalCard = ({ toolName, args, result }) => {
+export const DoomLoopApproval: PerToolApprovalCard = ({ toolName, args, result }) => {
 	const { dispatch } = useHitlDecision();
 	return (
 		<DoomLoopCardView
diff --git a/surfsense_web/features/chat-messages/hitl/approval-cards/generic-approval.tsx b/surfsense_web/features/chat-messages/hitl/approval-cards/generic-approval.tsx
index c8b35dbe0..5c0f0b3c4 100644
--- a/surfsense_web/features/chat-messages/hitl/approval-cards/generic-approval.tsx
+++ b/surfsense_web/features/chat-messages/hitl/approval-cards/generic-approval.tsx
@@ -9,7 +9,7 @@ import { Input } from "@/components/ui/input";
 import { Textarea } from "@/components/ui/textarea";
 import { getToolDisplayName } from "@/contracts/enums/toolIcons";
 import { connectorsApiService } from "@/lib/apis/connectors-api.service";
-import type { HitlApprovalCard, HitlDecision, InterruptResult } from "../types";
+import type { HitlDecision, InterruptResult, PerToolApprovalCard } from "../types";
 import { useHitlDecision } from "../use-hitl-decision";
 import { useHitlPhase } from "../use-hitl-phase";
 
@@ -248,7 +248,7 @@ function GenericApprovalCardView({
  * guard; this card receives a known ``InterruptResult`` and skips the
  * defensive runtime check.
  */
-export const GenericHitlApproval: HitlApprovalCard = ({ toolName, args, result }) => {
+export const GenericHitlApproval: PerToolApprovalCard = ({ toolName, args, result }) => {
 	const { dispatch } = useHitlDecision();
 	return (
 		<GenericApprovalCardView
diff --git a/surfsense_web/features/chat-messages/hitl/approval/approval-context.tsx b/surfsense_web/features/chat-messages/hitl/approval/approval-context.tsx
new file mode 100644
index 000000000..c40763968
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/approval/approval-context.tsx
@@ -0,0 +1,28 @@
+"use client";
+
+import { createContext, useContext } from "react";
+import type { HitlDecision } from "../types";
+
+/**
+ * Decisions are keyed by step index (not toolCallId) because the
+ * resume protocol is positional — backend pairs ``decisions[i]`` with
+ * ``action_requests[i]``. ``stage`` always targets the active step,
+ * so per-tool bodies stay tcId-agnostic.
+ */
+export interface HitlApprovalAPI {
+	total: number;
+	currentStep: number;
+	decisions: ReadonlyArray<HitlDecision | undefined>;
+	stage: (decision: HitlDecision) => void;
+	next: () => void;
+	prev: () => void;
+	goToStep: (i: number) => void;
+	canAdvance: boolean;
+	canSubmit: boolean;
+}
+
+export const HitlApprovalContext = createContext<HitlApprovalAPI | null>(null);
+
+export function useHitlApproval(): HitlApprovalAPI | null {
+	return useContext(HitlApprovalContext);
+}
diff --git a/surfsense_web/features/chat-messages/hitl/approval/hitl-approval-card.tsx b/surfsense_web/features/chat-messages/hitl/approval/hitl-approval-card.tsx
new file mode 100644
index 000000000..8aa8c7499
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/approval/hitl-approval-card.tsx
@@ -0,0 +1,267 @@
+"use client";
+
+import { ChevronLeftIcon, ChevronRightIcon } from "lucide-react";
+import { type FC, useCallback, useMemo, useState } from "react";
+import { Button } from "@/components/ui/button";
+import { getToolDisplayName } from "@/contracts/enums/toolIcons";
+import {
+	FallbackToolBody,
+	getToolComponent,
+	type TimelineToolProps,
+} from "@/features/chat-messages/timeline/tool-registry";
+import type {
+	HitlDecision,
+	InterruptActionRequest,
+	InterruptResult,
+	InterruptReviewConfig,
+} from "../types";
+import { type HitlApprovalAPI, HitlApprovalContext } from "./approval-context";
+import type { PendingInterruptState } from "./pending-interrupt-context";
+
+/**
+ * Narrow the bundle interrupt to the active step so per-tool bodies
+ * see the same single-action shape they're written against. Mirrors
+ * any staged decision onto ``__decided__`` (and edited args onto
+ * ``args``) so revisiting a decided step via Prev shows the past
+ * choice instead of pristine Approve/Reject buttons.
+ */
+function sliceForStep(
+	interruptData: Record<string, unknown>,
+	action: InterruptActionRequest,
+	reviewConfig: InterruptReviewConfig | undefined,
+	stagedDecision: HitlDecision | undefined
+): InterruptResult {
+	const baseAction =
+		stagedDecision?.type === "edit" && stagedDecision.edited_action
+			? { ...action, args: stagedDecision.edited_action.args }
+			: action;
+
+	const sliced: InterruptResult = {
+		...(interruptData as Partial<InterruptResult>),
+		__interrupt__: true,
+		action_requests: [baseAction],
+		review_configs: reviewConfig ? [reviewConfig] : [],
+	} as InterruptResult;
+
+	if (stagedDecision) {
+		(sliced as unknown as Record<string, unknown>).__decided__ = stagedDecision.type;
+	}
+
+	return sliced;
+}
+
+/**
+ * Single chrome for every HITL approval flow. Branches on
+ * ``action_requests.length``: 1 → per-tool body alone with auto-
+ * submit on first decision; ≥2 → per-tool body + inline pager +
+ * Submit-decisions (fires only once every step has a decision).
+ * Decisions are positional to match the resume protocol.
+ */
+export const HitlApprovalCard: FC<{
+	pendingInterrupt: PendingInterruptState;
+	onSubmit: (decisions: HitlDecision[]) => void;
+}> = ({ pendingInterrupt, onSubmit }) => {
+	const interruptData = pendingInterrupt.interruptData as InterruptResult & Record<string, unknown>;
+	const actionRequests = (interruptData.action_requests ?? []) as InterruptActionRequest[];
+	const reviewConfigs = (interruptData.review_configs ?? []) as InterruptReviewConfig[];
+	const total = actionRequests.length;
+	const isMulti = total >= 2;
+
+	const [currentStep, setCurrentStep] = useState(0);
+	const [decisions, setDecisions] = useState<(HitlDecision | undefined)[]>(() =>
+		Array.from({ length: total }, () => undefined)
+	);
+
+	// Reset on a new interrupt-request while still mounted (rapid
+	// back-to-back resumes), otherwise stale decisions would leak.
+	const [prevActionsRef, setPrevActionsRef] = useState(actionRequests);
+	if (prevActionsRef !== actionRequests) {
+		setPrevActionsRef(actionRequests);
+		setCurrentStep(0);
+		setDecisions(Array.from({ length: total }, () => undefined));
+	}
+
+	const submitFromDecisions = useCallback(
+		(next: (HitlDecision | undefined)[]) => {
+			if (next.length !== total) return;
+			if (next.some((d) => d === undefined)) return;
+			onSubmit(next as HitlDecision[]);
+		},
+		[onSubmit, total]
+	);
+
+	const stage = useCallback(
+		(decision: HitlDecision) => {
+			// Compute next array outside the setter so the side effect
+			// (auto-submit / step advance) runs once under StrictMode.
+			const updated = decisions.slice();
+			updated[currentStep] = decision;
+			setDecisions(updated);
+
+			if (!isMulti) {
+				submitFromDecisions(updated);
+				return;
+			}
+
+			// Skip to the next undecided step rather than +1 so users
+			// who jumped via Prev don't get pulled back to a decided
+			// step.
+			let target = currentStep;
+			for (let i = currentStep + 1; i < updated.length; i++) {
+				if (updated[i] === undefined) {
+					target = i;
+					break;
+				}
+			}
+			if (target !== currentStep) setCurrentStep(target);
+		},
+		[currentStep, decisions, isMulti, submitFromDecisions]
+	);
+
+	const next = useCallback(() => {
+		setCurrentStep((s) => Math.min(s + 1, Math.max(0, total - 1)));
+	}, [total]);
+	const prev = useCallback(() => {
+		setCurrentStep((s) => Math.max(s - 1, 0));
+	}, []);
+	const goToStep = useCallback(
+		(i: number) => {
+			if (i < 0 || i >= total) return;
+			setCurrentStep(i);
+		},
+		[total]
+	);
+	const submit = useCallback(() => {
+		submitFromDecisions(decisions);
+	}, [decisions, submitFromDecisions]);
+
+	const stagedCount = useMemo(() => decisions.filter((d) => d !== undefined).length, [decisions]);
+	const canSubmit = stagedCount === total && total > 0;
+	const canAdvance = decisions[currentStep] !== undefined;
+
+	const api = useMemo<HitlApprovalAPI>(
+		() => ({
+			total,
+			currentStep,
+			decisions,
+			stage,
+			next,
+			prev,
+			goToStep,
+			canAdvance,
+			canSubmit,
+		}),
+		[total, currentStep, decisions, stage, next, prev, goToStep, canAdvance, canSubmit]
+	);
+
+	if (total === 0) return null;
+
+	const action = actionRequests[currentStep];
+	const reviewConfig = reviewConfigs[currentStep];
+	const stagedDecision = decisions[currentStep];
+	const sliced = sliceForStep(interruptData, action, reviewConfig, stagedDecision);
+
+	const Body = getToolComponent(action.name) ?? FallbackToolBody;
+	const bodyProps: TimelineToolProps = {
+		// Per-step key remounts the body on navigation so per-tool
+		// internal state (useHitlPhase, edit drafts) doesn't bleed
+		// between steps.
+		toolCallId: pendingInterrupt.bundleToolCallIds[currentStep] ?? `step-${currentStep}`,
+		toolName: action.name,
+		args: (sliced.action_requests[0]?.args ?? {}) as Record<string, unknown>,
+		argsText: undefined,
+		result: sliced,
+		langchainToolCallId: undefined,
+		status: stagedDecision ? "completed" : "running",
+	};
+
+	return (
+		<HitlApprovalContext.Provider value={api}>
+			<div className="space-y-2">
+				<div key={`approval-step-${currentStep}`}>
+					<Body {...bodyProps} />
+				</div>
+				{isMulti && (
+					<PagerBar
+						currentStep={currentStep}
+						total={total}
+						stagedCount={stagedCount}
+						canAdvance={canAdvance}
+						canSubmit={canSubmit}
+						actionName={action.name}
+						onPrev={prev}
+						onNext={next}
+						onSubmit={submit}
+					/>
+				)}
+			</div>
+		</HitlApprovalContext.Provider>
+	);
+};
+
+const PagerBar: FC<{
+	currentStep: number;
+	total: number;
+	stagedCount: number;
+	canAdvance: boolean;
+	canSubmit: boolean;
+	actionName: string;
+	onPrev: () => void;
+	onNext: () => void;
+	onSubmit: () => void;
+}> = ({
+	currentStep,
+	total,
+	stagedCount,
+	canAdvance,
+	canSubmit,
+	actionName,
+	onPrev,
+	onNext,
+	onSubmit,
+}) => (
+	<div className="flex items-center gap-2 rounded-md border border-border bg-muted/40 px-2 py-1.5 text-sm">
+		<Button
+			type="button"
+			size="sm"
+			variant="outline"
+			onClick={onPrev}
+			disabled={currentStep === 0}
+			aria-label="Previous approval"
+		>
+			<ChevronLeftIcon className="h-4 w-4" />
+		</Button>
+		<span className="font-medium tabular-nums">
+			{currentStep + 1} / {total}
+		</span>
+		<span className="text-muted-foreground">·</span>
+		<span className="text-muted-foreground">
+			{stagedCount} of {total} decided
+		</span>
+		<Button
+			type="button"
+			size="sm"
+			variant="outline"
+			onClick={onNext}
+			disabled={!canAdvance || currentStep >= total - 1}
+			aria-label="Next approval"
+			title={!canAdvance ? "Decide on this action first" : undefined}
+		>
+			<ChevronRightIcon className="h-4 w-4" />
+		</Button>
+		<span className="ml-2 truncate text-xs text-muted-foreground" title={actionName}>
+			{getToolDisplayName(actionName)}
+		</span>
+		<div className="ml-auto">
+			<Button
+				type="button"
+				size="sm"
+				onClick={onSubmit}
+				disabled={!canSubmit}
+				title={canSubmit ? "Submit decisions" : "Decide every action first"}
+			>
+				Submit decisions
+			</Button>
+		</div>
+	</div>
+);
diff --git a/surfsense_web/features/chat-messages/hitl/approval/index.ts b/surfsense_web/features/chat-messages/hitl/approval/index.ts
new file mode 100644
index 000000000..cfe6ba080
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/approval/index.ts
@@ -0,0 +1,9 @@
+export type { HitlApprovalAPI } from "./approval-context";
+export { HitlApprovalContext, useHitlApproval } from "./approval-context";
+export { HitlApprovalCard } from "./hitl-approval-card";
+export {
+	PendingInterruptProvider,
+	type PendingInterruptState,
+	type PendingInterruptValue,
+	usePendingInterrupt,
+} from "./pending-interrupt-context";
diff --git a/surfsense_web/features/chat-messages/hitl/approval/pending-interrupt-context.tsx b/surfsense_web/features/chat-messages/hitl/approval/pending-interrupt-context.tsx
new file mode 100644
index 000000000..2c193d952
--- /dev/null
+++ b/surfsense_web/features/chat-messages/hitl/approval/pending-interrupt-context.tsx
@@ -0,0 +1,44 @@
+"use client";
+
+import { createContext, type ReactNode, useContext } from "react";
+import type { HitlDecision } from "../types";
+
+/** Snapshot of one in-flight HITL interrupt; ``null`` when nothing is pending. */
+export interface PendingInterruptState {
+	threadId: number;
+	assistantMsgId: string;
+	interruptData: Record<string, unknown>;
+	bundleToolCallIds: string[];
+}
+
+export interface PendingInterruptValue {
+	pendingInterrupt: PendingInterruptState | null;
+	onSubmit: (decisions: HitlDecision[]) => void;
+}
+
+const PendingInterruptContext = createContext<PendingInterruptValue | null>(null);
+
+/**
+ * Bridges page-level interrupt state to the Timeline, which is mounted
+ * by assistant-ui and can't be prop-drilled. Mount once at the chat
+ * page root.
+ */
+export function PendingInterruptProvider({
+	pendingInterrupt,
+	onSubmit,
+	children,
+}: {
+	pendingInterrupt: PendingInterruptState | null;
+	onSubmit: (decisions: HitlDecision[]) => void;
+	children: ReactNode;
+}) {
+	return (
+		<PendingInterruptContext.Provider value={{ pendingInterrupt, onSubmit }}>
+			{children}
+		</PendingInterruptContext.Provider>
+	);
+}
+
+export function usePendingInterrupt(): PendingInterruptValue | null {
+	return useContext(PendingInterruptContext);
+}
diff --git a/surfsense_web/features/chat-messages/hitl/bundle/bundle-context.tsx b/surfsense_web/features/chat-messages/hitl/bundle/bundle-context.tsx
deleted file mode 100644
index 72e9359f0..000000000
--- a/surfsense_web/features/chat-messages/hitl/bundle/bundle-context.tsx
+++ /dev/null
@@ -1,157 +0,0 @@
-"use client";
-
-import { createContext, type ReactNode, useCallback, useContext, useMemo, useState } from "react";
-import type { HitlDecision } from "../types";
-
-export type BundleSubmit = (orderedDecisions: HitlDecision[]) => void;
-
-export interface HitlBundleAPI {
-	toolCallIds: readonly string[];
-	currentStep: number;
-	stagedCount: number;
-	isInBundle: (toolCallId: string) => boolean;
-	isCurrentStep: (toolCallId: string) => boolean;
-	getStaged: (toolCallId: string) => HitlDecision | undefined;
-	stage: (toolCallId: string, decision: HitlDecision) => void;
-	goToStep: (i: number) => void;
-	next: () => void;
-	prev: () => void;
-	submit: () => void;
-}
-
-const HitlBundleContext = createContext<HitlBundleAPI | null>(null);
-const ToolCallIdContext = createContext<string | null>(null);
-
-export function useHitlBundle(): HitlBundleAPI | null {
-	return useContext(HitlBundleContext);
-}
-
-export function useToolCallIdContext(): string | null {
-	return useContext(ToolCallIdContext);
-}
-
-export function ToolCallIdProvider({
-	toolCallId,
-	children,
-}: {
-	toolCallId: string;
-	children: ReactNode;
-}) {
-	return <ToolCallIdContext.Provider value={toolCallId}>{children}</ToolCallIdContext.Provider>;
-}
-
-interface HitlBundleProviderProps {
-	toolCallIds: readonly string[] | null;
-	onSubmit: BundleSubmit;
-	children: ReactNode;
-}
-
-/**
- * Coordinates N pending HITL decisions into ONE ordered submission.
- *
- * Active only when ``toolCallIds`` has 2+ entries (parallel interrupts);
- * single-card interrupts bypass the bundle entirely (``useHitlDecision``
- * fires the ``hitl-decision`` window event directly).
- *
- * Pager UX: ``tool-call-item.tsx`` reads ``isInBundle`` + ``isCurrentStep``
- * to render only the current-step card; ``timeline.tsx`` mounts
- * ``<PagerChrome />`` once when this Provider is active. Submission is
- * user-initiated via the pager's "Submit decisions" button (calls
- * ``submit()``); not auto.
- */
-export function HitlBundleProvider({ toolCallIds, onSubmit, children }: HitlBundleProviderProps) {
-	const active = toolCallIds !== null && toolCallIds.length >= 2;
-	const ids = useMemo(() => (active ? [...toolCallIds] : []), [active, toolCallIds]);
-	const bundleKey = ids.join("|");
-
-	const [prevBundleKey, setPrevBundleKey] = useState(bundleKey);
-	const [staged, setStaged] = useState<Map<string, HitlDecision>>(() => new Map());
-	const [currentStep, setCurrentStep] = useState(0);
-	if (bundleKey !== prevBundleKey) {
-		setPrevBundleKey(bundleKey);
-		setStaged(new Map());
-		setCurrentStep(0);
-	}
-
-	const isInBundle = useCallback((tcId: string) => ids.includes(tcId), [ids]);
-	const isCurrentStep = useCallback(
-		(tcId: string) => active === true && ids[currentStep] === tcId,
-		[active, ids, currentStep]
-	);
-	const getStaged = useCallback((tcId: string) => staged.get(tcId), [staged]);
-	const stage = useCallback(
-		(tcId: string, decision: HitlDecision) => {
-			if (!active || !ids.includes(tcId)) return;
-			setStaged((prev) => {
-				const next = new Map(prev);
-				next.set(tcId, decision);
-				return next;
-			});
-			window.dispatchEvent(
-				new CustomEvent("hitl-stage", { detail: { toolCallId: tcId, decision } })
-			);
-			const idx = ids.indexOf(tcId);
-			if (idx >= 0 && idx < ids.length - 1) {
-				setCurrentStep(idx + 1);
-			}
-		},
-		[active, ids]
-	);
-	const goToStep = useCallback(
-		(i: number) => {
-			if (i < 0 || i >= ids.length) return;
-			setCurrentStep(i);
-		},
-		[ids.length]
-	);
-	const next = useCallback(() => {
-		setCurrentStep((s) => Math.min(s + 1, Math.max(0, ids.length - 1)));
-	}, [ids.length]);
-	const prev = useCallback(() => {
-		setCurrentStep((s) => Math.max(s - 1, 0));
-	}, []);
-
-	const submit = useCallback(() => {
-		if (!active) return;
-		if (staged.size !== ids.length) return;
-		const ordered: HitlDecision[] = [];
-		for (const tcId of ids) {
-			const d = staged.get(tcId);
-			if (!d) return;
-			ordered.push(d);
-		}
-		onSubmit(ordered);
-	}, [active, ids, staged, onSubmit]);
-
-	const value = useMemo<HitlBundleAPI | null>(() => {
-		if (!active) return null;
-		return {
-			toolCallIds: ids,
-			currentStep,
-			stagedCount: staged.size,
-			isInBundle,
-			isCurrentStep,
-			getStaged,
-			stage,
-			goToStep,
-			next,
-			prev,
-			submit,
-		};
-	}, [
-		active,
-		ids,
-		currentStep,
-		staged,
-		isInBundle,
-		isCurrentStep,
-		getStaged,
-		stage,
-		goToStep,
-		next,
-		prev,
-		submit,
-	]);
-
-	return <HitlBundleContext.Provider value={value}>{children}</HitlBundleContext.Provider>;
-}
diff --git a/surfsense_web/features/chat-messages/hitl/bundle/index.ts b/surfsense_web/features/chat-messages/hitl/bundle/index.ts
deleted file mode 100644
index a97282a28..000000000
--- a/surfsense_web/features/chat-messages/hitl/bundle/index.ts
+++ /dev/null
@@ -1,8 +0,0 @@
-export type { BundleSubmit, HitlBundleAPI } from "./bundle-context";
-export {
-	HitlBundleProvider,
-	ToolCallIdProvider,
-	useHitlBundle,
-	useToolCallIdContext,
-} from "./bundle-context";
-export { PagerChrome } from "./pager-chrome";
diff --git a/surfsense_web/features/chat-messages/hitl/bundle/pager-chrome.tsx b/surfsense_web/features/chat-messages/hitl/bundle/pager-chrome.tsx
deleted file mode 100644
index fa8333fe8..000000000
--- a/surfsense_web/features/chat-messages/hitl/bundle/pager-chrome.tsx
+++ /dev/null
@@ -1,65 +0,0 @@
-"use client";
-
-import { ChevronLeftIcon, ChevronRightIcon } from "lucide-react";
-import { Button } from "@/components/ui/button";
-import { useHitlBundle } from "./bundle-context";
-
-/**
- * Prev/next nav and Submit for the current step of an active HITL bundle.
- * Submission is gated on every action_request having a staged decision.
- *
- * Mounted ONCE by ``timeline.tsx`` when the bundle is active. Does NOT
- * wrap individual cards. Reads bundle state via ``useHitlBundle()``;
- * renders nothing when no bundle is active.
- */
-export function PagerChrome() {
-	const bundle = useHitlBundle();
-	if (!bundle) return null;
-
-	const total = bundle.toolCallIds.length;
-	const step = bundle.currentStep;
-	const allStaged = bundle.stagedCount === total;
-
-	return (
-		<div className="mt-3 flex items-center gap-2 rounded-md border border-border bg-muted/40 p-2 text-sm">
-			<Button
-				type="button"
-				size="sm"
-				variant="outline"
-				onClick={bundle.prev}
-				disabled={step === 0}
-				aria-label="Previous approval"
-			>
-				<ChevronLeftIcon className="h-4 w-4" />
-			</Button>
-			<span className="font-medium tabular-nums">
-				{step + 1} / {total}
-			</span>
-			<span className="text-muted-foreground">·</span>
-			<span className="text-muted-foreground">
-				{bundle.stagedCount} of {total} decided
-			</span>
-			<Button
-				type="button"
-				size="sm"
-				variant="outline"
-				onClick={bundle.next}
-				disabled={step >= total - 1}
-				aria-label="Next approval"
-			>
-				<ChevronRightIcon className="h-4 w-4" />
-			</Button>
-			<div className="ml-auto">
-				<Button
-					type="button"
-					size="sm"
-					onClick={bundle.submit}
-					disabled={!allStaged}
-					title={allStaged ? "Submit decisions" : "Decide every action first"}
-				>
-					Submit decisions
-				</Button>
-			</div>
-		</div>
-	);
-}
diff --git a/surfsense_web/features/chat-messages/hitl/index.ts b/surfsense_web/features/chat-messages/hitl/index.ts
index 13c37898f..50cc8ad69 100644
--- a/surfsense_web/features/chat-messages/hitl/index.ts
+++ b/surfsense_web/features/chat-messages/hitl/index.ts
@@ -1,13 +1,13 @@
-export { DoomLoopApproval, GenericHitlApproval, isDoomLoopInterrupt } from "./approval-cards";
 export {
-	type BundleSubmit,
-	type HitlBundleAPI,
-	HitlBundleProvider,
-	PagerChrome,
-	ToolCallIdProvider,
-	useHitlBundle,
-	useToolCallIdContext,
-} from "./bundle";
+	type HitlApprovalAPI,
+	HitlApprovalCard,
+	PendingInterruptProvider,
+	type PendingInterruptState,
+	type PendingInterruptValue,
+	useHitlApproval,
+	usePendingInterrupt,
+} from "./approval";
+export { DoomLoopApproval, GenericHitlApproval, isDoomLoopInterrupt } from "./approval-cards";
 export {
 	closeHitlEditPanelAtom,
 	type ExtraField,
@@ -18,13 +18,13 @@ export {
 	openHitlEditPanelAtom,
 } from "./edit-panel";
 export type {
-	HitlApprovalCard,
-	HitlApprovalCardProps,
 	HitlDecision,
 	HitlPhase,
 	InterruptActionRequest,
 	InterruptResult,
 	InterruptReviewConfig,
+	PerToolApprovalCard,
+	PerToolApprovalCardProps,
 } from "./types";
 export { isInterruptResult } from "./types";
 export { useHitlDecision } from "./use-hitl-decision";
diff --git a/surfsense_web/features/chat-messages/hitl/types.ts b/surfsense_web/features/chat-messages/hitl/types.ts
index bcde7abf4..03f00ba9d 100644
--- a/surfsense_web/features/chat-messages/hitl/types.ts
+++ b/surfsense_web/features/chat-messages/hitl/types.ts
@@ -41,11 +41,19 @@ export interface HitlDecision {
 
 export type HitlPhase = "pending" | "processing" | "complete" | "rejected";
 
-export interface HitlApprovalCardProps {
+export interface PerToolApprovalCardProps {
 	toolName: string;
 	toolCallId: string;
 	args: Record<string, unknown>;
 	result: InterruptResult;
 }
 
-export type HitlApprovalCard = (props: HitlApprovalCardProps) => ReactNode;
+/**
+ * Type signature for per-tool fallback approval cards (e.g.
+ * ``GenericHitlApproval``, ``DoomLoopApproval``) mounted by
+ * ``FallbackToolBody`` for unregistered HITL tools.
+ *
+ * Distinct from ``HitlApprovalCard`` (the high-level multi/single
+ * chrome) — this is the per-tool body that the chrome wraps.
+ */
+export type PerToolApprovalCard = (props: PerToolApprovalCardProps) => ReactNode;
diff --git a/surfsense_web/features/chat-messages/hitl/use-hitl-decision.ts b/surfsense_web/features/chat-messages/hitl/use-hitl-decision.ts
index 10d02c982..f35be46a3 100644
--- a/surfsense_web/features/chat-messages/hitl/use-hitl-decision.ts
+++ b/surfsense_web/features/chat-messages/hitl/use-hitl-decision.ts
@@ -1,44 +1,31 @@
 import { useCallback } from "react";
-import { useHitlBundle, useToolCallIdContext } from "./bundle/bundle-context";
+import { useHitlApproval } from "./approval/approval-context";
 import type { HitlDecision } from "./types";
 
 /**
- * Dispatches a HITL decision from inside an approval card.
- *
- * Behavior:
- *  - **Bundle active** (N≥2 parallel interrupts) AND this card's
- *    ``toolCallId`` is in the bundle: stage the (single) decision
- *    against this ``toolCallId`` so the bundle can submit one ordered
- *    N-payload when every card has decided. Multi-decision dispatches
- *    in this path are a programming error: only ``decisions[0]`` is
- *    staged; a dev warning fires for the rest.
- *  - **Otherwise (N=1 or no bundle):** dispatch the ``hitl-decision``
- *    window event directly with the full ``decisions`` array. The host
- *    page's listener calls ``runtime.resume`` with the same array.
- *
- * Cards always call ``dispatch([decision])`` and don't need to know
- * which path they're on.
+ * Per-tool components always call ``dispatch([decision])``. We route
+ * through ``HitlApprovalContext`` when mounted inside an approval
+ * card (so multi-approval can stage and pager-navigate), and fall
+ * back to the ``hitl-decision`` window event for standalone callers.
  */
 export function useHitlDecision() {
-	const bundle = useHitlBundle();
-	const toolCallId = useToolCallIdContext();
+	const approval = useHitlApproval();
 
 	const dispatch = useCallback(
 		(decisions: HitlDecision[]) => {
-			if (bundle && toolCallId && bundle.isInBundle(toolCallId) && decisions.length > 0) {
+			if (approval && decisions.length > 0) {
 				if (decisions.length > 1 && process.env.NODE_ENV !== "production") {
 					console.warn(
-						"[hitl] dispatch received %d decisions inside an active bundle; only [0] will be staged for %s",
-						decisions.length,
-						toolCallId
+						"[hitl] dispatch received %d decisions inside an approval card; only [0] will be staged",
+						decisions.length
 					);
 				}
-				bundle.stage(toolCallId, decisions[0]);
+				approval.stage(decisions[0]);
 				return;
 			}
 			window.dispatchEvent(new CustomEvent("hitl-decision", { detail: { decisions } }));
 		},
-		[bundle, toolCallId]
+		[approval]
 	);
 
 	return { dispatch };
diff --git a/surfsense_web/features/chat-messages/timeline/build-timeline.ts b/surfsense_web/features/chat-messages/timeline/build-timeline.ts
index 7c78dfb7b..20ae6d596 100644
--- a/surfsense_web/features/chat-messages/timeline/build-timeline.ts
+++ b/surfsense_web/features/chat-messages/timeline/build-timeline.ts
@@ -1,9 +1,9 @@
 import type { ItemStatus, ReasoningItem, TimelineItem, ToolCallItem } from "./types";
 
 /**
- * The thinking-step shape produced by the streaming pipeline (see
- * ``data-thinking-step`` SSE events). Kept structural here so this
- * builder doesn't depend on the legacy ``thinking-steps.tsx`` file.
+ * Structural shape of the relay's ``data-thinking-step`` payload.
+ * Declared here (not imported) so the builder stays free of the
+ * legacy ``thinking-steps.tsx`` dependency.
  */
 export interface ThinkingStepInput {
 	id: string;
@@ -13,12 +13,7 @@ export interface ThinkingStepInput {
 	metadata?: Record<string, unknown>;
 }
 
-/**
- * The minimum tool-call-part shape we read from message content. We
- * accept ``unknown[]`` and structurally narrow per part — the assistant-
- * ui content type has many shapes, but only ``tool-call`` parts matter
- * here.
- */
+/** Narrowed tool-call shape; the assistant-ui content type is wider. */
 interface ToolCallPart {
 	type: "tool-call";
 	toolCallId: string;
@@ -43,15 +38,101 @@ function asNonEmptyString(v: unknown): string | undefined {
 }
 
 /**
- * Derive coarse status for a tool-call from its result shape. Used
- * when the tool-call has no joined thinking step (orphan path).
+ * True iff THIS tool-call is the actual interrupt request (carries an
+ * ``action_requests[]``), not just a parent ``task`` wrapper that
+ * inherited the propagated ``__interrupt__`` flag. Pending requests
+ * are hidden so ``HitlApprovalCard`` owns the pending UX; the
+ * ``length > 0`` guard keeps parent task wrappers visible so their
+ * children stay indented under the delegation span.
+ */
+function isPendingHitlInterrupt(result: unknown): boolean {
+	if (typeof result !== "object" || result === null) return false;
+	const r = result as {
+		__interrupt__?: unknown;
+		__decided__?: unknown;
+		action_requests?: unknown;
+	};
+	return (
+		r.__interrupt__ === true &&
+		r.__decided__ === undefined &&
+		Array.isArray(r.action_requests) &&
+		r.action_requests.length > 0
+	);
+}
+
+/**
+ * Stable interrupt signal across pre/post decision: the resume flow
+ * spreads the original result and only adds ``__decided__``, so
+ * ``__interrupt__`` alone is the right key.
+ */
+function hasInterruptMarker(result: unknown): boolean {
+	if (typeof result !== "object" || result === null) return false;
+	return (result as { __interrupt__?: unknown }).__interrupt__ === true;
+}
+
+interface ToolCallSlim {
+	toolName: string;
+	toolCallId: string;
+	result?: unknown;
+	spanId?: string;
+}
+
+/**
+ * During the live-resume window the in-memory message holds BOTH the
+ * OLD interrupt-frame parts AND the freshly-streamed resume parts in
+ * a new ``task`` scope. Without this filter we'd render both until
+ * the next reload (where ``filterSupersededAbortedMessages`` drops
+ * the OLD row upstream).
  *
- * - HITL ``__decided__: "reject"``  → ``cancelled``
- * - Has any result                  → ``completed``
- * - No result yet                   → ``running``
- *
- * The per-tool component picks its own visual state from the result;
- * this is only the timeline chrome's coarse signal.
+ * A tool-call is "interrupt-affected" when it either carries
+ * ``__interrupt__`` directly or sits in a span that contains one. An
+ * affected call is superseded iff a later same-name call in a
+ * different scope exists. The conservative branch (no successor)
+ * preserves rejects that ended the run with no replacement.
+ */
+function collectSupersededToolCallIds(content: readonly unknown[]): Set<string> {
+	const slims: ToolCallSlim[] = [];
+	for (const part of content) {
+		if (!isToolCallPart(part)) continue;
+		slims.push({
+			toolName: part.toolName,
+			toolCallId: part.toolCallId,
+			result: part.result,
+			spanId: asNonEmptyString(part.metadata?.spanId),
+		});
+	}
+
+	const interruptedSpans = new Set<string>();
+	for (const tc of slims) {
+		if (!hasInterruptMarker(tc.result)) continue;
+		if (tc.spanId) interruptedSpans.add(tc.spanId);
+	}
+
+	const superseded = new Set<string>();
+	for (let i = 0; i < slims.length; i++) {
+		const tc = slims[i];
+		const inInterruptedSpan = tc.spanId !== undefined && interruptedSpans.has(tc.spanId);
+		const isDirectInterrupt = hasInterruptMarker(tc.result);
+		if (!inInterruptedSpan && !isDirectInterrupt) continue;
+
+		for (let j = i + 1; j < slims.length; j++) {
+			// Both-undefined counts as different scopes so standalone
+			// HITL tools (no delegation) get caught.
+			const sameSpan = tc.spanId !== undefined && slims[j].spanId === tc.spanId;
+			if (slims[j].toolName === tc.toolName && !sameSpan) {
+				superseded.add(tc.toolCallId);
+				break;
+			}
+		}
+	}
+
+	return superseded;
+}
+
+/**
+ * Coarse status for orphan tool-calls (no joined thinking step). The
+ * per-tool body picks its own visual state from ``result``; this
+ * only feeds the chrome dot/header.
  */
 function deriveToolCallStatus(result: unknown): ItemStatus {
 	if (!result) return "running";
@@ -68,119 +149,30 @@ function mapStepStatus(status: ThinkingStepInput["status"]): ItemStatus {
 }
 
 /**
- * True when a tool-call's result carries an HITL interrupt. Catches
- * both pre-decision (``__interrupt__: true``) and post-decision
- * (``__interrupt__: true, __decided__: …``) states — the resume
- * flow's decision-application spreads the original result and only
- * adds ``__decided__``, so ``__interrupt__`` alone is the stable
- * signal.
- */
-function isInterruptInResult(result: unknown): boolean {
-	if (typeof result !== "object" || result === null) return false;
-	return (result as { __interrupt__?: unknown }).__interrupt__ === true;
-}
-
-/**
- * Build the set of tool-call ids that have been superseded by the
- * resume stream's continuation.
- *
- * The challenge: during the live resume window, the in-memory message
- * holds BOTH the rehydrated interrupt-frame parts (the OLD ``task`` +
- * its inner ``update_notion_page`` whose result has ``__decided__``)
- * AND the freshly-streamed resume parts (a NEW ``task`` + a NEW
- * ``update_notion_page`` with the actual success result). We need to
- * drop the entire OLD delegation chain so only the NEW one renders.
- *
- * Two-stage detection:
- *
- * 1. **Identify "interrupted spans"** — any spanId that contains at
- *    least one tool-call whose ``result.__interrupt__`` is true. This
- *    captures both the inner decided tool and its outer ``task``
- *    wrapper (which itself has no result but shares the spanId).
- *    Without this the wrapper survives as an orphan parent — the
- *    stray "Notion" row we saw post-approve.
- *
- * 2. **Mark a tool-call as superseded** when (a) it sits in an
- *    interrupted span OR carries the interrupt marker directly, AND
- *    (b) a later tool-call with the same ``toolName`` in a DIFFERENT
- *    span exists. The "different span" guard prevents self-supersession
- *    within the same delegation episode.
- *
- * Mirrors the message-level rule in
- * ``filterSupersededAbortedMessages`` but at the part level — same
- * data-shape problem (interrupt frame + resume continuation cohabiting
- * one in-memory message) one level down.
- *
- * Conservative: an interrupted tool-call with NO later same-named
- * different-span successor stays (e.g. a reject that ended the run, a
- * never-resumed decision).
- */
-function collectSupersededToolCallIds(content: readonly unknown[]): Set<string> {
-	const toolCallParts: ToolCallPart[] = [];
-	for (const part of content) {
-		if (isToolCallPart(part)) toolCallParts.push(part);
-	}
-
-	const interruptedSpans = new Set<string>();
-	for (const part of toolCallParts) {
-		if (!isInterruptInResult(part.result)) continue;
-		const sid = asNonEmptyString(part.metadata?.spanId);
-		if (sid) interruptedSpans.add(sid);
-	}
-
-	const superseded = new Set<string>();
-	for (let i = 0; i < toolCallParts.length; i++) {
-		const part = toolCallParts[i];
-		const sid = asNonEmptyString(part.metadata?.spanId);
-		const inInterruptedSpan = sid !== undefined && interruptedSpans.has(sid);
-		const isDirectInterrupt = isInterruptInResult(part.result);
-		if (!inInterruptedSpan && !isDirectInterrupt) continue;
-
-		for (let j = i + 1; j < toolCallParts.length; j++) {
-			const jsid = asNonEmptyString(toolCallParts[j].metadata?.spanId);
-			// Both-undefined counts as "different scopes" so standalone
-			// HITL tools (no delegation, no spanId) get caught. Naive
-			// ``jsid !== sid`` misses them since ``undefined !==
-			// undefined`` is false.
-			const sameSpan = sid !== undefined && jsid === sid;
-			if (toolCallParts[j].toolName === part.toolName && !sameSpan) {
-				superseded.add(part.toolCallId);
-				break;
-			}
-		}
-	}
-
-	return superseded;
-}
-
-/**
- * Build the timeline's flat ``TimelineItem[]`` from thinking steps +
- * message content tool-calls.
- *
- * 1. Index tool-call parts by ``metadata.thinkingStepId`` (O(1) join).
- * 2. Walk thinking steps in order. Joined → ``ToolCallItem``;
- *    unjoined → ``ReasoningItem``.
- * 3. Append unjoined tool-calls as orphan ``ToolCallItem``s (legacy
- *    history pre-``thinkingStepId``).
- *
- * Pure: no React, no I/O. ``result`` is forwarded verbatim — per-tool
- * components own its discrimination. ``isThreadRunning`` lives in
- * ``timeline.tsx`` as a runtime override.
+ * Pure builder: thinking steps + message content → ``TimelineItem[]``.
+ * Joins tool-calls to thinking steps via ``metadata.thinkingStepId``,
+ * appends unjoined tool-calls as orphans, drops superseded
+ * interrupt-frame parts and pending HITL requests (those are owned
+ * by ``HitlApprovalCard``). ``result`` is forwarded verbatim so
+ * per-tool bodies can discriminate.
  */
 export function buildTimeline(
 	thinkingSteps: readonly ThinkingStepInput[],
 	content: readonly unknown[] | undefined
 ): TimelineItem[] {
 	const toolByStepId = new Map<string, ToolCallPart>();
+	const supersededStepIds = new Set<string>();
 	const consumedToolCallIds = new Set<string>();
-	const supersededToolCallIds = content
-		? collectSupersededToolCallIds(content)
-		: new Set<string>();
+	const superseded = content ? collectSupersededToolCallIds(content) : new Set<string>();
 
 	if (content) {
 		for (const part of content) {
 			if (!isToolCallPart(part)) continue;
 			const tid = asNonEmptyString(part.metadata?.thinkingStepId);
+			if (superseded.has(part.toolCallId)) {
+				if (tid) supersededStepIds.add(tid);
+				continue;
+			}
 			if (tid) toolByStepId.set(tid, part);
 		}
 	}
@@ -188,15 +180,14 @@ export function buildTimeline(
 	const items: TimelineItem[] = [];
 
 	for (const step of thinkingSteps) {
+		// Drop the step alongside its superseded tool-call, otherwise
+		// it'd render as an orphan reasoning row with the OLD title.
+		if (supersededStepIds.has(step.id)) continue;
+
 		const stepSpanId = asNonEmptyString(step.metadata?.spanId);
 		const joined = toolByStepId.get(step.id);
 
-		// Drop the step entirely when it joins a superseded tool-call:
-		// the resume stream has emitted a fresh same-named tool-call
-		// (with its own thinking step) that takes over the row.
-		// Without this, the timeline shows two "Notion → Update
-		// Notion page" groups during the live resume window.
-		if (joined && supersededToolCallIds.has(joined.toolCallId)) {
+		if (joined && isPendingHitlInterrupt(joined.result)) {
 			consumedToolCallIds.add(joined.toolCallId);
 			continue;
 		}
@@ -236,7 +227,8 @@ export function buildTimeline(
 		for (const part of content) {
 			if (!isToolCallPart(part)) continue;
 			if (consumedToolCallIds.has(part.toolCallId)) continue;
-			if (supersededToolCallIds.has(part.toolCallId)) continue;
+			if (superseded.has(part.toolCallId)) continue;
+			if (isPendingHitlInterrupt(part.result)) continue;
 			const orphan: ToolCallItem = {
 				kind: "tool-call",
 				id: part.toolCallId,
diff --git a/surfsense_web/features/chat-messages/timeline/data-renderer.tsx b/surfsense_web/features/chat-messages/timeline/data-renderer.tsx
index 4ae160b84..861e35fd2 100644
--- a/surfsense_web/features/chat-messages/timeline/data-renderer.tsx
+++ b/surfsense_web/features/chat-messages/timeline/data-renderer.tsx
@@ -2,25 +2,32 @@
 
 import { makeAssistantDataUI, useAuiState } from "@assistant-ui/react";
 import { useMemo } from "react";
+import { PendingInterruptProvider, usePendingInterrupt } from "@/features/chat-messages/hitl";
 import { buildTimeline, type ThinkingStepInput } from "./build-timeline";
 import { Timeline } from "./timeline";
 
+const noopSubmit = () => {};
+
 /**
- * assistant-ui data UI for the ``thinking-steps`` data-part. Receives
- * the relay's step array as ``data``, reads message ``content`` via
- * ``useAuiState``, builds the unified ``TimelineItem[]`` once
- * (``buildTimeline`` is pure), and renders the ``Timeline``.
+ * assistant-ui data UI for the ``thinking-steps`` data-part.
  *
- * ``isMessageStreaming`` is the AND of thread-running + this-message-
- * is-last; that flag drives the ``isThreadRunning`` runtime override
- * in ``Timeline`` (stale "running" → "completed" once the thread
- * stops). Mirrors the legacy ``ThinkingStepsDataRenderer`` semantics.
+ * Re-scopes the global ``PendingInterruptProvider`` per message: the
+ * approval card only mounts under the assistant message that owns
+ * the interrupt (otherwise every message in scrollback would render
+ * its own card).
  */
 function TimelineDataRenderer({ data }: { name: string; data: unknown }) {
 	const isThreadRunning = useAuiState(({ thread }) => thread.isRunning);
 	const isLastMessage = useAuiState(({ message }) => message?.isLast ?? false);
 	const isMessageStreaming = isThreadRunning && isLastMessage;
 	const content = useAuiState(({ message }) => message?.content);
+	const messageId = useAuiState(({ message }) => message?.id);
+	const pendingValue = usePendingInterrupt();
+	const pendingForThisMessage =
+		pendingValue?.pendingInterrupt && pendingValue.pendingInterrupt.assistantMsgId === messageId
+			? pendingValue.pendingInterrupt
+			: null;
+	const onSubmit = pendingValue?.onSubmit ?? noopSubmit;
 
 	const steps = useMemo<ThinkingStepInput[]>(
 		() => (data as { steps: ThinkingStepInput[] } | null)?.steps ?? [],
@@ -32,21 +39,18 @@ function TimelineDataRenderer({ data }: { name: string; data: unknown }) {
 		[steps, content]
 	);
 
-	if (items.length === 0) return null;
+	if (items.length === 0 && !pendingForThisMessage) return null;
 
 	return (
 		<div className="mb-3 -mx-2 leading-normal">
-			<Timeline items={items} isThreadRunning={isMessageStreaming} />
+			<PendingInterruptProvider pendingInterrupt={pendingForThisMessage} onSubmit={onSubmit}>
+				<Timeline items={items} isThreadRunning={isMessageStreaming} />
+			</PendingInterruptProvider>
 		</div>
 	);
 }
 
-/**
- * Drop-in replacement for the legacy ``ThinkingStepsDataUI``. Same
- * registration name (``thinking-steps``) so consumers (assistant-
- * message.tsx, public-thread.tsx, free-chat-page.tsx, etc.) just swap
- * the import — no SSE relay changes, no message format changes.
- */
+/** Registers under ``thinking-steps`` so consumers swap the import only. */
 export const TimelineDataUI = makeAssistantDataUI({
 	name: "thinking-steps",
 	render: TimelineDataRenderer,
diff --git a/surfsense_web/features/chat-messages/timeline/grouping.ts b/surfsense_web/features/chat-messages/timeline/grouping.ts
index 1a4dfebcc..478ec4b7b 100644
--- a/surfsense_web/features/chat-messages/timeline/grouping.ts
+++ b/surfsense_web/features/chat-messages/timeline/grouping.ts
@@ -1,33 +1,33 @@
 import type { TimelineGroup, TimelineItem } from "./types";
 
 /**
- * Group consecutive delegated child items under their parent.
+ * Group delegated child items under their owning ``task`` parent.
  *
- * The contract: the parent of a span is the FIRST item carrying that
- * ``spanId``. Subsequent items with the same ``spanId`` are children.
- * Items with no ``spanId`` are their own parent (no children).
+ * Backend invariant: ``metadata.spanId`` is set only while a ``task``
+ * tool is open, so every non-task item with ``spanId = X`` shares it
+ * with the ``task`` that owns the span. We promote that task to the
+ * group header.
  *
- * For ``task`` delegations specifically, the ``task`` tool-call IS the
- * span owner — its ``spanId`` is set on the call itself, and child
- * items emitted while the subagent is running carry the same ``spanId``.
- * The ``task`` item must therefore become the parent header, NOT a
- * child of itself. This is achieved by treating the FIRST occurrence
- * of any ``spanId`` as the parent; downstream items with the same
- * ``spanId`` are children.
- *
- * Defensive: if the very first item of a stream is a child of a span
- * we haven't seen the parent for yet, it's promoted to a parent so it
- * still renders. Real flows always emit the parent ``task`` first.
- *
- * Pure function. No React, no side effects. Trivially testable.
+ * The owner-missing branch defends against the live-resume window
+ * where the OLD ``task`` wrapper can be superseded while its
+ * children briefly survive — without it, grouping would promote
+ * the first orphan child to parent and visually nest its siblings
+ * under it.
  */
 export function groupItems(items: readonly TimelineItem[]): TimelineGroup[] {
+	const spanOwners = new Set<string>();
+	for (const item of items) {
+		if (item.kind === "tool-call" && item.toolName === "task" && item.spanId) {
+			spanOwners.add(item.spanId);
+		}
+	}
+
 	const groups: TimelineGroup[] = [];
 	const spanParent = new Map<string, TimelineGroup>();
 
 	for (const item of items) {
 		const sid = item.spanId;
-		if (!sid) {
+		if (!sid || !spanOwners.has(sid)) {
 			groups.push({ parent: item, children: [] });
 			continue;
 		}
diff --git a/surfsense_web/features/chat-messages/timeline/items/tool-call-item.tsx b/surfsense_web/features/chat-messages/timeline/items/tool-call-item.tsx
index 1848f0c5c..4f96c8bd5 100644
--- a/surfsense_web/features/chat-messages/timeline/items/tool-call-item.tsx
+++ b/surfsense_web/features/chat-messages/timeline/items/tool-call-item.tsx
@@ -2,49 +2,25 @@
 
 import type { FC } from "react";
 import { getToolDisplayName } from "@/contracts/enums/toolIcons";
-import { ToolCallIdProvider, useHitlBundle } from "@/features/chat-messages/hitl";
 import { resolveItemTitle } from "../subagent-rename";
 import { adaptItemToProps, FallbackToolBody, getToolComponent } from "../tool-registry";
 import type { ToolCallItem as ToolCallItemModel } from "../types";
 import { ItemHeader } from "./item-header";
 
 /**
- * Renders a ``kind: "tool-call"`` row: ``ItemHeader`` (title + items)
- * plus the resolved tool body underneath.
- *
- * Tool body is selected from the registry; unknown names fall through
- * to ``FallbackToolBody`` (which itself dispatches between HITL
- * approval cards and the default visual card based on result shape).
- *
- * Multi-approval bundle behaviour: when the HITL bundle is active, all
- * cards EXCEPT the current step are hidden so the user is paged
- * through them one at a time. Hiding is local to this row — the header
- * and the timeline chrome around it are unaffected (the row collapses
- * to its header only). The bundle's ``PagerChrome`` is mounted once
- * at the end of the timeline by ``timeline.tsx``.
- *
- * Every tool body is wrapped in ``ToolCallIdProvider`` so
- * ``useHitlDecision`` (called inside HITL approval cards) can read the
- * tool-call id from context and stage decisions in the bundle.
+ * Renders a tool-call row. Pending HITL interrupts are filtered
+ * upstream in ``buildTimeline`` (owned by ``HitlApprovalCard``); this
+ * component only sees running / completed / errored / decided rows.
  */
 export const ToolCallItem: FC<{ item: ToolCallItemModel }> = ({ item }) => {
-	const bundle = useHitlBundle();
-	const hideForBundle =
-		bundle?.isInBundle(item.toolCallId) === true && !bundle.isCurrentStep(item.toolCallId);
-
 	const title = resolveItemTitle(item, getToolDisplayName);
-
 	const Body = getToolComponent(item.toolName) ?? FallbackToolBody;
 	const props = adaptItemToProps(item);
 
 	return (
 		<>
 			<ItemHeader title={title} status={item.status} items={item.items} itemKey={item.id} />
-			{!hideForBundle && (
-				<ToolCallIdProvider toolCallId={item.toolCallId}>
-					<Body {...props} />
-				</ToolCallIdProvider>
-			)}
+			<Body {...props} />
 		</>
 	);
 };
diff --git a/surfsense_web/features/chat-messages/timeline/timeline.tsx b/surfsense_web/features/chat-messages/timeline/timeline.tsx
index cdabbb67a..f51034733 100644
--- a/surfsense_web/features/chat-messages/timeline/timeline.tsx
+++ b/surfsense_web/features/chat-messages/timeline/timeline.tsx
@@ -4,7 +4,7 @@ import { ChevronRightIcon } from "lucide-react";
 import { type FC, useEffect, useMemo, useState } from "react";
 import { TextShimmerLoader } from "@/components/prompt-kit/loader";
 import { getToolDisplayName } from "@/contracts/enums/toolIcons";
-import { PagerChrome, useHitlBundle } from "@/features/chat-messages/hitl";
+import { HitlApprovalCard, usePendingInterrupt } from "@/features/chat-messages/hitl";
 import { cn } from "@/lib/utils";
 import { groupItems } from "./grouping";
 import { resolveItemTitle } from "./subagent-rename";
@@ -12,10 +12,9 @@ import { TimelineGroupRow } from "./timeline-group-row";
 import type { ItemStatus, TimelineItem } from "./types";
 
 /**
- * Override coarse status when the thread isn't running anymore: a
- * stale "running" must read as "completed" so the chrome stops
- * pulsing. Mirrors the legacy ``getEffectiveStatus`` from
- * ``thinking-steps.tsx``.
+ * Force a stale "running" to read as "completed" once the thread
+ * stops, so the chrome doesn't keep pulsing forever after a stream
+ * is aborted or disconnected.
  */
 function effectiveStatus(status: ItemStatus, isThreadRunning: boolean): ItemStatus {
 	if (status === "running" && !isThreadRunning) return "completed";
@@ -23,54 +22,23 @@ function effectiveStatus(status: ItemStatus, isThreadRunning: boolean): ItemStat
 }
 
 /**
- * True when a tool-call's result is an HITL interrupt the user has
- * NOT decided on yet. The backend marks the step as ``completed``
- * (the tool DID complete — it returned an interrupt as its result),
- * which would normally collapse the timeline. This predicate lets the
- * chrome treat "waiting on user" as still-in-progress.
- *
- * Decided interrupts (``__decided__`` set to "approve"/"reject"/
- * "edit") count as completed for chrome purposes — the resume stream
- * will take it from there.
- */
-function isPendingInterrupt(result: unknown): boolean {
-	if (typeof result !== "object" || result === null) return false;
-	const r = result as { __interrupt__?: unknown; __decided__?: unknown };
-	return r.__interrupt__ === true && r.__decided__ === undefined;
-}
-
-/**
- * The chain-of-thought timeline. The "process" surface in the
- * `body | timeline` split — owns chrome (collapsible header, tree
- * dots/lines, indent, group iteration) and dispatches to per-kind
- * items for the actual content.
- *
- * Rendering responsibilities (kept here, not on items):
- *  - Outer max-width container.
- *  - Collapsible header with state-aware label ("Reviewed" /
- *    "Processing" / current step title) and shimmer.
- *  - Open/close state derived from ``isThreadRunning`` + completion.
- *  - Status dot + vertical connector line per group (delegates the
- *    inner row to ``TimelineGroupRow``).
- *  - Mounting ``PagerChrome`` once at the bottom when the HITL bundle
- *    is active (multi-approval coordination — see
- *    ``hitl/bundle/bundle-context.tsx``).
- *
- * Pure consumption of ``TimelineItem[]`` — does NOT call
- * ``buildTimeline`` itself. The data-renderer adapter does that and
- * passes the items in.
+ * The "process" surface in the body | timeline split. Pure consumer
+ * of ``TimelineItem[]`` — owns the collapsible chrome and tree
+ * indent only. Pending HITL interrupts mount ``HitlApprovalCard`` at
+ * the bottom; the card owns its own decision/pager state.
  */
 export const Timeline: FC<{
 	items: readonly TimelineItem[];
 	isThreadRunning?: boolean;
 }> = ({ items, isThreadRunning = true }) => {
-	const bundle = useHitlBundle();
+	const pendingValue = usePendingInterrupt();
+	const pendingInterrupt = pendingValue?.pendingInterrupt ?? null;
+	const onSubmit = pendingValue?.onSubmit;
+	const hasPending = pendingInterrupt !== null;
 
-	// Apply the runtime ``isThreadRunning`` override to every item once,
-	// up-front, so downstream code (grouping, group rows, item headers,
-	// status dot, all children) sees the corrected coarse status without
-	// having to thread a callback through. ``buildTimeline`` stays pure;
-	// the override is purely a render-time concern that lives here.
+	// Apply the override here so downstream (grouping, headers, dots)
+	// sees the corrected status without threading a callback. Keeps
+	// ``buildTimeline`` pure.
 	const effectiveItems = useMemo<TimelineItem[]>(
 		() =>
 			items.map((it) => ({
@@ -89,29 +57,20 @@ export const Timeline: FC<{
 		[inProgressItem]
 	);
 
-	// Detect a tool-call that's parked on an HITL interrupt the user hasn't
-	// decided yet. Treated as "still in progress" by the chrome so the
-	// timeline doesn't auto-collapse on the user mid-decision (the LangGraph
-	// thread paused, but the agent's work is conceptually unfinished).
-	const pendingInterruptItem = useMemo(
-		() => effectiveItems.find((it) => it.kind === "tool-call" && isPendingInterrupt(it.result)),
-		[effectiveItems]
-	);
-	const pendingInterruptTitle = useMemo(
-		() =>
-			pendingInterruptItem ? resolveItemTitle(pendingInterruptItem, getToolDisplayName) : undefined,
-		[pendingInterruptItem]
-	);
-
-	const allCompleted = useMemo(
+	// "Settled" includes cancelled/errored, not just completed —
+	// rejecting an interrupt leaves items in ``cancelled`` and the
+	// timeline still needs to auto-collapse.
+	const allSettled = useMemo(
 		() =>
 			effectiveItems.length > 0 &&
 			!isThreadRunning &&
-			!pendingInterruptItem &&
-			effectiveItems.every((it) => it.status === "completed"),
-		[effectiveItems, isThreadRunning, pendingInterruptItem]
+			!hasPending &&
+			effectiveItems.every(
+				(it) => it.status === "completed" || it.status === "cancelled" || it.status === "error"
+			),
+		[effectiveItems, isThreadRunning, hasPending]
 	);
-	const isProcessing = (isThreadRunning || !!pendingInterruptItem) && !allCompleted;
+	const isProcessing = (isThreadRunning || hasPending) && !allSettled;
 
 	const [isOpen, setIsOpen] = useState(() => isProcessing);
 	useEffect(() => {
@@ -119,22 +78,19 @@ export const Timeline: FC<{
 			setIsOpen(true);
 			return;
 		}
-		if (allCompleted) {
+		if (allSettled) {
 			setIsOpen(false);
 		}
-	}, [allCompleted, isProcessing]);
+	}, [allSettled, isProcessing]);
 
 	const groups = useMemo(() => groupItems(effectiveItems), [effectiveItems]);
 
-	if (effectiveItems.length === 0) return null;
+	if (effectiveItems.length === 0 && !hasPending) return null;
 
 	const headerText = (() => {
-		if (allCompleted) return "Reviewed";
+		if (allSettled) return "Reviewed";
+		if (hasPending) return "Awaiting your decision";
 		if (inProgressTitle) return inProgressTitle;
-		// Pending HITL: prefer the tool's own name so the user knows WHICH
-		// approval is gating progress (e.g. "Update Notion page") rather
-		// than a generic "Awaiting approval" label.
-		if (pendingInterruptTitle) return pendingInterruptTitle;
 		if (isProcessing) return "Processing";
 		return "Reviewed";
 	})();
@@ -168,16 +124,22 @@ export const Timeline: FC<{
 				>
 					<div className="overflow-hidden">
 						<div className="mt-3 pl-1">
-							{groups.map((group, groupIndex) => (
-								<TimelineGroupRow
-									key={group.parent.id}
-									group={group}
-									parentStatus={group.parent.status}
-									showParentLine={groupIndex < groups.length - 1}
-								/>
-							))}
-
-							{bundle && <PagerChrome />}
+							{groups.map((group, idx) => {
+								const showLine = idx < groups.length - 1 || hasPending;
+								return (
+									<TimelineGroupRow
+										key={group.parent.id}
+										group={group}
+										parentStatus={group.parent.status}
+										showParentLine={showLine}
+									/>
+								);
+							})}
+							{pendingInterrupt && onSubmit && (
+								<div className="pl-5">
+									<HitlApprovalCard pendingInterrupt={pendingInterrupt} onSubmit={onSubmit} />
+								</div>
+							)}
 						</div>
 					</div>
 				</div>
diff --git a/surfsense_web/features/chat-messages/timeline/types.ts b/surfsense_web/features/chat-messages/timeline/types.ts
index 37bd0fbc3..d32496b5e 100644
--- a/surfsense_web/features/chat-messages/timeline/types.ts
+++ b/surfsense_web/features/chat-messages/timeline/types.ts
@@ -56,7 +56,7 @@ export interface ReasoningItem extends BaseItem {
 export interface ToolCallItem extends BaseItem {
 	kind: "tool-call";
 	toolName: string;
-	/** The actual tool-call ID — used by HITL (bundle membership, ``ToolCallIdProvider``). */
+	/** The actual tool-call ID — passed to per-tool components (e.g. for the Revert button). */
 	toolCallId: string;
 	args: Record<string, unknown>;
 	argsText?: string;

From 932bf22a34813ac164e58669fa8d082b605b78ca Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 22:54:07 +0200
Subject: [PATCH 57/58] chat: fix mixed-decision HITL crash and fold resumed
 assistant messages into the interrupted bubble.

---
 .../config.py                                 |  46 +++++
 .../task_tool.py                              |   7 +
 .../new-chat/[[...chat_id]]/page.tsx          |  13 +-
 .../chat-messages/timeline/build-timeline.ts  |   4 +-
 surfsense_web/lib/chat/message-utils.ts       | 174 ++++++++++++++----
 surfsense_web/lib/chat/streaming-state.ts     |   4 +-
 6 files changed, 208 insertions(+), 40 deletions(-)

diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/config.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/config.py
index 16211686c..ac232b92a 100644
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/config.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/config.py
@@ -6,12 +6,19 @@ exposes the side-channel ``stream_resume_chat`` uses to ferry resume payloads.
 
 from __future__ import annotations
 
+import logging
 from typing import Any
 
 from langchain.tools import ToolRuntime
 
 from .constants import DEFAULT_SUBAGENT_RECURSION_LIMIT
 
+logger = logging.getLogger(__name__)
+
+# langgraph stores the parent task's scratchpad under this configurable key;
+# subagents inherit the chain via ``parent_scratchpad`` fallback.
+_LANGGRAPH_SCRATCHPAD_KEY = "__pregel_scratchpad"
+
 
 def subagent_invoke_config(runtime: ToolRuntime) -> dict[str, Any]:
     """RunnableConfig for the nested invoke; raises ``recursion_limit`` to the parent's budget."""
@@ -42,3 +49,42 @@ def has_surfsense_resume(runtime: ToolRuntime) -> bool:
     if not isinstance(configurable, dict):
         return False
     return "surfsense_resume_value" in configurable
+
+
+def drain_parent_null_resume(runtime: ToolRuntime) -> None:
+    """Consume the parent's lingering ``NULL_TASK_ID/RESUME`` write before delegating.
+
+    ``stream_resume_chat`` wakes the main agent with
+    ``Command(resume={"decisions": [...]})`` so the propagated
+    ``_lg_interrupt(...)`` can return. langgraph stores that payload as the
+    parent task's ``null_resume`` pending write, which only gets consumed
+    *after* ``subagent.[a]invoke`` returns (when the post-call propagation
+    re-fires). While the subagent is mid-execution, any *new* ``interrupt()``
+    inside it (e.g. a follow-up tool call after a mixed approve/reject) walks
+    ``subagent_scratchpad → parent_scratchpad.get_null_resume`` and picks up
+    the parent's still-live decisions — mismatching against a different number
+    of hanging tool calls and crashing ``HumanInTheLoopMiddleware``.
+
+    Draining the write here closes that cross-graph leak so subagent
+    interrupts pause cleanly and re-propagate as a fresh approval card.
+    """
+    cfg = runtime.config or {}
+    configurable = cfg.get("configurable") if isinstance(cfg, dict) else None
+    if not isinstance(configurable, dict):
+        return
+    scratchpad = configurable.get(_LANGGRAPH_SCRATCHPAD_KEY)
+    if scratchpad is None:
+        return
+    consume = getattr(scratchpad, "get_null_resume", None)
+    if not callable(consume):
+        return
+    try:
+        consume(True)
+    except Exception:
+        # Defensive: if langgraph's internal scratchpad shape changes we don't
+        # want to break the resume path. Worst case the original ValueError
+        # still surfaces — same behavior as before this fix.
+        logger.debug(
+            "drain_parent_null_resume: scratchpad.get_null_resume raised",
+            exc_info=True,
+        )
diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py
index 5668f8ddb..7c0dd8624 100644
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py
@@ -20,6 +20,7 @@ from langgraph.types import Command
 
 from .config import (
     consume_surfsense_resume,
+    drain_parent_null_resume,
     has_surfsense_resume,
     subagent_invoke_config,
 )
@@ -157,6 +158,9 @@ def build_task_tool_with_parent_config(
                 )
             expected = hitlrequest_action_count(pending_value)
             resume_value = fan_out_decisions_to_match(resume_value, expected)
+            # Prevent the parent's resume payload from leaking into subagent
+            # interrupts via langgraph's parent_scratchpad fallback.
+            drain_parent_null_resume(runtime)
             result = subagent.invoke(
                 build_resume_command(resume_value, pending_id),
                 config=sub_config,
@@ -221,6 +225,9 @@ def build_task_tool_with_parent_config(
                 )
             expected = hitlrequest_action_count(pending_value)
             resume_value = fan_out_decisions_to_match(resume_value, expected)
+            # Prevent the parent's resume payload from leaking into subagent
+            # interrupts via langgraph's parent_scratchpad fallback.
+            drain_parent_null_resume(runtime)
             result = await subagent.ainvoke(
                 build_resume_command(resume_value, pending_id),
                 config=sub_config,
diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
index 76f48bc92..9550eed05 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@@ -64,7 +64,10 @@ import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { getBearerToken } from "@/lib/auth-utils";
 import { type ChatFlow, classifyChatError } from "@/lib/chat/chat-error-classifier";
 import { tagPreAcceptSendFailure, toHttpResponseError } from "@/lib/chat/chat-request-errors";
-import { convertToThreadMessage, filterSupersededAbortedMessages } from "@/lib/chat/message-utils";
+import {
+	convertToThreadMessage,
+	reconcileInterruptedAssistantMessages,
+} from "@/lib/chat/message-utils";
 import {
 	isPodcastGenerating,
 	looksLikePodcastRequest,
@@ -395,7 +398,7 @@ export default function NewChatPage() {
 				const memberById = new Map(membersData?.map((m) => [m.user_id, m]) ?? []);
 				const prevById = new Map(prev.map((m) => [m.id, m]));
 
-				return filterSupersededAbortedMessages(syncedMessages).map((msg) => {
+				return reconcileInterruptedAssistantMessages(syncedMessages).map((msg) => {
 					const member = msg.author_id ? (memberById.get(msg.author_id) ?? null) : null;
 
 					// Preserve existing author info if member lookup fails (e.g., cloned chats)
@@ -622,9 +625,9 @@ export default function NewChatPage() {
 				setCurrentThread(threadData);
 
 				if (messagesResponse.messages && messagesResponse.messages.length > 0) {
-					const loadedMessages = filterSupersededAbortedMessages(messagesResponse.messages).map(
-						convertToThreadMessage
-					);
+					const loadedMessages = reconcileInterruptedAssistantMessages(
+						messagesResponse.messages
+					).map(convertToThreadMessage);
 					setMessages(loadedMessages);
 
 					for (const msg of messagesResponse.messages) {
diff --git a/surfsense_web/features/chat-messages/timeline/build-timeline.ts b/surfsense_web/features/chat-messages/timeline/build-timeline.ts
index 20ae6d596..d4365f211 100644
--- a/surfsense_web/features/chat-messages/timeline/build-timeline.ts
+++ b/surfsense_web/features/chat-messages/timeline/build-timeline.ts
@@ -81,8 +81,8 @@ interface ToolCallSlim {
  * During the live-resume window the in-memory message holds BOTH the
  * OLD interrupt-frame parts AND the freshly-streamed resume parts in
  * a new ``task`` scope. Without this filter we'd render both until
- * the next reload (where ``filterSupersededAbortedMessages`` drops
- * the OLD row upstream).
+ * the next reload (where ``reconcileInterruptedAssistantMessages``
+ * folds the OLD row into the resume row upstream).
  *
  * A tool-call is "interrupt-affected" when it either carries
  * ``__interrupt__`` directly or sits in a span that contains one. An
diff --git a/surfsense_web/lib/chat/message-utils.ts b/surfsense_web/lib/chat/message-utils.ts
index 3267afd76..935dda539 100644
--- a/surfsense_web/lib/chat/message-utils.ts
+++ b/surfsense_web/lib/chat/message-utils.ts
@@ -1,7 +1,7 @@
 import type { ThreadMessageLike } from "@assistant-ui/react";
 import type { MessageRecord } from "./thread-persistence";
 
-/** Minimal shape used by ``filterSupersededAbortedMessages``. */
+/** Minimal shape used by the interrupt/resume reconciler. */
 interface AbortableMessage {
 	id: number;
 	role: string;
@@ -9,14 +9,28 @@ interface AbortableMessage {
 	turn_id?: string | null;
 }
 
+function isAssistant(msg: AbortableMessage): boolean {
+	return msg.role.toLowerCase() === "assistant";
+}
+
+/** True when the row carries at least one tool-call with ``state: "aborted"``. */
+function hasAbortedToolCall(msg: AbortableMessage): boolean {
+	if (!isAssistant(msg) || !Array.isArray(msg.content)) return false;
+	for (const part of msg.content) {
+		if (typeof part !== "object" || part === null) continue;
+		if ((part as { type?: string }).type !== "tool-call") continue;
+		if ((part as { state?: unknown }).state === "aborted") return true;
+	}
+	return false;
+}
+
 /**
- * True when the row is a frozen interrupt frame: an assistant message
- * whose tool-calls all carry ``state: "aborted"``. A single non-aborted
- * tool-call disqualifies (defensive against future mixed states).
+ * True when EVERY tool-call on the row is aborted. The row is then a
+ * frozen interrupt frame with no salvageable activity — safe to drop
+ * outright.
  */
-function isAbortedAssistantMessage(msg: AbortableMessage): boolean {
-	if (msg.role.toLowerCase() !== "assistant") return false;
-	if (!Array.isArray(msg.content)) return false;
+function isFullyAbortedAssistantMessage(msg: AbortableMessage): boolean {
+	if (!isAssistant(msg) || !Array.isArray(msg.content)) return false;
 	let hasToolCalls = false;
 	for (const part of msg.content) {
 		if (typeof part !== "object" || part === null) continue;
@@ -28,42 +42,140 @@ function isAbortedAssistantMessage(msg: AbortableMessage): boolean {
 }
 
 /**
- * Positional supersede check: an aborted assistant row is superseded
- * iff another assistant row appears later before any user row.
- *
- * NOT turn-id-based: ``stream_resume_chat`` allocates a fresh
- * ``turn_id`` for the resumed row, so interrupt+resume rows never
- * share a turn_id. Conversational adjacency is the reliable signal —
- * an assistant→assistant pair without a user row between them is the
- * unique signature of an interrupt+resume cycle.
+ * Locate the resume row that supersedes ``messages[idx]``. The
+ * ``stream_resume_chat`` flow allocates a fresh ``turn_id`` so we
+ * can't pair on it; conversational adjacency (assistant → assistant
+ * with no user row between) is the unique signature. Skips already-
+ * dropped indices so chained interrupt-resumes still pair cleanly.
  */
-function isSupersededByLaterAssistant<T extends AbortableMessage>(
+function findResumeSuccessorIdx<T extends AbortableMessage>(
 	messages: readonly T[],
-	idx: number
-): boolean {
+	idx: number,
+	dropped: ReadonlySet<number>
+): number | null {
 	for (let i = idx + 1; i < messages.length; i++) {
+		if (dropped.has(i)) continue;
 		const role = messages[i].role.toLowerCase();
-		if (role === "user") return false;
-		if (role === "assistant") return true;
+		if (role === "user") return null;
+		if (role === "assistant") return i;
 	}
-	return false;
+	return null;
+}
+
+/** Read ``data.steps`` from either ``data-thinking-steps`` (modern) or ``thinking-steps`` (legacy). */
+function extractStepsFromPart(part: unknown): unknown[] | null {
+	if (typeof part !== "object" || part === null) return null;
+	const p = part as { type?: unknown; data?: unknown; steps?: unknown };
+	if (p.type === "data-thinking-steps") {
+		const data = p.data as { steps?: unknown } | undefined;
+		return Array.isArray(data?.steps) ? data.steps : [];
+	}
+	if (p.type === "thinking-steps") {
+		return Array.isArray(p.steps) ? (p.steps as unknown[]) : [];
+	}
+	return null;
+}
+
+/** Split a content array into (combined steps, all other parts in order). */
+function partitionContent(content: unknown): { steps: unknown[]; others: unknown[] } {
+	if (!Array.isArray(content)) return { steps: [], others: [] };
+	const steps: unknown[] = [];
+	const others: unknown[] = [];
+	for (const part of content) {
+		const partSteps = extractStepsFromPart(part);
+		if (partSteps !== null) {
+			steps.push(...partSteps);
+			continue;
+		}
+		others.push(part);
+	}
+	return { steps, others };
 }
 
 /**
- * Drop frozen interrupt-frame rows once they have a resumed
- * continuation. Pure (returns a new array). Caller passes messages in
- * chronological order.
+ * Fold an interrupt-frame row's content into its resume successor so
+ * the user sees one assistant turn instead of two stacked bubbles.
+ * Successor's metadata wins (id, created_at, turn_id, token_usage,
+ * author) — that's the row the per-turn revert button keys to.
  *
- * Never-resumed aborts are preserved (user navigated away mid-decision)
- * so the user still sees what happened.
+ * Order: combined ``data-thinking-steps`` (older steps then newer) at
+ * index 0, followed by older's other parts in order, then newer's. The
+ * older row's aborted ``task`` wrapper is preserved so the rejected
+ * attempt remains visible alongside the successful retry; both spans
+ * survive and ``groupItems`` renders them as sibling task branches in
+ * one timeline.
  */
-export function filterSupersededAbortedMessages<T extends AbortableMessage>(
+function mergeInterruptedIntoResume<T extends AbortableMessage>(older: T, newer: T): T {
+	const olderParts = partitionContent(older.content);
+	const newerParts = partitionContent(newer.content);
+
+	const mergedSteps = [...olderParts.steps, ...newerParts.steps];
+	const mergedContent: unknown[] = [];
+	if (mergedSteps.length > 0) {
+		mergedContent.push({ type: "data-thinking-steps", data: { steps: mergedSteps } });
+	}
+	mergedContent.push(...olderParts.others, ...newerParts.others);
+
+	return { ...newer, content: mergedContent };
+}
+
+/**
+ * Reconcile interrupt-frame and resume rows so the UI shows one
+ * assistant turn per user turn even when the backend persists them as
+ * separate ``new_chat_messages`` rows.
+ *
+ * Two cases, both keyed on conversational adjacency (assistant →
+ * assistant with no user row between):
+ *
+ *  - **Fully aborted older row** (every tool-call ``state: "aborted"``,
+ *    no salvageable activity) → drop the older row.
+ *  - **Partially aborted older row** (mixed completed + aborted, e.g.
+ *    inner subagent tools ran before the interrupt) → fold its content
+ *    into the successor. Successor metadata wins.
+ *
+ * Never-resumed aborts (user navigated away mid-decision) survive so
+ * the user still sees what happened.
+ *
+ * Pure: returns a new array with new merged objects when needed.
+ * Caller passes messages in chronological order.
+ */
+export function reconcileInterruptedAssistantMessages<T extends AbortableMessage>(
 	messages: readonly T[]
 ): T[] {
-	return messages.filter((msg, idx) => {
-		if (!isAbortedAssistantMessage(msg)) return true;
-		return !isSupersededByLaterAssistant(messages, idx);
-	});
+	const dropped = new Set<number>();
+	const mergeInto = new Map<number, number[]>();
+
+	for (let i = 0; i < messages.length; i++) {
+		if (dropped.has(i)) continue;
+		const msg = messages[i];
+		if (!hasAbortedToolCall(msg)) continue;
+
+		const successorIdx = findResumeSuccessorIdx(messages, i, dropped);
+		if (successorIdx === null) continue;
+
+		dropped.add(i);
+		if (!isFullyAbortedAssistantMessage(msg)) {
+			const list = mergeInto.get(successorIdx) ?? [];
+			list.push(i);
+			mergeInto.set(successorIdx, list);
+		}
+	}
+
+	const result: T[] = [];
+	for (let i = 0; i < messages.length; i++) {
+		if (dropped.has(i)) continue;
+		const olderIdxs = mergeInto.get(i);
+		if (olderIdxs && olderIdxs.length > 0) {
+			let merged = messages[i];
+			for (const olderIdx of olderIdxs) {
+				merged = mergeInterruptedIntoResume(messages[olderIdx], merged);
+			}
+			result.push(merged);
+			continue;
+		}
+		result.push(messages[i]);
+	}
+	return result;
 }
 
 /**
diff --git a/surfsense_web/lib/chat/streaming-state.ts b/surfsense_web/lib/chat/streaming-state.ts
index e3bfdcaea..1d057ef94 100644
--- a/surfsense_web/lib/chat/streaming-state.ts
+++ b/surfsense_web/lib/chat/streaming-state.ts
@@ -79,8 +79,8 @@ export interface ContentPartsState {
 	 * the resume stream's first ``start-step`` fires
 	 * ``addStepSeparator`` while rehydrated OLD content already makes
 	 * ``hasContent`` true → a divider lands between OLD and NEW
-	 * content with no semantic value (OLD content is filtered by
-	 * ``buildTimeline`` + ``filterSupersededAbortedMessages``,
+	 * content with no semantic value (OLD content is folded by
+	 * ``buildTimeline`` + ``reconcileInterruptedAssistantMessages``,
 	 * persisted state carries no separator, so the line vanishes on
 	 * reload).
 	 */

From 5e7d41f3e8e1cf3e7cc1b510cc173b7d4829bf1a Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Sat, 9 May 2026 23:00:18 +0200
Subject: [PATCH 58/58] chat-messages: drop feature module architecture doc.

---
 .../features/chat-messages/ARCHITECTURE.md    | 573 ------------------
 1 file changed, 573 deletions(-)
 delete mode 100644 surfsense_web/features/chat-messages/ARCHITECTURE.md

diff --git a/surfsense_web/features/chat-messages/ARCHITECTURE.md b/surfsense_web/features/chat-messages/ARCHITECTURE.md
deleted file mode 100644
index 5b1dedd3b..000000000
--- a/surfsense_web/features/chat-messages/ARCHITECTURE.md
+++ /dev/null
@@ -1,573 +0,0 @@
-# `features/chat-messages/` — Architecture
-
-> **Scope.** This module owns everything between an assistant message
-> arriving and its rendering inside the chat UI: the timeline (the
-> agent's process — reasoning + every tool call), and the HITL
-> primitives that per-tool components compose to render approval views.
->
-> It does **NOT** own: the thread shell, the composer, the streaming
-> pipeline, the message frame (`assistant-message.tsx`,
-> `user-message.tsx`, markdown renderer, citations), the comments
-> sidebar, or any of the 63 individual tool-ui integration files
-> under `components/tool-ui/`.
-
----
-
-## 1. Mental model
-
-Every assistant message has two regions:
-
-| Region | What it shows |
-|---|---|
-| **Timeline** | The agent's *process*. Reasoning, every tool call, grouped by delegation `spanId` into a tree. Each tool call is rendered by its registered component, which selects its own view (running, awaiting approval, success, error, etc.) by discriminating its `result` data. |
-| **Body** | The agent's *product*. Markdown text, citations, native reasoning blocks, and value-add deliverables (image viewer, chart, canvas). Connector tool cards do NOT render here. |
-
-**Principle: timeline = process, body = product. No overlap.**
-
-A tool's UI lives in the body **if and only if** it produces a deliverable
-the user wants to interact with — view, scrub, copy, share. If the UI
-just shows that the tool ran and what it did, it lives in the timeline.
-
-```
-┌─ Assistant Message ─────────────────────────────────────────┐
-│                                                             │
-│  ╔═════════════════════════════════════════════════════╗   │
-│  ║  TIMELINE  (process)                                ║   │
-│  ║                                                     ║   │
-│  ║  ▸ task: NotionAgent              [running]         ║   │
-│  ║      ▸ search_workspace           [completed]       ║   │
-│  ║      ▸ update_page                ← rendered by     ║   │
-│  ║          (Notion-styled approval     UpdateNotion-  ║   │
-│  ║           card OR Notion-styled      PageToolUI;    ║   │
-│  ║           success/error card,        the component  ║   │
-│  ║           per its own data           picks the view ║   │
-│  ║           discrimination)            from result)   ║   │
-│  ║  ▸ summarize                      [completed]       ║   │
-│  ╚═════════════════════════════════════════════════════╝   │
-│                                                             │
-│  ╔═════════════════════════════════════════════════════╗   │
-│  ║  BODY  (product)                                    ║   │
-│  ║                                                     ║   │
-│  ║  Markdown text, citations, value-add deliverables   ║   │
-│  ║  only. Connector tool cards do NOT render here.     ║   │
-│  ╚═════════════════════════════════════════════════════╝   │
-│                                                             │
-└─────────────────────────────────────────────────────────────┘
-```
-
----
-
-## 2. The single data model
-
-The timeline reads ONE data structure: a `TimelineItem[]`. There are
-no parallel structures for "thinking steps", "tool calls", "HITL
-bundles", etc. Every visible piece of agent activity is a `TimelineItem`.
-
-### 2.1 The discriminated union (outer discrimination)
-
-Two kinds. The timeline does **outer discrimination** — it chooses
-reasoning view vs tool-call mounting based on `kind`.
-
-```ts
-type ItemStatus =
-    | "pending" | "running" | "completed" | "cancelled" | "error";
-
-interface BaseItem {
-    id: string;
-    spanId?: string;             // groups items into delegation tree (parent task + children)
-    status: ItemStatus;
-}
-
-interface ReasoningItem extends BaseItem {
-    kind: "reasoning";
-    text: string;
-}
-
-interface ToolCallItem extends BaseItem {
-    kind: "tool-call";
-    toolName: string;
-    args: Record<string, unknown>;
-    argsText?: string;
-    result?: unknown;            // per-tool component discriminates this internally
-    langchainToolCallId?: string;
-}
-
-type TimelineItem = ReasoningItem | ToolCallItem;
-
-interface TimelineGroup {
-    parent: TimelineItem;
-    children: TimelineItem[];
-}
-```
-
-**`ToolCallItem` has no `approval` field, no `phase`, no `view`.** All of
-that is derived inside the per-tool component from the result data.
-
-### 2.2 Inner discrimination (per-tool component)
-
-Each tool registers a component that receives the tool-call data and
-decides what to render based on its own result-shape discriminators:
-
-```tsx
-const UpdateNotionPageToolUI: TimelineToolComponent = (props) => {
-    if (isInterruptResult(props.result))    return <NotionApprovalCard {...props} />;
-    if (isAuthErrorResult(props.result))    return <NotionAuthErrorCard {...props} />;
-    if (isErrorResult(props.result))        return <NotionErrorCard {...props} />;
-    if (isInfoResult(props.result))         return <NotionNotFoundCard {...props} />;
-    if (isSuccessResult(props.result))      return <NotionSuccessCard {...props} />;
-    return <NotionPendingCard {...props} />;
-};
-```
-
-The discriminators (`isInterruptResult`, `isAuthErrorResult`, etc.)
-are **types, not centralized infrastructure**. The component owns
-the dispatch. The timeline knows none of this.
-
-### 2.3 The pure builder
-
-```ts
-function buildTimeline(
-    content: MessageContent[],
-    thinkingSteps: ThinkingStep[],
-): TimelineGroup[]
-```
-
-Builds the timeline from existing message content + thinking-step data
-parts. Pure function. Sets `kind` and `status` on each item; preserves
-`result` verbatim for per-tool discrimination.
-
-### 2.4 The dispatch (timeline-level)
-
-Two cases. Exhaustive switch. No runtime guards in the timeline renderer.
-
-```tsx
-function TimelineItemView({ item }: { item: TimelineItem }) {
-    switch (item.kind) {
-        case "reasoning": return <ReasoningItemView item={item} />;
-        case "tool-call": return <ToolCallItemView item={item} />;
-    }
-}
-
-function ToolCallItemView({ item }: { item: ToolCallItem }) {
-    const ToolBody = getToolComponent(item.toolName) ?? FallbackToolBody;
-    return <ToolBody {...adaptItemToProps(item)} />;
-}
-```
-
-**No card frame, header, body slot, approval area, or result panel
-at the timeline level.** Each tool component owns its own visual
-presentation. This matches how every existing tool-ui component
-already works — they each render their own rounded card with their
-own header.
-
----
-
-## 3. The timeline's tool-component contract
-
-Tool components mounted by the timeline implement a subset of
-assistant-ui's `ToolCallMessagePartProps` — only the fields the
-timeline can supply:
-
-```ts
-interface TimelineToolProps {
-    toolCallId: string;
-    toolName: string;
-    args: Record<string, unknown>;
-    argsText?: string;
-    result?: unknown;
-    langchainToolCallId?: string;
-    status: ItemStatus;          // simple enum, not assistant-ui's complex status object
-}
-
-type TimelineToolComponent = (props: TimelineToolProps) => ReactNode;
-```
-
-Notably absent (compared to `ToolCallMessagePartProps`):
-- `addResult`, `resume` — runtime-only, not needed; HITL decisions
-  flow through `useHitlDecision`, which either stages in the active
-  bundle (N≥2) or fires the `hitl-decision` window event the page
-  listens for (N=1). The hook reads `useToolCallIdContext()` to know
-  which call is dispatching.
-- The complex `status: ToolCallMessagePartState["status"]` object —
-  replaced by our simple `ItemStatus` enum.
-
-The 15 existing HITL-aware tool-ui components only use the subset
-above. They are **retyped** to `TimelineToolComponent` in the cutover
-commit (mechanical: `ToolCallMessagePartComponent` → `TimelineToolComponent`).
-
----
-
-## 4. Rendering topology — how the body opts out
-
-The body uses assistant-ui's `MessagePrimitive.Parts` and registers a
-**no-op fallback** for tool calls so they don't render here:
-
-```tsx
-<MessagePrimitive.Parts
-    components={{
-        Text: MarkdownText,
-        Reasoning,
-        Source,
-    }}
-    tools={{
-        by_name: BODY_TOOLS,         // value-add deliverables only (image viewer, etc.)
-        fallback: () => null,         // every other tool-call: render nothing in the body
-    }}
-/>
-```
-
-`BODY_TOOLS` starts empty (no value-add deliverables exist yet) and
-grows as we identify them. Every tool not in `BODY_TOOLS` renders
-nothing in the body.
-
-The timeline reads message content via `useAuiState(({ message }) =>
-message?.content)` and runs `buildTimeline` to produce the items it
-renders. Tool-call data IS in the message; the body just chooses not
-to render it.
-
-**Result:** zero dual placement. Zero suppression HOC. Zero
-render-target context. Zero pager HOC.
-
----
-
-## 4a. Multi-approval coordination (the bundle + pager)
-
-When N HITL interrupts are pending in the same assistant turn (e.g. an
-agent fires multiple gated tool calls in parallel), the LangGraph
-runtime expects **one resume call with N decisions in order**. Per-card
-independent submission isn't possible without backend changes.
-
-The slice handles this with a single React state container,
-`HitlBundleProvider`, mounted once at the thread root by the page that
-owns the runtime (currently `app/dashboard/.../new-chat/page.tsx` and
-`components/free-chat/free-chat-page.tsx`):
-
-```tsx
-<HitlBundleProvider
-    toolCallIds={pendingInterrupt?.bundleToolCallIds ?? null}
-    onSubmit={handleBundleSubmit}            // page-owned: dispatches the actual resume
->
-    {/* Thread + Timeline + approval cards mount inside */}
-</HitlBundleProvider>
-```
-
-Per-card flow:
-
-1. `tool-call-item.tsx` wraps each mounted tool component in
-   `<ToolCallIdProvider toolCallId={item.id}>` so `useHitlDecision`
-   knows which call is dispatching.
-2. The user clicks approve/edit/reject on a card.
-3. `useHitlDecision().dispatch([decision])` runs:
-    - **Bundle active (N≥2):** stages the decision under this card's
-      `toolCallId` and fires a `hitl-stage` event so the card's local
-      result mirror updates immediately (UX continuity — no re-prompt
-      if user navigates back via the pager).
-    - **No bundle (N=1):** dispatches the `hitl-decision` event
-      directly — single-decision fast path.
-4. When all N decisions are staged, the user clicks "Submit decisions"
-   on the pager chrome. `bundle.submit()` dispatches the `hitl-decision`
-   event with the full ordered array. The page's listener calls
-   `runtime.resume({ resume: orderedDecisions })` once.
-
-### Pager UX (kept, not deleted)
-
-When a bundle is active (N≥2), only ONE approval card is visible at a
-time — the current step. Other bundle members are hidden until the user
-navigates to them. A small pager chrome (prev/next + "Step X / N" +
-"Submit decisions" button) renders once at the end of the timeline.
-
-Where the responsibilities live:
-
-- **`tool-call-item.tsx`** (timeline) — checks `useHitlBundle()`. If
-  the item is in an active bundle but not the current step, returns
-  null. Otherwise wraps the per-tool component in `ToolCallIdProvider`
-  and mounts it.
-- **`timeline.tsx`** — renders `<PagerChrome />` once at the bottom,
-  conditional on `useHitlBundle()` being non-null.
-- **`hitl/bundle/pager-chrome.tsx`** — pure presentational component;
-  reads bundle state, renders nav + Submit. No knowledge of the timeline.
-
-This is **the only Provider in the slice.** It's a state container, not
-a behavior HOC: nothing wraps individual cards. The hide-if-not-current
-decision is made at the single mount point (`tool-call-item.tsx`), not
-distributed across N HOC wrappers.
-
-What was deleted vs kept here:
-
-- **Deleted:** `withBundleStep` HOC. Its two responsibilities (hide
-  non-current cards; render pager after current card) split into the
-  two correct places: `tool-call-item.tsx` and `timeline.tsx`
-  respectively. No HOC to compose.
-- **Kept (ported as-is to the slice):** `HitlBundleProvider`,
-  `useHitlBundle`, `ToolCallIdProvider`, `useToolCallIdContext`,
-  `BundleSubmit`, `HitlBundleAPI`, `PagerChrome`.
-
----
-
-## 5. Slice layout
-
-```
-features/chat-messages/
-├── ARCHITECTURE.md
-│
-├── timeline/                                ← the process surface
-│   ├── types.ts                             (TimelineItem union, ToolCallItem, ItemStatus, TimelineGroup)
-│   ├── build-timeline.ts                    (pure: content + thinkingSteps → groups)
-│   ├── grouping.ts                          (pure: group items by spanId)
-│   ├── subagent-rename.ts                   (pure: parent task title from args.subagent_type)
-│   ├── tool-registry/                       (PRIVATE to timeline; only timeline mounts tools)
-│   │   ├── types.ts                         (TimelineToolComponent, TimelineToolProps)
-│   │   ├── registry.ts                      (TOOLS_BY_NAME from components/tool-ui/*)
-│   │   ├── adapt-props.ts                   (pure: ToolCallItem → TimelineToolProps)
-│   │   ├── fallback/
-│   │   │   ├── fallback-tool-body.tsx       (TimelineToolComponent for unregistered tools — discriminates internally)
-│   │   │   ├── default-fallback-card.tsx    (the non-HITL fallback view: status icon + collapsible + JSON)
-│   │   │   ├── revert-button.tsx            (revert affordance — used by default-fallback-card)
-│   │   │   ├── use-tool-action.ts           (action lookup hook for revert)
-│   │   │   └── index.ts
-│   │   └── index.ts
-│   ├── items/
-│   │   ├── reasoning-item.tsx               (renders kind: "reasoning")
-│   │   ├── tool-call-item.tsx               (lookup component + mount with adapted props — ~10 lines)
-│   │   └── index.ts
-│   ├── timeline.tsx                         (groups + iteration + 2-case dispatch)
-│   ├── data-renderer.tsx                    (assistant-ui adapter; exports TimelineDataUI)
-│   └── index.ts
-│
-├── hitl/                                    ← HITL primitives + bundle state container
-│   ├── types.ts                             (InterruptResult, HitlPhase, HitlDecision, isInterruptResult)
-│   ├── bundle/                              (the ONLY Provider in the slice — coordinates N→1 submission + pager UX)
-│   │   ├── bundle-context.tsx               (HitlBundleProvider, useHitlBundle, ToolCallIdProvider, useToolCallIdContext, BundleSubmit, HitlBundleAPI)
-│   │   ├── pager-chrome.tsx                 (prev/next/submit chrome — mounted once by timeline.tsx when bundle active)
-│   │   └── index.ts
-│   ├── use-hitl-decision.ts                 (hook: stages in bundle when N≥2, direct-dispatches when N=1; used by every approval card)
-│   ├── use-hitl-phase.ts                    (hook: tracks pending → processing → complete/rejected)
-│   ├── approval-cards/                      (the FALLBACK-mounted approval views; per-tool components import from here OR build their own)
-│   │   ├── generic-approval.tsx             (default approval UI — what FallbackToolBody mounts for interrupt results)
-│   │   ├── doom-loop-approval.tsx           (special-case approval UI + isDoomLoopInterrupt)
-│   │   └── index.ts
-│   ├── edit-panel/
-│   │   ├── edit-panel.atom.ts               (Jotai atoms for the panel state)
-│   │   ├── edit-panel.tsx                   (root: atom wiring + desktop/mobile switch only)
-│   │   ├── fields/
-│   │   │   ├── email-tags-field.tsx         (EmailsTagField + parse/format helpers)
-│   │   │   ├── calendar-field.tsx           (DateTimePickerField + parse/format helpers)
-│   │   │   ├── extra-fields.tsx             (ExtraField switch renderer)
-│   │   │   └── index.ts                     (private barrel)
-│   │   └── index.ts
-│   └── index.ts
-│
-└── (no body slice yet — body just registers `tools={{ fallback: () => null }}`)
-```
-
-### 5.1 Notable absences
-
-| Was | Status | Reason |
-|---|---|---|
-| `tool-cards/` slice | **Folded into `timeline/`** | Tool-call rendering happens in the timeline; the tool-registry is private to timeline. |
-| `bundleTool` composer | **Deleted** | Body opts out via `fallback: () => null`. No HOCs to compose. |
-| `withDelegationSpanIndent` HOC | **Deleted** | Tree indent is owned by the timeline's group renderer. |
-| `withBundleStep` HOC | **Deleted** | Two responsibilities split into the right places: hide-if-not-current → `tool-call-item.tsx`; render pager after current card → `timeline.tsx`. No HOC. |
-| `HitlBundleProvider` + `useHitlBundle` + `PagerChrome` | **Kept** (state container + presentational chrome, not HOCs) | Backend constraint: parallel interrupts need ONE ordered resume call. Provider collects N decisions, pager is the user's submit affordance. |
-| `withHitlInTimeline` + `HitlRenderTargetProvider` | **Deleted** | Tool cards never render in body; no dual-placement to suppress. |
-| `pickApprovalCard` central dispatcher | **Deleted** | Each tool component picks its own view via internal discrimination. The fallback has its OWN internal dispatcher (interrupt → generic-approval; doom-loop → doom-loop-approval). |
-| `getHitlToolComponent` registry | **Deleted** | The tool-registry is just a `Record<string, TimelineToolComponent>`; lookup is `TOOLS_BY_NAME[name]`. |
-| Centralized `approval-area.tsx` in timeline | **Deleted** | The approval is a view the per-tool component renders, not an area the timeline composes. |
-| `ApprovalState` on `ToolCallItem` | **Deleted** | Phase is local UI state inside per-tool approval cards (via `useHitlPhase`). The timeline doesn't track it. |
-| `ThinkingStepToolInfoMap` Map join | **Deleted** | The unified `TimelineItem` union eliminates the join. |
-
----
-
-## 6. Public surfaces
-
-### `timeline/index.ts`
-
-```ts
-export { TimelineDataUI };               // the assistant-ui registration
-export { Timeline };                     // exposed for tests
-export type { TimelineItem, ReasoningItem, ToolCallItem, TimelineGroup, ItemStatus };
-export type { TimelineToolComponent, TimelineToolProps };
-```
-
-### `hitl/index.ts`
-
-```ts
-export type { InterruptResult, InterruptActionRequest, InterruptReviewConfig, HitlDecision, HitlPhase };
-export { isInterruptResult };
-
-export { useHitlDecision };
-export { useHitlPhase };
-
-export { HitlBundleProvider, ToolCallIdProvider, useHitlBundle, useToolCallIdContext };
-export { PagerChrome };
-export type { BundleSubmit, HitlBundleAPI };
-
-export { GenericHitlApprovalToolUI };    // for tool-ui integrations that want to compose on top
-export { DoomLoopApprovalToolUI, isDoomLoopInterrupt };
-
-export { HitlEditPanel, MobileHitlEditPanel };
-export { openHitlEditPanelAtom, closeHitlEditPanelAtom, hitlEditPanelAtom };
-export type { ExtraField };
-```
-
-The 63 `components/tool-ui/*` integrations consume `hitl/`'s public
-surface (types, hooks, edit-panel atom, optionally the fallback
-approval cards). Nothing else.
-
----
-
-## 7. Layering & SRP rules
-
-### 7.1 The "what knows about what" rule
-
-| Component | Knows about |
-|---|---|
-| `timeline/` | Itself + `hitl/` (via the fallback) + `components/tool-ui/*` (via the registry) |
-| `timeline/tool-registry/` | The `TimelineToolComponent` contract, `components/tool-ui/*`, and `hitl/` (for the fallback's approval views) |
-| `hitl/` | Itself only — no knowledge of timeline, tool-call types, registry |
-| `components/tool-ui/*` | `hitl/` only (for HITL primitives + optional fallback approval cards); never reaches into `timeline/` |
-| Body (`assistant-message.tsx`) | The `BODY_TOOLS` registry and `TimelineDataUI` from `timeline/index.ts` |
-
-`hitl/` does **NOT** import from `timeline/`. The dependency arrow is one-way.
-
-### 7.2 Render policy belongs to the surface, not the primitive
-
-- `hitl/` exposes hooks, types, and the fallback approval cards.
-- `timeline/` decides WHEN and WHERE tool components mount (inside
-  `tool-call-item.tsx`).
-- A `hitl/` primitive must never assume it's being rendered in the
-  timeline, the body, or anywhere else. It receives props, renders
-  UI, returns. No environment sniffing, no context.
-- Per-tool components in `components/tool-ui/*` decide WHICH view to
-  render based on result-shape discriminators. The timeline does not
-  know these discriminators exist.
-
-### 7.3 Single Responsibility
-
-Rules in priority order:
-
-1. **One responsibility per file.** Need "and" to describe it? Split it.
-2. **One responsibility per function.** Same.
-3. **Line count is a smell, not a budget.** ~250 lines = pause and
-   ask "still one responsibility?"; ~500 lines = strong presumption
-   of split needed unless explicitly justified at the top of the file.
-
-Notable splits driven by SRP during the port:
-
-- `hitl-edit-panel.tsx` (current 405 lines, 4 responsibilities) → 5
-  files: `edit-panel.tsx` (root + layout switch), `email-tags-field.tsx`,
-  `calendar-field.tsx`, `extra-fields.tsx`, `edit-panel.atom.ts`.
-- `tool-fallback.tsx` (current 533 lines, 3 responsibilities) → split
-  across `fallback-tool-body.tsx`, `default-fallback-card.tsx`,
-  `revert-button.tsx`, `use-tool-action.ts`.
-- `thinking-steps.tsx` (current 434 lines, 5 responsibilities) →
-  folded into the new `timeline/` slice across `types.ts`,
-  `build-timeline.ts`, `grouping.ts`, `subagent-rename.ts`,
-  `timeline.tsx`, `items/*`, `data-renderer.tsx`.
-
----
-
-## 8. Tested behaviors
-
-> **Status:** No test runner is set up in `surfsense_web` yet. The pure
-> functions below are *intended* to be unit-tested but tests are
-> deferred to **Phase D** (post-cutover follow-up: install vitest,
-> write the suites, update this section).
-
-Planned tests once vitest is in:
-
-- `timeline/build-timeline.test.ts` — content + thinkingSteps → correct items, correct kind, correct status, correct ordering. `result` preserved verbatim. Orphan tool-calls (no `thinkingStepId`) appended at end.
-- `timeline/grouping.test.ts` — items group correctly by spanId; first item with a spanId is the parent; orphaned children become parents defensively.
-- `timeline/subagent-rename.test.ts` — `task` tool-call's display title resolves to `args.subagent_type` (title-cased); falls back to `getToolDisplayName("task")` when subagent type is missing.
-- `timeline/tool-registry/registry.test.ts` — `TOOLS_BY_NAME` includes every named tool; `FallbackToolBody` is returned for unknown names; the fallback dispatches correctly (interrupt → generic, doom-loop → doom-loop, otherwise → default fallback).
-- `timeline/tool-registry/adapt-props.test.ts` — `ToolCallItem` → `TimelineToolProps` mapping is lossless; status mapping is correct.
-- `hitl/use-hitl-phase.test.ts` — phase transitions through pending → processing → complete/rejected correctly.
-- `hitl/approval-cards/doom-loop-approval.test.tsx` — `isDoomLoopInterrupt` matches doom-loop-shape interrupts only.
-
-Smoke test after cutover:
-- Assistant message renders; markdown + citations work in body.
-- All connector tool calls render in timeline only (none in body).
-- Reasoning steps render in timeline.
-- Single HITL flow (Notion update): approve, edit, reject — each transitions through the phases correctly.
-- Multiple pending HITL cards: each renders inline at its position; deciding one doesn't affect the others.
-- Doom-loop approval renders the special card.
-- Revert button works on completed default-fallback cards and survives reload.
-- Subagent name renaming on `task` parent step.
-
----
-
-## 9. Migration plan (strangler fig, single atomic cutover)
-
-### Phase A — Build the new slice in parallel
-
-In dependency order: `hitl/` first (leaf), then `timeline/`. The
-existing code (`thinking-steps.tsx`, `tool-fallback.tsx`,
-`assistant-message.tsx`'s tool registry, etc.) remains fully
-functional throughout Phase A.
-
-1. Port `hitl/` primitives. Apply SRP splits (edit panel into 5 files).
-   `hitl/approval-cards/{generic,doom-loop}-approval.tsx` are ported
-   as standalone components — they're what the fallback mounts and
-   what per-tool integrations may compose on top of.
-2. Build `timeline/` slice. Implement `buildTimeline` from scratch
-   (do NOT copy thinking-steps logic verbatim — design the pure
-   function around the new union). Build the `tool-registry/` with
-   `TimelineToolComponent` contract; the registry imports from
-   `components/tool-ui/*` (no file moves yet).
-3. Add unit tests as listed in §8.
-4. Verify: tsc clean, biome clean, no consumer file touched, no
-   linter regressions.
-
-### Phase B — Atomic cutover (single commit)
-
-| File | Change |
-|---|---|
-| `components/assistant-ui/assistant-message.tsx` | Replace `TOOLS_BY_NAME`/`TOOLS_FALLBACK` definitions with `BODY_TOOLS` (initially empty) + `tools={{ fallback: () => null }}`. Replace `ThinkingStepsDataUI` registration with `TimelineDataUI`. |
-| `components/public-chat/public-thread.tsx` | Same registry + data UI swap. |
-| `app/dashboard/.../new-chat/page.tsx` | Switch `ThinkingStepsDataUI` → `TimelineDataUI`. Switch `HitlBundleProvider` import from `@/lib/hitl` → `@/features/chat-messages/hitl` (keep the wrap; just new path). |
-| `components/free-chat/free-chat-page.tsx` | Switch `ThinkingStepsDataUI` → `TimelineDataUI`. |
-| `components/public-chat/public-chat-view.tsx` | Same. |
-| `components/layout/ui/right-panel/RightPanel.tsx` | Switch `HitlEditPanel` import to `@/features/chat-messages/hitl`. |
-| The 15 `components/tool-ui/*` HITL-aware integration files | (a) Switch HITL imports from `@/lib/hitl`, `@/hooks/use-hitl-phase`, `@/atoms/chat/hitl-edit-panel.atom` → `@/features/chat-messages/hitl`. (b) Retype from `ToolCallMessagePartComponent` → `TimelineToolComponent` (mechanical type rename). |
-
-### Phase C — Delete legacy
-
-After cutover passes smoke tests:
-
-- `components/assistant-ui/thinking-steps.tsx`
-- `components/assistant-ui/tool-fallback.tsx`
-- `lib/chat/delegation-span-indent.ts`
-- `lib/hitl/` (entire folder — replaced by `features/chat-messages/hitl/{types.ts,bundle/,use-hitl-decision.ts}`)
-- `components/hitl-bundle-pager/` (entire folder — `PagerChrome` ported to `hitl/bundle/pager-chrome.tsx`; `withBundleStep` deleted, responsibilities split into `tool-call-item.tsx` + `timeline.tsx`)
-- `components/tool-ui/generic-hitl-approval.tsx`
-- `components/tool-ui/doom-loop-approval.tsx`
-- `components/hitl-edit-panel/` (entire folder)
-- `hooks/use-hitl-phase.ts`
-- `atoms/chat/hitl-edit-panel.atom.ts`
-
-Verify: no orphan files, no dead imports, no test regressions.
-
----
-
-## 10. Out of scope (and one consumer relationship)
-
-### 10.1 The 63 `components/tool-ui/*` integrations
-
-These are **first-class consumers** of `hitl/` and the
-`TimelineToolComponent` contract. They are imported by
-`timeline/tool-registry/registry.ts` to build `TOOLS_BY_NAME`. They
-never reach into `timeline/` themselves.
-
-They stay where they are. Future option to move them is a separate,
-mechanical follow-up refactor.
-
-### 10.2 Not touched by this refactor
-
-- The composer (input bar, mention picker, prompt picker, tool toggles).
-- The streaming pipeline (`lib/chat/streaming-state.ts`, `stream-pipeline.ts`, `thread-persistence.ts`).
-- The chat-comments sidebar.
-- The message frame (`assistant-message.tsx`, `user-message.tsx`, `markdown-text.tsx`, `inline-citation.tsx`) beyond swapping the registry imports.
-
-If any of these become a blocker for the refactor (e.g. the streaming
-pipeline needs a metadata field that doesn't exist), surface it
-explicitly and decide whether to expand scope before touching it.