From a9bf7ab7d23d27141da245fbd85d773071203940 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:47 +0200 Subject: [PATCH 01/58] Add SSE envelope helpers under app.services.streaming. --- .../services/streaming/envelope/__init__.py | 23 +++++++++++++++++ .../streaming/envelope/identifiers.py | 25 +++++++++++++++++++ .../app/services/streaming/envelope/sse.py | 25 +++++++++++++++++++ 3 files changed, 73 insertions(+) create mode 100644 surfsense_backend/app/services/streaming/envelope/__init__.py create mode 100644 surfsense_backend/app/services/streaming/envelope/identifiers.py create mode 100644 surfsense_backend/app/services/streaming/envelope/sse.py diff --git a/surfsense_backend/app/services/streaming/envelope/__init__.py b/surfsense_backend/app/services/streaming/envelope/__init__.py new file mode 100644 index 000000000..862e84c8d --- /dev/null +++ b/surfsense_backend/app/services/streaming/envelope/__init__.py @@ -0,0 +1,23 @@ +"""Wire framing layer.""" + +from __future__ import annotations + +from .identifiers import ( + generate_message_id, + generate_reasoning_id, + generate_subagent_run_id, + generate_text_id, + generate_tool_call_id, +) +from .sse import format_done, format_sse, get_response_headers + +__all__ = [ + "format_done", + "format_sse", + "generate_message_id", + "generate_reasoning_id", + "generate_subagent_run_id", + "generate_text_id", + "generate_tool_call_id", + "get_response_headers", +] diff --git a/surfsense_backend/app/services/streaming/envelope/identifiers.py b/surfsense_backend/app/services/streaming/envelope/identifiers.py new file mode 100644 index 000000000..2fdd6ff09 --- /dev/null +++ b/surfsense_backend/app/services/streaming/envelope/identifiers.py @@ -0,0 +1,25 @@ +"""Prefixed UUID generators for stream parts.""" + +from __future__ import annotations + +import uuid + + +def generate_message_id() -> str: + return f"msg_{uuid.uuid4().hex}" + + +def generate_text_id() -> str: + return f"text_{uuid.uuid4().hex}" + + +def generate_reasoning_id() -> str: + return f"reasoning_{uuid.uuid4().hex}" + + +def generate_tool_call_id() -> str: + return f"call_{uuid.uuid4().hex}" + + +def generate_subagent_run_id() -> str: + return f"subagent_{uuid.uuid4().hex}" diff --git a/surfsense_backend/app/services/streaming/envelope/sse.py b/surfsense_backend/app/services/streaming/envelope/sse.py new file mode 100644 index 000000000..508fc1b1c --- /dev/null +++ b/surfsense_backend/app/services/streaming/envelope/sse.py @@ -0,0 +1,25 @@ +"""Server-Sent-Events wire framing.""" + +from __future__ import annotations + +import json +from typing import Any + + +def format_sse(data: Any) -> str: + if isinstance(data, str): + return f"data: {data}\n\n" + return f"data: {json.dumps(data)}\n\n" + + +def format_done() -> str: + return "data: [DONE]\n\n" + + +def get_response_headers() -> dict[str, str]: + return { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "x-vercel-ai-ui-message-stream": "v1", + } From 5510c6c3147e80a4de1b6a25e58bc6479032482e Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:47 +0200 Subject: [PATCH 02/58] Add typed event payload modules for the streaming service. --- .../app/services/streaming/events/__init__.py | 29 +++++ .../services/streaming/events/action_log.py | 24 ++++ .../app/services/streaming/events/data.py | 118 ++++++++++++++++++ .../app/services/streaming/events/error.py | 23 ++++ .../services/streaming/events/interrupt.py | 56 +++++++++ .../services/streaming/events/lifecycle.py | 29 +++++ .../services/streaming/events/reasoning.py | 36 ++++++ .../app/services/streaming/events/source.py | 59 +++++++++ .../streaming/events/subagent_lifecycle.py | 86 +++++++++++++ .../app/services/streaming/events/text.py | 31 +++++ .../app/services/streaming/events/tool.py | 80 ++++++++++++ 11 files changed, 571 insertions(+) create mode 100644 surfsense_backend/app/services/streaming/events/__init__.py create mode 100644 surfsense_backend/app/services/streaming/events/action_log.py create mode 100644 surfsense_backend/app/services/streaming/events/data.py create mode 100644 surfsense_backend/app/services/streaming/events/error.py create mode 100644 surfsense_backend/app/services/streaming/events/interrupt.py create mode 100644 surfsense_backend/app/services/streaming/events/lifecycle.py create mode 100644 surfsense_backend/app/services/streaming/events/reasoning.py create mode 100644 surfsense_backend/app/services/streaming/events/source.py create mode 100644 surfsense_backend/app/services/streaming/events/subagent_lifecycle.py create mode 100644 surfsense_backend/app/services/streaming/events/text.py create mode 100644 surfsense_backend/app/services/streaming/events/tool.py diff --git a/surfsense_backend/app/services/streaming/events/__init__.py b/surfsense_backend/app/services/streaming/events/__init__.py new file mode 100644 index 000000000..91a8ff854 --- /dev/null +++ b/surfsense_backend/app/services/streaming/events/__init__.py @@ -0,0 +1,29 @@ +"""SSE event payload formatters, one module per event family.""" + +from __future__ import annotations + +from . import ( + action_log, + data, + error, + interrupt, + lifecycle, + reasoning, + source, + subagent_lifecycle, + text, + tool, +) + +__all__ = [ + "action_log", + "data", + "error", + "interrupt", + "lifecycle", + "reasoning", + "source", + "subagent_lifecycle", + "text", + "tool", +] diff --git a/surfsense_backend/app/services/streaming/events/action_log.py b/surfsense_backend/app/services/streaming/events/action_log.py new file mode 100644 index 000000000..0a8e46f0a --- /dev/null +++ b/surfsense_backend/app/services/streaming/events/action_log.py @@ -0,0 +1,24 @@ +"""Action-log events relayed from ``ActionLogMiddleware`` custom dispatches.""" + +from __future__ import annotations + +from typing import Any + +from ..emitter import Emitter +from .data import format_data + + +def format_action_log( + payload: dict[str, Any], + *, + emitter: Emitter | None = None, +) -> str: + return format_data("action-log", payload, emitter=emitter) + + +def format_action_log_updated( + payload: dict[str, Any], + *, + emitter: Emitter | None = None, +) -> str: + return format_data("action-log-updated", payload, emitter=emitter) diff --git a/surfsense_backend/app/services/streaming/events/data.py b/surfsense_backend/app/services/streaming/events/data.py new file mode 100644 index 000000000..f6e190578 --- /dev/null +++ b/surfsense_backend/app/services/streaming/events/data.py @@ -0,0 +1,118 @@ +"""Generic ``data-*`` envelopes and SurfSense-specific data parts. + +Inner ``data`` dict fields use snake_case. Legacy ``threadId`` / +``messageId`` keys are preserved where they cross the AI SDK boundary. +""" + +from __future__ import annotations + +from typing import Any + +from ..emitter import Emitter, attach_emitted_by +from ..envelope import format_sse + + +def format_data( + data_type: str, + data: Any, + *, + emitter: Emitter | None = None, +) -> str: + payload: dict[str, Any] = {"type": f"data-{data_type}", "data": data} + return format_sse(attach_emitted_by(payload, emitter)) + + +def format_terminal_info( + text: str, + *, + message_type: str = "info", + emitter: Emitter | None = None, +) -> str: + return format_data( + "terminal-info", + {"text": text, "type": message_type}, + emitter=emitter, + ) + + +def format_further_questions( + questions: list[str], + *, + emitter: Emitter | None = None, +) -> str: + return format_data("further-questions", {"questions": questions}, emitter=emitter) + + +def format_thinking_step( + *, + step_id: str, + title: str, + status: str = "in_progress", + items: list[str] | None = None, + emitter: Emitter | None = None, +) -> str: + return format_data( + "thinking-step", + { + "id": step_id, + "title": title, + "status": status, + "items": items or [], + }, + emitter=emitter, + ) + + +def format_thread_title_update( + *, + thread_id: int, + title: str, + emitter: Emitter | None = None, +) -> str: + return format_data( + "thread-title-update", + {"threadId": thread_id, "title": title}, + emitter=emitter, + ) + + +def format_turn_info( + *, + chat_turn_id: str, + emitter: Emitter | None = None, +) -> str: + return format_data("turn-info", {"chat_turn_id": chat_turn_id}, emitter=emitter) + + +def format_turn_status( + *, + status: str, + emitter: Emitter | None = None, +) -> str: + return format_data("turn-status", {"status": status}, emitter=emitter) + + +def format_user_message_id( + *, + message_id: str, + turn_id: str, + emitter: Emitter | None = None, +) -> str: + return format_data( + "user-message-id", + {"message_id": message_id, "turn_id": turn_id}, + emitter=emitter, + ) + + +def format_assistant_message_id( + *, + message_id: str, + turn_id: str, + emitter: Emitter | None = None, +) -> str: + return format_data( + "assistant-message-id", + {"message_id": message_id, "turn_id": turn_id}, + emitter=emitter, + ) diff --git a/surfsense_backend/app/services/streaming/events/error.py b/surfsense_backend/app/services/streaming/events/error.py new file mode 100644 index 000000000..cd190d1f4 --- /dev/null +++ b/surfsense_backend/app/services/streaming/events/error.py @@ -0,0 +1,23 @@ +"""Single terminal error path the orchestrator must route through.""" + +from __future__ import annotations + +from typing import Any + +from ..emitter import Emitter, attach_emitted_by +from ..envelope import format_sse + + +def format_error( + error_text: str, + *, + error_code: str | None = None, + extra: dict[str, Any] | None = None, + emitter: Emitter | None = None, +) -> str: + payload: dict[str, Any] = {"type": "error", "errorText": error_text} + if error_code: + payload["errorCode"] = error_code + if extra: + payload.update(extra) + return format_sse(attach_emitted_by(payload, emitter)) diff --git a/surfsense_backend/app/services/streaming/events/interrupt.py b/surfsense_backend/app/services/streaming/events/interrupt.py new file mode 100644 index 000000000..0334b10b3 --- /dev/null +++ b/surfsense_backend/app/services/streaming/events/interrupt.py @@ -0,0 +1,56 @@ +"""Interrupt-request events with a single canonical payload shape.""" + +from __future__ import annotations + +from typing import Any + +from ..emitter import Emitter +from .data import format_data + + +def normalize_interrupt_payload(interrupt_value: dict[str, Any]) -> dict[str, Any]: + if "action_requests" in interrupt_value and "review_configs" in interrupt_value: + return interrupt_value + + interrupt_type = interrupt_value.get("type", "unknown") + message = interrupt_value.get("message") + action = interrupt_value.get("action", {}) or {} + context = interrupt_value.get("context", {}) or {} + + normalized: dict[str, Any] = { + "action_requests": [ + { + "name": action.get("tool", "unknown_tool"), + "args": action.get("params", {}), + } + ], + "review_configs": [ + { + "action_name": action.get("tool", "unknown_tool"), + "allowed_decisions": ["approve", "edit", "reject"], + } + ], + "interrupt_type": interrupt_type, + "context": context, + } + if message: + normalized["message"] = message + return normalized + + +def format_interrupt_request( + interrupt_value: dict[str, Any], + *, + interrupt_id: str | None = None, + pending_interrupt_count: int | None = None, + chat_turn_id: str | None = None, + emitter: Emitter | None = None, +) -> str: + payload = normalize_interrupt_payload(interrupt_value) + if interrupt_id is not None: + payload["interrupt_id"] = interrupt_id + if pending_interrupt_count is not None: + payload["pending_interrupt_count"] = pending_interrupt_count + if chat_turn_id is not None: + payload["chat_turn_id"] = chat_turn_id + return format_data("interrupt-request", payload, emitter=emitter) diff --git a/surfsense_backend/app/services/streaming/events/lifecycle.py b/surfsense_backend/app/services/streaming/events/lifecycle.py new file mode 100644 index 000000000..019718b67 --- /dev/null +++ b/surfsense_backend/app/services/streaming/events/lifecycle.py @@ -0,0 +1,29 @@ +"""High-level message and step lifecycle events. + +Wire verbs are fixed by the AI SDK protocol (``start`` / ``finish`` for +the whole message, ``start-step`` / ``finish-step`` for each step). +Python helpers always read ``format__`` so pairs are +visible at the call site. +""" + +from __future__ import annotations + +from ..emitter import Emitter, attach_emitted_by +from ..envelope import format_sse + + +def format_message_start(message_id: str, *, emitter: Emitter | None = None) -> str: + payload = {"type": "start", "messageId": message_id} + return format_sse(attach_emitted_by(payload, emitter)) + + +def format_message_finish(*, emitter: Emitter | None = None) -> str: + return format_sse(attach_emitted_by({"type": "finish"}, emitter)) + + +def format_step_start(*, emitter: Emitter | None = None) -> str: + return format_sse(attach_emitted_by({"type": "start-step"}, emitter)) + + +def format_step_finish(*, emitter: Emitter | None = None) -> str: + return format_sse(attach_emitted_by({"type": "finish-step"}, emitter)) diff --git a/surfsense_backend/app/services/streaming/events/reasoning.py b/surfsense_backend/app/services/streaming/events/reasoning.py new file mode 100644 index 000000000..5b912d43a --- /dev/null +++ b/surfsense_backend/app/services/streaming/events/reasoning.py @@ -0,0 +1,36 @@ +"""Reasoning-block streaming events.""" + +from __future__ import annotations + +from ..emitter import Emitter, attach_emitted_by +from ..envelope import format_sse + + +def format_reasoning_start( + reasoning_id: str, *, emitter: Emitter | None = None +) -> str: + return format_sse( + attach_emitted_by({"type": "reasoning-start", "id": reasoning_id}, emitter) + ) + + +def format_reasoning_delta( + reasoning_id: str, + delta: str, + *, + emitter: Emitter | None = None, +) -> str: + return format_sse( + attach_emitted_by( + {"type": "reasoning-delta", "id": reasoning_id, "delta": delta}, + emitter, + ) + ) + + +def format_reasoning_end( + reasoning_id: str, *, emitter: Emitter | None = None +) -> str: + return format_sse( + attach_emitted_by({"type": "reasoning-end", "id": reasoning_id}, emitter) + ) diff --git a/surfsense_backend/app/services/streaming/events/source.py b/surfsense_backend/app/services/streaming/events/source.py new file mode 100644 index 000000000..54541e8d2 --- /dev/null +++ b/surfsense_backend/app/services/streaming/events/source.py @@ -0,0 +1,59 @@ +"""Source and file reference events.""" + +from __future__ import annotations + +from typing import Any + +from ..emitter import Emitter, attach_emitted_by +from ..envelope import format_sse + + +def format_source_url( + url: str, + *, + source_id: str | None = None, + title: str | None = None, + emitter: Emitter | None = None, +) -> str: + payload: dict[str, Any] = { + "type": "source-url", + "sourceId": source_id or url, + "url": url, + } + if title: + payload["title"] = title + return format_sse(attach_emitted_by(payload, emitter)) + + +def format_source_document( + source_id: str, + *, + media_type: str = "file", + title: str | None = None, + description: str | None = None, + emitter: Emitter | None = None, +) -> str: + payload: dict[str, Any] = { + "type": "source-document", + "sourceId": source_id, + "mediaType": media_type, + } + if title: + payload["title"] = title + if description: + payload["description"] = description + return format_sse(attach_emitted_by(payload, emitter)) + + +def format_file( + url: str, + media_type: str, + *, + emitter: Emitter | None = None, +) -> str: + payload: dict[str, Any] = { + "type": "file", + "url": url, + "mediaType": media_type, + } + return format_sse(attach_emitted_by(payload, emitter)) diff --git a/surfsense_backend/app/services/streaming/events/subagent_lifecycle.py b/surfsense_backend/app/services/streaming/events/subagent_lifecycle.py new file mode 100644 index 000000000..6dd2d4eab --- /dev/null +++ b/surfsense_backend/app/services/streaming/events/subagent_lifecycle.py @@ -0,0 +1,86 @@ +"""Sub-agent lifecycle events the FE pairs into one timeline lane. + +A sub-agent run is a high-level boundary (a whole agent invocation), +so we use the ``start`` / ``finish`` verb pair, matching how the AI SDK +spells message- and step-level lifecycles. +""" + +from __future__ import annotations + +from typing import Any + +from ..emitter import Emitter +from .data import format_data + + +def format_subagent_start( + *, + subagent_run_id: str, + subagent_type: str, + parent_tool_call_id: str, + chat_turn_id: str | None = None, + description: str | None = None, + started_at: str | None = None, + emitter: Emitter | None = None, +) -> str: + payload: dict[str, Any] = { + "subagent_run_id": subagent_run_id, + "subagent_type": subagent_type, + "parent_tool_call_id": parent_tool_call_id, + } + if chat_turn_id is not None: + payload["chat_turn_id"] = chat_turn_id + if description is not None: + payload["description"] = description + if started_at is not None: + payload["started_at"] = started_at + return format_data("subagent-start", payload, emitter=emitter) + + +def format_subagent_finish( + *, + subagent_run_id: str, + subagent_type: str, + parent_tool_call_id: str, + status: str = "completed", + ended_at: str | None = None, + duration_ms: int | None = None, + emitter: Emitter | None = None, +) -> str: + payload: dict[str, Any] = { + "subagent_run_id": subagent_run_id, + "subagent_type": subagent_type, + "parent_tool_call_id": parent_tool_call_id, + "status": status, + } + if ended_at is not None: + payload["ended_at"] = ended_at + if duration_ms is not None: + payload["duration_ms"] = duration_ms + return format_data("subagent-finish", payload, emitter=emitter) + + +def format_subagent_error( + *, + subagent_run_id: str, + subagent_type: str, + parent_tool_call_id: str, + error_text: str, + error_type: str | None = None, + ended_at: str | None = None, + duration_ms: int | None = None, + emitter: Emitter | None = None, +) -> str: + payload: dict[str, Any] = { + "subagent_run_id": subagent_run_id, + "subagent_type": subagent_type, + "parent_tool_call_id": parent_tool_call_id, + "error_text": error_text, + } + if error_type is not None: + payload["error_type"] = error_type + if ended_at is not None: + payload["ended_at"] = ended_at + if duration_ms is not None: + payload["duration_ms"] = duration_ms + return format_data("subagent-error", payload, emitter=emitter) diff --git a/surfsense_backend/app/services/streaming/events/text.py b/surfsense_backend/app/services/streaming/events/text.py new file mode 100644 index 000000000..3baebdebb --- /dev/null +++ b/surfsense_backend/app/services/streaming/events/text.py @@ -0,0 +1,31 @@ +"""Text-block streaming events.""" + +from __future__ import annotations + +from ..emitter import Emitter, attach_emitted_by +from ..envelope import format_sse + + +def format_text_start(text_id: str, *, emitter: Emitter | None = None) -> str: + return format_sse( + attach_emitted_by({"type": "text-start", "id": text_id}, emitter) + ) + + +def format_text_delta( + text_id: str, + delta: str, + *, + emitter: Emitter | None = None, +) -> str: + return format_sse( + attach_emitted_by( + {"type": "text-delta", "id": text_id, "delta": delta}, emitter + ) + ) + + +def format_text_end(text_id: str, *, emitter: Emitter | None = None) -> str: + return format_sse( + attach_emitted_by({"type": "text-end", "id": text_id}, emitter) + ) diff --git a/surfsense_backend/app/services/streaming/events/tool.py b/surfsense_backend/app/services/streaming/events/tool.py new file mode 100644 index 000000000..c85dc061b --- /dev/null +++ b/surfsense_backend/app/services/streaming/events/tool.py @@ -0,0 +1,80 @@ +"""Tool-call streaming events. + +``toolCallId`` and ``langchainToolCallId`` are AI SDK protocol fields +and stay camelCase. Sub-agent provenance rides on the snake_case +top-level ``emitted_by`` envelope added by :func:`attach_emitted_by`. +""" + +from __future__ import annotations + +from typing import Any + +from ..emitter import Emitter, attach_emitted_by +from ..envelope import format_sse + + +def format_tool_input_start( + tool_call_id: str, + tool_name: str, + *, + langchain_tool_call_id: str | None = None, + emitter: Emitter | None = None, +) -> str: + payload: dict[str, Any] = { + "type": "tool-input-start", + "toolCallId": tool_call_id, + "toolName": tool_name, + } + if langchain_tool_call_id: + payload["langchainToolCallId"] = langchain_tool_call_id + return format_sse(attach_emitted_by(payload, emitter)) + + +def format_tool_input_delta( + tool_call_id: str, + input_text_delta: str, + *, + emitter: Emitter | None = None, +) -> str: + payload: dict[str, Any] = { + "type": "tool-input-delta", + "toolCallId": tool_call_id, + "inputTextDelta": input_text_delta, + } + return format_sse(attach_emitted_by(payload, emitter)) + + +def format_tool_input_available( + tool_call_id: str, + tool_name: str, + input_data: dict[str, Any], + *, + langchain_tool_call_id: str | None = None, + emitter: Emitter | None = None, +) -> str: + payload: dict[str, Any] = { + "type": "tool-input-available", + "toolCallId": tool_call_id, + "toolName": tool_name, + "input": input_data, + } + if langchain_tool_call_id: + payload["langchainToolCallId"] = langchain_tool_call_id + return format_sse(attach_emitted_by(payload, emitter)) + + +def format_tool_output_available( + tool_call_id: str, + output: Any, + *, + langchain_tool_call_id: str | None = None, + emitter: Emitter | None = None, +) -> str: + payload: dict[str, Any] = { + "type": "tool-output-available", + "toolCallId": tool_call_id, + "output": output, + } + if langchain_tool_call_id: + payload["langchainToolCallId"] = langchain_tool_call_id + return format_sse(attach_emitted_by(payload, emitter)) From fc429d87024a6a0a36d520f23dd584bcf7bd8262 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:47 +0200 Subject: [PATCH 03/58] Add streaming emitter and registry for scoped SSE writes. --- .../services/streaming/emitter/__init__.py | 29 +++++++++ .../app/services/streaming/emitter/emitter.py | 61 +++++++++++++++++++ .../services/streaming/emitter/registry.py | 51 ++++++++++++++++ 3 files changed, 141 insertions(+) create mode 100644 surfsense_backend/app/services/streaming/emitter/__init__.py create mode 100644 surfsense_backend/app/services/streaming/emitter/emitter.py create mode 100644 surfsense_backend/app/services/streaming/emitter/registry.py diff --git a/surfsense_backend/app/services/streaming/emitter/__init__.py b/surfsense_backend/app/services/streaming/emitter/__init__.py new file mode 100644 index 000000000..7814894f3 --- /dev/null +++ b/surfsense_backend/app/services/streaming/emitter/__init__.py @@ -0,0 +1,29 @@ +"""Identity of the agent that emitted a streamed event. + +The wire field is ``emitted_by``; the Python identity is :class:`Emitter`. +``EmitterRegistry`` resolves which emitter owns a LangGraph event, with +LangGraph's own namespace metadata as the primary key and a parent_ids +walk as a fallback for cases where context vars don't propagate. +""" + +from __future__ import annotations + +from .emitter import ( + MAIN_EMITTER, + Emitter, + EmitterLevel, + attach_emitted_by, + main_emitter, + subagent_emitter, +) +from .registry import EmitterRegistry + +__all__ = [ + "MAIN_EMITTER", + "Emitter", + "EmitterLevel", + "EmitterRegistry", + "attach_emitted_by", + "main_emitter", + "subagent_emitter", +] diff --git a/surfsense_backend/app/services/streaming/emitter/emitter.py b/surfsense_backend/app/services/streaming/emitter/emitter.py new file mode 100644 index 000000000..08f625a69 --- /dev/null +++ b/surfsense_backend/app/services/streaming/emitter/emitter.py @@ -0,0 +1,61 @@ +"""Identity payload describing which agent produced a stream event.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Literal + +EmitterLevel = Literal["main", "subagent"] + + +@dataclass(frozen=True) +class Emitter: + level: EmitterLevel + subagent_type: str | None = None + subagent_run_id: str | None = None + parent_tool_call_id: str | None = None + extra: dict[str, Any] = field(default_factory=dict) + + def to_payload(self) -> dict[str, Any]: + payload: dict[str, Any] = {"level": self.level} + if self.subagent_type is not None: + payload["subagent_type"] = self.subagent_type + if self.subagent_run_id is not None: + payload["subagent_run_id"] = self.subagent_run_id + if self.parent_tool_call_id is not None: + payload["parent_tool_call_id"] = self.parent_tool_call_id + if self.extra: + payload.update(self.extra) + return payload + + +MAIN_EMITTER = Emitter(level="main") + + +def main_emitter() -> Emitter: + return MAIN_EMITTER + + +def subagent_emitter( + *, + subagent_type: str, + subagent_run_id: str, + parent_tool_call_id: str | None = None, + extra: dict[str, Any] | None = None, +) -> Emitter: + return Emitter( + level="subagent", + subagent_type=subagent_type, + subagent_run_id=subagent_run_id, + parent_tool_call_id=parent_tool_call_id, + extra=dict(extra or {}), + ) + + +def attach_emitted_by( + payload: dict[str, Any], emitter: Emitter | None +) -> dict[str, Any]: + if emitter is None: + return payload + payload["emitted_by"] = emitter.to_payload() + return payload diff --git a/surfsense_backend/app/services/streaming/emitter/registry.py b/surfsense_backend/app/services/streaming/emitter/registry.py new file mode 100644 index 000000000..cd3e10cdd --- /dev/null +++ b/surfsense_backend/app/services/streaming/emitter/registry.py @@ -0,0 +1,51 @@ +"""Resolve which agent owns a streamed event from its LangGraph run lineage.""" + +from __future__ import annotations + +from collections.abc import Iterable + +from .emitter import Emitter, main_emitter + + +class EmitterRegistry: + def __init__(self) -> None: + self._by_run_id: dict[str, Emitter] = {} + + def register(self, run_id: str, emitter: Emitter) -> None: + if not run_id: + return + self._by_run_id[run_id] = emitter + + def unregister(self, run_id: str) -> Emitter | None: + if not run_id: + return None + return self._by_run_id.pop(run_id, None) + + def get(self, run_id: str | None) -> Emitter | None: + if not run_id: + return None + return self._by_run_id.get(run_id) + + def resolve( + self, + *, + run_id: str | None, + parent_ids: Iterable[str] | None, + ) -> Emitter: + own = self.get(run_id) + if own is not None: + return own + if parent_ids: + for ancestor in reversed(list(parent_ids)): + emitter = self.get(ancestor) + if emitter is not None: + return emitter + return main_emitter() + + def has_active_subagents(self) -> bool: + return any( + emitter.level == "subagent" for emitter in self._by_run_id.values() + ) + + def clear(self) -> None: + self._by_run_id.clear() From fef7621d96c63986e723228c1dcff4a918b443e9 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:47 +0200 Subject: [PATCH 04/58] Add StreamingService and interrupt correlation for chat streams. --- .../app/services/streaming/__init__.py | 20 + .../streaming/interrupt_correlation.py | 84 ++++ .../app/services/streaming/service.py | 414 ++++++++++++++++++ 3 files changed, 518 insertions(+) create mode 100644 surfsense_backend/app/services/streaming/__init__.py create mode 100644 surfsense_backend/app/services/streaming/interrupt_correlation.py create mode 100644 surfsense_backend/app/services/streaming/service.py diff --git a/surfsense_backend/app/services/streaming/__init__.py b/surfsense_backend/app/services/streaming/__init__.py new file mode 100644 index 000000000..287d48a7a --- /dev/null +++ b/surfsense_backend/app/services/streaming/__init__.py @@ -0,0 +1,20 @@ +"""Single-responsibility split of the streaming SSE protocol. + +Layout: +* ``envelope/`` - SSE wire framing + ID generators +* ``emitter/`` - identity of the agent that emitted an event + runtime registry +* ``events/`` - one module per SSE event family +* ``service.py`` - composition root used by the orchestrator +* ``interrupt_correlation.py`` - id-aware lookup over LangGraph state + +Naming on the wire: +* AI SDK protocol fields keep their existing camelCase + (``toolCallId``, ``messageId``, ``inputTextDelta``, ``langchainToolCallId``). +* Every SurfSense-added field uses ``snake_case``, including the + top-level ``emitted_by`` envelope and all inner ``data`` payloads. + +Production keeps using ``app.services.new_streaming_service`` and +``app.tasks.chat.stream_new_chat`` until the cutover phase. +""" + +from __future__ import annotations diff --git a/surfsense_backend/app/services/streaming/interrupt_correlation.py b/surfsense_backend/app/services/streaming/interrupt_correlation.py new file mode 100644 index 000000000..3045dfb6a --- /dev/null +++ b/surfsense_backend/app/services/streaming/interrupt_correlation.py @@ -0,0 +1,84 @@ +"""Id-aware lookup of pending LangGraph interrupts (replaces first-wins).""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + + +@dataclass(frozen=True) +class PendingInterrupt: + interrupt_id: str | None + value: dict[str, Any] + source_task_id: str | None = None + + +def list_pending_interrupts(state: Any) -> list[PendingInterrupt]: + out: list[PendingInterrupt] = [] + + for task in getattr(state, "tasks", None) or (): + task_id = _safe_str(getattr(task, "id", None)) + for it in getattr(task, "interrupts", None) or (): + value = _coerce_interrupt_value(it) + if value is None: + continue + interrupt_id = _safe_str(getattr(it, "id", None)) + out.append( + PendingInterrupt( + interrupt_id=interrupt_id, + value=value, + source_task_id=task_id, + ) + ) + + for it in getattr(state, "interrupts", None) or (): + value = _coerce_interrupt_value(it) + if value is None: + continue + interrupt_id = _safe_str(getattr(it, "id", None)) + out.append(PendingInterrupt(interrupt_id=interrupt_id, value=value)) + + return out + + +def get_pending_interrupt_by_id( + state: Any, interrupt_id: str +) -> PendingInterrupt | None: + for pending in list_pending_interrupts(state): + if pending.interrupt_id == interrupt_id: + return pending + return None + + +def get_pending_interrupt_for_tool_call( + state: Any, tool_call_id: str +) -> PendingInterrupt | None: + for pending in list_pending_interrupts(state): + actions = pending.value.get("action_requests") + if not isinstance(actions, list): + continue + for action in actions: + if not isinstance(action, dict): + continue + if action.get("tool_call_id") == tool_call_id: + return pending + return None + + +def first_pending_interrupt(state: Any) -> PendingInterrupt | None: + """Explicit opt-in to legacy first-wins; prefer the id-aware helpers above.""" + pending = list_pending_interrupts(state) + return pending[0] if pending else None + + +def _coerce_interrupt_value(item: Any) -> dict[str, Any] | None: + if isinstance(item, dict): + return item if item else None + value = getattr(item, "value", None) + if isinstance(value, dict): + return value if value else None + return None + + +def _safe_str(value: Any) -> str | None: + return value if isinstance(value, str) and value else None diff --git a/surfsense_backend/app/services/streaming/service.py b/surfsense_backend/app/services/streaming/service.py new file mode 100644 index 000000000..5a75a1b2d --- /dev/null +++ b/surfsense_backend/app/services/streaming/service.py @@ -0,0 +1,414 @@ +"""Composition root: bundles every formatter + a per-invocation emitter registry.""" + +from __future__ import annotations + +from collections.abc import Iterable +from typing import Any + +from . import envelope +from .emitter import Emitter, EmitterRegistry +from .events import ( + action_log, + data, + error, + interrupt, + lifecycle, + reasoning, + source, + subagent_lifecycle, + text, + tool, +) + + +class StreamingService: + def __init__(self) -> None: + self._message_id: str | None = None + self.emitter_registry = EmitterRegistry() + + @property + def message_id(self) -> str | None: + return self._message_id + + def begin_message(self, message_id: str | None = None) -> str: + self._message_id = message_id or envelope.generate_message_id() + return self._message_id + + @staticmethod + def generate_text_id() -> str: + return envelope.generate_text_id() + + @staticmethod + def generate_reasoning_id() -> str: + return envelope.generate_reasoning_id() + + @staticmethod + def generate_tool_call_id() -> str: + return envelope.generate_tool_call_id() + + @staticmethod + def generate_subagent_run_id() -> str: + return envelope.generate_subagent_run_id() + + @staticmethod + def get_response_headers() -> dict[str, str]: + return envelope.get_response_headers() + + @staticmethod + def format_done() -> str: + return envelope.format_done() + + def resolve_emitter( + self, + *, + run_id: str | None, + parent_ids: Iterable[str] | None, + ) -> Emitter: + return self.emitter_registry.resolve(run_id=run_id, parent_ids=parent_ids) + + def format_message_start( + self, + message_id: str | None = None, + *, + emitter: Emitter | None = None, + ) -> str: + chosen = self.begin_message(message_id) + return lifecycle.format_message_start(chosen, emitter=emitter) + + def format_message_finish(self, *, emitter: Emitter | None = None) -> str: + return lifecycle.format_message_finish(emitter=emitter) + + def format_step_start(self, *, emitter: Emitter | None = None) -> str: + return lifecycle.format_step_start(emitter=emitter) + + def format_step_finish(self, *, emitter: Emitter | None = None) -> str: + return lifecycle.format_step_finish(emitter=emitter) + + def format_text_start( + self, text_id: str, *, emitter: Emitter | None = None + ) -> str: + return text.format_text_start(text_id, emitter=emitter) + + def format_text_delta( + self, text_id: str, delta: str, *, emitter: Emitter | None = None + ) -> str: + return text.format_text_delta(text_id, delta, emitter=emitter) + + def format_text_end( + self, text_id: str, *, emitter: Emitter | None = None + ) -> str: + return text.format_text_end(text_id, emitter=emitter) + + def format_reasoning_start( + self, reasoning_id: str, *, emitter: Emitter | None = None + ) -> str: + return reasoning.format_reasoning_start(reasoning_id, emitter=emitter) + + def format_reasoning_delta( + self, + reasoning_id: str, + delta: str, + *, + emitter: Emitter | None = None, + ) -> str: + return reasoning.format_reasoning_delta(reasoning_id, delta, emitter=emitter) + + def format_reasoning_end( + self, reasoning_id: str, *, emitter: Emitter | None = None + ) -> str: + return reasoning.format_reasoning_end(reasoning_id, emitter=emitter) + + def format_tool_input_start( + self, + tool_call_id: str, + tool_name: str, + *, + langchain_tool_call_id: str | None = None, + emitter: Emitter | None = None, + ) -> str: + return tool.format_tool_input_start( + tool_call_id, + tool_name, + langchain_tool_call_id=langchain_tool_call_id, + emitter=emitter, + ) + + def format_tool_input_delta( + self, + tool_call_id: str, + input_text_delta: str, + *, + emitter: Emitter | None = None, + ) -> str: + return tool.format_tool_input_delta( + tool_call_id, input_text_delta, emitter=emitter + ) + + def format_tool_input_available( + self, + tool_call_id: str, + tool_name: str, + input_data: dict[str, Any], + *, + langchain_tool_call_id: str | None = None, + emitter: Emitter | None = None, + ) -> str: + return tool.format_tool_input_available( + tool_call_id, + tool_name, + input_data, + langchain_tool_call_id=langchain_tool_call_id, + emitter=emitter, + ) + + def format_tool_output_available( + self, + tool_call_id: str, + output: Any, + *, + langchain_tool_call_id: str | None = None, + emitter: Emitter | None = None, + ) -> str: + return tool.format_tool_output_available( + tool_call_id, + output, + langchain_tool_call_id=langchain_tool_call_id, + emitter=emitter, + ) + + def format_source_url( + self, + url: str, + *, + source_id: str | None = None, + title: str | None = None, + emitter: Emitter | None = None, + ) -> str: + return source.format_source_url( + url, source_id=source_id, title=title, emitter=emitter + ) + + def format_source_document( + self, + source_id: str, + *, + media_type: str = "file", + title: str | None = None, + description: str | None = None, + emitter: Emitter | None = None, + ) -> str: + return source.format_source_document( + source_id, + media_type=media_type, + title=title, + description=description, + emitter=emitter, + ) + + def format_file( + self, url: str, media_type: str, *, emitter: Emitter | None = None + ) -> str: + return source.format_file(url, media_type, emitter=emitter) + + def format_data( + self, data_type: str, payload: Any, *, emitter: Emitter | None = None + ) -> str: + return data.format_data(data_type, payload, emitter=emitter) + + def format_terminal_info( + self, + text_value: str, + *, + message_type: str = "info", + emitter: Emitter | None = None, + ) -> str: + return data.format_terminal_info( + text_value, message_type=message_type, emitter=emitter + ) + + def format_further_questions( + self, + questions: list[str], + *, + emitter: Emitter | None = None, + ) -> str: + return data.format_further_questions(questions, emitter=emitter) + + def format_thinking_step( + self, + *, + step_id: str, + title: str, + status: str = "in_progress", + items: list[str] | None = None, + emitter: Emitter | None = None, + ) -> str: + return data.format_thinking_step( + step_id=step_id, + title=title, + status=status, + items=items, + emitter=emitter, + ) + + def format_thread_title_update( + self, + *, + thread_id: int, + title: str, + emitter: Emitter | None = None, + ) -> str: + return data.format_thread_title_update( + thread_id=thread_id, title=title, emitter=emitter + ) + + def format_turn_info( + self, + *, + chat_turn_id: str, + emitter: Emitter | None = None, + ) -> str: + return data.format_turn_info(chat_turn_id=chat_turn_id, emitter=emitter) + + def format_turn_status( + self, + *, + status: str, + emitter: Emitter | None = None, + ) -> str: + return data.format_turn_status(status=status, emitter=emitter) + + def format_user_message_id( + self, + *, + message_id: str, + turn_id: str, + emitter: Emitter | None = None, + ) -> str: + return data.format_user_message_id( + message_id=message_id, turn_id=turn_id, emitter=emitter + ) + + def format_assistant_message_id( + self, + *, + message_id: str, + turn_id: str, + emitter: Emitter | None = None, + ) -> str: + return data.format_assistant_message_id( + message_id=message_id, turn_id=turn_id, emitter=emitter + ) + + def format_error( + self, + error_text: str, + *, + error_code: str | None = None, + extra: dict[str, Any] | None = None, + emitter: Emitter | None = None, + ) -> str: + return error.format_error( + error_text, + error_code=error_code, + extra=extra, + emitter=emitter, + ) + + def format_interrupt_request( + self, + interrupt_value: dict[str, Any], + *, + interrupt_id: str | None = None, + pending_interrupt_count: int | None = None, + chat_turn_id: str | None = None, + emitter: Emitter | None = None, + ) -> str: + return interrupt.format_interrupt_request( + interrupt_value, + interrupt_id=interrupt_id, + pending_interrupt_count=pending_interrupt_count, + chat_turn_id=chat_turn_id, + emitter=emitter, + ) + + def format_subagent_start( + self, + *, + subagent_run_id: str, + subagent_type: str, + parent_tool_call_id: str, + chat_turn_id: str | None = None, + description: str | None = None, + started_at: str | None = None, + emitter: Emitter | None = None, + ) -> str: + return subagent_lifecycle.format_subagent_start( + subagent_run_id=subagent_run_id, + subagent_type=subagent_type, + parent_tool_call_id=parent_tool_call_id, + chat_turn_id=chat_turn_id, + description=description, + started_at=started_at, + emitter=emitter, + ) + + def format_subagent_finish( + self, + *, + subagent_run_id: str, + subagent_type: str, + parent_tool_call_id: str, + status: str = "completed", + ended_at: str | None = None, + duration_ms: int | None = None, + emitter: Emitter | None = None, + ) -> str: + return subagent_lifecycle.format_subagent_finish( + subagent_run_id=subagent_run_id, + subagent_type=subagent_type, + parent_tool_call_id=parent_tool_call_id, + status=status, + ended_at=ended_at, + duration_ms=duration_ms, + emitter=emitter, + ) + + def format_subagent_error( + self, + *, + subagent_run_id: str, + subagent_type: str, + parent_tool_call_id: str, + error_text: str, + error_type: str | None = None, + ended_at: str | None = None, + duration_ms: int | None = None, + emitter: Emitter | None = None, + ) -> str: + return subagent_lifecycle.format_subagent_error( + subagent_run_id=subagent_run_id, + subagent_type=subagent_type, + parent_tool_call_id=parent_tool_call_id, + error_text=error_text, + error_type=error_type, + ended_at=ended_at, + duration_ms=duration_ms, + emitter=emitter, + ) + + def format_action_log( + self, + payload: dict[str, Any], + *, + emitter: Emitter | None = None, + ) -> str: + return action_log.format_action_log(payload, emitter=emitter) + + def format_action_log_updated( + self, + payload: dict[str, Any], + *, + emitter: Emitter | None = None, + ) -> str: + return action_log.format_action_log_updated(payload, emitter=emitter) From 3d8c4be369bc887695522e3a18a553cd04954a68 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:48 +0200 Subject: [PATCH 05/58] Add unit tests for streaming SSE envelope behavior. --- .../services/streaming/test_sse_envelope.py | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 surfsense_backend/tests/unit/services/streaming/test_sse_envelope.py diff --git a/surfsense_backend/tests/unit/services/streaming/test_sse_envelope.py b/surfsense_backend/tests/unit/services/streaming/test_sse_envelope.py new file mode 100644 index 000000000..511e4575a --- /dev/null +++ b/surfsense_backend/tests/unit/services/streaming/test_sse_envelope.py @@ -0,0 +1,51 @@ +"""Pin the exact SSE wire bytes the FE parser depends on.""" + +from __future__ import annotations + +import json + +import pytest + +from app.services.streaming.envelope import ( + format_done, + format_sse, + get_response_headers, +) + +pytestmark = pytest.mark.unit + + +class TestFormatSse: + def test_dict_payload_is_json_serialised(self) -> None: + frame = format_sse({"type": "start", "messageId": "msg_1"}) + assert frame.startswith("data: ") + assert frame.endswith("\n\n") + body = frame[len("data: ") : -2] + assert json.loads(body) == {"type": "start", "messageId": "msg_1"} + + def test_string_payload_is_emitted_verbatim(self) -> None: + frame = format_sse('{"already":"json"}') + assert frame == 'data: {"already":"json"}\n\n' + + def test_nested_payload_round_trips(self) -> None: + payload = { + "type": "data-action-log", + "data": {"id": 7, "tool_name": "ls", "reversible": False}, + } + frame = format_sse(payload) + body = frame.removeprefix("data: ").removesuffix("\n\n") + assert json.loads(body) == payload + + +class TestFormatDone: + def test_done_marker_is_literal(self) -> None: + assert format_done() == "data: [DONE]\n\n" + + +class TestResponseHeaders: + def test_headers_pin_ai_sdk_v1_protocol(self) -> None: + headers = get_response_headers() + assert headers["Content-Type"] == "text/event-stream" + assert headers["Cache-Control"] == "no-cache" + assert headers["Connection"] == "keep-alive" + assert headers["x-vercel-ai-ui-message-stream"] == "v1" From 619a8362b7b45034601f35cda15ea349b0f7c701 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:48 +0200 Subject: [PATCH 06/58] Add unit tests for streaming emitters and registry wiring. --- .../unit/services/streaming/test_emitter.py | 79 +++++++++++++ .../streaming/test_emitter_registry.py | 111 ++++++++++++++++++ 2 files changed, 190 insertions(+) create mode 100644 surfsense_backend/tests/unit/services/streaming/test_emitter.py create mode 100644 surfsense_backend/tests/unit/services/streaming/test_emitter_registry.py diff --git a/surfsense_backend/tests/unit/services/streaming/test_emitter.py b/surfsense_backend/tests/unit/services/streaming/test_emitter.py new file mode 100644 index 000000000..6c4e1ff58 --- /dev/null +++ b/surfsense_backend/tests/unit/services/streaming/test_emitter.py @@ -0,0 +1,79 @@ +"""Pin the wire compactness rule and the top-level ``emitted_by`` field name.""" + +from __future__ import annotations + +import pytest + +from app.services.streaming.emitter import ( + Emitter, + attach_emitted_by, + main_emitter, + subagent_emitter, +) + +pytestmark = pytest.mark.unit + + +def test_main_emitter_payload_contains_only_level() -> None: + payload = main_emitter().to_payload() + assert payload == {"level": "main"} + + +def test_subagent_emitter_payload_includes_all_set_fields() -> None: + payload = subagent_emitter( + subagent_type="deliverables", + subagent_run_id="subagent_abc", + parent_tool_call_id="call_xyz", + ).to_payload() + assert payload == { + "level": "subagent", + "subagent_type": "deliverables", + "subagent_run_id": "subagent_abc", + "parent_tool_call_id": "call_xyz", + } + + +def test_subagent_emitter_payload_omits_unset_optional_fields() -> None: + """parent_tool_call_id is None when the run is started outside a tool boundary.""" + payload = Emitter( + level="subagent", + subagent_type="email", + subagent_run_id="subagent_1", + ).to_payload() + assert "parent_tool_call_id" not in payload + assert payload["subagent_type"] == "email" + + +def test_extra_fields_merge_into_payload() -> None: + """Future extension fields (e.g. lane colour, label) flow through ``extra``.""" + emitter = subagent_emitter( + subagent_type="search", + subagent_run_id="r1", + extra={"label": "Web Search"}, + ) + assert emitter.to_payload()["label"] == "Web Search" + + +def test_attach_emitted_by_with_none_is_noop() -> None: + payload = {"type": "text-delta", "delta": "hi"} + result = attach_emitted_by(payload, None) + assert "emitted_by" not in result + assert result is payload + + +def test_attach_emitted_by_adds_payload_under_snake_case_top_level_key() -> None: + payload = {"type": "text-delta", "delta": "hi"} + attach_emitted_by( + payload, + subagent_emitter( + subagent_type="x", + subagent_run_id="y", + parent_tool_call_id="z", + ), + ) + assert payload["emitted_by"] == { + "level": "subagent", + "subagent_type": "x", + "subagent_run_id": "y", + "parent_tool_call_id": "z", + } diff --git a/surfsense_backend/tests/unit/services/streaming/test_emitter_registry.py b/surfsense_backend/tests/unit/services/streaming/test_emitter_registry.py new file mode 100644 index 000000000..e459c946a --- /dev/null +++ b/surfsense_backend/tests/unit/services/streaming/test_emitter_registry.py @@ -0,0 +1,111 @@ +"""Pin the parent_ids walk + parallel sub-agent isolation that drives lane attribution.""" + +from __future__ import annotations + +import pytest + +from app.services.streaming.emitter import ( + Emitter, + EmitterRegistry, + main_emitter, + subagent_emitter, +) + +pytestmark = pytest.mark.unit + + +def _sub(run_id: str, kind: str = "deliverables") -> Emitter: + return subagent_emitter( + subagent_type=kind, + subagent_run_id=f"sub_{run_id}", + parent_tool_call_id=f"call_{run_id}", + ) + + +def test_unregistered_event_resolves_to_main_emitter() -> None: + registry = EmitterRegistry() + resolved = registry.resolve(run_id="run_1", parent_ids=["root"]) + assert resolved is main_emitter() + + +def test_event_owned_by_registered_run_id_returns_that_emitter() -> None: + registry = EmitterRegistry() + emitter = _sub("a") + registry.register("run_task_a", emitter) + assert registry.resolve(run_id="run_task_a", parent_ids=[]) is emitter + + +def test_descendant_resolves_via_parent_ids_chain() -> None: + """A model-call event nested under the task tool inherits its sub-agent emitter.""" + registry = EmitterRegistry() + emitter = _sub("a") + registry.register("run_task_a", emitter) + descendant = registry.resolve( + run_id="run_chat_model", + parent_ids=["root", "run_agent", "run_task_a"], + ) + assert descendant is emitter + + +def test_nearest_registered_ancestor_wins_over_distant_ones() -> None: + """Inner sub-agents owe their emitter to the nearest task tool, not the outer one.""" + registry = EmitterRegistry() + outer = _sub("outer", kind="planner") + inner = _sub("inner", kind="email") + registry.register("run_outer", outer) + registry.register("run_inner", inner) + resolved = registry.resolve( + run_id="run_inner_tool", + parent_ids=["root", "run_outer", "run_inner"], + ) + assert resolved is inner + + +def test_parallel_subagents_do_not_bleed_into_each_other() -> None: + """Two concurrent task tools each own their own descendant events.""" + registry = EmitterRegistry() + a = _sub("a", kind="search") + b = _sub("b", kind="email") + registry.register("run_task_a", a) + registry.register("run_task_b", b) + + from_a = registry.resolve(run_id="x", parent_ids=["root", "run_task_a"]) + from_b = registry.resolve(run_id="y", parent_ids=["root", "run_task_b"]) + from_main = registry.resolve(run_id="z", parent_ids=["root"]) + + assert from_a is a + assert from_b is b + assert from_main is main_emitter() + + +def test_unregister_releases_run_id_so_descendants_fall_back_to_main() -> None: + registry = EmitterRegistry() + emitter = _sub("a") + registry.register("run_task_a", emitter) + registry.unregister("run_task_a") + assert registry.resolve(run_id="x", parent_ids=["run_task_a"]) is main_emitter() + + +def test_unregister_returns_the_previously_registered_emitter() -> None: + """Lets callers emit ``data-subagent-finish`` carrying the same emitter they opened with.""" + registry = EmitterRegistry() + emitter = _sub("a") + registry.register("run_task_a", emitter) + assert registry.unregister("run_task_a") is emitter + + +def test_has_active_subagents_tracks_open_lanes() -> None: + registry = EmitterRegistry() + assert not registry.has_active_subagents() + registry.register("run_task_a", _sub("a")) + assert registry.has_active_subagents() + registry.unregister("run_task_a") + assert not registry.has_active_subagents() + + +def test_empty_run_id_and_parent_ids_resolves_to_main() -> None: + """Defensive: events without identifiers always belong to the main lane.""" + registry = EmitterRegistry() + registry.register("run_task_a", _sub("a")) + assert registry.resolve(run_id=None, parent_ids=None) is main_emitter() + assert registry.resolve(run_id="", parent_ids=[]) is main_emitter() From 366122da6e4568289e131e2ac20be63e8bb5bd90 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:48 +0200 Subject: [PATCH 07/58] Add unit tests for streaming interrupts and service propagation. --- .../tests/unit/services/streaming/__init__.py | 0 .../streaming/test_interrupt_correlation.py | 164 ++++++++++++++++++ .../streaming/test_interrupt_events.py | 91 ++++++++++ .../test_service_emitter_propagation.py | 142 +++++++++++++++ 4 files changed, 397 insertions(+) create mode 100644 surfsense_backend/tests/unit/services/streaming/__init__.py create mode 100644 surfsense_backend/tests/unit/services/streaming/test_interrupt_correlation.py create mode 100644 surfsense_backend/tests/unit/services/streaming/test_interrupt_events.py create mode 100644 surfsense_backend/tests/unit/services/streaming/test_service_emitter_propagation.py diff --git a/surfsense_backend/tests/unit/services/streaming/__init__.py b/surfsense_backend/tests/unit/services/streaming/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/unit/services/streaming/test_interrupt_correlation.py b/surfsense_backend/tests/unit/services/streaming/test_interrupt_correlation.py new file mode 100644 index 000000000..edf4ecb9a --- /dev/null +++ b/surfsense_backend/tests/unit/services/streaming/test_interrupt_correlation.py @@ -0,0 +1,164 @@ +"""Pin id-aware pending-interrupt lookup that replaces the buggy first-wins.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +import pytest + +from app.services.streaming.interrupt_correlation import ( + PendingInterrupt, + first_pending_interrupt, + get_pending_interrupt_by_id, + get_pending_interrupt_for_tool_call, + list_pending_interrupts, +) + +pytestmark = pytest.mark.unit + + +@dataclass +class _Interrupt: + value: dict[str, Any] + id: str | None = None + + +@dataclass +class _Task: + interrupts: tuple[_Interrupt, ...] = () + id: str | None = None + + +@dataclass +class _State: + tasks: tuple[_Task, ...] = () + interrupts: tuple[_Interrupt, ...] = () + + +def _hitl(name: str, tool_call_id: str | None = None) -> dict[str, Any]: + """Minimal LangChain HITLRequest payload for one action.""" + action: dict[str, Any] = {"name": name, "args": {}} + if tool_call_id is not None: + action["tool_call_id"] = tool_call_id + return { + "action_requests": [action], + "review_configs": [{"action_name": name, "allowed_decisions": ["approve"]}], + } + + +def test_empty_state_has_no_pending_interrupts() -> None: + state = _State() + assert list_pending_interrupts(state) == [] + assert first_pending_interrupt(state) is None + + +def test_single_pending_interrupt_in_task_is_returned() -> None: + state = _State( + tasks=( + _Task( + id="task_1", + interrupts=(_Interrupt(value=_hitl("send_email"), id="int_1"),), + ), + ) + ) + pending = list_pending_interrupts(state) + assert len(pending) == 1 + assert pending[0] == PendingInterrupt( + interrupt_id="int_1", + value=_hitl("send_email"), + source_task_id="task_1", + ) + + +def test_pending_interrupts_returned_in_task_then_root_order() -> None: + """Determinism matters: callers iterate in this order to render the UI.""" + state = _State( + tasks=( + _Task( + id="task_a", + interrupts=(_Interrupt(value=_hitl("a"), id="int_a"),), + ), + _Task( + id="task_b", + interrupts=(_Interrupt(value=_hitl("b"), id="int_b"),), + ), + ), + interrupts=(_Interrupt(value=_hitl("c"), id="int_c"),), + ) + pending = list_pending_interrupts(state) + ids = [p.interrupt_id for p in pending] + assert ids == ["int_a", "int_b", "int_c"] + + +def test_get_by_id_finds_the_right_interrupt_under_parallel_load() -> None: + """Replacing first-wins: id-aware lookup MUST pick the requested one.""" + state = _State( + tasks=( + _Task(interrupts=(_Interrupt(value=_hitl("a"), id="int_a"),)), + _Task(interrupts=(_Interrupt(value=_hitl("b"), id="int_b"),)), + _Task(interrupts=(_Interrupt(value=_hitl("c"), id="int_c"),)), + ) + ) + found = get_pending_interrupt_by_id(state, "int_b") + assert found is not None + assert found.value["action_requests"][0]["name"] == "b" + + +def test_get_by_id_returns_none_when_id_is_not_pending() -> None: + state = _State( + tasks=(_Task(interrupts=(_Interrupt(value=_hitl("a"), id="int_a"),)),) + ) + assert get_pending_interrupt_by_id(state, "missing") is None + + +def test_get_by_tool_call_id_matches_action_request_payload() -> None: + """HITLRequest carries ``tool_call_id`` per action; lookup uses that.""" + state = _State( + tasks=( + _Task( + interrupts=( + _Interrupt( + value=_hitl("a", tool_call_id="call_xxx"), id="int_a" + ), + _Interrupt( + value=_hitl("b", tool_call_id="call_yyy"), id="int_b" + ), + ) + ), + ) + ) + found = get_pending_interrupt_for_tool_call(state, "call_yyy") + assert found is not None + assert found.interrupt_id == "int_b" + + +def test_first_pending_interrupt_matches_legacy_first_wins_behaviour() -> None: + """Sequential-turn safety: the explicit shortcut still returns the first.""" + state = _State( + tasks=(_Task(interrupts=(_Interrupt(value=_hitl("first"), id="int_1"),)),), + interrupts=(_Interrupt(value=_hitl("second"), id="int_2"),), + ) + first = first_pending_interrupt(state) + assert first is not None + assert first.interrupt_id == "int_1" + + +def test_interrupt_without_id_falls_back_to_none() -> None: + """Snapshots from older LangGraph versions may omit ``id`` — preserve that.""" + state = _State( + tasks=(_Task(interrupts=(_Interrupt(value=_hitl("a"), id=None),)),) + ) + pending = list_pending_interrupts(state) + assert len(pending) == 1 + assert pending[0].interrupt_id is None + + +def test_non_dict_interrupt_values_are_ignored() -> None: + """Defensive: a non-dict value should not crash the iteration.""" + + class _Raw: + value = "not a dict" + + state = _State(tasks=(_Task(interrupts=(_Raw(),)),)) # type: ignore[arg-type] + assert list_pending_interrupts(state) == [] diff --git a/surfsense_backend/tests/unit/services/streaming/test_interrupt_events.py b/surfsense_backend/tests/unit/services/streaming/test_interrupt_events.py new file mode 100644 index 000000000..dbdd607bf --- /dev/null +++ b/surfsense_backend/tests/unit/services/streaming/test_interrupt_events.py @@ -0,0 +1,91 @@ +"""Pin interrupt-payload normalisation and the optional correlation fields on the wire.""" + +from __future__ import annotations + +import json + +import pytest + +from app.services.streaming.events.interrupt import ( + format_interrupt_request, + normalize_interrupt_payload, +) + +pytestmark = pytest.mark.unit + + +def _decode(frame: str) -> dict: + body = frame.removeprefix("data: ").removesuffix("\n\n") + return json.loads(body) + + +def test_hitlrequest_shape_is_passed_through_unchanged() -> None: + raw = { + "action_requests": [{"name": "send_email", "args": {"to": "a@b"}}], + "review_configs": [ + {"action_name": "send_email", "allowed_decisions": ["approve"]} + ], + } + assert normalize_interrupt_payload(raw) == raw + + +def test_custom_interrupt_primitive_is_converted_to_canonical_shape() -> None: + raw = { + "type": "permission", + "message": "Allow send?", + "action": {"tool": "send_email", "params": {"to": "a@b"}}, + "context": {"reason": "destructive"}, + } + out = normalize_interrupt_payload(raw) + assert out["action_requests"] == [ + {"name": "send_email", "args": {"to": "a@b"}} + ] + assert out["review_configs"] == [ + { + "action_name": "send_email", + "allowed_decisions": ["approve", "edit", "reject"], + } + ] + assert out["interrupt_type"] == "permission" + assert out["message"] == "Allow send?" + assert out["context"] == {"reason": "destructive"} + + +def test_custom_interrupt_without_message_omits_message_key() -> None: + """Optional fields stay optional on the wire; FE does not see ``"message": None``.""" + raw = {"action": {"tool": "send_email"}} + out = normalize_interrupt_payload(raw) + assert "message" not in out + + +def test_custom_interrupt_without_tool_falls_back_to_unknown_tool() -> None: + """Defensive: a malformed ``action`` block must not crash the relay.""" + out = normalize_interrupt_payload({"type": "x", "action": {}}) + assert out["action_requests"][0]["name"] == "unknown_tool" + assert out["review_configs"][0]["action_name"] == "unknown_tool" + + +def test_format_interrupt_request_carries_correlation_fields_on_the_wire() -> None: + frame = format_interrupt_request( + {"action_requests": [], "review_configs": []}, + interrupt_id="int_42", + pending_interrupt_count=3, + chat_turn_id="turn_99", + ) + payload = _decode(frame) + assert payload["type"] == "data-interrupt-request" + inner = payload["data"] + assert inner["interrupt_id"] == "int_42" + assert inner["pending_interrupt_count"] == 3 + assert inner["chat_turn_id"] == "turn_99" + + +def test_format_interrupt_request_omits_correlation_fields_when_unset() -> None: + """Backward compat: legacy single-interrupt callers don't have to supply ids.""" + frame = format_interrupt_request( + {"action_requests": [], "review_configs": []}, + ) + inner = _decode(frame)["data"] + assert "interrupt_id" not in inner + assert "pending_interrupt_count" not in inner + assert "chat_turn_id" not in inner diff --git a/surfsense_backend/tests/unit/services/streaming/test_service_emitter_propagation.py b/surfsense_backend/tests/unit/services/streaming/test_service_emitter_propagation.py new file mode 100644 index 000000000..b381f13bc --- /dev/null +++ b/surfsense_backend/tests/unit/services/streaming/test_service_emitter_propagation.py @@ -0,0 +1,142 @@ +"""Pin that sub-agent emitter reaches every wire event the relay emits.""" + +from __future__ import annotations + +import json + +import pytest + +from app.services.streaming.emitter import subagent_emitter +from app.services.streaming.service import StreamingService + +pytestmark = pytest.mark.unit + + +def _decode(frame: str) -> dict: + body = frame.removeprefix("data: ").removesuffix("\n\n") + return json.loads(body) + + +@pytest.fixture +def service() -> StreamingService: + return StreamingService() + + +@pytest.fixture +def sub_emitter(): + return subagent_emitter( + subagent_type="deliverables", + subagent_run_id="sub_xyz", + parent_tool_call_id="call_parent", + ) + + +def test_text_delta_carries_subagent_emitter_on_the_wire(service, sub_emitter) -> None: + payload = _decode(service.format_text_delta("text_1", "hi", emitter=sub_emitter)) + assert payload["emitted_by"]["subagent_run_id"] == "sub_xyz" + assert payload["delta"] == "hi" + + +def test_reasoning_delta_carries_subagent_emitter_on_the_wire( + service, sub_emitter +) -> None: + payload = _decode( + service.format_reasoning_delta("r_1", "thinking", emitter=sub_emitter) + ) + assert payload["emitted_by"]["subagent_run_id"] == "sub_xyz" + + +def test_tool_input_start_carries_subagent_emitter_and_lc_id( + service, sub_emitter +) -> None: + payload = _decode( + service.format_tool_input_start( + "call_1", + "send_email", + langchain_tool_call_id="lc_1", + emitter=sub_emitter, + ) + ) + assert payload["emitted_by"]["subagent_type"] == "deliverables" + assert payload["langchainToolCallId"] == "lc_1" + assert payload["toolName"] == "send_email" + + +def test_tool_output_available_carries_subagent_emitter(service, sub_emitter) -> None: + payload = _decode( + service.format_tool_output_available( + "call_1", {"ok": True}, emitter=sub_emitter + ) + ) + assert payload["emitted_by"]["subagent_run_id"] == "sub_xyz" + assert payload["output"] == {"ok": True} + + +def test_thinking_step_carries_subagent_emitter(service, sub_emitter) -> None: + payload = _decode( + service.format_thinking_step( + step_id="s1", + title="Sending email", + status="in_progress", + emitter=sub_emitter, + ) + ) + assert payload["type"] == "data-thinking-step" + assert payload["emitted_by"]["subagent_run_id"] == "sub_xyz" + + +def test_action_log_carries_subagent_emitter(service, sub_emitter) -> None: + payload = _decode( + service.format_action_log( + {"id": 1, "tool_name": "send_email", "reversible": False}, + emitter=sub_emitter, + ) + ) + assert payload["emitted_by"]["subagent_run_id"] == "sub_xyz" + assert payload["data"]["tool_name"] == "send_email" + + +def test_subagent_lifecycle_events_share_run_id_for_pairing( + service, sub_emitter +) -> None: + start = _decode( + service.format_subagent_start( + subagent_run_id="sub_xyz", + subagent_type="deliverables", + parent_tool_call_id="call_parent", + emitter=sub_emitter, + ) + ) + finish = _decode( + service.format_subagent_finish( + subagent_run_id="sub_xyz", + subagent_type="deliverables", + parent_tool_call_id="call_parent", + emitter=sub_emitter, + ) + ) + assert start["data"]["subagent_run_id"] == finish["data"]["subagent_run_id"] + assert start["type"] == "data-subagent-start" + assert finish["type"] == "data-subagent-finish" + + +def test_main_emitter_events_omit_emitted_by_field(service) -> None: + payload = _decode(service.format_text_delta("text_1", "hi")) + assert "emitted_by" not in payload + + +def test_resolve_emitter_through_service_uses_registry(service, sub_emitter) -> None: + service.emitter_registry.register("run_task_1", sub_emitter) + resolved = service.resolve_emitter( + run_id="run_chat_model", + parent_ids=["root", "run_task_1"], + ) + assert resolved is sub_emitter + + +def test_message_id_is_assigned_on_message_start_and_reused(service) -> None: + frame = service.format_message_start() + payload = _decode(frame) + assigned = payload["messageId"] + assert assigned.startswith("msg_") + assert service.message_id == assigned From c25b78c30492e53e5f1b69f831f15dda029c9d54 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:48 +0200 Subject: [PATCH 08/58] Add chat streaming error classification, helpers, and StreamResult. --- .../app/tasks/chat/streaming/__init__.py | 3 + .../tasks/chat/streaming/errors/__init__.py | 3 + .../tasks/chat/streaming/errors/classifier.py | 187 ++++++++++++++++++ .../tasks/chat/streaming/errors/emitter.py | 38 ++++ .../tasks/chat/streaming/helpers/__init__.py | 3 + .../chat/streaming/helpers/chunk_parts.py | 60 ++++++ .../streaming/helpers/interrupt_inspector.py | 47 +++++ .../streaming/helpers/tool_call_matching.py | 32 +++ .../chat/streaming/helpers/tool_output.py | 43 ++++ .../app/tasks/chat/streaming/stream_result.py | 28 +++ 10 files changed, 444 insertions(+) create mode 100644 surfsense_backend/app/tasks/chat/streaming/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/errors/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/errors/classifier.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/errors/emitter.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/helpers/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/helpers/chunk_parts.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/helpers/interrupt_inspector.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/helpers/tool_call_matching.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/helpers/tool_output.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/stream_result.py diff --git a/surfsense_backend/app/tasks/chat/streaming/__init__.py b/surfsense_backend/app/tasks/chat/streaming/__init__.py new file mode 100644 index 000000000..bb06cc021 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/__init__.py @@ -0,0 +1,3 @@ +"""Chat streaming orchestrator and event relay.""" + +from __future__ import annotations diff --git a/surfsense_backend/app/tasks/chat/streaming/errors/__init__.py b/surfsense_backend/app/tasks/chat/streaming/errors/__init__.py new file mode 100644 index 000000000..02284d4b0 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/errors/__init__.py @@ -0,0 +1,3 @@ +"""Error classification, structured logging, and terminal-error SSE emission.""" + +from __future__ import annotations diff --git a/surfsense_backend/app/tasks/chat/streaming/errors/classifier.py b/surfsense_backend/app/tasks/chat/streaming/errors/classifier.py new file mode 100644 index 000000000..3af2b9f9f --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/errors/classifier.py @@ -0,0 +1,187 @@ +"""Classify stream exceptions for logging and client error payloads.""" + +from __future__ import annotations + +import json +import logging +import time +from typing import Any, Literal + +from app.agents.new_chat.errors import BusyError +from app.agents.new_chat.middleware.busy_mutex import ( + get_cancel_state, + is_cancel_requested, +) + +TURN_CANCELLING_INITIAL_DELAY_MS = 200 +TURN_CANCELLING_BACKOFF_FACTOR = 2 +TURN_CANCELLING_MAX_DELAY_MS = 1500 + + +def compute_turn_cancelling_retry_delay(attempt: int) -> int: + if attempt < 1: + attempt = 1 + delay = TURN_CANCELLING_INITIAL_DELAY_MS * ( + TURN_CANCELLING_BACKOFF_FACTOR ** (attempt - 1) + ) + return min(delay, TURN_CANCELLING_MAX_DELAY_MS) + + +def log_chat_stream_error( + *, + flow: Literal["new", "resume", "regenerate"], + error_kind: str, + error_code: str | None, + severity: Literal["info", "warn", "error"], + is_expected: bool, + request_id: str | None, + thread_id: int | None, + search_space_id: int | None, + user_id: str | None, + message: str, + extra: dict[str, Any] | None = None, +) -> None: + payload: dict[str, Any] = { + "event": "chat_stream_error", + "flow": flow, + "error_kind": error_kind, + "error_code": error_code, + "severity": severity, + "is_expected": is_expected, + "request_id": request_id or "unknown", + "thread_id": thread_id, + "search_space_id": search_space_id, + "user_id": user_id, + "message": message, + } + if extra: + payload.update(extra) + + logger = logging.getLogger(__name__) + rendered = json.dumps(payload, ensure_ascii=False) + if severity == "error": + logger.error("[chat_stream_error] %s", rendered) + elif severity == "warn": + logger.warning("[chat_stream_error] %s", rendered) + else: + logger.info("[chat_stream_error] %s", rendered) + + +def _parse_error_payload(message: str) -> dict[str, Any] | None: + candidates = [message] + first_brace_idx = message.find("{") + if first_brace_idx >= 0: + candidates.append(message[first_brace_idx:]) + + for candidate in candidates: + try: + parsed = json.loads(candidate) + if isinstance(parsed, dict): + return parsed + except Exception: + continue + return None + + +def _extract_provider_error_code(parsed: dict[str, Any] | None) -> int | None: + if not isinstance(parsed, dict): + return None + candidates: list[Any] = [parsed.get("code")] + nested = parsed.get("error") + if isinstance(nested, dict): + candidates.append(nested.get("code")) + for value in candidates: + try: + if value is None: + continue + return int(value) + except Exception: + continue + return None + + +def is_provider_rate_limited(exc: BaseException) -> bool: + """Return True if the exception looks like an upstream HTTP 429 / rate limit.""" + raw = str(exc) + lowered = raw.lower() + if "ratelimit" in type(exc).__name__.lower(): + return True + parsed = _parse_error_payload(raw) + provider_code = _extract_provider_error_code(parsed) + if provider_code == 429: + return True + + provider_error_type = "" + if parsed: + top_type = parsed.get("type") + if isinstance(top_type, str): + provider_error_type = top_type.lower() + nested = parsed.get("error") + if isinstance(nested, dict): + nested_type = nested.get("type") + if isinstance(nested_type, str): + provider_error_type = nested_type.lower() + if provider_error_type == "rate_limit_error": + return True + + return ( + "rate limited" in lowered + or "rate-limited" in lowered + or "temporarily rate-limited upstream" in lowered + ) + + +def classify_stream_exception( + exc: Exception, + *, + flow_label: str, +) -> tuple[ + str, str, Literal["info", "warn", "error"], bool, str, dict[str, Any] | None +]: + """Return kind, code, severity, expected flag, message, and optional extra dict.""" + raw = str(exc) + if isinstance(exc, BusyError) or "Thread is busy with another request" in raw: + busy_thread_id = str(exc.request_id) if isinstance(exc, BusyError) else None + if busy_thread_id and is_cancel_requested(busy_thread_id): + cancel_state = get_cancel_state(busy_thread_id) + attempt = cancel_state[0] if cancel_state else 1 + retry_after_ms = compute_turn_cancelling_retry_delay(attempt) + retry_after_at = int(time.time() * 1000) + retry_after_ms + return ( + "thread_busy", + "TURN_CANCELLING", + "info", + True, + "A previous response is still stopping. Please try again in a moment.", + { + "retry_after_ms": retry_after_ms, + "retry_after_at": retry_after_at, + }, + ) + return ( + "thread_busy", + "THREAD_BUSY", + "warn", + True, + "Another response is still finishing for this thread. Please try again in a moment.", + None, + ) + + if is_provider_rate_limited(exc): + return ( + "rate_limited", + "RATE_LIMITED", + "warn", + True, + "This model is temporarily rate-limited. Please try again in a few seconds or switch models.", + None, + ) + + return ( + "server_error", + "SERVER_ERROR", + "error", + False, + f"Error during {flow_label}: {raw}", + None, + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/errors/emitter.py b/surfsense_backend/app/tasks/chat/streaming/errors/emitter.py new file mode 100644 index 000000000..95806ab87 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/errors/emitter.py @@ -0,0 +1,38 @@ +"""Emit one terminal error SSE frame and log via the stream error classifier.""" + +from __future__ import annotations + +from typing import Any, Literal + +from .classifier import log_chat_stream_error + + +def emit_stream_terminal_error( + *, + streaming_service: Any, + flow: Literal["new", "resume", "regenerate"], + request_id: str | None, + thread_id: int, + search_space_id: int, + user_id: str | None, + message: str, + error_kind: str = "server_error", + error_code: str = "SERVER_ERROR", + severity: Literal["info", "warn", "error"] = "error", + is_expected: bool = False, + extra: dict[str, Any] | None = None, +) -> str: + log_chat_stream_error( + flow=flow, + error_kind=error_kind, + error_code=error_code, + severity=severity, + is_expected=is_expected, + request_id=request_id, + thread_id=thread_id, + search_space_id=search_space_id, + user_id=user_id, + message=message, + extra=extra, + ) + return streaming_service.format_error(message, error_code=error_code, extra=extra) diff --git a/surfsense_backend/app/tasks/chat/streaming/helpers/__init__.py b/surfsense_backend/app/tasks/chat/streaming/helpers/__init__.py new file mode 100644 index 000000000..151dfdaac --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/helpers/__init__.py @@ -0,0 +1,3 @@ +"""Pure helpers for chat streaming.""" + +from __future__ import annotations diff --git a/surfsense_backend/app/tasks/chat/streaming/helpers/chunk_parts.py b/surfsense_backend/app/tasks/chat/streaming/helpers/chunk_parts.py new file mode 100644 index 000000000..48b44fc1d --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/helpers/chunk_parts.py @@ -0,0 +1,60 @@ +"""Split a model chunk into text, reasoning, and tool-call fragment lists.""" + +from __future__ import annotations + +from typing import Any + + +def extract_chunk_parts(chunk: Any) -> dict[str, Any]: + """Return dict with keys text, reasoning, and tool_call_chunks (merged from chunk fields).""" + out: dict[str, Any] = {"text": "", "reasoning": "", "tool_call_chunks": []} + if chunk is None: + return out + + content = getattr(chunk, "content", None) + if isinstance(content, str): + if content: + out["text"] = content + elif isinstance(content, list): + text_parts: list[str] = [] + reasoning_parts: list[str] = [] + for block in content: + if not isinstance(block, dict): + continue + block_type = block.get("type") + if block_type == "text": + value = block.get("text") or block.get("content") or "" + if isinstance(value, str) and value: + text_parts.append(value) + elif block_type == "reasoning": + value = ( + block.get("reasoning") + or block.get("text") + or block.get("content") + or "" + ) + if isinstance(value, str) and value: + reasoning_parts.append(value) + elif block_type in ("tool_call_chunk", "tool_use"): + out["tool_call_chunks"].append(block) + if text_parts: + out["text"] = "".join(text_parts) + if reasoning_parts: + out["reasoning"] = "".join(reasoning_parts) + + additional = getattr(chunk, "additional_kwargs", None) or {} + if isinstance(additional, dict): + extra_reasoning = additional.get("reasoning_content") + if isinstance(extra_reasoning, str) and extra_reasoning: + existing = out["reasoning"] + out["reasoning"] = ( + (existing + extra_reasoning) if existing else extra_reasoning + ) + + extra_tool_chunks = getattr(chunk, "tool_call_chunks", None) + if isinstance(extra_tool_chunks, list): + for tcc in extra_tool_chunks: + if isinstance(tcc, dict): + out["tool_call_chunks"].append(tcc) + + return out diff --git a/surfsense_backend/app/tasks/chat/streaming/helpers/interrupt_inspector.py b/surfsense_backend/app/tasks/chat/streaming/helpers/interrupt_inspector.py new file mode 100644 index 000000000..dca099b3f --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/helpers/interrupt_inspector.py @@ -0,0 +1,47 @@ +"""Read the first interrupt payload from a LangGraph state snapshot.""" + +from __future__ import annotations + +from typing import Any + + +def first_interrupt_value(state: Any) -> dict[str, Any] | None: + """Return the first interrupt payload across all snapshot tasks.""" + + def _extract(candidate: Any) -> dict[str, Any] | None: + if isinstance(candidate, dict): + value = candidate.get("value", candidate) + return value if isinstance(value, dict) else None + value = getattr(candidate, "value", None) + if isinstance(value, dict): + return value + if isinstance(candidate, list | tuple): + for item in candidate: + extracted = _extract(item) + if extracted is not None: + return extracted + return None + + for task in getattr(state, "tasks", ()) or (): + try: + interrupts = getattr(task, "interrupts", ()) or () + except (AttributeError, IndexError, TypeError): + interrupts = () + if not interrupts: + extracted = _extract(task) + if extracted is not None: + return extracted + continue + for interrupt_item in interrupts: + extracted = _extract(interrupt_item) + if extracted is not None: + return extracted + + try: + state_interrupts = getattr(state, "interrupts", ()) or () + except (AttributeError, IndexError, TypeError): + state_interrupts = () + extracted = _extract(state_interrupts) + if extracted is not None: + return extracted + return None diff --git a/surfsense_backend/app/tasks/chat/streaming/helpers/tool_call_matching.py b/surfsense_backend/app/tasks/chat/streaming/helpers/tool_call_matching.py new file mode 100644 index 000000000..fbe4c94b7 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/helpers/tool_call_matching.py @@ -0,0 +1,32 @@ +"""Match buffered model tool-call chunks to a tool start when ids were missing.""" + +from __future__ import annotations + +from typing import Any + + +def match_buffered_langchain_tool_call_id( + pending_tool_call_chunks: list[dict[str, Any]], + tool_name: str, + run_id: str, + lc_tool_call_id_by_run: dict[str, str], +) -> str | None: + matched_idx: int | None = None + for idx, tcc in enumerate(pending_tool_call_chunks): + if tcc.get("name") == tool_name and tcc.get("id"): + matched_idx = idx + break + if matched_idx is None: + for idx, tcc in enumerate(pending_tool_call_chunks): + if tcc.get("id"): + matched_idx = idx + break + if matched_idx is None: + return None + matched = pending_tool_call_chunks.pop(matched_idx) + candidate = matched.get("id") + if isinstance(candidate, str) and candidate: + if run_id: + lc_tool_call_id_by_run[run_id] = candidate + return candidate + return None diff --git a/surfsense_backend/app/tasks/chat/streaming/helpers/tool_output.py b/surfsense_backend/app/tasks/chat/streaming/helpers/tool_output.py new file mode 100644 index 000000000..a7c401dee --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/helpers/tool_output.py @@ -0,0 +1,43 @@ +"""Normalize filesystem tool payloads for SSE cards and messages.""" + +from __future__ import annotations + +import json +from typing import Any + + +def tool_output_to_text(tool_output: Any) -> str: + if isinstance(tool_output, dict): + if isinstance(tool_output.get("result"), str): + return tool_output["result"] + if isinstance(tool_output.get("error"), str): + return tool_output["error"] + return json.dumps(tool_output, ensure_ascii=False) + return str(tool_output) + + +def tool_output_has_error(tool_output: Any) -> bool: + if isinstance(tool_output, dict): + if tool_output.get("error"): + return True + result = tool_output.get("result") + return bool( + isinstance(result, str) and result.strip().lower().startswith("error:") + ) + if isinstance(tool_output, str): + return tool_output.strip().lower().startswith("error:") + return False + + +def extract_resolved_file_path( + *, tool_name: str, tool_output: Any, tool_input: Any | None = None +) -> str | None: + if isinstance(tool_output, dict): + path_value = tool_output.get("path") + if isinstance(path_value, str) and path_value.strip(): + return path_value.strip() + if tool_name in ("write_file", "edit_file") and isinstance(tool_input, dict): + file_path = tool_input.get("file_path") + if isinstance(file_path, str) and file_path.strip(): + return file_path.strip() + return None diff --git a/surfsense_backend/app/tasks/chat/streaming/stream_result.py b/surfsense_backend/app/tasks/chat/streaming/stream_result.py new file mode 100644 index 000000000..8ea3bd295 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/stream_result.py @@ -0,0 +1,28 @@ +"""Mutable facts collected while streaming one agent turn.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class StreamResult: + accumulated_text: str = "" + is_interrupted: bool = False + interrupt_value: dict[str, Any] | None = None + sandbox_files: list[str] = field(default_factory=list) + agent_called_update_memory: bool = False + request_id: str | None = None + turn_id: str = "" + filesystem_mode: str = "cloud" + client_platform: str = "web" + intent_detected: str = "chat_only" + intent_confidence: float = 0.0 + write_attempted: bool = False + write_succeeded: bool = False + verification_succeeded: bool = False + commit_gate_passed: bool = True + commit_gate_reason: str = "" + assistant_message_id: int | None = None + content_builder: Any | None = field(default=None, repr=False) From 7581a7c9c3247bc977ae556daddd25cc185ef2eb Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:48 +0200 Subject: [PATCH 09/58] Add chat streaming relay state and thinking-step SSE helpers. --- .../tasks/chat/streaming/relay/__init__.py | 3 + .../app/tasks/chat/streaming/relay/state.py | 55 +++++++++++++++++++ .../relay/thinking_step_completion.py | 31 +++++++++++ .../chat/streaming/relay/thinking_step_sse.py | 24 ++++++++ 4 files changed, 113 insertions(+) create mode 100644 surfsense_backend/app/tasks/chat/streaming/relay/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/relay/state.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py b/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py new file mode 100644 index 000000000..c1a5e7175 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py @@ -0,0 +1,3 @@ +"""Relay state: thinking steps, tool bookkeeping, and stream helpers.""" + +from __future__ import annotations diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/state.py b/surfsense_backend/app/tasks/chat/streaming/relay/state.py new file mode 100644 index 000000000..e8e35d0b2 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/relay/state.py @@ -0,0 +1,55 @@ +"""Mutable counters and maps for one agent stream.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class AgentEventRelayState: + """Tracks text, thinking steps, tool depth, and pending tool-call metadata.""" + + accumulated_text: str = "" + current_text_id: str | None = None + thinking_step_counter: int = 0 + tool_step_ids: dict[str, str] = field(default_factory=dict) + completed_step_ids: set[str] = field(default_factory=set) + last_active_step_id: str | None = None + last_active_step_title: str = "" + last_active_step_items: list[str] = field(default_factory=list) + just_finished_tool: bool = False + active_tool_depth: int = 0 + called_update_memory: bool = False + current_reasoning_id: str | None = None + parity_v2: bool = False + pending_tool_call_chunks: list[dict[str, Any]] = field(default_factory=list) + lc_tool_call_id_by_run: dict[str, str] = field(default_factory=dict) + file_path_by_run: dict[str, str] = field(default_factory=dict) + index_to_meta: dict[int, dict[str, str]] = field(default_factory=dict) + ui_tool_call_id_by_run: dict[str, str] = field(default_factory=dict) + current_lc_tool_call_id: dict[str, str | None] = field( + default_factory=lambda: {"value": None} + ) + + @classmethod + def for_invocation( + cls, + *, + initial_step_id: str | None = None, + initial_step_title: str = "", + initial_step_items: list[str] | None = None, + parity_v2: bool, + ) -> AgentEventRelayState: + counter = 1 if initial_step_id else 0 + return cls( + thinking_step_counter=counter, + last_active_step_id=initial_step_id, + last_active_step_title=initial_step_title, + last_active_step_items=list(initial_step_items or []), + parity_v2=parity_v2, + ) + + def next_thinking_step_id(self, step_prefix: str) -> str: + self.thinking_step_counter += 1 + return f"{step_prefix}-{self.thinking_step_counter}" diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py new file mode 100644 index 000000000..a0be71281 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py @@ -0,0 +1,31 @@ +"""Close the in-progress thinking step with a completed status frame.""" + +from __future__ import annotations + +from typing import Any + +from .thinking_step_sse import emit_thinking_step_frame + + +def complete_active_thinking_step( + *, + streaming_service: Any, + content_builder: Any | None, + last_active_step_id: str | None, + last_active_step_title: str, + last_active_step_items: list[str], + completed_step_ids: set[str], +) -> tuple[str | None, str | None]: + """Emit a completed thinking-step frame once; return (frame or None, next active step id).""" + if last_active_step_id and last_active_step_id not in completed_step_ids: + completed_step_ids.add(last_active_step_id) + event = emit_thinking_step_frame( + streaming_service=streaming_service, + content_builder=content_builder, + step_id=last_active_step_id, + title=last_active_step_title, + status="completed", + items=last_active_step_items if last_active_step_items else None, + ) + return event, None + return None, last_active_step_id diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py new file mode 100644 index 000000000..9e8c08dd5 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py @@ -0,0 +1,24 @@ +"""Thinking-step SSE plus optional content-builder updates.""" + +from __future__ import annotations + +from typing import Any + + +def emit_thinking_step_frame( + *, + streaming_service: Any, + content_builder: Any | None, + step_id: str, + title: str, + status: str = "in_progress", + items: list[str] | None = None, +) -> str: + if content_builder is not None: + content_builder.on_thinking_step(step_id, title, status, items) + return streaming_service.format_thinking_step( + step_id=step_id, + title=title, + status=status, + items=items, + ) From ee16e1d5f96b22d7bb9ed822f1ec983c966b7b91 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:48 +0200 Subject: [PATCH 10/58] Add LangGraph handlers for chat model, chain, tool, and custom events. --- .../tasks/chat/streaming/handlers/__init__.py | 3 + .../chat/streaming/handlers/chain_end.py | 23 +++ .../streaming/handlers/chat_model_stream.py | 149 ++++++++++++++++++ .../handlers/custom_event_dispatch.py | 56 +++++++ .../chat/streaming/handlers/custom_events.py | 77 +++++++++ .../tasks/chat/streaming/handlers/tool_end.py | 112 +++++++++++++ .../streaming/handlers/tool_output_frame.py | 24 +++ .../chat/streaming/handlers/tool_start.py | 142 +++++++++++++++++ 8 files changed, 586 insertions(+) create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/chain_end.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/__init__.py new file mode 100644 index 000000000..3e2165932 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/__init__.py @@ -0,0 +1,3 @@ +"""LangGraph stream handlers by event kind.""" + +from __future__ import annotations diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/chain_end.py b/surfsense_backend/app/tasks/chat/streaming/handlers/chain_end.py new file mode 100644 index 000000000..c61058ac7 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/chain_end.py @@ -0,0 +1,23 @@ +"""Close open text when a LangGraph chain or agent node finishes.""" + +from __future__ import annotations + +from collections.abc import Iterator +from typing import Any + +from app.tasks.chat.streaming.relay.state import AgentEventRelayState + + +def iter_chain_end_frames( + _event: dict[str, Any], + *, + state: AgentEventRelayState, + streaming_service: Any, + content_builder: Any | None, +) -> Iterator[str]: + """Close the open text stream if one is open.""" + if state.current_text_id is not None: + yield streaming_service.format_text_end(state.current_text_id) + if content_builder is not None: + content_builder.on_text_end(state.current_text_id) + state.current_text_id = None diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py b/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py new file mode 100644 index 000000000..861342b32 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py @@ -0,0 +1,149 @@ +"""Chat model stream: text, reasoning, and tool-call chunk SSE.""" + +from __future__ import annotations + +from collections.abc import Iterator +from typing import Any + +from app.tasks.chat.streaming.helpers.chunk_parts import extract_chunk_parts +from app.tasks.chat.streaming.relay.state import AgentEventRelayState +from app.tasks.chat.streaming.relay.thinking_step_completion import ( + complete_active_thinking_step, +) + + +def iter_chat_model_stream_frames( + event: dict[str, Any], + *, + state: AgentEventRelayState, + streaming_service: Any, + content_builder: Any | None, + step_prefix: str, +) -> Iterator[str]: + """SSE frames for one chat-model chunk.""" + if state.active_tool_depth > 0: + return + if "surfsense:internal" in event.get("tags", []): + return + chunk = event.get("data", {}).get("chunk") + if not chunk: + return + parts = extract_chunk_parts(chunk) + + reasoning_delta = parts["reasoning"] + text_delta = parts["text"] + + if state.parity_v2 and reasoning_delta: + if state.current_text_id is not None: + yield streaming_service.format_text_end(state.current_text_id) + if content_builder is not None: + content_builder.on_text_end(state.current_text_id) + state.current_text_id = None + if state.current_reasoning_id is None: + comp, new_active = complete_active_thinking_step( + streaming_service=streaming_service, + content_builder=content_builder, + last_active_step_id=state.last_active_step_id, + last_active_step_title=state.last_active_step_title, + last_active_step_items=state.last_active_step_items, + completed_step_ids=state.completed_step_ids, + ) + if comp: + yield comp + state.last_active_step_id = new_active + if state.just_finished_tool: + state.last_active_step_id = None + state.last_active_step_title = "" + state.last_active_step_items = [] + state.just_finished_tool = False + state.current_reasoning_id = streaming_service.generate_reasoning_id() + yield streaming_service.format_reasoning_start(state.current_reasoning_id) + if content_builder is not None: + content_builder.on_reasoning_start(state.current_reasoning_id) + yield streaming_service.format_reasoning_delta( + state.current_reasoning_id, reasoning_delta + ) + if content_builder is not None: + content_builder.on_reasoning_delta( + state.current_reasoning_id, reasoning_delta + ) + + if text_delta: + if state.current_reasoning_id is not None: + yield streaming_service.format_reasoning_end(state.current_reasoning_id) + if content_builder is not None: + content_builder.on_reasoning_end(state.current_reasoning_id) + state.current_reasoning_id = None + if state.current_text_id is None: + comp, new_active = complete_active_thinking_step( + streaming_service=streaming_service, + content_builder=content_builder, + last_active_step_id=state.last_active_step_id, + last_active_step_title=state.last_active_step_title, + last_active_step_items=state.last_active_step_items, + completed_step_ids=state.completed_step_ids, + ) + if comp: + yield comp + state.last_active_step_id = new_active + if state.just_finished_tool: + state.last_active_step_id = None + state.last_active_step_title = "" + state.last_active_step_items = [] + state.just_finished_tool = False + state.current_text_id = streaming_service.generate_text_id() + yield streaming_service.format_text_start(state.current_text_id) + if content_builder is not None: + content_builder.on_text_start(state.current_text_id) + yield streaming_service.format_text_delta(state.current_text_id, text_delta) + state.accumulated_text += text_delta + if content_builder is not None: + content_builder.on_text_delta(state.current_text_id, text_delta) + + if state.parity_v2 and parts["tool_call_chunks"]: + for tcc in parts["tool_call_chunks"]: + idx = tcc.get("index") + + if idx is not None and idx not in state.index_to_meta: + lc_id = tcc.get("id") + name = tcc.get("name") + if lc_id and name: + ui_id = lc_id + + if state.current_text_id is not None: + yield streaming_service.format_text_end(state.current_text_id) + if content_builder is not None: + content_builder.on_text_end(state.current_text_id) + state.current_text_id = None + if state.current_reasoning_id is not None: + yield streaming_service.format_reasoning_end( + state.current_reasoning_id + ) + if content_builder is not None: + content_builder.on_reasoning_end(state.current_reasoning_id) + state.current_reasoning_id = None + + state.index_to_meta[idx] = { + "ui_id": ui_id, + "lc_id": lc_id, + "name": name, + } + yield streaming_service.format_tool_input_start( + ui_id, + name, + langchain_tool_call_id=lc_id, + ) + if content_builder is not None: + content_builder.on_tool_input_start(ui_id, name, lc_id) + + meta = state.index_to_meta.get(idx) if idx is not None else None + if meta: + args_chunk = tcc.get("args") or "" + if args_chunk: + yield streaming_service.format_tool_input_delta( + meta["ui_id"], args_chunk + ) + if content_builder is not None: + content_builder.on_tool_input_delta(meta["ui_id"], args_chunk) + else: + state.pending_tool_call_chunks.append(tcc) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py new file mode 100644 index 000000000..b373919cf --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py @@ -0,0 +1,56 @@ +"""Custom graph events routed to SSE (documents, action logs, report progress).""" + +from __future__ import annotations + +from collections.abc import Iterator +from typing import Any + +from app.tasks.chat.streaming.handlers.custom_events import ( + handle_action_log, + handle_action_log_updated, + handle_document_created, + handle_report_progress, +) +from app.tasks.chat.streaming.relay.state import AgentEventRelayState + + +def iter_custom_event_frames( + event: dict[str, Any], + *, + state: AgentEventRelayState, + streaming_service: Any, + content_builder: Any | None, +) -> Iterator[str]: + """Yield any SSE produced by ad-hoc graph events (documents, action logs, report progress).""" + name = event.get("name") + data = event.get("data", {}) + + if name == "report_progress": + frame, state.last_active_step_items = handle_report_progress( + data, + last_active_step_id=state.last_active_step_id, + last_active_step_title=state.last_active_step_title, + last_active_step_items=state.last_active_step_items, + streaming_service=streaming_service, + content_builder=content_builder, + ) + if frame: + yield frame + return + + if name == "document_created": + frame = handle_document_created(data, streaming_service=streaming_service) + if frame: + yield frame + return + + if name == "action_log": + frame = handle_action_log(data, streaming_service=streaming_service) + if frame: + yield frame + return + + if name == "action_log_updated": + frame = handle_action_log_updated(data, streaming_service=streaming_service) + if frame: + yield frame diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py new file mode 100644 index 000000000..765f1d790 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py @@ -0,0 +1,77 @@ +"""Custom-event payloads turned into SSE (no model/tool stream handling).""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.relay.thinking_step_sse import emit_thinking_step_frame + + +def handle_report_progress( + data: dict[str, Any], + *, + last_active_step_id: str | None, + last_active_step_title: str, + last_active_step_items: list[str], + streaming_service: Any, + content_builder: Any | None, +) -> tuple[str | None, list[str]]: + """Update report step items; may emit one thinking SSE frame. + + Returns (frame or None, items list after update). + """ + message = data.get("message", "") + if not message or not last_active_step_id: + return None, last_active_step_items + + phase = data.get("phase", "") + topic_items = [ + item for item in last_active_step_items if item.startswith("Topic:") + ] + + if phase in ("revising_section", "adding_section"): + plan_items = [ + item + for item in last_active_step_items + if item.startswith("Topic:") + or item.startswith("Modifying ") + or item.startswith("Adding ") + or item.startswith("Removing ") + ] + plan_items = [item for item in plan_items if not item.endswith("...")] + new_items = [*plan_items, message] + else: + new_items = [*topic_items, message] + + frame = emit_thinking_step_frame( + streaming_service=streaming_service, + content_builder=content_builder, + step_id=last_active_step_id, + title=last_active_step_title, + status="in_progress", + items=new_items, + ) + return frame, new_items + + +def handle_document_created(data: dict[str, Any], *, streaming_service: Any) -> str | None: + if not data.get("id"): + return None + return streaming_service.format_data( + "documents-updated", + {"action": "created", "document": data}, + ) + + +def handle_action_log(data: dict[str, Any], *, streaming_service: Any) -> str | None: + if data.get("id") is None: + return None + return streaming_service.format_data("action-log", data) + + +def handle_action_log_updated( + data: dict[str, Any], *, streaming_service: Any +) -> str | None: + if data.get("id") is None: + return None + return streaming_service.format_data("action-log-updated", data) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py new file mode 100644 index 000000000..0bfef25eb --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py @@ -0,0 +1,112 @@ +"""Tool end: thinking completion, tool output, and terminal SSE.""" + +from __future__ import annotations + +import json +from collections.abc import Iterator +from typing import Any + +from app.tasks.chat.streaming.handlers.tools import ( + ToolCompletionEmissionContext, + iter_tool_completion_emission_frames, + resolve_tool_completed_thinking_step, +) +from app.tasks.chat.streaming.helpers.tool_output import tool_output_has_error +from app.tasks.chat.streaming.relay.state import AgentEventRelayState +from app.tasks.chat.streaming.relay.thinking_step_sse import emit_thinking_step_frame + + +def iter_tool_end_frames( + event: dict[str, Any], + *, + state: AgentEventRelayState, + streaming_service: Any, + content_builder: Any | None, + result: Any, + step_prefix: str, + config: dict[str, Any], +) -> Iterator[str]: + """SSE frames when one tool run finishes.""" + state.active_tool_depth = max(0, state.active_tool_depth - 1) + run_id = event.get("run_id", "") + tool_name = event.get("name", "unknown_tool") + raw_output = event.get("data", {}).get("output", "") + staged_file_path = ( + state.file_path_by_run.pop(run_id, None) if run_id else None + ) + + if tool_name == "update_memory": + state.called_update_memory = True + + if hasattr(raw_output, "content"): + content = raw_output.content + if isinstance(content, str): + try: + tool_output = json.loads(content) + except (json.JSONDecodeError, TypeError): + tool_output = {"result": content} + elif isinstance(content, dict): + tool_output = content + else: + tool_output = {"result": str(content)} + elif isinstance(raw_output, dict): + tool_output = raw_output + else: + tool_output = {"result": str(raw_output) if raw_output else "completed"} + + if tool_name in ("write_file", "edit_file"): + if tool_output_has_error(tool_output): + pass + else: + result.write_succeeded = True + result.verification_succeeded = True + + tool_call_id = state.ui_tool_call_id_by_run.get( + run_id, + f"call_{run_id[:32]}" if run_id else "call_unknown", + ) + original_step_id = state.tool_step_ids.get( + run_id, f"{step_prefix}-unknown-{run_id[:8]}" + ) + state.completed_step_ids.add(original_step_id) + + holder = state.current_lc_tool_call_id + holder["value"] = None + authoritative = getattr(raw_output, "tool_call_id", None) + if isinstance(authoritative, str) and authoritative: + holder["value"] = authoritative + if run_id: + state.lc_tool_call_id_by_run[run_id] = authoritative + elif run_id and run_id in state.lc_tool_call_id_by_run: + holder["value"] = state.lc_tool_call_id_by_run[run_id] + + items = state.last_active_step_items + title, completed_items = resolve_tool_completed_thinking_step( + tool_name, tool_output, items + ) + yield emit_thinking_step_frame( + streaming_service=streaming_service, + content_builder=content_builder, + step_id=original_step_id, + title=title, + status="completed", + items=completed_items, + ) + + state.just_finished_tool = True + state.last_active_step_id = None + state.last_active_step_title = "" + state.last_active_step_items = [] + + emission_ctx = ToolCompletionEmissionContext( + tool_name=tool_name, + tool_call_id=tool_call_id, + tool_output=tool_output, + streaming_service=streaming_service, + content_builder=content_builder, + langchain_tool_call_id_holder=holder, + stream_result=result, + langgraph_config=config, + staged_workspace_file_path=staged_file_path, + ) + yield from iter_tool_completion_emission_frames(emission_ctx) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py new file mode 100644 index 000000000..07244364c --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py @@ -0,0 +1,24 @@ +"""Emit tool-output SSE and optional assistant content updates.""" + +from __future__ import annotations + +from typing import Any + + +def emit_tool_output_available_frame( + *, + streaming_service: Any, + content_builder: Any | None, + langchain_id_holder: dict[str, str | None], + call_id: str, + output: Any, +) -> str: + if content_builder is not None: + content_builder.on_tool_output_available( + call_id, output, langchain_id_holder["value"] + ) + return streaming_service.format_tool_output_available( + call_id, + output, + langchain_tool_call_id=langchain_id_holder["value"], + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py new file mode 100644 index 000000000..c316cc74a --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py @@ -0,0 +1,142 @@ +"""Tool start: thinking-step and tool-input SSE.""" + +from __future__ import annotations + +import json +from collections.abc import Iterator +from typing import Any + +from app.tasks.chat.streaming.handlers.tools import resolve_tool_start_thinking +from app.tasks.chat.streaming.helpers.tool_call_matching import ( + match_buffered_langchain_tool_call_id, +) +from app.tasks.chat.streaming.relay.state import AgentEventRelayState +from app.tasks.chat.streaming.relay.thinking_step_completion import ( + complete_active_thinking_step, +) +from app.tasks.chat.streaming.relay.thinking_step_sse import emit_thinking_step_frame + + +def iter_tool_start_frames( + event: dict[str, Any], + *, + state: AgentEventRelayState, + streaming_service: Any, + content_builder: Any | None, + result: Any, + step_prefix: str, +) -> Iterator[str]: + """SSE frames for the start of one tool run.""" + state.active_tool_depth += 1 + tool_name = event.get("name", "unknown_tool") + run_id = event.get("run_id", "") + tool_input = event.get("data", {}).get("input", {}) + if tool_name in ("write_file", "edit_file"): + result.write_attempted = True + if isinstance(tool_input, dict): + file_path = tool_input.get("file_path") + if isinstance(file_path, str) and file_path.strip() and run_id: + state.file_path_by_run[run_id] = file_path.strip() + + if state.current_text_id is not None: + yield streaming_service.format_text_end(state.current_text_id) + if content_builder is not None: + content_builder.on_text_end(state.current_text_id) + state.current_text_id = None + + if state.last_active_step_title != "Synthesizing response": + comp, new_active = complete_active_thinking_step( + streaming_service=streaming_service, + content_builder=content_builder, + last_active_step_id=state.last_active_step_id, + last_active_step_title=state.last_active_step_title, + last_active_step_items=state.last_active_step_items, + completed_step_ids=state.completed_step_ids, + ) + if comp: + yield comp + state.last_active_step_id = new_active + + state.just_finished_tool = False + tool_step_id = state.next_thinking_step_id(step_prefix) + state.tool_step_ids[run_id] = tool_step_id + state.last_active_step_id = tool_step_id + + thinking = resolve_tool_start_thinking(tool_name, tool_input) + state.last_active_step_title = thinking.title + state.last_active_step_items = thinking.items + frame_kw: dict[str, Any] = { + "streaming_service": streaming_service, + "content_builder": content_builder, + "step_id": tool_step_id, + "title": thinking.title, + "status": "in_progress", + } + if thinking.include_items_on_frame: + frame_kw["items"] = thinking.items + yield emit_thinking_step_frame(**frame_kw) + + matched_meta: dict[str, str] | None = None + if state.parity_v2: + taken_ui_ids = set(state.ui_tool_call_id_by_run.values()) + for meta in state.index_to_meta.values(): + if meta["name"] == tool_name and meta["ui_id"] not in taken_ui_ids: + matched_meta = meta + break + + tool_call_id: str + langchain_tool_call_id: str | None = None + if matched_meta is not None: + tool_call_id = matched_meta["ui_id"] + langchain_tool_call_id = matched_meta["lc_id"] + if run_id: + state.lc_tool_call_id_by_run[run_id] = matched_meta["lc_id"] + else: + tool_call_id = ( + f"call_{run_id[:32]}" + if run_id + else streaming_service.generate_tool_call_id() + ) + if state.parity_v2: + langchain_tool_call_id = match_buffered_langchain_tool_call_id( + state.pending_tool_call_chunks, + tool_name, + run_id, + state.lc_tool_call_id_by_run, + ) + yield streaming_service.format_tool_input_start( + tool_call_id, + tool_name, + langchain_tool_call_id=langchain_tool_call_id, + ) + if content_builder is not None: + content_builder.on_tool_input_start( + tool_call_id, tool_name, langchain_tool_call_id + ) + + if run_id: + state.ui_tool_call_id_by_run[run_id] = tool_call_id + + if isinstance(tool_input, dict): + _safe_input: dict[str, Any] = {} + for _k, _v in tool_input.items(): + try: + json.dumps(_v) + _safe_input[_k] = _v + except (TypeError, ValueError, OverflowError): + pass + else: + _safe_input = {"input": tool_input} + yield streaming_service.format_tool_input_available( + tool_call_id, + tool_name, + _safe_input, + langchain_tool_call_id=langchain_tool_call_id, + ) + if content_builder is not None: + content_builder.on_tool_input_available( + tool_call_id, + tool_name, + _safe_input, + langchain_tool_call_id, + ) From 1392abf5b1d9cbb2309c7780edeacf9c0c72f205 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:48 +0200 Subject: [PATCH 11/58] Add chat tool streaming registry with shared, default, and connector tools. --- .../chat/streaming/handlers/tools/__init__.py | 23 +++++ .../handlers/tools/connector/__init__.py | 0 .../tools/connector/shared/__init__.py | 0 .../tools/connector/shared/emission.py | 15 ++++ .../tools/connector/shared/thinking.py | 22 +++++ .../tools/connector/shared/tool_names.py | 31 +++++++ .../handlers/tools/default/__init__.py | 3 + .../handlers/tools/default/emission.py | 24 +++++ .../handlers/tools/default/thinking.py | 23 +++++ .../handlers/tools/emission_context.py | 34 +++++++ .../chat/streaming/handlers/tools/registry.py | 88 +++++++++++++++++++ .../handlers/tools/shared/__init__.py | 0 .../streaming/handlers/tools/shared/model.py | 12 +++ 13 files changed, 275 insertions(+) create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/emission.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/tool_names.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/emission.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/registry.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/shared/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/shared/model.py diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/__init__.py new file mode 100644 index 000000000..4b191c100 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/__init__.py @@ -0,0 +1,23 @@ +"""Per-tool streaming: thinking-step and completion emission.""" + +from __future__ import annotations + +from app.tasks.chat.streaming.handlers.tools.emission_context import ( + ToolCompletionEmissionContext, +) +from app.tasks.chat.streaming.handlers.tools.registry import ( + iter_tool_completion_emission_frames, + resolve_tool_completed_thinking_step, + resolve_tool_start_thinking, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + +__all__ = [ + "ToolCompletionEmissionContext", + "ToolStartThinking", + "iter_tool_completion_emission_frames", + "resolve_tool_completed_thinking_step", + "resolve_tool_start_thinking", +] diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/emission.py new file mode 100644 index 000000000..8e19dc224 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/emission.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from collections.abc import Iterator + +from app.tasks.chat.streaming.handlers.tools.emission_context import ( + ToolCompletionEmissionContext, +) + + +def iter_completion_emission_frames( + ctx: ToolCompletionEmissionContext, +) -> Iterator[str]: + out = ctx.tool_output + payload = out if isinstance(out, dict) else {"result": out} + yield ctx.emit_tool_output_card(payload) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/thinking.py new file mode 100644 index 000000000..7e9dd8b96 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/thinking.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.default import ( + thinking as default_thinking, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + return default_thinking.resolve_start_thinking(tool_name, tool_input) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + return default_thinking.resolve_completed_thinking( + tool_name, tool_output, last_items + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/tool_names.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/tool_names.py new file mode 100644 index 000000000..ab698b32d --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/connector/shared/tool_names.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +SHARED_CONNECTOR_TOOLS: frozenset[str] = frozenset( + { + "create_calendar_event", + "create_confluence_page", + "create_dropbox_file", + "create_gmail_draft", + "create_google_drive_file", + "create_jira_issue", + "create_linear_issue", + "create_notion_page", + "create_onedrive_file", + "delete_calendar_event", + "delete_confluence_page", + "delete_dropbox_file", + "delete_google_drive_file", + "delete_jira_issue", + "delete_linear_issue", + "delete_notion_page", + "delete_onedrive_file", + "send_gmail_email", + "trash_gmail_email", + "update_calendar_event", + "update_confluence_page", + "update_gmail_draft", + "update_jira_issue", + "update_linear_issue", + "update_notion_page", + } +) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/__init__.py new file mode 100644 index 000000000..5e84a37f4 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/__init__.py @@ -0,0 +1,3 @@ +"""Fallback tool package.""" + +from __future__ import annotations diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/emission.py new file mode 100644 index 000000000..e24c619a7 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/emission.py @@ -0,0 +1,24 @@ +"""Default tool-output card and a short completion terminal line.""" + +from __future__ import annotations + +from collections.abc import Iterator + +from app.tasks.chat.streaming.handlers.tools.emission_context import ( + ToolCompletionEmissionContext, +) + + +def iter_completion_emission_frames( + ctx: ToolCompletionEmissionContext, +) -> Iterator[str]: + yield ctx.emit_tool_output_card( + { + "status": "completed", + "result_length": len(str(ctx.tool_output)), + }, + ) + yield ctx.streaming_service.format_terminal_info( + f"Tool {ctx.tool_name} completed", + "success", + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/thinking.py new file mode 100644 index 000000000..46d15a4e7 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/default/thinking.py @@ -0,0 +1,23 @@ +"""Fallback thinking-step copy for unknown tools and connectors without custom UI.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_input + title = tool_name.replace("_", " ").strip().capitalize() or tool_name + return ToolStartThinking(title=title, items=[], include_items_on_frame=False) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str] +) -> tuple[str, list[str]]: + del tool_output + title = tool_name.replace("_", " ").strip().capitalize() or tool_name + return (title, last_items) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py new file mode 100644 index 000000000..d9ff796c0 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py @@ -0,0 +1,34 @@ +"""Context for one tool-completion emission pass.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from app.tasks.chat.streaming.handlers.tool_output_frame import ( + emit_tool_output_available_frame, +) + + +@dataclass +class ToolCompletionEmissionContext: + """Streaming service, tool output, and ids for completion frames.""" + + tool_name: str + tool_call_id: str + tool_output: Any + streaming_service: Any + content_builder: Any | None + langchain_tool_call_id_holder: dict[str, str | None] + stream_result: Any + langgraph_config: dict[str, Any] + staged_workspace_file_path: str | None + + def emit_tool_output_card(self, payload: Any) -> str: + return emit_tool_output_available_frame( + streaming_service=self.streaming_service, + content_builder=self.content_builder, + langchain_id_holder=self.langchain_tool_call_id_holder, + call_id=self.tool_call_id, + output=payload, + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/registry.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/registry.py new file mode 100644 index 000000000..c0568f870 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/registry.py @@ -0,0 +1,88 @@ +"""Resolve thinking and emission modules by tool name.""" + +from __future__ import annotations + +import importlib +from collections.abc import Iterator +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.connector.shared.tool_names import ( + SHARED_CONNECTOR_TOOLS, +) +from app.tasks.chat.streaming.handlers.tools.deliverables.tool_names import ( + DELIVERABLE_TOOLS, +) +from app.tasks.chat.streaming.handlers.tools.emission_context import ( + ToolCompletionEmissionContext, +) +from app.tasks.chat.streaming.handlers.tools.filesystem.tool_names import ( + FILESYSTEM_TOOLS, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + +_BASE = "app.tasks.chat.streaming.handlers.tools" +_CONNECTOR_SHARED = "connector.shared" + +_THINKING_ALIAS: dict[str, str] = { + "execute_code": "filesystem.execute", +} +_EMISSION_ALIAS: dict[str, str] = { + "edit_file": "filesystem.write_file", + "execute_code": "filesystem.execute", +} + + +def _thinking_module(tool_name: str) -> str: + if tool_name in SHARED_CONNECTOR_TOOLS: + return _CONNECTOR_SHARED + if tool_name in FILESYSTEM_TOOLS: + return f"filesystem.{tool_name}" + if tool_name in DELIVERABLE_TOOLS: + return f"deliverables.{tool_name}" + return _THINKING_ALIAS.get(tool_name, tool_name) + + +def _emission_module(tool_name: str) -> str: + if tool_name in _EMISSION_ALIAS: + return _EMISSION_ALIAS[tool_name] + if tool_name in SHARED_CONNECTOR_TOOLS: + return _CONNECTOR_SHARED + if tool_name in DELIVERABLE_TOOLS: + return f"deliverables.{tool_name}" + if tool_name in FILESYSTEM_TOOLS: + return f"filesystem.{tool_name}" + return tool_name + + +def _import_thinking(tool_name: str): + try: + return importlib.import_module(f"{_BASE}.{_thinking_module(tool_name)}.thinking") + except ModuleNotFoundError: + return importlib.import_module(f"{_BASE}.default.thinking") + + +def _import_emission(tool_name: str): + try: + return importlib.import_module(f"{_BASE}.{_emission_module(tool_name)}.emission") + except ModuleNotFoundError: + return importlib.import_module(f"{_BASE}.default.emission") + + +def resolve_tool_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + return _import_thinking(tool_name).resolve_start_thinking(tool_name, tool_input) + + +def resolve_tool_completed_thinking_step( + tool_name: str, tool_output: Any, last_items: list[str] +) -> tuple[str, list[str]]: + return _import_thinking(tool_name).resolve_completed_thinking( + tool_name, tool_output, last_items + ) + + +def iter_tool_completion_emission_frames( + ctx: ToolCompletionEmissionContext, +) -> Iterator[str]: + yield from _import_emission(ctx.tool_name).iter_completion_emission_frames(ctx) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/shared/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/shared/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/shared/model.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/shared/model.py new file mode 100644 index 000000000..047a84374 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/shared/model.py @@ -0,0 +1,12 @@ +"""In-progress thinking-step title and bullet lines.""" + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True, slots=True) +class ToolStartThinking: + title: str + items: list[str] + include_items_on_frame: bool = True From a322eedaa1a3a0604ffb5f053b9e5381ac539284 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:48 +0200 Subject: [PATCH 12/58] Add filesystem tool streaming handlers for chat runs. --- .../handlers/tools/filesystem/__init__.py | 0 .../tools/filesystem/edit_file/__init__.py | 0 .../tools/filesystem/edit_file/thinking.py | 27 +++++++++ .../tools/filesystem/execute/__init__.py | 0 .../tools/filesystem/execute/emission.py | 40 +++++++++++++ .../tools/filesystem/execute/thinking.py | 42 +++++++++++++ .../tools/filesystem/glob/__init__.py | 0 .../tools/filesystem/glob/thinking.py | 27 +++++++++ .../tools/filesystem/grep/__init__.py | 0 .../tools/filesystem/grep/thinking.py | 31 ++++++++++ .../handlers/tools/filesystem/ls/__init__.py | 0 .../handlers/tools/filesystem/ls/thinking.py | 59 +++++++++++++++++++ .../tools/filesystem/mkdir/__init__.py | 0 .../tools/filesystem/mkdir/thinking.py | 27 +++++++++ .../tools/filesystem/move_file/__init__.py | 0 .../tools/filesystem/move_file/thinking.py | 33 +++++++++++ .../tools/filesystem/read_file/__init__.py | 0 .../tools/filesystem/read_file/thinking.py | 27 +++++++++ .../handlers/tools/filesystem/rm/__init__.py | 0 .../handlers/tools/filesystem/rm/thinking.py | 28 +++++++++ .../tools/filesystem/rmdir/__init__.py | 0 .../tools/filesystem/rmdir/thinking.py | 27 +++++++++ .../tools/filesystem/shared/__init__.py | 0 .../tools/filesystem/shared/tool_input.py | 17 ++++++ .../handlers/tools/filesystem/tool_names.py | 18 ++++++ .../tools/filesystem/write_file/__init__.py | 0 .../tools/filesystem/write_file/emission.py | 43 ++++++++++++++ .../tools/filesystem/write_file/thinking.py | 27 +++++++++ .../tools/filesystem/write_todos/__init__.py | 0 .../tools/filesystem/write_todos/thinking.py | 34 +++++++++++ 30 files changed, 507 insertions(+) create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/edit_file/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/edit_file/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/emission.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/glob/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/glob/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/grep/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/grep/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/ls/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/ls/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/mkdir/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/mkdir/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/move_file/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/move_file/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/read_file/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/read_file/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rm/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rm/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rmdir/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rmdir/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/shared/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/shared/tool_input.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/tool_names.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/emission.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_todos/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_todos/thinking.py diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/edit_file/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/edit_file/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/edit_file/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/edit_file/thinking.py new file mode 100644 index 000000000..8669107db --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/edit_file/thinking.py @@ -0,0 +1,27 @@ +"""edit_file: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import ( + as_tool_input_dict, + truncate_path, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + fp = d.get("file_path", "") if isinstance(tool_input, dict) else str(tool_input) + return ToolStartThinking(title="Editing file", items=[truncate_path(fp)]) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_output, tool_name + return ("Editing file", last_items) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/emission.py new file mode 100644 index 000000000..0ff87a907 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/emission.py @@ -0,0 +1,40 @@ +"""execute: exit code, stdout, sandbox file hints.""" + +from __future__ import annotations + +import re +from collections.abc import Iterator + +from app.tasks.chat.streaming.handlers.tools.emission_context import ( + ToolCompletionEmissionContext, +) + + +def iter_completion_emission_frames( + ctx: ToolCompletionEmissionContext, +) -> Iterator[str]: + out = ctx.tool_output + raw_text = out.get("result", "") if isinstance(out, dict) else str(out) + exit_code: int | None = None + output_text = raw_text + m = re.match(r"^Exit code:\s*(\d+)", raw_text) + if m: + exit_code = int(m.group(1)) + om = re.search(r"\nOutput:\n([\s\S]*)", raw_text) + output_text = om.group(1) if om else "" + thread_id_str = ctx.langgraph_config.get("configurable", {}).get("thread_id", "") + + for sf_match in re.finditer( + r"^SANDBOX_FILE:\s*(.+)$", output_text, re.MULTILINE + ): + fpath = sf_match.group(1).strip() + if fpath and fpath not in ctx.stream_result.sandbox_files: + ctx.stream_result.sandbox_files.append(fpath) + + yield ctx.emit_tool_output_card( + { + "exit_code": exit_code, + "output": output_text, + "thread_id": thread_id_str, + }, + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/thinking.py new file mode 100644 index 000000000..2c8aa296b --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/execute/thinking.py @@ -0,0 +1,42 @@ +"""execute: sandbox command thinking + completion lines.""" + +from __future__ import annotations + +import re +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import ( + as_tool_input_dict, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + cmd = d.get("command", "") if isinstance(tool_input, dict) else str(tool_input) + display_cmd = cmd[:80] + ("…" if len(cmd) > 80 else "") + return ToolStartThinking(title="Running command", items=[f"$ {display_cmd}"]) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_name + items = last_items + raw_text = ( + tool_output.get("result", "") + if isinstance(tool_output, dict) + else str(tool_output) + ) + m = re.match(r"^Exit code:\s*(\d+)", raw_text) + exit_code_val = int(m.group(1)) if m else None + if exit_code_val is not None and exit_code_val == 0: + completed = [*items, "Completed successfully"] + elif exit_code_val is not None: + completed = [*items, f"Exit code: {exit_code_val}"] + else: + completed = [*items, "Finished"] + return ("Running command", completed) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/glob/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/glob/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/glob/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/glob/thinking.py new file mode 100644 index 000000000..f5a57beac --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/glob/thinking.py @@ -0,0 +1,27 @@ +"""glob: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import ( + as_tool_input_dict, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + pat = d.get("pattern", "") if isinstance(tool_input, dict) else str(tool_input) + base = d.get("path", "/") if isinstance(tool_input, dict) else "/" + return ToolStartThinking(title="Searching files", items=[f"{pat} in {base}"]) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_output, tool_name + return ("Searching files", last_items) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/grep/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/grep/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/grep/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/grep/thinking.py new file mode 100644 index 000000000..da0864177 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/grep/thinking.py @@ -0,0 +1,31 @@ +"""grep: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import ( + as_tool_input_dict, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + pat = d.get("pattern", "") if isinstance(tool_input, dict) else str(tool_input) + grep_path = d.get("path", "") if isinstance(tool_input, dict) else "" + display_pat = pat[:60] + ("…" if len(pat) > 60 else "") + return ToolStartThinking( + title="Searching content", + items=[f'"{display_pat}"' + (f" in {grep_path}" if grep_path else "")], + ) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_output, tool_name + return ("Searching content", last_items) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/ls/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/ls/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/ls/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/ls/thinking.py new file mode 100644 index 000000000..80c547b5a --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/ls/thinking.py @@ -0,0 +1,59 @@ +"""ls: thinking-step copy for directory listing.""" + +from __future__ import annotations + +import ast +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + if isinstance(tool_input, dict): + path = tool_input.get("path", "/") + else: + path = str(tool_input) + return ToolStartThinking(title="Listing files", items=[path]) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_name + if isinstance(tool_output, dict): + ls_output = tool_output.get("result", "") + elif isinstance(tool_output, str): + ls_output = tool_output + else: + ls_output = str(tool_output) if tool_output else "" + file_names: list[str] = [] + if ls_output: + paths: list[str] = [] + try: + parsed = ast.literal_eval(ls_output) + if isinstance(parsed, list): + paths = [str(p) for p in parsed] + except (ValueError, SyntaxError): + paths = [ + line.strip() + for line in ls_output.strip().split("\n") + if line.strip() + ] + for p in paths: + name = p.rstrip("/").split("/")[-1] + if name and len(name) <= 40: + file_names.append(name) + elif name: + file_names.append(name[:37] + "...") + if file_names: + if len(file_names) <= 5: + completed = [f"[{name}]" for name in file_names] + else: + completed = [f"[{name}]" for name in file_names[:4]] + completed.append(f"(+{len(file_names) - 4} more)") + else: + completed = ["No files found"] + return ("Listing files", completed) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/mkdir/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/mkdir/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/mkdir/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/mkdir/thinking.py new file mode 100644 index 000000000..3a3707698 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/mkdir/thinking.py @@ -0,0 +1,27 @@ +"""mkdir: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import ( + as_tool_input_dict, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + p = d.get("path", "") if isinstance(tool_input, dict) else str(tool_input) + display = p if len(p) <= 80 else "…" + p[-77:] + return ToolStartThinking(title="Creating folder", items=[display] if display else []) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_output, tool_name + return ("Creating folder", last_items) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/move_file/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/move_file/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/move_file/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/move_file/thinking.py new file mode 100644 index 000000000..192a789f4 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/move_file/thinking.py @@ -0,0 +1,33 @@ +"""move_file: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import ( + as_tool_input_dict, + truncate_middle, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + src = d.get("source_path", "") if isinstance(tool_input, dict) else "" + dst = d.get("destination_path", "") if isinstance(tool_input, dict) else "" + display_src = truncate_middle(src, max_len=60) + display_dst = truncate_middle(dst, max_len=60) + return ToolStartThinking( + title="Moving file", + items=[f"{display_src} → {display_dst}"] if src or dst else [], + ) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_output, tool_name + return ("Moving file", last_items) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/read_file/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/read_file/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/read_file/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/read_file/thinking.py new file mode 100644 index 000000000..3f4290ad7 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/read_file/thinking.py @@ -0,0 +1,27 @@ +"""read_file: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import ( + as_tool_input_dict, + truncate_path, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + fp = d.get("file_path", "") if isinstance(tool_input, dict) else str(tool_input) + return ToolStartThinking(title="Reading file", items=[truncate_path(fp)]) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_output, tool_name + return ("Reading file", last_items) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rm/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rm/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rm/thinking.py new file mode 100644 index 000000000..a82a44e6f --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rm/thinking.py @@ -0,0 +1,28 @@ +"""rm: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import ( + as_tool_input_dict, + truncate_path, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + rm_path = d.get("path", "") if isinstance(tool_input, dict) else str(tool_input) + display = truncate_path(rm_path) + return ToolStartThinking(title="Deleting file", items=[display] if display else []) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_output, tool_name + return ("Deleting file", last_items) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rmdir/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rmdir/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rmdir/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rmdir/thinking.py new file mode 100644 index 000000000..6c97904b7 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/rmdir/thinking.py @@ -0,0 +1,27 @@ +"""rmdir: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import ( + as_tool_input_dict, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + p = d.get("path", "") if isinstance(tool_input, dict) else str(tool_input) + display = p if len(p) <= 80 else "…" + p[-77:] + return ToolStartThinking(title="Deleting folder", items=[display] if display else []) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_output, tool_name + return ("Deleting folder", last_items) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/shared/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/shared/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/shared/tool_input.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/shared/tool_input.py new file mode 100644 index 000000000..507782283 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/shared/tool_input.py @@ -0,0 +1,17 @@ +"""Tool-call args + display truncation for filesystem thinking modules.""" + +from __future__ import annotations + +from typing import Any + + +def as_tool_input_dict(tool_input: Any) -> dict[str, Any]: + return tool_input if isinstance(tool_input, dict) else {} + + +def truncate_path(fp: str, *, max_len: int = 80) -> str: + return fp if len(fp) <= max_len else "…" + fp[-(max_len - 3) :] + + +def truncate_middle(s: str, *, max_len: int = 60) -> str: + return s if len(s) <= max_len else "…" + s[-(max_len - 3) :] diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/tool_names.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/tool_names.py new file mode 100644 index 000000000..e2ad33736 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/tool_names.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +FILESYSTEM_TOOLS: frozenset[str] = frozenset( + { + "read_file", + "glob", + "grep", + "ls", + "mkdir", + "move_file", + "rm", + "rmdir", + "write_todos", + "write_file", + "edit_file", + "execute", + } +) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/emission.py new file mode 100644 index 000000000..820235379 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/emission.py @@ -0,0 +1,43 @@ +"""write_file: path + status envelope on the tool-output card.""" + +from __future__ import annotations + +from collections.abc import Iterator + +from app.tasks.chat.streaming.handlers.tools.emission_context import ( + ToolCompletionEmissionContext, +) +from app.tasks.chat.streaming.helpers.tool_output import ( + extract_resolved_file_path, + tool_output_has_error, + tool_output_to_text, +) + + +def iter_completion_emission_frames( + ctx: ToolCompletionEmissionContext, +) -> Iterator[str]: + resolved_path = extract_resolved_file_path( + tool_name=ctx.tool_name, + tool_output=ctx.tool_output, + tool_input={"file_path": ctx.staged_workspace_file_path} + if ctx.staged_workspace_file_path + else None, + ) + result_text = tool_output_to_text(ctx.tool_output) + if tool_output_has_error(ctx.tool_output): + yield ctx.emit_tool_output_card( + { + "status": "error", + "error": result_text, + "path": resolved_path, + }, + ) + else: + yield ctx.emit_tool_output_card( + { + "status": "completed", + "path": resolved_path, + "result": result_text, + }, + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/thinking.py new file mode 100644 index 000000000..43bc8a65f --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_file/thinking.py @@ -0,0 +1,27 @@ +"""write_file: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import ( + as_tool_input_dict, + truncate_path, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + fp = d.get("file_path", "") if isinstance(tool_input, dict) else str(tool_input) + return ToolStartThinking(title="Writing file", items=[truncate_path(fp)]) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_output, tool_name + return ("Writing file", last_items) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_todos/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_todos/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_todos/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_todos/thinking.py new file mode 100644 index 000000000..43e533daa --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/filesystem/write_todos/thinking.py @@ -0,0 +1,34 @@ +"""write_todos: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.filesystem.shared.tool_input import ( + as_tool_input_dict, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + todos = d.get("todos", []) if isinstance(tool_input, dict) else [] + todo_count = len(todos) if isinstance(todos, list) else 0 + return ToolStartThinking( + title="Planning tasks", + items=( + [f"{todo_count} task{'s' if todo_count != 1 else ''}"] + if todo_count + else [] + ), + ) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_output, tool_name + return ("Planning tasks", last_items) From c8fb4aa5e5b2df6c3c8cca2081f5f8634bd69d16 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:48 +0200 Subject: [PATCH 13/58] Add deliverables and web tool streaming handlers for chat runs. --- .../handlers/tools/deliverables/__init__.py | 0 .../deliverables/generate_image/__init__.py | 0 .../deliverables/generate_image/emission.py | 28 +++++++ .../deliverables/generate_image/thinking.py | 39 +++++++++ .../deliverables/generate_podcast/__init__.py | 0 .../deliverables/generate_podcast/emission.py | 37 +++++++++ .../deliverables/generate_podcast/thinking.py | 80 +++++++++++++++++++ .../deliverables/generate_report/__init__.py | 0 .../deliverables/generate_report/emission.py | 33 ++++++++ .../deliverables/generate_report/thinking.py | 77 ++++++++++++++++++ .../deliverables/generate_resume/__init__.py | 0 .../deliverables/generate_resume/emission.py | 32 ++++++++ .../deliverables/generate_resume/thinking.py | 24 ++++++ .../generate_video_presentation/__init__.py | 0 .../generate_video_presentation/emission.py | 28 +++++++ .../generate_video_presentation/thinking.py | 52 ++++++++++++ .../deliverables/save_document/__init__.py | 0 .../deliverables/save_document/emission.py | 16 ++++ .../deliverables/save_document/thinking.py | 38 +++++++++ .../tools/deliverables/shared/__init__.py | 0 .../tools/deliverables/shared/tool_input.py | 9 +++ .../handlers/tools/deliverables/tool_names.py | 12 +++ .../handlers/tools/scrape_webpage/emission.py | 43 ++++++++++ .../tools/scrape_webpage/shared/__init__.py | 0 .../tools/scrape_webpage/shared/tool_input.py | 9 +++ .../handlers/tools/scrape_webpage/thinking.py | 47 +++++++++++ .../handlers/tools/web_search/emission.py | 41 ++++++++++ 27 files changed, 645 insertions(+) create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/emission.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/emission.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/emission.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/emission.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/emission.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/emission.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/shared/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/shared/tool_input.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/tool_names.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/emission.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/shared/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/shared/tool_input.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/thinking.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/handlers/tools/web_search/emission.py diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/emission.py new file mode 100644 index 000000000..762f75cca --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/emission.py @@ -0,0 +1,28 @@ +"""generate_image: tool card + terminal summary.""" + +from __future__ import annotations + +from collections.abc import Iterator + +from app.tasks.chat.streaming.handlers.tools.emission_context import ( + ToolCompletionEmissionContext, +) + + +def iter_completion_emission_frames( + ctx: ToolCompletionEmissionContext, +) -> Iterator[str]: + out = ctx.tool_output + payload = out if isinstance(out, dict) else {"result": out} + yield ctx.emit_tool_output_card(payload) + if isinstance(out, dict): + if out.get("error"): + yield ctx.streaming_service.format_terminal_info( + f"Image generation failed: {out['error'][:60]}", + "error", + ) + else: + yield ctx.streaming_service.format_terminal_info( + "Image generated successfully", + "success", + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/thinking.py new file mode 100644 index 000000000..9675cb0f2 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_image/thinking.py @@ -0,0 +1,39 @@ +"""generate_image: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.deliverables.shared.tool_input import ( + as_tool_input_dict, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + prompt = d.get("prompt", "") if isinstance(tool_input, dict) else str(tool_input) + return ToolStartThinking( + title="Generating image", + items=[f"Prompt: {prompt[:80]}{'...' if len(prompt) > 80 else ''}"], + ) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_name + items = last_items + if isinstance(tool_output, dict) and not tool_output.get("error"): + completed = [*items, "Image generated successfully"] + else: + error_msg = ( + tool_output.get("error", "Generation failed") + if isinstance(tool_output, dict) + else "Generation failed" + ) + completed = [*items, f"Error: {error_msg}"] + return ("Generating image", completed) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/emission.py new file mode 100644 index 000000000..f1a1e9c37 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/emission.py @@ -0,0 +1,37 @@ +"""generate_podcast: tool card + queue / success / failure terminal lines.""" + +from __future__ import annotations + +from collections.abc import Iterator + +from app.tasks.chat.streaming.handlers.tools.emission_context import ( + ToolCompletionEmissionContext, +) + + +def iter_completion_emission_frames( + ctx: ToolCompletionEmissionContext, +) -> Iterator[str]: + out = ctx.tool_output + payload = out if isinstance(out, dict) else {"result": out} + yield ctx.emit_tool_output_card(payload) + if isinstance(out, dict) and out.get("status") in ( + "pending", + "generating", + "processing", + ): + yield ctx.streaming_service.format_terminal_info( + f"Podcast queued: {out.get('title', 'Podcast')}", + "success", + ) + elif isinstance(out, dict) and out.get("status") in ("ready", "success"): + yield ctx.streaming_service.format_terminal_info( + f"Podcast generated successfully: {out.get('title', 'Podcast')}", + "success", + ) + elif isinstance(out, dict) and out.get("status") in ("failed", "error"): + error_msg = out.get("error", "Unknown error") + yield ctx.streaming_service.format_terminal_info( + f"Podcast generation failed: {error_msg}", + "error", + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/thinking.py new file mode 100644 index 000000000..b92e0c91f --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/thinking.py @@ -0,0 +1,80 @@ +"""generate_podcast: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.deliverables.shared.tool_input import ( + as_tool_input_dict, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + podcast_title = ( + d.get("podcast_title", "SurfSense Podcast") + if isinstance(tool_input, dict) + else "SurfSense Podcast" + ) + content_len = len( + d.get("source_content", "") if isinstance(tool_input, dict) else "" + ) + return ToolStartThinking( + title="Generating podcast", + items=[ + f"Title: {podcast_title}", + f"Content: {content_len:,} characters", + "Preparing audio generation...", + ], + ) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_name + items = last_items + podcast_status = ( + tool_output.get("status", "unknown") + if isinstance(tool_output, dict) + else "unknown" + ) + podcast_title = ( + tool_output.get("title", "Podcast") + if isinstance(tool_output, dict) + else "Podcast" + ) + if podcast_status in ("pending", "generating", "processing"): + completed = [ + f"Title: {podcast_title}", + "Podcast generation started", + "Processing in background...", + ] + elif podcast_status == "already_generating": + completed = [ + f"Title: {podcast_title}", + "Podcast already in progress", + "Please wait for it to complete", + ] + elif podcast_status in ("failed", "error"): + error_msg = ( + tool_output.get("error", "Unknown error") + if isinstance(tool_output, dict) + else "Unknown error" + ) + completed = [ + f"Title: {podcast_title}", + f"Error: {error_msg[:50]}", + ] + elif podcast_status in ("ready", "success"): + completed = [ + f"Title: {podcast_title}", + "Podcast ready", + ] + else: + completed = items + return ("Generating podcast", completed) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/emission.py new file mode 100644 index 000000000..1c5c71b8b --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/emission.py @@ -0,0 +1,33 @@ +"""generate_report: full payload + terminal line.""" + +from __future__ import annotations + +from collections.abc import Iterator + +from app.tasks.chat.streaming.handlers.tools.emission_context import ( + ToolCompletionEmissionContext, +) + + +def iter_completion_emission_frames( + ctx: ToolCompletionEmissionContext, +) -> Iterator[str]: + out = ctx.tool_output + payload = out if isinstance(out, dict) else {"result": out} + yield ctx.emit_tool_output_card(payload) + if isinstance(out, dict) and out.get("status") == "ready": + word_count = out.get("word_count", 0) + yield ctx.streaming_service.format_terminal_info( + f"Report generated: {out.get('title', 'Report')} ({word_count:,} words)", + "success", + ) + else: + error_msg = ( + out.get("error", "Unknown error") + if isinstance(out, dict) + else "Unknown error" + ) + yield ctx.streaming_service.format_terminal_info( + f"Report generation failed: {error_msg}", + "error", + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/thinking.py new file mode 100644 index 000000000..f912350f8 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_report/thinking.py @@ -0,0 +1,77 @@ +"""generate_report: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.deliverables.shared.tool_input import ( + as_tool_input_dict, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + report_topic = ( + d.get("topic", "Report") if isinstance(tool_input, dict) else "Report" + ) + is_revision = bool( + isinstance(tool_input, dict) and tool_input.get("parent_report_id") + ) + step_title = "Revising report" if is_revision else "Generating report" + return ToolStartThinking( + title=step_title, + items=[f"Topic: {report_topic}", "Analyzing source content..."], + ) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_name + items = last_items + report_status = ( + tool_output.get("status", "unknown") + if isinstance(tool_output, dict) + else "unknown" + ) + report_title = ( + tool_output.get("title", "Report") + if isinstance(tool_output, dict) + else "Report" + ) + word_count = ( + tool_output.get("word_count", 0) + if isinstance(tool_output, dict) + else 0 + ) + is_revision = ( + tool_output.get("is_revision", False) + if isinstance(tool_output, dict) + else False + ) + step_title = "Revising report" if is_revision else "Generating report" + + if report_status == "ready": + completed = [ + f"Topic: {report_title}", + f"{word_count:,} words", + "Report ready", + ] + elif report_status == "failed": + error_msg = ( + tool_output.get("error", "Unknown error") + if isinstance(tool_output, dict) + else "Unknown error" + ) + completed = [ + f"Topic: {report_title}", + f"Error: {error_msg[:50]}", + ] + else: + completed = items + + return (step_title, completed) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/emission.py new file mode 100644 index 000000000..dc8d3c7fc --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/emission.py @@ -0,0 +1,32 @@ +"""generate_resume: full payload + terminal line.""" + +from __future__ import annotations + +from collections.abc import Iterator + +from app.tasks.chat.streaming.handlers.tools.emission_context import ( + ToolCompletionEmissionContext, +) + + +def iter_completion_emission_frames( + ctx: ToolCompletionEmissionContext, +) -> Iterator[str]: + out = ctx.tool_output + payload = out if isinstance(out, dict) else {"result": out} + yield ctx.emit_tool_output_card(payload) + if isinstance(out, dict) and out.get("status") == "ready": + yield ctx.streaming_service.format_terminal_info( + f"Resume generated: {out.get('title', 'Resume')}", + "success", + ) + else: + error_msg = ( + out.get("error", "Unknown error") + if isinstance(out, dict) + else "Unknown error" + ) + yield ctx.streaming_service.format_terminal_info( + f"Resume generation failed: {error_msg}", + "error", + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/thinking.py new file mode 100644 index 000000000..e81a80679 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_resume/thinking.py @@ -0,0 +1,24 @@ +"""generate_resume: generic thinking titles and items.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.default import ( + thinking as default_thinking, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + return default_thinking.resolve_start_thinking(tool_name, tool_input) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + return default_thinking.resolve_completed_thinking( + tool_name, tool_output, last_items + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/emission.py new file mode 100644 index 000000000..21e27d4c3 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/emission.py @@ -0,0 +1,28 @@ +"""generate_video_presentation: tool card + terminal line.""" + +from __future__ import annotations + +from collections.abc import Iterator + +from app.tasks.chat.streaming.handlers.tools.emission_context import ( + ToolCompletionEmissionContext, +) + + +def iter_completion_emission_frames( + ctx: ToolCompletionEmissionContext, +) -> Iterator[str]: + out = ctx.tool_output + payload = out if isinstance(out, dict) else {"result": out} + yield ctx.emit_tool_output_card(payload) + if isinstance(out, dict) and out.get("status") == "pending": + yield ctx.streaming_service.format_terminal_info( + f"Video presentation queued: {out.get('title', 'Presentation')}", + "success", + ) + elif isinstance(out, dict) and out.get("status") == "failed": + error_msg = out.get("error", "Unknown error") + yield ctx.streaming_service.format_terminal_info( + f"Presentation generation failed: {error_msg}", + "error", + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/thinking.py new file mode 100644 index 000000000..5c5aa977d --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_video_presentation/thinking.py @@ -0,0 +1,52 @@ +"""generate_video_presentation: generic in-progress thinking; completion is status-driven.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.default import ( + thinking as default_thinking, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + return default_thinking.resolve_start_thinking(tool_name, tool_input) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_name + items = last_items + vp_status = ( + tool_output.get("status", "unknown") + if isinstance(tool_output, dict) + else "unknown" + ) + vp_title = ( + tool_output.get("title", "Presentation") + if isinstance(tool_output, dict) + else "Presentation" + ) + if vp_status in ("pending", "generating"): + completed = [ + f"Title: {vp_title}", + "Presentation generation started", + "Processing in background...", + ] + elif vp_status == "failed": + error_msg = ( + tool_output.get("error", "Unknown error") + if isinstance(tool_output, dict) + else "Unknown error" + ) + completed = [ + f"Title: {vp_title}", + f"Error: {error_msg[:50]}", + ] + else: + completed = items + return ("Generating video presentation", completed) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/emission.py new file mode 100644 index 000000000..68c93dede --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/emission.py @@ -0,0 +1,16 @@ +"""save_document: default completion card and terminal line.""" + +from __future__ import annotations + +from collections.abc import Iterator + +from app.tasks.chat.streaming.handlers.tools.default import emission as _default +from app.tasks.chat.streaming.handlers.tools.emission_context import ( + ToolCompletionEmissionContext, +) + + +def iter_completion_emission_frames( + ctx: ToolCompletionEmissionContext, +) -> Iterator[str]: + yield from _default.iter_completion_emission_frames(ctx) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/thinking.py new file mode 100644 index 000000000..77059a28c --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/save_document/thinking.py @@ -0,0 +1,38 @@ +"""save_document: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.deliverables.shared.tool_input import ( + as_tool_input_dict, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + doc_title = d.get("title", "") if isinstance(tool_input, dict) else str(tool_input) + display_title = doc_title[:60] + ("…" if len(doc_title) > 60 else "") + return ToolStartThinking(title="Saving document", items=[display_title]) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_name + items = last_items + result_str = ( + tool_output.get("result", "") + if isinstance(tool_output, dict) + else str(tool_output) + ) + is_error = "Error" in result_str + completed = [ + *items, + result_str[:80] if is_error else "Saved to knowledge base", + ] + return ("Saving document", completed) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/shared/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/shared/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/shared/tool_input.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/shared/tool_input.py new file mode 100644 index 000000000..1303cf09f --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/shared/tool_input.py @@ -0,0 +1,9 @@ +"""Tool-call args for deliverable thinking modules.""" + +from __future__ import annotations + +from typing import Any + + +def as_tool_input_dict(tool_input: Any) -> dict[str, Any]: + return tool_input if isinstance(tool_input, dict) else {} diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/tool_names.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/tool_names.py new file mode 100644 index 000000000..5924af196 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/tool_names.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +DELIVERABLE_TOOLS: frozenset[str] = frozenset( + { + "generate_image", + "generate_podcast", + "generate_report", + "generate_resume", + "generate_video_presentation", + "save_document", + } +) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/emission.py new file mode 100644 index 000000000..293d2a1e9 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/emission.py @@ -0,0 +1,43 @@ +"""scrape_webpage: redacted payload + terminal summary.""" + +from __future__ import annotations + +from collections.abc import Iterator + +from app.tasks.chat.streaming.handlers.tools.emission_context import ( + ToolCompletionEmissionContext, +) + + +def iter_completion_emission_frames( + ctx: ToolCompletionEmissionContext, +) -> Iterator[str]: + out = ctx.tool_output + if isinstance(out, dict): + display_output = {k: v for k, v in out.items() if k != "content"} + if "content" in out: + content = out.get("content", "") + display_output["content_preview"] = ( + content[:500] + "..." if len(content) > 500 else content + ) + yield ctx.emit_tool_output_card(display_output) + else: + yield ctx.emit_tool_output_card({"result": out}) + + if isinstance(out, dict) and "error" not in out: + title = out.get("title", "Webpage") + word_count = out.get("word_count", 0) + yield ctx.streaming_service.format_terminal_info( + f"Scraped: {title[:40]}{'...' if len(title) > 40 else ''} ({word_count:,} words)", + "success", + ) + else: + error_msg = ( + out.get("error", "Failed to scrape") + if isinstance(out, dict) + else "Failed to scrape" + ) + yield ctx.streaming_service.format_terminal_info( + f"Scrape failed: {error_msg}", + "error", + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/shared/__init__.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/shared/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/shared/tool_input.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/shared/tool_input.py new file mode 100644 index 000000000..581f0e64a --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/shared/tool_input.py @@ -0,0 +1,9 @@ +"""Tool-call args for scrape_webpage thinking.""" + +from __future__ import annotations + +from typing import Any + + +def as_tool_input_dict(tool_input: Any) -> dict[str, Any]: + return tool_input if isinstance(tool_input, dict) else {} diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/thinking.py new file mode 100644 index 000000000..335cc9703 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/scrape_webpage/thinking.py @@ -0,0 +1,47 @@ +"""scrape_webpage: thinking-step copy.""" + +from __future__ import annotations + +from typing import Any + +from app.tasks.chat.streaming.handlers.tools.scrape_webpage.shared.tool_input import ( + as_tool_input_dict, +) +from app.tasks.chat.streaming.handlers.tools.shared.model import ( + ToolStartThinking, +) + + +def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking: + del tool_name + d = as_tool_input_dict(tool_input) + url = d.get("url", "") if isinstance(tool_input, dict) else str(tool_input) + return ToolStartThinking( + title="Scraping webpage", + items=[f"URL: {url[:80]}{'...' if len(url) > 80 else ''}"], + ) + + +def resolve_completed_thinking( + tool_name: str, tool_output: Any, last_items: list[str], +) -> tuple[str, list[str]]: + del tool_name + items = last_items + if isinstance(tool_output, dict): + title = tool_output.get("title", "Webpage") + word_count = tool_output.get("word_count", 0) + has_error = "error" in tool_output + if has_error: + completed = [ + *items, + f"Error: {tool_output.get('error', 'Failed to scrape')[:50]}", + ] + else: + completed = [ + *items, + f"Title: {title[:50]}{'...' if len(title) > 50 else ''}", + f"Extracted: {word_count:,} words", + ] + else: + completed = [*items, "Content extracted"] + return ("Scraping webpage", completed) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/web_search/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/web_search/emission.py new file mode 100644 index 000000000..eccaed708 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/web_search/emission.py @@ -0,0 +1,41 @@ +"""web_search: citations parsed from provider XML.""" + +from __future__ import annotations + +import re +from collections.abc import Iterator + +from app.tasks.chat.streaming.handlers.tools.emission_context import ( + ToolCompletionEmissionContext, +) + + +def iter_completion_emission_frames( + ctx: ToolCompletionEmissionContext, +) -> Iterator[str]: + out = ctx.tool_output + xml = out.get("result", str(out)) if isinstance(out, dict) else str(out) + citations: dict[str, dict[str, str]] = {} + for m in re.finditer( + r"<!\[CDATA\[(.*?)\]\]>\s*", + xml, + ): + title, url = m.group(1).strip(), m.group(2).strip() + if url.startswith("http") and url not in citations: + citations[url] = {"title": title} + for m in re.finditer( + r"", + xml, + ): + chunk_url, content = m.group(1).strip(), m.group(2).strip() + if ( + chunk_url.startswith("http") + and chunk_url in citations + and content + ): + citations[chunk_url]["snippet"] = ( + content[:200] + "…" if len(content) > 200 else content + ) + yield ctx.emit_tool_output_card( + {"status": "completed", "citations": citations}, + ) From ec26ca69a6b4f05471beb8a3c114f689991f5622 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:48 +0200 Subject: [PATCH 14/58] Add chat EventRelay and orchestrator stubs for future cutover. --- .../app/tasks/chat/streaming/event_relay.py | 127 ++++++++++++++++++ .../app/tasks/chat/streaming/orchestrator.py | 48 +++++++ 2 files changed, 175 insertions(+) create mode 100644 surfsense_backend/app/tasks/chat/streaming/event_relay.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestrator.py diff --git a/surfsense_backend/app/tasks/chat/streaming/event_relay.py b/surfsense_backend/app/tasks/chat/streaming/event_relay.py new file mode 100644 index 000000000..f86337ad7 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/event_relay.py @@ -0,0 +1,127 @@ +"""Turn LangGraph astream_events into SSE strings via the handler modules.""" + +from __future__ import annotations + +from collections.abc import AsyncIterator +from dataclasses import dataclass, field +from typing import Any + +from app.services.streaming.emitter import EmitterRegistry +from app.tasks.chat.streaming.handlers.chain_end import iter_chain_end_frames +from app.tasks.chat.streaming.handlers.chat_model_stream import ( + iter_chat_model_stream_frames, +) +from app.tasks.chat.streaming.handlers.custom_event_dispatch import ( + iter_custom_event_frames, +) +from app.tasks.chat.streaming.handlers.tool_end import iter_tool_end_frames +from app.tasks.chat.streaming.handlers.tool_start import iter_tool_start_frames +from app.tasks.chat.streaming.relay.state import AgentEventRelayState +from app.tasks.chat.streaming.relay.thinking_step_completion import ( + complete_active_thinking_step, +) +from app.tasks.chat.streaming.stream_result import StreamResult + + +@dataclass +class EventRelayConfig: + """Optional relay tuning (sub-agent tools, text suppression).""" + + subagent_entry_tool_names: frozenset[str] = field( + default_factory=lambda: frozenset({"task"}) + ) + suppress_main_text_inside_tools: bool = True + + +class EventRelay: + """Dispatches graph events to streaming handlers and optional emitters.""" + + def __init__( + self, + *, + streaming_service: Any, + config: EventRelayConfig | None = None, + ) -> None: + self.streaming_service = streaming_service + self.config = config or EventRelayConfig() + reg = getattr(streaming_service, "emitter_registry", None) + self.emitter_registry = reg if reg is not None else EmitterRegistry() + + async def relay( + self, + events: AsyncIterator[dict[str, Any]], + *, + state: AgentEventRelayState, + result: StreamResult, + step_prefix: str = "thinking", + content_builder: Any | None = None, + config: dict[str, Any] | None = None, + ) -> AsyncIterator[str]: + """Yield SSE for each event from the async iterator, then finalize text/thinking.""" + graph_config = config or {} + async for event in events: + event_type = event.get("event", "") + if event_type == "on_chat_model_stream": + for frame in iter_chat_model_stream_frames( + event, + state=state, + streaming_service=self.streaming_service, + content_builder=content_builder, + step_prefix=step_prefix, + ): + yield frame + elif event_type == "on_tool_start": + for frame in iter_tool_start_frames( + event, + state=state, + streaming_service=self.streaming_service, + content_builder=content_builder, + result=result, + step_prefix=step_prefix, + ): + yield frame + elif event_type == "on_tool_end": + for frame in iter_tool_end_frames( + event, + state=state, + streaming_service=self.streaming_service, + content_builder=content_builder, + result=result, + step_prefix=step_prefix, + config=graph_config, + ): + yield frame + elif event_type == "on_custom_event": + for frame in iter_custom_event_frames( + event, + state=state, + streaming_service=self.streaming_service, + content_builder=content_builder, + ): + yield frame + elif event_type in ("on_chain_end", "on_agent_end"): + for frame in iter_chain_end_frames( + event, + state=state, + streaming_service=self.streaming_service, + content_builder=content_builder, + ): + yield frame + + if state.current_text_id is not None: + yield self.streaming_service.format_text_end(state.current_text_id) + if content_builder is not None: + content_builder.on_text_end(state.current_text_id) + state.current_text_id = None + + completion_event, new_active = complete_active_thinking_step( + streaming_service=self.streaming_service, + content_builder=content_builder, + last_active_step_id=state.last_active_step_id, + last_active_step_title=state.last_active_step_title, + last_active_step_items=state.last_active_step_items, + completed_step_ids=state.completed_step_ids, + ) + if completion_event: + yield completion_event + state.last_active_step_id = new_active diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/orchestrator.py new file mode 100644 index 000000000..1b8558bc6 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/orchestrator.py @@ -0,0 +1,48 @@ +"""Top-level chat streaming entrypoints (stubs until wired).""" + +from __future__ import annotations + +from collections.abc import AsyncGenerator +from typing import Any + + +async def stream_chat( + *, + request: Any, + user: Any, + db_session: Any, +) -> AsyncGenerator[str, None]: # pragma: no cover - orchestrator port in progress + del request, user, db_session + raise NotImplementedError( + "stream_chat: orchestrator not wired yet" + ) + if False: # pragma: no cover + yield "" + + +async def stream_resume( + *, + request: Any, + user: Any, + db_session: Any, +) -> AsyncGenerator[str, None]: # pragma: no cover - orchestrator port in progress + del request, user, db_session + raise NotImplementedError( + "stream_resume: orchestrator not wired yet" + ) + if False: # pragma: no cover + yield "" + + +async def stream_regenerate( + *, + request: Any, + user: Any, + db_session: Any, +) -> AsyncGenerator[str, None]: # pragma: no cover - orchestrator port in progress + del request, user, db_session + raise NotImplementedError( + "stream_regenerate: orchestrator not wired yet" + ) + if False: # pragma: no cover + yield "" From 8b6ffd12b8649bd789a9e780dd90a0a64d04fbac Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:08:48 +0200 Subject: [PATCH 15/58] Add parity unit tests for extracted chat streaming vs legacy. --- .../unit/tasks/chat/streaming/__init__.py | 0 .../chat/streaming/test_stage_1_parity.py | 292 ++++++++++++++++++ .../chat/streaming/test_stage_2_parity.py | 240 ++++++++++++++ 3 files changed, 532 insertions(+) create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/__init__.py create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/__init__.py b/surfsense_backend/tests/unit/tasks/chat/streaming/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py new file mode 100644 index 000000000..9207f37d1 --- /dev/null +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py @@ -0,0 +1,292 @@ +"""Pin Stage 1 extractions as faithful copies of the old helpers. + +The new orchestrator under ``app.tasks.chat.streaming`` is built in +parallel with the production module ``app.tasks.chat.stream_new_chat``. +For each Stage 1 extraction we assert the new function returns the same +output as the old one for a representative input set. The moment the +two diverge - intentionally or otherwise - this file fails loudly so +the divergence is reviewed rather than shipped silently. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from typing import Any + +import pytest + +from app.agents.new_chat.errors import BusyError +from app.agents.new_chat.middleware.busy_mutex import request_cancel, reset_cancel +from app.tasks.chat.stream_new_chat import ( + _classify_stream_exception as old_classify, + _emit_stream_terminal_error as old_emit_terminal_error, + _extract_chunk_parts as old_extract_chunk_parts, + _extract_resolved_file_path as old_extract_resolved_file_path, + _first_interrupt_value as old_first_interrupt_value, + _tool_output_has_error as old_tool_output_has_error, + _tool_output_to_text as old_tool_output_to_text, +) +from app.tasks.chat.streaming.errors.classifier import ( + classify_stream_exception as new_classify, +) +from app.tasks.chat.streaming.errors.emitter import ( + emit_stream_terminal_error as new_emit_terminal_error, +) +from app.tasks.chat.streaming.helpers.chunk_parts import ( + extract_chunk_parts as new_extract_chunk_parts, +) +from app.tasks.chat.streaming.helpers.interrupt_inspector import ( + first_interrupt_value as new_first_interrupt_value, +) +from app.tasks.chat.streaming.helpers.tool_output import ( + extract_resolved_file_path as new_extract_resolved_file_path, + tool_output_has_error as new_tool_output_has_error, + tool_output_to_text as new_tool_output_to_text, +) + +pytestmark = pytest.mark.unit + + +# ---------------------------------------------------------------- chunk parts + + +@dataclass +class _Chunk: + content: Any = "" + additional_kwargs: dict[str, Any] = field(default_factory=dict) + tool_call_chunks: list[dict[str, Any]] = field(default_factory=list) + + +_CHUNK_CASES: list[Any] = [ + None, + _Chunk(content=""), + _Chunk(content="hello"), + _Chunk(content=42), # invalid type, defensively coerced to empty + _Chunk( + content=[ + {"type": "text", "text": "Hello "}, + {"type": "text", "text": "world"}, + ] + ), + _Chunk( + content=[ + {"type": "reasoning", "reasoning": "hmm "}, + {"type": "reasoning", "text": "still"}, + {"type": "text", "text": "answer"}, + ] + ), + _Chunk( + content=[ + {"type": "tool_call_chunk", "id": "c1", "name": "x", "args": "{"}, + {"type": "tool_use", "id": "c2", "name": "y"}, + {"type": "image_url", "url": "ignored"}, + ] + ), + _Chunk( + content="visible", + additional_kwargs={"reasoning_content": "private"}, + ), + _Chunk( + tool_call_chunks=[ + {"id": None, "name": None, "args": '{"a":1}', "index": 0}, + {"id": "c", "name": "n", "args": "}", "index": 0}, + ] + ), + _Chunk( + content=[{"type": "tool_call_chunk", "id": "from-block", "name": "x"}], + tool_call_chunks=[{"id": "from-attr", "name": "y"}], + ), +] + + +@pytest.mark.parametrize("chunk", _CHUNK_CASES) +def test_extract_chunk_parts_matches_old_implementation(chunk: Any) -> None: + assert new_extract_chunk_parts(chunk) == old_extract_chunk_parts(chunk) + + +# ---------------------------------------------------------- interrupt inspector + + +@dataclass +class _Interrupt: + value: dict[str, Any] + + +@dataclass +class _Task: + interrupts: tuple[Any, ...] = () + + +@dataclass +class _State: + tasks: tuple[Any, ...] = () + interrupts: tuple[Any, ...] = () + + +_INTERRUPT_CASES: list[Any] = [ + _State(), + _State(tasks=(_Task(interrupts=(_Interrupt(value={"name": "send"}),)),)), + # Multiple tasks: must return the FIRST one in iteration order. + _State( + tasks=( + _Task(interrupts=(_Interrupt(value={"name": "first"}),)), + _Task(interrupts=(_Interrupt(value={"name": "second"}),)), + ) + ), + # Empty task interrupts -> falls back to root state.interrupts. + _State( + tasks=(_Task(interrupts=()),), + interrupts=(_Interrupt(value={"name": "root"}),), + ), + # Interrupts as plain dicts (not wrapper objects). + _State(interrupts=({"value": {"name": "dict_root"}},)), + # A defective task whose `.interrupts` raises - must be tolerated. + _State(tasks=(object(),)), +] + + +@pytest.mark.parametrize("state", _INTERRUPT_CASES) +def test_first_interrupt_value_matches_old_implementation(state: Any) -> None: + assert new_first_interrupt_value(state) == old_first_interrupt_value(state) + + +# ----------------------------------------------------------- error classifier + + +def _classify_cases() -> list[Exception]: + """Inputs that the FE depends on being mapped to specific error codes.""" + return [ + Exception("totally generic error"), + Exception( + '{"error":{"type":"rate_limit_error","message":"slow down"}}' + ), + Exception( + 'OpenrouterException - {"error":{"message":"Provider returned error",' + '"code":429}}' + ), + BusyError(request_id="thread-busy-parity"), + Exception("Thread is busy with another request"), + ] + + +@pytest.mark.parametrize("exc", _classify_cases()) +def test_classify_stream_exception_matches_old_implementation( + exc: Exception, +) -> None: + new = new_classify(exc, flow_label="parity-test") + old = old_classify(exc, flow_label="parity-test") + # Strip the wall-clock retry timestamp before comparing — both + # implementations call ``time.time()`` independently and the call + # order is enough to differ by 1 ms in practice. Every other field + # in the tuple must match exactly. + new_extra = dict(new[5]) if isinstance(new[5], dict) else new[5] + old_extra = dict(old[5]) if isinstance(old[5], dict) else old[5] + if isinstance(new_extra, dict) and isinstance(old_extra, dict): + new_extra.pop("retry_after_at", None) + old_extra.pop("retry_after_at", None) + assert new[:5] == old[:5] + assert new_extra == old_extra + + +def test_classify_turn_cancelling_branch_parity() -> None: + """The TURN_CANCELLING branch reads cancel state for the busy thread id; + both implementations must agree on retry-window semantics, not just the + plain THREAD_BUSY code.""" + thread_id = "parity-cancelling-thread" + reset_cancel(thread_id) + request_cancel(thread_id) + exc = BusyError(request_id=thread_id) + new = new_classify(exc, flow_label="parity-test") + old = old_classify(exc, flow_label="parity-test") + assert new[0] == old[0] == "thread_busy" + assert new[1] == old[1] == "TURN_CANCELLING" + assert isinstance(new[5], dict) and isinstance(old[5], dict) + assert new[5]["retry_after_ms"] == old[5]["retry_after_ms"] + + +# ------------------------------------------------------------ terminal emitter + + +class _FakeStreamingService: + """Duck-types ``format_error`` for both old and new emitters.""" + + def __init__(self) -> None: + self.calls: list[dict[str, Any]] = [] + + def format_error( + self, message: str, *, error_code: str, extra: dict[str, Any] | None = None + ) -> str: + self.calls.append( + {"message": message, "error_code": error_code, "extra": extra} + ) + return f"data: {{\"type\":\"error\",\"errorText\":\"{message}\"}}\n\n" + + +def test_emit_stream_terminal_error_matches_old_output_and_logs(caplog) -> None: + """The new emitter must produce the same SSE frame and log the same + structured payload as the old one for the same arguments.""" + args: dict[str, Any] = { + "flow": "new", + "request_id": "req-parity", + "thread_id": 7, + "search_space_id": 9, + "user_id": "user-parity", + "message": "boom", + "error_kind": "server_error", + "error_code": "SERVER_ERROR", + "severity": "error", + "is_expected": False, + "extra": {"foo": "bar"}, + } + + new_svc = _FakeStreamingService() + old_svc = _FakeStreamingService() + + with caplog.at_level(logging.ERROR): + new_frame = new_emit_terminal_error(streaming_service=new_svc, **args) + old_frame = old_emit_terminal_error(streaming_service=old_svc, **args) + + assert new_frame == old_frame + assert new_svc.calls == old_svc.calls + chat_error_records = [ + r for r in caplog.records if "[chat_stream_error]" in r.message + ] + # One log line per emit call (two emits -> two records). + assert len(chat_error_records) == 2 + + +# ---------------------------------------------------------------- tool output + + +def test_tool_output_helpers_match_old_implementation() -> None: + samples: list[Any] = [ + {"result": "ok"}, + {"error": "bad"}, + {"result": "Error: x"}, + "Error: plain", + "fine", + {"nested": {"a": 1}}, + ] + for s in samples: + assert new_tool_output_to_text(s) == old_tool_output_to_text(s) + assert new_tool_output_has_error(s) == old_tool_output_has_error(s) + + assert new_extract_resolved_file_path( + tool_name="write_file", + tool_output={"path": " /tmp/x "}, + tool_input=None, + ) == old_extract_resolved_file_path( + tool_name="write_file", + tool_output={"path": " /tmp/x "}, + tool_input=None, + ) + assert new_extract_resolved_file_path( + tool_name="write_file", + tool_output={}, + tool_input={"file_path": " /fallback "}, + ) == old_extract_resolved_file_path( + tool_name="write_file", + tool_output={}, + tool_input={"file_path": " /fallback "}, + ) diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py new file mode 100644 index 000000000..892bb7a6a --- /dev/null +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py @@ -0,0 +1,240 @@ +"""Parity tests for Stage 2 extractions (tool matching, thinking step, custom events).""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import MagicMock + +import pytest + +from app.tasks.chat.stream_new_chat import _legacy_match_lc_id as old_legacy_match +from app.tasks.chat.streaming.handlers.custom_events import ( + handle_action_log, + handle_action_log_updated, + handle_document_created, + handle_report_progress, +) +from app.tasks.chat.streaming.helpers.tool_call_matching import ( + match_buffered_langchain_tool_call_id as new_legacy_match, +) +from app.tasks.chat.streaming.relay.state import AgentEventRelayState +from app.tasks.chat.streaming.relay.thinking_step_completion import ( + complete_active_thinking_step, +) +from app.tasks.chat.streaming.relay.thinking_step_sse import emit_thinking_step_frame + +pytestmark = pytest.mark.unit + + +def _copy_chunk_buffer(raw: list[dict[str, Any]]) -> list[dict[str, Any]]: + return [dict(x) for x in raw] + + +def test_legacy_tool_call_match_matches_old_implementation() -> None: + cases: list[tuple[list[dict[str, Any]], str, str, dict[str, str]]] = [ + ( + [ + {"name": "write_file", "id": "lc-a"}, + {"name": "other", "id": "lc-b"}, + ], + "write_file", + "run-1", + {}, + ), + ( + [{"name": "x", "id": None}, {"name": "y", "id": "lc-fallback"}], + "write_file", + "run-2", + {}, + ), + ([{"name": "no_id"}], "write_file", "run-3", {}), + ] + for chunks_template, tool_name, run_id, lc_map_seed in cases: + old_chunks = _copy_chunk_buffer(chunks_template) + new_chunks = _copy_chunk_buffer(chunks_template) + old_map = dict(lc_map_seed) + new_map = dict(lc_map_seed) + old_out = old_legacy_match(old_chunks, tool_name, run_id, old_map) + new_out = new_legacy_match(new_chunks, tool_name, run_id, new_map) + assert new_out == old_out + assert new_chunks == old_chunks + assert new_map == old_map + + +def test_emit_thinking_step_frame_invokes_builder_before_service() -> None: + order: list[str] = [] + builder = MagicMock() + + def on_ts(*args: Any, **kwargs: Any) -> None: + order.append("builder") + + builder.on_thinking_step.side_effect = on_ts + + svc = MagicMock() + + def fmt(**kwargs: Any) -> str: + order.append("service") + return "frame" + + svc.format_thinking_step.side_effect = fmt + + out = emit_thinking_step_frame( + streaming_service=svc, + content_builder=builder, + step_id="thinking-1", + title="Working", + status="in_progress", + items=["a"], + ) + assert out == "frame" + assert order == ["builder", "service"] + builder.on_thinking_step.assert_called_once() + svc.format_thinking_step.assert_called_once() + + +def test_emit_thinking_step_frame_skips_builder_when_none() -> None: + svc = MagicMock(return_value="x") + svc.format_thinking_step.return_value = "frame" + assert ( + emit_thinking_step_frame( + streaming_service=svc, + content_builder=None, + step_id="s", + title="t", + ) + == "frame" + ) + svc.format_thinking_step.assert_called_once() + + +def test_complete_active_thinking_step_mirrors_closure_semantics() -> None: + svc = MagicMock() + svc.format_thinking_step.return_value = "done-frame" + completed: set[str] = set() + + frame, new_id = complete_active_thinking_step( + streaming_service=svc, + content_builder=None, + last_active_step_id="thinking-1", + last_active_step_title="T", + last_active_step_items=["x"], + completed_step_ids=completed, + ) + assert frame == "done-frame" + assert new_id is None + assert "thinking-1" in completed + + frame2, id2 = complete_active_thinking_step( + streaming_service=svc, + content_builder=None, + last_active_step_id="thinking-1", + last_active_step_title="T", + last_active_step_items=[], + completed_step_ids=completed, + ) + assert frame2 is None + assert id2 == "thinking-1" + + +def test_agent_event_relay_state_factory_matches_counter_rule() -> None: + s0 = AgentEventRelayState.for_invocation(parity_v2=False) + assert s0.thinking_step_counter == 0 + assert s0.last_active_step_id is None + + s1 = AgentEventRelayState.for_invocation( + initial_step_id="thinking-resume-1", + initial_step_title="Inherited", + initial_step_items=["Topic: X"], + parity_v2=True, + ) + assert s1.thinking_step_counter == 1 + assert s1.last_active_step_id == "thinking-resume-1" + assert s1.parity_v2 is True + assert s1.next_thinking_step_id("thinking") == "thinking-2" + + +@pytest.mark.parametrize( + ("phase", "message", "start_items", "expected_tail"), + [ + ( + "revising_section", + "progress line", + ["Topic: Foo", "Modifying bar", "stale..."], + ["Topic: Foo", "Modifying bar", "progress line"], + ), + ( + "other", + "phase msg", + ["Topic: Foo", "old line"], + ["Topic: Foo", "phase msg"], + ), + ], +) +def test_report_progress_items_match_reference( + phase: str, + message: str, + start_items: list[str], + expected_tail: list[str], +) -> None: + svc = MagicMock() + svc.format_thinking_step.return_value = "sse" + + items = list(start_items) + frame, new_items = handle_report_progress( + {"message": message, "phase": phase}, + last_active_step_id="step-1", + last_active_step_title="Report", + last_active_step_items=items, + streaming_service=svc, + content_builder=None, + ) + assert frame == "sse" + assert new_items == expected_tail + kwargs = svc.format_thinking_step.call_args.kwargs + assert kwargs["items"] == expected_tail + + +def test_report_progress_noop_when_missing_message_or_step() -> None: + svc = MagicMock() + items = ["Topic: A"] + f1, i1 = handle_report_progress( + {"message": "", "phase": "x"}, + last_active_step_id="s", + last_active_step_title="t", + last_active_step_items=items, + streaming_service=svc, + content_builder=None, + ) + assert f1 is None and i1 is items + + f2, i2 = handle_report_progress( + {"message": "m", "phase": "x"}, + last_active_step_id=None, + last_active_step_title="t", + last_active_step_items=items, + streaming_service=svc, + content_builder=None, + ) + assert f2 is None and i2 is items + + +def test_document_action_handlers_match_format_data_guards() -> None: + svc = MagicMock() + svc.format_data.return_value = "data-frame" + + assert handle_document_created({}, streaming_service=svc) is None + assert handle_document_created({"id": 0}, streaming_service=svc) is None + handle_document_created({"id": 42, "title": "x"}, streaming_service=svc) + svc.format_data.assert_called_with( + "documents-updated", {"action": "created", "document": {"id": 42, "title": "x"}} + ) + + svc.reset_mock() + assert handle_action_log({"id": None}, streaming_service=svc) is None + handle_action_log({"id": 1}, streaming_service=svc) + svc.format_data.assert_called_once_with("action-log", {"id": 1}) + + svc.reset_mock() + assert handle_action_log_updated({"id": None}, streaming_service=svc) is None + handle_action_log_updated({"id": 2}, streaming_service=svc) + svc.format_data.assert_called_once_with("action-log-updated", {"id": 2}) From 2ec2e82d9d3215734bb367e3cd854035ae078919 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 6 May 2026 20:10:09 +0200 Subject: [PATCH 16/58] Configure workspace Python interpreter and pyright extra paths. --- .vscode/settings.json | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 05bd30702..7da4b54f8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,9 @@ { "biome.configurationPath": "./surfsense_web/biome.json", - "deepscan.ignoreConfirmWarning": true + "deepscan.ignoreConfirmWarning": true, + "python.defaultInterpreterPath": "${workspaceFolder}/surfsense_backend/.venv/bin/python", + "basedpyright.analysis.extraPaths": [ + "${workspaceFolder}/surfsense_backend" + ], + "python-envs.pythonProjects": [] } \ No newline at end of file From c0706364d15e25a42d2973a890a106e9df6ccff0 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 7 May 2026 14:44:36 +0200 Subject: [PATCH 17/58] Add a route-level kill switch for streaming orchestrator cutover. --- surfsense_backend/app/config/__init__.py | 6 + .../app/routes/new_chat_routes.py | 33 +++- .../app/tasks/chat/streaming/orchestrator.py | 143 ++++++++++++++---- 3 files changed, 146 insertions(+), 36 deletions(-) diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index f6f0c7f62..543524456 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -490,6 +490,12 @@ class Config: ENABLE_DESKTOP_LOCAL_FILESYSTEM = ( os.getenv("ENABLE_DESKTOP_LOCAL_FILESYSTEM", "FALSE").upper() == "TRUE" ) + # Streaming entrypoint switch. Keep this at the route layer so orchestrator + # code stays free of legacy fallback branching. + ENABLE_CHAT_STREAM_ORCHESTRATOR = ( + os.getenv("SURFSENSE_ENABLE_CHAT_STREAM_ORCHESTRATOR", "TRUE").upper() + == "TRUE" + ) @classmethod def is_self_hosted(cls) -> bool: diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index ad96654f5..7f035daef 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -71,7 +71,15 @@ from app.schemas.new_chat import ( TokenUsageSummary, TurnStatusResponse, ) -from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat +from app.tasks.chat.stream_new_chat import ( + stream_new_chat as legacy_stream_new_chat, + stream_resume_chat as legacy_stream_resume_chat, +) +from app.tasks.chat.streaming.orchestrator import ( + stream_chat, + stream_regenerate, + stream_resume, +) from app.users import current_active_user from app.utils.perf import get_perf_logger from app.utils.rbac import check_permission @@ -90,6 +98,10 @@ TURN_CANCELLING_MAX_DELAY_MS = 1500 router = APIRouter() +def _use_streaming_orchestrator() -> bool: + return config.ENABLE_CHAT_STREAM_ORCHESTRATOR + + def _resolve_filesystem_selection( *, mode: str, @@ -1770,7 +1782,11 @@ async def handle_new_chat( ) return StreamingResponse( - stream_new_chat( + ( + stream_chat + if _use_streaming_orchestrator() + else legacy_stream_new_chat + )( user_query=request.user_query, search_space_id=request.search_space_id, chat_id=request.chat_id, @@ -2255,7 +2271,12 @@ async def regenerate_response( else None ) try: - async for chunk in stream_new_chat( + regenerate_fn = ( + stream_regenerate + if _use_streaming_orchestrator() + else legacy_stream_new_chat + ) + async for chunk in regenerate_fn( user_query=str(user_query_to_use), search_space_id=request.search_space_id, chat_id=thread_id, @@ -2387,7 +2408,11 @@ async def resume_chat( await session.close() return StreamingResponse( - stream_resume_chat( + ( + stream_resume + if _use_streaming_orchestrator() + else legacy_stream_resume_chat + )( chat_id=thread_id, search_space_id=request.search_space_id, decisions=decisions, diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/orchestrator.py index 1b8558bc6..e912dd632 100644 --- a/surfsense_backend/app/tasks/chat/streaming/orchestrator.py +++ b/surfsense_backend/app/tasks/chat/streaming/orchestrator.py @@ -1,48 +1,127 @@ -"""Top-level chat streaming entrypoints (stubs until wired).""" +"""Top-level chat streaming entrypoints. + +For now these orchestrator functions are thin compatibility wrappers around the +current ``stream_new_chat`` / ``stream_resume_chat`` implementations. Routing +calls through this module lets us cut over to the fully modular event relay in +one place later without touching API routes again. +""" from __future__ import annotations from collections.abc import AsyncGenerator -from typing import Any +from typing import Any, Literal + +from app.agents.new_chat.filesystem_selection import FilesystemSelection +from app.db import ChatVisibility +from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat async def stream_chat( *, - request: Any, - user: Any, - db_session: Any, -) -> AsyncGenerator[str, None]: # pragma: no cover - orchestrator port in progress - del request, user, db_session - raise NotImplementedError( - "stream_chat: orchestrator not wired yet" - ) - if False: # pragma: no cover - yield "" + user_query: str, + search_space_id: int, + chat_id: int, + user_id: str | None = None, + llm_config_id: int = -1, + mentioned_document_ids: list[int] | None = None, + mentioned_surfsense_doc_ids: list[int] | None = None, + mentioned_documents: list[dict[str, Any]] | None = None, + checkpoint_id: str | None = None, + needs_history_bootstrap: bool = False, + thread_visibility: ChatVisibility | None = None, + current_user_display_name: str | None = None, + disabled_tools: list[str] | None = None, + filesystem_selection: FilesystemSelection | None = None, + request_id: str | None = None, + user_image_data_urls: list[str] | None = None, +) -> AsyncGenerator[str, None]: + """Stream a new chat turn through the current production pipeline.""" + async for chunk in stream_new_chat( + user_query=user_query, + search_space_id=search_space_id, + chat_id=chat_id, + user_id=user_id, + llm_config_id=llm_config_id, + mentioned_document_ids=mentioned_document_ids, + mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, + mentioned_documents=mentioned_documents, + checkpoint_id=checkpoint_id, + needs_history_bootstrap=needs_history_bootstrap, + thread_visibility=thread_visibility, + current_user_display_name=current_user_display_name, + disabled_tools=disabled_tools, + filesystem_selection=filesystem_selection, + request_id=request_id, + user_image_data_urls=user_image_data_urls, + ): + yield chunk async def stream_resume( *, - request: Any, - user: Any, - db_session: Any, -) -> AsyncGenerator[str, None]: # pragma: no cover - orchestrator port in progress - del request, user, db_session - raise NotImplementedError( - "stream_resume: orchestrator not wired yet" - ) - if False: # pragma: no cover - yield "" + chat_id: int, + search_space_id: int, + decisions: list[dict], + user_id: str | None = None, + llm_config_id: int = -1, + thread_visibility: ChatVisibility | None = None, + filesystem_selection: FilesystemSelection | None = None, + request_id: str | None = None, + disabled_tools: list[str] | None = None, +) -> AsyncGenerator[str, None]: + """Resume an interrupted chat turn through the current production pipeline.""" + async for chunk in stream_resume_chat( + chat_id=chat_id, + search_space_id=search_space_id, + decisions=decisions, + user_id=user_id, + llm_config_id=llm_config_id, + thread_visibility=thread_visibility, + filesystem_selection=filesystem_selection, + request_id=request_id, + disabled_tools=disabled_tools, + ): + yield chunk async def stream_regenerate( *, - request: Any, - user: Any, - db_session: Any, -) -> AsyncGenerator[str, None]: # pragma: no cover - orchestrator port in progress - del request, user, db_session - raise NotImplementedError( - "stream_regenerate: orchestrator not wired yet" - ) - if False: # pragma: no cover - yield "" + user_query: str, + search_space_id: int, + chat_id: int, + user_id: str | None = None, + llm_config_id: int = -1, + mentioned_document_ids: list[int] | None = None, + mentioned_surfsense_doc_ids: list[int] | None = None, + mentioned_documents: list[dict[str, Any]] | None = None, + checkpoint_id: str | None = None, + needs_history_bootstrap: bool = False, + thread_visibility: ChatVisibility | None = None, + current_user_display_name: str | None = None, + disabled_tools: list[str] | None = None, + filesystem_selection: FilesystemSelection | None = None, + request_id: str | None = None, + user_image_data_urls: list[str] | None = None, + flow: Literal["new", "regenerate"] = "regenerate", +) -> AsyncGenerator[str, None]: + """Regenerate an assistant turn through the current production pipeline.""" + async for chunk in stream_new_chat( + user_query=user_query, + search_space_id=search_space_id, + chat_id=chat_id, + user_id=user_id, + llm_config_id=llm_config_id, + mentioned_document_ids=mentioned_document_ids, + mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, + mentioned_documents=mentioned_documents, + checkpoint_id=checkpoint_id, + needs_history_bootstrap=needs_history_bootstrap, + thread_visibility=thread_visibility, + current_user_display_name=current_user_display_name, + disabled_tools=disabled_tools, + filesystem_selection=filesystem_selection, + request_id=request_id, + user_image_data_urls=user_image_data_urls, + flow=flow, + ): + yield chunk From 4e664652a86ddcdb81014b92587d5436762cd2fa Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 7 May 2026 15:13:22 +0200 Subject: [PATCH 18/58] Add streaming runtime helpers with behavior-focused unit tests. --- .../app/tasks/chat/streaming/runtime.py | 92 ++++++++++++++ .../streaming/test_orchestrator_runtime.py | 120 ++++++++++++++++++ 2 files changed, 212 insertions(+) create mode 100644 surfsense_backend/app/tasks/chat/streaming/runtime.py create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_runtime.py diff --git a/surfsense_backend/app/tasks/chat/streaming/runtime.py b/surfsense_backend/app/tasks/chat/streaming/runtime.py new file mode 100644 index 000000000..b45da2789 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/runtime.py @@ -0,0 +1,92 @@ +"""Runtime setup helpers for orchestrated chat streaming.""" + +from __future__ import annotations + +import contextlib +import logging +from collections.abc import Callable +from typing import Any + +_PREFLIGHT_TIMEOUT_SEC: float = 2.5 +_PREFLIGHT_MAX_TOKENS: int = 1 + + +async def preflight_llm( + llm: Any, + *, + is_provider_rate_limited: Callable[[BaseException], bool], +) -> None: + """Issue a minimal completion probe to catch immediate provider 429s.""" + from litellm import acompletion + + model = getattr(llm, "model", None) + if not model or model == "auto": + return + + try: + await acompletion( + model=model, + messages=[{"role": "user", "content": "ping"}], + api_key=getattr(llm, "api_key", None), + api_base=getattr(llm, "api_base", None), + max_tokens=_PREFLIGHT_MAX_TOKENS, + timeout=_PREFLIGHT_TIMEOUT_SEC, + stream=False, + metadata={"tags": ["surfsense:internal", "auto-pin-preflight"]}, + ) + except Exception as exc: + if is_provider_rate_limited(exc): + raise + logging.getLogger(__name__).debug( + "auto_pin_preflight non_rate_limit_error model=%s err=%s", + model, + exc, + ) + + +async def build_main_agent_for_thread( + agent_factory: Any, + *, + llm: Any, + search_space_id: int, + db_session: Any, + connector_service: Any, + checkpointer: Any, + user_id: str | None, + thread_id: int | None, + agent_config: Any, + firecrawl_api_key: str | None, + thread_visibility: Any, + filesystem_selection: Any, + disabled_tools: list[str] | None = None, + mentioned_document_ids: list[int] | None = None, +) -> Any: + """Run one canonical agent-build call for a single thread.""" + return await agent_factory( + llm=llm, + search_space_id=search_space_id, + db_session=db_session, + connector_service=connector_service, + checkpointer=checkpointer, + user_id=user_id, + thread_id=thread_id, + agent_config=agent_config, + firecrawl_api_key=firecrawl_api_key, + thread_visibility=thread_visibility, + filesystem_selection=filesystem_selection, + disabled_tools=disabled_tools, + mentioned_document_ids=mentioned_document_ids, + ) + + +async def settle_speculative_agent_build(task: Any) -> None: + """Wait for a discarded speculative build and swallow its outcome.""" + with contextlib.suppress(BaseException): + await task + + +__all__ = [ + "build_main_agent_for_thread", + "preflight_llm", + "settle_speculative_agent_build", +] diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_runtime.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_runtime.py new file mode 100644 index 000000000..edb05edfa --- /dev/null +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_runtime.py @@ -0,0 +1,120 @@ +"""Behavior tests for streaming runtime helpers.""" + +from __future__ import annotations + +import sys +import types +from typing import Any + +import pytest + +from app.tasks.chat.streaming import runtime + +pytestmark = pytest.mark.unit + + +async def test_preflight_llm_calls_litellm_when_model_present( + monkeypatch: pytest.MonkeyPatch, +) -> None: + calls: dict[str, Any] = {} + + async def _fake_acompletion(**kwargs: Any): + calls.update(kwargs) + return {"ok": True} + + monkeypatch.setitem( + sys.modules, + "litellm", + types.SimpleNamespace(acompletion=_fake_acompletion), + ) + + llm = types.SimpleNamespace(model="openai/test", api_key="k", api_base="b") + await runtime.preflight_llm(llm, is_provider_rate_limited=lambda _: False) + + assert calls["model"] == "openai/test" + assert calls["max_tokens"] == 1 + assert calls["timeout"] == 2.5 + assert calls["stream"] is False + + +async def test_preflight_llm_rethrows_rate_limited(monkeypatch: pytest.MonkeyPatch) -> None: + class _RateLimitedError(Exception): + pass + + async def _fake_acompletion(**kwargs: Any): + del kwargs + raise _RateLimitedError("rl") + + monkeypatch.setitem( + sys.modules, + "litellm", + types.SimpleNamespace(acompletion=_fake_acompletion), + ) + + with pytest.raises(_RateLimitedError): + await runtime.preflight_llm( + types.SimpleNamespace(model="openai/test"), + is_provider_rate_limited=lambda exc: isinstance(exc, _RateLimitedError), + ) + + +async def test_preflight_llm_skips_probe_for_auto_model( + monkeypatch: pytest.MonkeyPatch, +) -> None: + called = {"count": 0} + + async def _fake_acompletion(**kwargs: Any): + del kwargs + called["count"] += 1 + return {"ok": True} + + monkeypatch.setitem( + sys.modules, + "litellm", + types.SimpleNamespace(acompletion=_fake_acompletion), + ) + + await runtime.preflight_llm( + types.SimpleNamespace(model="auto"), + is_provider_rate_limited=lambda _: False, + ) + assert called["count"] == 0 + + +async def test_build_main_agent_for_thread_forwards_arguments() -> None: + seen: dict[str, Any] = {} + + async def _factory(**kwargs: Any): + seen.update(kwargs) + return "agent" + + out = await runtime.build_main_agent_for_thread( + _factory, + llm="llm", + search_space_id=1, + db_session="db", + connector_service="connector", + checkpointer="cp", + user_id="u", + thread_id=10, + agent_config="cfg", + firecrawl_api_key="key", + thread_visibility="vis", + filesystem_selection="fs", + disabled_tools=["a"], + mentioned_document_ids=[5], + ) + assert out == "agent" + assert seen["thread_id"] == 10 + assert seen["mentioned_document_ids"] == [5] + + +async def test_settle_speculative_agent_build_swallows_exceptions() -> None: + async def _boom() -> None: + raise RuntimeError("ignore") + + import asyncio + + task = asyncio.create_task(_boom()) + await runtime.settle_speculative_agent_build(task) + assert task.done() From f8754a9dab480d2cb112f32c2cf1b4c67b4949ea Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 7 May 2026 15:41:33 +0200 Subject: [PATCH 19/58] Rename streaming runtime modules for clearer SRP boundaries. --- .../streaming/{runtime.py => agent_setup.py} | 2 +- .../chat/streaming/orchestration/__init__.py | 5 + .../streaming/orchestration/event_stream.py | 53 +++++++++ ...strator_runtime.py => test_agent_setup.py} | 14 +-- .../test_orchestration_event_stream.py | 107 ++++++++++++++++++ 5 files changed, 173 insertions(+), 8 deletions(-) rename surfsense_backend/app/tasks/chat/streaming/{runtime.py => agent_setup.py} (97%) create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py rename surfsense_backend/tests/unit/tasks/chat/streaming/{test_orchestrator_runtime.py => test_agent_setup.py} (87%) create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py diff --git a/surfsense_backend/app/tasks/chat/streaming/runtime.py b/surfsense_backend/app/tasks/chat/streaming/agent_setup.py similarity index 97% rename from surfsense_backend/app/tasks/chat/streaming/runtime.py rename to surfsense_backend/app/tasks/chat/streaming/agent_setup.py index b45da2789..f67c6ad65 100644 --- a/surfsense_backend/app/tasks/chat/streaming/runtime.py +++ b/surfsense_backend/app/tasks/chat/streaming/agent_setup.py @@ -1,4 +1,4 @@ -"""Runtime setup helpers for orchestrated chat streaming.""" +"""Agent setup helpers for orchestrated chat streaming.""" from __future__ import annotations diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py new file mode 100644 index 000000000..8b586f2be --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py @@ -0,0 +1,5 @@ +"""Composable orchestration pieces for chat streaming.""" + +from app.tasks.chat.streaming.orchestration.event_stream import stream_agent_events + +__all__ = ["stream_agent_events"] diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py new file mode 100644 index 000000000..1448cd86a --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py @@ -0,0 +1,53 @@ +"""Run LangGraph event streams through the EventRelay.""" + +from __future__ import annotations + +from collections.abc import AsyncIterator +from typing import Any + +from app.agents.new_chat.feature_flags import get_flags +from app.tasks.chat.streaming.event_relay import EventRelay +from app.tasks.chat.streaming.relay.state import AgentEventRelayState +from app.tasks.chat.streaming.stream_result import StreamResult + + +async def stream_agent_events( + *, + agent: Any, + config: dict[str, Any], + input_data: Any, + streaming_service: Any, + result: StreamResult, + step_prefix: str = "thinking", + initial_step_id: str | None = None, + initial_step_title: str = "", + initial_step_items: list[str] | None = None, + content_builder: Any | None = None, + runtime_context: Any = None, +) -> AsyncIterator[str]: + """Yield SSE frames from agent ``astream_events`` via ``EventRelay``.""" + state = AgentEventRelayState.for_invocation( + initial_step_id=initial_step_id, + initial_step_title=initial_step_title, + initial_step_items=initial_step_items, + parity_v2=bool(get_flags().enable_stream_parity_v2), + ) + + astream_kwargs: dict[str, Any] = {"config": config, "version": "v2"} + if runtime_context is not None: + astream_kwargs["context"] = runtime_context + + events = agent.astream_events(input_data, **astream_kwargs) + relay = EventRelay(streaming_service=streaming_service) + async for frame in relay.relay( + events, + state=state, + result=result, + step_prefix=step_prefix, + content_builder=content_builder, + config=config, + ): + yield frame + + result.accumulated_text = state.accumulated_text + result.agent_called_update_memory = state.called_update_memory diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_runtime.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py similarity index 87% rename from surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_runtime.py rename to surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py index edb05edfa..e1f7dd027 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_runtime.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py @@ -1,4 +1,4 @@ -"""Behavior tests for streaming runtime helpers.""" +"""Behavior tests for streaming agent setup helpers.""" from __future__ import annotations @@ -8,7 +8,7 @@ from typing import Any import pytest -from app.tasks.chat.streaming import runtime +from app.tasks.chat.streaming import agent_setup pytestmark = pytest.mark.unit @@ -29,7 +29,7 @@ async def test_preflight_llm_calls_litellm_when_model_present( ) llm = types.SimpleNamespace(model="openai/test", api_key="k", api_base="b") - await runtime.preflight_llm(llm, is_provider_rate_limited=lambda _: False) + await agent_setup.preflight_llm(llm, is_provider_rate_limited=lambda _: False) assert calls["model"] == "openai/test" assert calls["max_tokens"] == 1 @@ -52,7 +52,7 @@ async def test_preflight_llm_rethrows_rate_limited(monkeypatch: pytest.MonkeyPat ) with pytest.raises(_RateLimitedError): - await runtime.preflight_llm( + await agent_setup.preflight_llm( types.SimpleNamespace(model="openai/test"), is_provider_rate_limited=lambda exc: isinstance(exc, _RateLimitedError), ) @@ -74,7 +74,7 @@ async def test_preflight_llm_skips_probe_for_auto_model( types.SimpleNamespace(acompletion=_fake_acompletion), ) - await runtime.preflight_llm( + await agent_setup.preflight_llm( types.SimpleNamespace(model="auto"), is_provider_rate_limited=lambda _: False, ) @@ -88,7 +88,7 @@ async def test_build_main_agent_for_thread_forwards_arguments() -> None: seen.update(kwargs) return "agent" - out = await runtime.build_main_agent_for_thread( + out = await agent_setup.build_main_agent_for_thread( _factory, llm="llm", search_space_id=1, @@ -116,5 +116,5 @@ async def test_settle_speculative_agent_build_swallows_exceptions() -> None: import asyncio task = asyncio.create_task(_boom()) - await runtime.settle_speculative_agent_build(task) + await agent_setup.settle_speculative_agent_build(task) assert task.done() diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py new file mode 100644 index 000000000..e12283a75 --- /dev/null +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py @@ -0,0 +1,107 @@ +"""Behavior tests for orchestration event-stream execution.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + +import pytest + +from app.tasks.chat.streaming.orchestration import stream_agent_events +from app.tasks.chat.streaming.stream_result import StreamResult + +pytestmark = pytest.mark.unit + + +@dataclass +class _Chunk: + content: Any = "" + additional_kwargs: dict[str, Any] = field(default_factory=dict) + tool_call_chunks: list[dict[str, Any]] = field(default_factory=list) + + +class _StreamingService: + def __init__(self) -> None: + self._text_idx = 0 + + def generate_text_id(self) -> str: + self._text_idx += 1 + return f"text-{self._text_idx}" + + def format_text_start(self, text_id: str) -> str: + return f"text_start:{text_id}" + + def format_text_delta(self, text_id: str, text: str) -> str: + return f"text_delta:{text_id}:{text}" + + def format_text_end(self, text_id: str) -> str: + return f"text_end:{text_id}" + + +class _Agent: + def __init__(self, events: list[dict[str, Any]]) -> None: + self.events = list(events) + self.calls: list[tuple[Any, dict[str, Any]]] = [] + + async def astream_events(self, input_data: Any, **kwargs: Any): + self.calls.append((input_data, kwargs)) + for event in self.events: + yield event + + +async def _collect(stream: Any) -> list[str]: + out: list[str] = [] + async for x in stream: + out.append(x) + return out + + +async def test_stream_agent_events_emits_text_lifecycle_and_updates_result() -> None: + service = _StreamingService() + agent = _Agent( + [ + {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content="Hello")}}, + {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content=" world")}}, + ] + ) + result = StreamResult() + + frames = await _collect( + stream_agent_events( + agent=agent, + config={"configurable": {"thread_id": "t-1"}}, + input_data={"messages": []}, + streaming_service=service, + result=result, + ) + ) + + assert frames == [ + "text_start:text-1", + "text_delta:text-1:Hello", + "text_delta:text-1: world", + "text_end:text-1", + ] + assert result.accumulated_text == "Hello world" + assert result.agent_called_update_memory is False + assert agent.calls[0][1]["version"] == "v2" + + +async def test_stream_agent_events_passes_runtime_context_to_agent() -> None: + service = _StreamingService() + agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("x")}}]) + result = StreamResult() + + _ = await _collect( + stream_agent_events( + agent=agent, + config={"configurable": {"thread_id": "t-2"}}, + input_data={"messages": []}, + streaming_service=service, + result=result, + runtime_context={"mentioned_document_ids": [1, 2]}, + ) + ) + + assert agent.calls + assert agent.calls[0][1]["context"] == {"mentioned_document_ids": [1, 2]} From 52593d88dbf2dfa5d81587048f4d2bb0ea0d5cad Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 7 May 2026 16:00:15 +0200 Subject: [PATCH 20/58] Reorganize streaming orchestration modules into relay and orchestration folders. --- .../app/routes/new_chat_routes.py | 2 +- .../chat/streaming/orchestration/__init__.py | 8 +- .../streaming/orchestration/event_stream.py | 6 +- .../chat/streaming/orchestration/input.py | 23 +++++ .../{ => orchestration}/orchestrator.py | 33 +++++++ .../output.py} | 8 +- .../tasks/chat/streaming/relay/__init__.py | 4 + .../chat/streaming/{ => relay}/event_relay.py | 4 +- .../test_orchestration_event_stream.py | 6 +- .../test_orchestrator_stream_chat.py | 88 +++++++++++++++++++ 10 files changed, 170 insertions(+), 12 deletions(-) create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/input.py rename surfsense_backend/app/tasks/chat/streaming/{ => orchestration}/orchestrator.py (76%) rename surfsense_backend/app/tasks/chat/streaming/{stream_result.py => orchestration/output.py} (82%) rename surfsense_backend/app/tasks/chat/streaming/{ => relay}/event_relay.py (97%) create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index 7f035daef..e54497f93 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -75,7 +75,7 @@ from app.tasks.chat.stream_new_chat import ( stream_new_chat as legacy_stream_new_chat, stream_resume_chat as legacy_stream_resume_chat, ) -from app.tasks.chat.streaming.orchestrator import ( +from app.tasks.chat.streaming.orchestration.orchestrator import ( stream_chat, stream_regenerate, stream_resume, diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py index 8b586f2be..6f683a410 100644 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py @@ -1,5 +1,11 @@ """Composable orchestration pieces for chat streaming.""" from app.tasks.chat.streaming.orchestration.event_stream import stream_agent_events +from app.tasks.chat.streaming.orchestration.input import StreamExecutionInput +from app.tasks.chat.streaming.orchestration.output import StreamOutput -__all__ = ["stream_agent_events"] +__all__ = [ + "StreamExecutionInput", + "StreamOutput", + "stream_agent_events", +] diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py index 1448cd86a..369883c3a 100644 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py @@ -6,9 +6,9 @@ from collections.abc import AsyncIterator from typing import Any from app.agents.new_chat.feature_flags import get_flags -from app.tasks.chat.streaming.event_relay import EventRelay +from app.tasks.chat.streaming.orchestration.output import StreamOutput +from app.tasks.chat.streaming.relay.event_relay import EventRelay from app.tasks.chat.streaming.relay.state import AgentEventRelayState -from app.tasks.chat.streaming.stream_result import StreamResult async def stream_agent_events( @@ -17,7 +17,7 @@ async def stream_agent_events( config: dict[str, Any], input_data: Any, streaming_service: Any, - result: StreamResult, + result: StreamOutput, step_prefix: str = "thinking", initial_step_id: str | None = None, initial_step_title: str = "", diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py new file mode 100644 index 000000000..13d43b612 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py @@ -0,0 +1,23 @@ +"""Inputs for orchestrator-owned streaming execution.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + + +@dataclass(frozen=True) +class StreamExecutionInput: + """Container for dependencies required by ``stream_agent_events``.""" + + agent: Any + config: dict[str, Any] + input_data: Any + streaming_service: Any + step_prefix: str = "thinking" + initial_step_id: str | None = None + initial_step_title: str = "" + initial_step_items: list[str] | None = None + content_builder: Any | None = None + runtime_context: Any = None + diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py similarity index 76% rename from surfsense_backend/app/tasks/chat/streaming/orchestrator.py rename to surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py index e912dd632..ac7abc6f4 100644 --- a/surfsense_backend/app/tasks/chat/streaming/orchestrator.py +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py @@ -14,6 +14,9 @@ from typing import Any, Literal from app.agents.new_chat.filesystem_selection import FilesystemSelection from app.db import ChatVisibility from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat +from app.tasks.chat.streaming.orchestration.event_stream import stream_agent_events +from app.tasks.chat.streaming.orchestration.input import StreamExecutionInput +from app.tasks.chat.streaming.orchestration.output import StreamOutput async def stream_chat( @@ -34,8 +37,38 @@ async def stream_chat( filesystem_selection: FilesystemSelection | None = None, request_id: str | None = None, user_image_data_urls: list[str] | None = None, + orchestration_input: StreamExecutionInput | None = None, ) -> AsyncGenerator[str, None]: """Stream a new chat turn through the current production pipeline.""" + if orchestration_input is not None: + result = StreamOutput( + request_id=request_id, + turn_id=f"{chat_id}:orchestrator", + filesystem_mode=( + filesystem_selection.mode.value if filesystem_selection else "cloud" + ), + client_platform=( + filesystem_selection.client_platform.value + if filesystem_selection + else "web" + ), + ) + async for frame in stream_agent_events( + agent=orchestration_input.agent, + config=orchestration_input.config, + input_data=orchestration_input.input_data, + streaming_service=orchestration_input.streaming_service, + result=result, + step_prefix=orchestration_input.step_prefix, + initial_step_id=orchestration_input.initial_step_id, + initial_step_title=orchestration_input.initial_step_title, + initial_step_items=orchestration_input.initial_step_items, + content_builder=orchestration_input.content_builder, + runtime_context=orchestration_input.runtime_context, + ): + yield frame + return + async for chunk in stream_new_chat( user_query=user_query, search_space_id=search_space_id, diff --git a/surfsense_backend/app/tasks/chat/streaming/stream_result.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py similarity index 82% rename from surfsense_backend/app/tasks/chat/streaming/stream_result.py rename to surfsense_backend/app/tasks/chat/streaming/orchestration/output.py index 8ea3bd295..0c4870ec4 100644 --- a/surfsense_backend/app/tasks/chat/streaming/stream_result.py +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py @@ -1,4 +1,4 @@ -"""Mutable facts collected while streaming one agent turn.""" +"""Output facts collected while streaming one orchestrated agent turn.""" from __future__ import annotations @@ -7,7 +7,7 @@ from typing import Any @dataclass -class StreamResult: +class StreamOutput: accumulated_text: str = "" is_interrupted: bool = False interrupt_value: dict[str, Any] | None = None @@ -26,3 +26,7 @@ class StreamResult: commit_gate_reason: str = "" assistant_message_id: int | None = None content_builder: Any | None = field(default=None, repr=False) + + +# Backwards-compatible alias while imports migrate. +StreamResult = StreamOutput diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py b/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py index c1a5e7175..351e878a8 100644 --- a/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py +++ b/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py @@ -1,3 +1,7 @@ """Relay state: thinking steps, tool bookkeeping, and stream helpers.""" from __future__ import annotations + +from app.tasks.chat.streaming.relay.event_relay import EventRelay, EventRelayConfig + +__all__ = ["EventRelay", "EventRelayConfig"] diff --git a/surfsense_backend/app/tasks/chat/streaming/event_relay.py b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py similarity index 97% rename from surfsense_backend/app/tasks/chat/streaming/event_relay.py rename to surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py index f86337ad7..072baac72 100644 --- a/surfsense_backend/app/tasks/chat/streaming/event_relay.py +++ b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py @@ -16,11 +16,11 @@ from app.tasks.chat.streaming.handlers.custom_event_dispatch import ( ) from app.tasks.chat.streaming.handlers.tool_end import iter_tool_end_frames from app.tasks.chat.streaming.handlers.tool_start import iter_tool_start_frames +from app.tasks.chat.streaming.orchestration.output import StreamOutput from app.tasks.chat.streaming.relay.state import AgentEventRelayState from app.tasks.chat.streaming.relay.thinking_step_completion import ( complete_active_thinking_step, ) -from app.tasks.chat.streaming.stream_result import StreamResult @dataclass @@ -52,7 +52,7 @@ class EventRelay: events: AsyncIterator[dict[str, Any]], *, state: AgentEventRelayState, - result: StreamResult, + result: StreamOutput, step_prefix: str = "thinking", content_builder: Any | None = None, config: dict[str, Any] | None = None, diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py index e12283a75..e0a1877a8 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py @@ -8,7 +8,7 @@ from typing import Any import pytest from app.tasks.chat.streaming.orchestration import stream_agent_events -from app.tasks.chat.streaming.stream_result import StreamResult +from app.tasks.chat.streaming.orchestration.output import StreamOutput pytestmark = pytest.mark.unit @@ -64,7 +64,7 @@ async def test_stream_agent_events_emits_text_lifecycle_and_updates_result() -> {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content=" world")}}, ] ) - result = StreamResult() + result = StreamOutput() frames = await _collect( stream_agent_events( @@ -90,7 +90,7 @@ async def test_stream_agent_events_emits_text_lifecycle_and_updates_result() -> async def test_stream_agent_events_passes_runtime_context_to_agent() -> None: service = _StreamingService() agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("x")}}]) - result = StreamResult() + result = StreamOutput() _ = await _collect( stream_agent_events( diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py new file mode 100644 index 000000000..cf54fdab0 --- /dev/null +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py @@ -0,0 +1,88 @@ +"""Behavior tests for orchestrator ``stream_chat`` public API.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + +import pytest + +from app.tasks.chat.streaming.orchestration import StreamExecutionInput +from app.tasks.chat.streaming.orchestration.orchestrator import stream_chat + +pytestmark = pytest.mark.unit + + +@dataclass +class _Chunk: + content: Any = "" + additional_kwargs: dict[str, Any] = field(default_factory=dict) + tool_call_chunks: list[dict[str, Any]] = field(default_factory=list) + + +class _StreamingService: + def __init__(self) -> None: + self._text_idx = 0 + + def generate_text_id(self) -> str: + self._text_idx += 1 + return f"text-{self._text_idx}" + + def format_text_start(self, text_id: str) -> str: + return f"text_start:{text_id}" + + def format_text_delta(self, text_id: str, text: str) -> str: + return f"text_delta:{text_id}:{text}" + + def format_text_end(self, text_id: str) -> str: + return f"text_end:{text_id}" + + +class _Agent: + def __init__(self, events: list[dict[str, Any]]) -> None: + self.events = list(events) + self.calls: list[tuple[Any, dict[str, Any]]] = [] + + async def astream_events(self, input_data: Any, **kwargs: Any): + self.calls.append((input_data, kwargs)) + for event in self.events: + yield event + + +async def _collect(stream: Any) -> list[str]: + out: list[str] = [] + async for x in stream: + out.append(x) + return out + + +async def test_stream_chat_uses_orchestration_input_path() -> None: + service = _StreamingService() + agent = _Agent( + [ + {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content="hello")}}, + {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content="!")}}, + ] + ) + frames = await _collect( + stream_chat( + user_query="ignored-here", + search_space_id=1, + chat_id=77, + orchestration_input=StreamExecutionInput( + agent=agent, + config={"configurable": {"thread_id": "thread-1"}}, + input_data={"messages": []}, + streaming_service=service, + ), + ) + ) + + assert frames == [ + "text_start:text-1", + "text_delta:text-1:hello", + "text_delta:text-1:!", + "text_end:text-1", + ] + assert agent.calls + assert agent.calls[0][1]["version"] == "v2" From 0f40279d9581515dddbd847318ef37589972630c Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 7 May 2026 16:18:29 +0200 Subject: [PATCH 21/58] Expand orchestration gate coverage to resume and regenerate flows. --- .../streaming/orchestration/orchestrator.py | 60 +++++++++++++++++++ .../test_orchestration_event_stream.py | 18 ++++-- .../test_orchestrator_stream_chat.py | 58 +++++++++++++++++- 3 files changed, 128 insertions(+), 8 deletions(-) diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py index ac7abc6f4..1e32e7f5a 100644 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py @@ -101,8 +101,38 @@ async def stream_resume( filesystem_selection: FilesystemSelection | None = None, request_id: str | None = None, disabled_tools: list[str] | None = None, + orchestration_input: StreamExecutionInput | None = None, ) -> AsyncGenerator[str, None]: """Resume an interrupted chat turn through the current production pipeline.""" + if orchestration_input is not None: + result = StreamOutput( + request_id=request_id, + turn_id=f"{chat_id}:orchestrator-resume", + filesystem_mode=( + filesystem_selection.mode.value if filesystem_selection else "cloud" + ), + client_platform=( + filesystem_selection.client_platform.value + if filesystem_selection + else "web" + ), + ) + async for frame in stream_agent_events( + agent=orchestration_input.agent, + config=orchestration_input.config, + input_data=orchestration_input.input_data, + streaming_service=orchestration_input.streaming_service, + result=result, + step_prefix=orchestration_input.step_prefix, + initial_step_id=orchestration_input.initial_step_id, + initial_step_title=orchestration_input.initial_step_title, + initial_step_items=orchestration_input.initial_step_items, + content_builder=orchestration_input.content_builder, + runtime_context=orchestration_input.runtime_context, + ): + yield frame + return + async for chunk in stream_resume_chat( chat_id=chat_id, search_space_id=search_space_id, @@ -136,8 +166,38 @@ async def stream_regenerate( request_id: str | None = None, user_image_data_urls: list[str] | None = None, flow: Literal["new", "regenerate"] = "regenerate", + orchestration_input: StreamExecutionInput | None = None, ) -> AsyncGenerator[str, None]: """Regenerate an assistant turn through the current production pipeline.""" + if orchestration_input is not None: + result = StreamOutput( + request_id=request_id, + turn_id=f"{chat_id}:orchestrator-regenerate", + filesystem_mode=( + filesystem_selection.mode.value if filesystem_selection else "cloud" + ), + client_platform=( + filesystem_selection.client_platform.value + if filesystem_selection + else "web" + ), + ) + async for frame in stream_agent_events( + agent=orchestration_input.agent, + config=orchestration_input.config, + input_data=orchestration_input.input_data, + streaming_service=orchestration_input.streaming_service, + result=result, + step_prefix=orchestration_input.step_prefix, + initial_step_id=orchestration_input.initial_step_id, + initial_step_title=orchestration_input.initial_step_title, + initial_step_items=orchestration_input.initial_step_items, + content_builder=orchestration_input.content_builder, + runtime_context=orchestration_input.runtime_context, + ): + yield frame + return + async for chunk in stream_new_chat( user_query=user_query, search_space_id=search_space_id, diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py index e0a1877a8..bd154e6a0 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py @@ -84,15 +84,20 @@ async def test_stream_agent_events_emits_text_lifecycle_and_updates_result() -> ] assert result.accumulated_text == "Hello world" assert result.agent_called_update_memory is False - assert agent.calls[0][1]["version"] == "v2" async def test_stream_agent_events_passes_runtime_context_to_agent() -> None: service = _StreamingService() - agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("x")}}]) + class _ContextAwareAgent: + async def astream_events(self, input_data: Any, **kwargs: Any): + del input_data + text = "ctx-ok" if kwargs.get("context") else "ctx-missing" + yield {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(text)}} + + agent = _ContextAwareAgent() result = StreamOutput() - _ = await _collect( + frames = await _collect( stream_agent_events( agent=agent, config={"configurable": {"thread_id": "t-2"}}, @@ -103,5 +108,8 @@ async def test_stream_agent_events_passes_runtime_context_to_agent() -> None: ) ) - assert agent.calls - assert agent.calls[0][1]["context"] == {"mentioned_document_ids": [1, 2]} + assert frames == [ + "text_start:text-1", + "text_delta:text-1:ctx-ok", + "text_end:text-1", + ] diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py index cf54fdab0..d9cd7951f 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py @@ -8,7 +8,11 @@ from typing import Any import pytest from app.tasks.chat.streaming.orchestration import StreamExecutionInput -from app.tasks.chat.streaming.orchestration.orchestrator import stream_chat +from app.tasks.chat.streaming.orchestration.orchestrator import ( + stream_chat, + stream_regenerate, + stream_resume, +) pytestmark = pytest.mark.unit @@ -84,5 +88,53 @@ async def test_stream_chat_uses_orchestration_input_path() -> None: "text_delta:text-1:!", "text_end:text-1", ] - assert agent.calls - assert agent.calls[0][1]["version"] == "v2" + + +async def test_stream_resume_uses_orchestration_input_path() -> None: + service = _StreamingService() + agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("r")}}]) + + frames = await _collect( + stream_resume( + chat_id=9, + search_space_id=1, + decisions=[], + orchestration_input=StreamExecutionInput( + agent=agent, + config={"configurable": {"thread_id": "thread-r"}}, + input_data={"messages": []}, + streaming_service=service, + ), + ) + ) + + assert frames == [ + "text_start:text-1", + "text_delta:text-1:r", + "text_end:text-1", + ] + + +async def test_stream_regenerate_uses_orchestration_input_path() -> None: + service = _StreamingService() + agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("g")}}]) + + frames = await _collect( + stream_regenerate( + user_query="q", + search_space_id=1, + chat_id=2, + orchestration_input=StreamExecutionInput( + agent=agent, + config={"configurable": {"thread_id": "thread-g"}}, + input_data={"messages": []}, + streaming_service=service, + ), + ) + ) + + assert frames == [ + "text_start:text-1", + "text_delta:text-1:g", + "text_end:text-1", + ] From a04b2e88bdaeea624c0d192b3259bbd3482bc717 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 7 May 2026 17:06:17 +0200 Subject: [PATCH 22/58] wire orchestrator streaming context path and align event relay outputs --- .../chat/streaming/orchestration/__init__.py | 12 +- .../streaming/orchestration/event_stream.py | 6 +- .../chat/streaming/orchestration/input.py | 4 +- .../streaming/orchestration/orchestrator.py | 142 ++++++++---------- .../chat/streaming/orchestration/output.py | 5 +- .../tasks/chat/streaming/relay/event_relay.py | 4 +- .../test_orchestration_event_stream.py | 16 +- .../test_orchestrator_stream_chat.py | 14 +- 8 files changed, 94 insertions(+), 109 deletions(-) diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py index 6f683a410..b1a201fd3 100644 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py @@ -1,11 +1,11 @@ """Composable orchestration pieces for chat streaming.""" -from app.tasks.chat.streaming.orchestration.event_stream import stream_agent_events -from app.tasks.chat.streaming.orchestration.input import StreamExecutionInput -from app.tasks.chat.streaming.orchestration.output import StreamOutput +from app.tasks.chat.streaming.orchestration.event_stream import stream_output +from app.tasks.chat.streaming.orchestration.input import StreamingContext +from app.tasks.chat.streaming.orchestration.output import StreamingResult __all__ = [ - "StreamExecutionInput", - "StreamOutput", - "stream_agent_events", + "StreamingContext", + "StreamingResult", + "stream_output", ] diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py index 369883c3a..fc8c13027 100644 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py @@ -6,18 +6,18 @@ from collections.abc import AsyncIterator from typing import Any from app.agents.new_chat.feature_flags import get_flags -from app.tasks.chat.streaming.orchestration.output import StreamOutput +from app.tasks.chat.streaming.orchestration.output import StreamingResult from app.tasks.chat.streaming.relay.event_relay import EventRelay from app.tasks.chat.streaming.relay.state import AgentEventRelayState -async def stream_agent_events( +async def stream_output( *, agent: Any, config: dict[str, Any], input_data: Any, streaming_service: Any, - result: StreamOutput, + result: StreamingResult, step_prefix: str = "thinking", initial_step_id: str | None = None, initial_step_title: str = "", diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py index 13d43b612..45a33d435 100644 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py @@ -7,8 +7,8 @@ from typing import Any @dataclass(frozen=True) -class StreamExecutionInput: - """Container for dependencies required by ``stream_agent_events``.""" +class StreamingContext: + """Container for dependencies required by ``stream_output``.""" agent: Any config: dict[str, Any] diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py index 1e32e7f5a..b40083f42 100644 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py @@ -1,9 +1,4 @@ """Top-level chat streaming entrypoints. - -For now these orchestrator functions are thin compatibility wrappers around the -current ``stream_new_chat`` / ``stream_resume_chat`` implementations. Routing -calls through this module lets us cut over to the fully modular event relay in -one place later without touching API routes again. """ from __future__ import annotations @@ -14,9 +9,47 @@ from typing import Any, Literal from app.agents.new_chat.filesystem_selection import FilesystemSelection from app.db import ChatVisibility from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat -from app.tasks.chat.streaming.orchestration.event_stream import stream_agent_events -from app.tasks.chat.streaming.orchestration.input import StreamExecutionInput -from app.tasks.chat.streaming.orchestration.output import StreamOutput +from app.tasks.chat.streaming.orchestration.event_stream import stream_output +from app.tasks.chat.streaming.orchestration.input import StreamingContext +from app.tasks.chat.streaming.orchestration.output import StreamingResult + + +def _build_streaming_result( + *, + chat_id: int, + request_id: str | None, + filesystem_selection: FilesystemSelection | None, + suffix: str, +) -> StreamingResult: + return StreamingResult( + request_id=request_id, + turn_id=f"{chat_id}:{suffix}", + filesystem_mode=(filesystem_selection.mode.value if filesystem_selection else "cloud"), + client_platform=( + filesystem_selection.client_platform.value if filesystem_selection else "web" + ), + ) + + +async def _stream_output_with_streaming_context( + *, + streaming_context: StreamingContext, + result: StreamingResult, +) -> AsyncGenerator[str, None]: + async for frame in stream_output( + agent=streaming_context.agent, + config=streaming_context.config, + input_data=streaming_context.input_data, + streaming_service=streaming_context.streaming_service, + result=result, + step_prefix=streaming_context.step_prefix, + initial_step_id=streaming_context.initial_step_id, + initial_step_title=streaming_context.initial_step_title, + initial_step_items=streaming_context.initial_step_items, + content_builder=streaming_context.content_builder, + runtime_context=streaming_context.runtime_context, + ): + yield frame async def stream_chat( @@ -37,34 +70,19 @@ async def stream_chat( filesystem_selection: FilesystemSelection | None = None, request_id: str | None = None, user_image_data_urls: list[str] | None = None, - orchestration_input: StreamExecutionInput | None = None, + streaming_context: StreamingContext | None = None, ) -> AsyncGenerator[str, None]: """Stream a new chat turn through the current production pipeline.""" - if orchestration_input is not None: - result = StreamOutput( + if streaming_context is not None: + result = _build_streaming_result( + chat_id=chat_id, request_id=request_id, - turn_id=f"{chat_id}:orchestrator", - filesystem_mode=( - filesystem_selection.mode.value if filesystem_selection else "cloud" - ), - client_platform=( - filesystem_selection.client_platform.value - if filesystem_selection - else "web" - ), + filesystem_selection=filesystem_selection, + suffix="orchestrator", ) - async for frame in stream_agent_events( - agent=orchestration_input.agent, - config=orchestration_input.config, - input_data=orchestration_input.input_data, - streaming_service=orchestration_input.streaming_service, + async for frame in _stream_output_with_streaming_context( + streaming_context=streaming_context, result=result, - step_prefix=orchestration_input.step_prefix, - initial_step_id=orchestration_input.initial_step_id, - initial_step_title=orchestration_input.initial_step_title, - initial_step_items=orchestration_input.initial_step_items, - content_builder=orchestration_input.content_builder, - runtime_context=orchestration_input.runtime_context, ): yield frame return @@ -101,34 +119,19 @@ async def stream_resume( filesystem_selection: FilesystemSelection | None = None, request_id: str | None = None, disabled_tools: list[str] | None = None, - orchestration_input: StreamExecutionInput | None = None, + streaming_context: StreamingContext | None = None, ) -> AsyncGenerator[str, None]: """Resume an interrupted chat turn through the current production pipeline.""" - if orchestration_input is not None: - result = StreamOutput( + if streaming_context is not None: + result = _build_streaming_result( + chat_id=chat_id, request_id=request_id, - turn_id=f"{chat_id}:orchestrator-resume", - filesystem_mode=( - filesystem_selection.mode.value if filesystem_selection else "cloud" - ), - client_platform=( - filesystem_selection.client_platform.value - if filesystem_selection - else "web" - ), + filesystem_selection=filesystem_selection, + suffix="orchestrator-resume", ) - async for frame in stream_agent_events( - agent=orchestration_input.agent, - config=orchestration_input.config, - input_data=orchestration_input.input_data, - streaming_service=orchestration_input.streaming_service, + async for frame in _stream_output_with_streaming_context( + streaming_context=streaming_context, result=result, - step_prefix=orchestration_input.step_prefix, - initial_step_id=orchestration_input.initial_step_id, - initial_step_title=orchestration_input.initial_step_title, - initial_step_items=orchestration_input.initial_step_items, - content_builder=orchestration_input.content_builder, - runtime_context=orchestration_input.runtime_context, ): yield frame return @@ -166,34 +169,19 @@ async def stream_regenerate( request_id: str | None = None, user_image_data_urls: list[str] | None = None, flow: Literal["new", "regenerate"] = "regenerate", - orchestration_input: StreamExecutionInput | None = None, + streaming_context: StreamingContext | None = None, ) -> AsyncGenerator[str, None]: """Regenerate an assistant turn through the current production pipeline.""" - if orchestration_input is not None: - result = StreamOutput( + if streaming_context is not None: + result = _build_streaming_result( + chat_id=chat_id, request_id=request_id, - turn_id=f"{chat_id}:orchestrator-regenerate", - filesystem_mode=( - filesystem_selection.mode.value if filesystem_selection else "cloud" - ), - client_platform=( - filesystem_selection.client_platform.value - if filesystem_selection - else "web" - ), + filesystem_selection=filesystem_selection, + suffix="orchestrator-regenerate", ) - async for frame in stream_agent_events( - agent=orchestration_input.agent, - config=orchestration_input.config, - input_data=orchestration_input.input_data, - streaming_service=orchestration_input.streaming_service, + async for frame in _stream_output_with_streaming_context( + streaming_context=streaming_context, result=result, - step_prefix=orchestration_input.step_prefix, - initial_step_id=orchestration_input.initial_step_id, - initial_step_title=orchestration_input.initial_step_title, - initial_step_items=orchestration_input.initial_step_items, - content_builder=orchestration_input.content_builder, - runtime_context=orchestration_input.runtime_context, ): yield frame return diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py index 0c4870ec4..60f8ee6ee 100644 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py @@ -7,7 +7,7 @@ from typing import Any @dataclass -class StreamOutput: +class StreamingResult: accumulated_text: str = "" is_interrupted: bool = False interrupt_value: dict[str, Any] | None = None @@ -27,6 +27,3 @@ class StreamOutput: assistant_message_id: int | None = None content_builder: Any | None = field(default=None, repr=False) - -# Backwards-compatible alias while imports migrate. -StreamResult = StreamOutput diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py index 072baac72..c8aebd99c 100644 --- a/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py +++ b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py @@ -16,7 +16,7 @@ from app.tasks.chat.streaming.handlers.custom_event_dispatch import ( ) from app.tasks.chat.streaming.handlers.tool_end import iter_tool_end_frames from app.tasks.chat.streaming.handlers.tool_start import iter_tool_start_frames -from app.tasks.chat.streaming.orchestration.output import StreamOutput +from app.tasks.chat.streaming.orchestration.output import StreamingResult from app.tasks.chat.streaming.relay.state import AgentEventRelayState from app.tasks.chat.streaming.relay.thinking_step_completion import ( complete_active_thinking_step, @@ -52,7 +52,7 @@ class EventRelay: events: AsyncIterator[dict[str, Any]], *, state: AgentEventRelayState, - result: StreamOutput, + result: StreamingResult, step_prefix: str = "thinking", content_builder: Any | None = None, config: dict[str, Any] | None = None, diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py index bd154e6a0..b17d82293 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py @@ -7,8 +7,8 @@ from typing import Any import pytest -from app.tasks.chat.streaming.orchestration import stream_agent_events -from app.tasks.chat.streaming.orchestration.output import StreamOutput +from app.tasks.chat.streaming.orchestration import stream_output +from app.tasks.chat.streaming.orchestration.output import StreamingResult pytestmark = pytest.mark.unit @@ -56,7 +56,7 @@ async def _collect(stream: Any) -> list[str]: return out -async def test_stream_agent_events_emits_text_lifecycle_and_updates_result() -> None: +async def test_stream_output_emits_text_lifecycle_and_updates_result() -> None: service = _StreamingService() agent = _Agent( [ @@ -64,10 +64,10 @@ async def test_stream_agent_events_emits_text_lifecycle_and_updates_result() -> {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content=" world")}}, ] ) - result = StreamOutput() + result = StreamingResult() frames = await _collect( - stream_agent_events( + stream_output( agent=agent, config={"configurable": {"thread_id": "t-1"}}, input_data={"messages": []}, @@ -86,7 +86,7 @@ async def test_stream_agent_events_emits_text_lifecycle_and_updates_result() -> assert result.agent_called_update_memory is False -async def test_stream_agent_events_passes_runtime_context_to_agent() -> None: +async def test_stream_output_passes_runtime_context_to_agent() -> None: service = _StreamingService() class _ContextAwareAgent: async def astream_events(self, input_data: Any, **kwargs: Any): @@ -95,10 +95,10 @@ async def test_stream_agent_events_passes_runtime_context_to_agent() -> None: yield {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(text)}} agent = _ContextAwareAgent() - result = StreamOutput() + result = StreamingResult() frames = await _collect( - stream_agent_events( + stream_output( agent=agent, config={"configurable": {"thread_id": "t-2"}}, input_data={"messages": []}, diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py index d9cd7951f..b84193cb7 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py @@ -7,7 +7,7 @@ from typing import Any import pytest -from app.tasks.chat.streaming.orchestration import StreamExecutionInput +from app.tasks.chat.streaming.orchestration import StreamingContext from app.tasks.chat.streaming.orchestration.orchestrator import ( stream_chat, stream_regenerate, @@ -60,7 +60,7 @@ async def _collect(stream: Any) -> list[str]: return out -async def test_stream_chat_uses_orchestration_input_path() -> None: +async def test_stream_chat_uses_streaming_context_path() -> None: service = _StreamingService() agent = _Agent( [ @@ -73,7 +73,7 @@ async def test_stream_chat_uses_orchestration_input_path() -> None: user_query="ignored-here", search_space_id=1, chat_id=77, - orchestration_input=StreamExecutionInput( + streaming_context=StreamingContext( agent=agent, config={"configurable": {"thread_id": "thread-1"}}, input_data={"messages": []}, @@ -90,7 +90,7 @@ async def test_stream_chat_uses_orchestration_input_path() -> None: ] -async def test_stream_resume_uses_orchestration_input_path() -> None: +async def test_stream_resume_uses_streaming_context_path() -> None: service = _StreamingService() agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("r")}}]) @@ -99,7 +99,7 @@ async def test_stream_resume_uses_orchestration_input_path() -> None: chat_id=9, search_space_id=1, decisions=[], - orchestration_input=StreamExecutionInput( + streaming_context=StreamingContext( agent=agent, config={"configurable": {"thread_id": "thread-r"}}, input_data={"messages": []}, @@ -115,7 +115,7 @@ async def test_stream_resume_uses_orchestration_input_path() -> None: ] -async def test_stream_regenerate_uses_orchestration_input_path() -> None: +async def test_stream_regenerate_uses_streaming_context_path() -> None: service = _StreamingService() agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("g")}}]) @@ -124,7 +124,7 @@ async def test_stream_regenerate_uses_orchestration_input_path() -> None: user_query="q", search_space_id=1, chat_id=2, - orchestration_input=StreamExecutionInput( + streaming_context=StreamingContext( agent=agent, config={"configurable": {"thread_id": "thread-g"}}, input_data={"messages": []}, From 52895e37e9ec86aba93cf4dd3fcf734da2b2e162 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 7 May 2026 17:57:27 +0200 Subject: [PATCH 23/58] build streaming contexts for chat resume and regenerate paths --- .../streaming/orchestration/orchestrator.py | 55 +++- .../streaming_context/__init__.py | 18 ++ .../orchestration/streaming_context/chat.py | 258 ++++++++++++++++++ .../streaming_context/regenerate.py | 49 ++++ .../orchestration/streaming_context/resume.py | 154 +++++++++++ .../test_orchestrator_stream_chat.py | 100 +++++++ 6 files changed, 633 insertions(+), 1 deletion(-) create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py index b40083f42..80cae77a2 100644 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py @@ -9,6 +9,11 @@ from typing import Any, Literal from app.agents.new_chat.filesystem_selection import FilesystemSelection from app.db import ChatVisibility from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat +from app.tasks.chat.streaming.orchestration.streaming_context import ( + build_chat_streaming_context, + build_regenerate_streaming_context, + build_resume_streaming_context, +) from app.tasks.chat.streaming.orchestration.event_stream import stream_output from app.tasks.chat.streaming.orchestration.input import StreamingContext from app.tasks.chat.streaming.orchestration.output import StreamingResult @@ -38,7 +43,7 @@ async def _stream_output_with_streaming_context( ) -> AsyncGenerator[str, None]: async for frame in stream_output( agent=streaming_context.agent, - config=streaming_context.config, + config=streaming_context.config, input_data=streaming_context.input_data, streaming_service=streaming_context.streaming_service, result=result, @@ -73,6 +78,24 @@ async def stream_chat( streaming_context: StreamingContext | None = None, ) -> AsyncGenerator[str, None]: """Stream a new chat turn through the current production pipeline.""" + if streaming_context is None: + streaming_context = await build_chat_streaming_context( + user_query=user_query, + search_space_id=search_space_id, + chat_id=chat_id, + user_id=user_id, + llm_config_id=llm_config_id, + mentioned_document_ids=mentioned_document_ids, + mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, + checkpoint_id=checkpoint_id, + needs_history_bootstrap=needs_history_bootstrap, + thread_visibility=thread_visibility, + current_user_display_name=current_user_display_name, + disabled_tools=disabled_tools, + filesystem_selection=filesystem_selection, + request_id=request_id, + user_image_data_urls=user_image_data_urls, + ) if streaming_context is not None: result = _build_streaming_result( chat_id=chat_id, @@ -122,6 +145,18 @@ async def stream_resume( streaming_context: StreamingContext | None = None, ) -> AsyncGenerator[str, None]: """Resume an interrupted chat turn through the current production pipeline.""" + if streaming_context is None: + streaming_context = await build_resume_streaming_context( + chat_id=chat_id, + search_space_id=search_space_id, + decisions=decisions, + user_id=user_id, + llm_config_id=llm_config_id, + thread_visibility=thread_visibility, + filesystem_selection=filesystem_selection, + request_id=request_id, + disabled_tools=disabled_tools, + ) if streaming_context is not None: result = _build_streaming_result( chat_id=chat_id, @@ -172,6 +207,24 @@ async def stream_regenerate( streaming_context: StreamingContext | None = None, ) -> AsyncGenerator[str, None]: """Regenerate an assistant turn through the current production pipeline.""" + if streaming_context is None: + streaming_context = await build_regenerate_streaming_context( + user_query=user_query, + search_space_id=search_space_id, + chat_id=chat_id, + user_id=user_id, + llm_config_id=llm_config_id, + mentioned_document_ids=mentioned_document_ids, + mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, + checkpoint_id=checkpoint_id, + needs_history_bootstrap=needs_history_bootstrap, + thread_visibility=thread_visibility, + current_user_display_name=current_user_display_name, + disabled_tools=disabled_tools, + filesystem_selection=filesystem_selection, + request_id=request_id, + user_image_data_urls=user_image_data_urls, + ) if streaming_context is not None: result = _build_streaming_result( chat_id=chat_id, diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py new file mode 100644 index 000000000..1bd3e103d --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py @@ -0,0 +1,18 @@ +"""Streaming context builders per orchestrator entrypoint.""" + +from app.tasks.chat.streaming.orchestration.streaming_context.chat import ( + build_chat_streaming_context, +) +from app.tasks.chat.streaming.orchestration.streaming_context.regenerate import ( + build_regenerate_streaming_context, +) +from app.tasks.chat.streaming.orchestration.streaming_context.resume import ( + build_resume_streaming_context, +) + +__all__ = [ + "build_chat_streaming_context", + "build_regenerate_streaming_context", + "build_resume_streaming_context", +] + diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py new file mode 100644 index 000000000..eb459ae5c --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py @@ -0,0 +1,258 @@ +"""Build ``StreamingContext`` for chat streaming.""" + +from __future__ import annotations + +import logging +import time +from typing import Any + +from langchain_core.messages import HumanMessage +from sqlalchemy.future import select +from sqlalchemy.orm import selectinload + +from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent +from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent +from app.agents.new_chat.checkpointer import get_checkpointer +from app.agents.new_chat.context import SurfSenseContextSchema +from app.agents.new_chat.filesystem_selection import FilesystemSelection +from app.agents.new_chat.llm_config import ( + AgentConfig, + create_chat_litellm_from_agent_config, + create_chat_litellm_from_config, + load_agent_config, + load_global_llm_config_by_id, +) +from app.db import ( + ChatVisibility, + NewChatThread, + Report, + SearchSourceConnectorType, + SurfsenseDocsDocument, + async_session_maker, +) +from app.services.auto_model_pin_service import resolve_or_get_pinned_llm_config_id +from app.services.connector_service import ConnectorService +from app.services.new_streaming_service import VercelStreamingService +from app.tasks.chat.stream_new_chat import format_mentioned_surfsense_docs_as_context +from app.tasks.chat.streaming.agent_setup import build_main_agent_for_thread +from app.tasks.chat.streaming.orchestration.input import StreamingContext +from app.utils.content_utils import bootstrap_history_from_db +from app.utils.user_message_multimodal import build_human_message_content + +logger = logging.getLogger(__name__) + + +async def build_chat_streaming_context( + *, + user_query: str, + search_space_id: int, + chat_id: int, + user_id: str | None = None, + llm_config_id: int = -1, + mentioned_document_ids: list[int] | None = None, + mentioned_surfsense_doc_ids: list[int] | None = None, + checkpoint_id: str | None = None, + needs_history_bootstrap: bool = False, + thread_visibility: ChatVisibility | None = None, + current_user_display_name: str | None = None, + disabled_tools: list[str] | None = None, + filesystem_selection: FilesystemSelection | None = None, + request_id: str | None = None, + user_image_data_urls: list[str] | None = None, +) -> StreamingContext | None: + """Build context for ``stream_output`` from route-level chat inputs.""" + session = async_session_maker() + try: + requested_llm_config_id = llm_config_id + llm_config_id = ( + await resolve_or_get_pinned_llm_config_id( + session, + thread_id=chat_id, + search_space_id=search_space_id, + user_id=user_id, + selected_llm_config_id=llm_config_id, + requires_image_input=bool(user_image_data_urls), + ) + ).resolved_llm_config_id + + llm: Any + agent_config: AgentConfig | None + if llm_config_id >= 0: + agent_config = await load_agent_config( + session=session, + config_id=llm_config_id, + search_space_id=search_space_id, + ) + if not agent_config: + logger.warning("streaming context build failed: missing config %s", llm_config_id) + return None + llm = create_chat_litellm_from_agent_config(agent_config) + else: + loaded_llm_config = load_global_llm_config_by_id(llm_config_id) + if not loaded_llm_config: + logger.warning( + "streaming context build failed: missing global config %s", + llm_config_id, + ) + return None + llm = create_chat_litellm_from_config(loaded_llm_config) + agent_config = AgentConfig.from_yaml_config(loaded_llm_config) + + connector_service = ConnectorService(session, search_space_id=search_space_id) + firecrawl_api_key = None + webcrawler_connector = await connector_service.get_connector_by_type( + SearchSourceConnectorType.WEBCRAWLER_CONNECTOR, + search_space_id, + ) + if webcrawler_connector and webcrawler_connector.config: + firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY") + + checkpointer = await get_checkpointer() + visibility = thread_visibility or ChatVisibility.PRIVATE + + from app.config import config as app_config + + agent_factory = ( + create_multi_agent_chat_deep_agent + if bool(app_config.MULTI_AGENT_CHAT_ENABLED) + else create_surfsense_deep_agent + ) + agent = await build_main_agent_for_thread( + agent_factory, + llm=llm, + search_space_id=search_space_id, + db_session=session, + connector_service=connector_service, + checkpointer=checkpointer, + user_id=user_id, + thread_id=chat_id, + agent_config=agent_config, + firecrawl_api_key=firecrawl_api_key, + thread_visibility=visibility, + filesystem_selection=filesystem_selection, + disabled_tools=disabled_tools, + mentioned_document_ids=mentioned_document_ids, + ) + + langchain_messages = [] + if needs_history_bootstrap: + langchain_messages = await bootstrap_history_from_db( + session, + chat_id, + thread_visibility=visibility, + ) + thread_result = await session.execute( + select(NewChatThread).filter(NewChatThread.id == chat_id) + ) + thread = thread_result.scalars().first() + if thread: + thread.needs_history_bootstrap = False + await session.commit() + + mentioned_surfsense_docs: list[SurfsenseDocsDocument] = [] + if mentioned_surfsense_doc_ids: + result = await session.execute( + select(SurfsenseDocsDocument) + .options(selectinload(SurfsenseDocsDocument.chunks)) + .filter(SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids)) + ) + mentioned_surfsense_docs = list(result.scalars().all()) + + recent_reports_result = await session.execute( + select(Report) + .filter(Report.thread_id == chat_id, Report.content.isnot(None)) + .order_by(Report.id.desc()) + .limit(3) + ) + recent_reports = list(recent_reports_result.scalars().all()) + + final_query = user_query + context_parts = [] + if mentioned_surfsense_docs: + context_parts.append( + format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs) + ) + if recent_reports: + report_lines = [ + f' - report_id={r.id}, title="{r.title}", style="{r.report_style or "detailed"}"' + for r in recent_reports + ] + reports_listing = "\n".join(report_lines) + context_parts.append( + "\n" + "Previously generated reports in this conversation:\n" + f"{reports_listing}\n\n" + "If the user wants to MODIFY, REVISE, UPDATE, or ADD to one of these reports, " + "set parent_report_id to the relevant report_id above.\n" + "If the user wants a completely NEW report on a different topic, " + "leave parent_report_id unset.\n" + "" + ) + if context_parts: + joined_context = "\n\n".join(context_parts) + final_query = f"{joined_context}\n\n{user_query}" + if visibility == ChatVisibility.SEARCH_SPACE and current_user_display_name: + final_query = f"**[{current_user_display_name}]:** {final_query}" + + human_content = build_human_message_content( + final_query, + list(user_image_data_urls or ()), + ) + langchain_messages.append(HumanMessage(content=human_content)) + + turn_id = f"{chat_id}:{int(time.time() * 1000)}" + input_state = { + "messages": langchain_messages, + "search_space_id": search_space_id, + "request_id": request_id or "unknown", + "turn_id": turn_id, + } + configurable = { + "thread_id": str(chat_id), + "request_id": request_id or "unknown", + "turn_id": turn_id, + } + if checkpoint_id: + configurable["checkpoint_id"] = checkpoint_id + config = {"configurable": configurable, "recursion_limit": 10_000} + + initial_title = ( + "Analyzing referenced content" + if mentioned_surfsense_docs + else "Understanding your request" + ) + action_verb = "Analyzing" if mentioned_surfsense_docs else "Processing" + query_excerpt = user_query[:80] + ("..." if len(user_query) > 80 else "") + query_part = query_excerpt if query_excerpt.strip() else "(message)" + initial_items = [f"{action_verb}: {query_part}"] + + runtime_context = SurfSenseContextSchema( + search_space_id=search_space_id, + mentioned_document_ids=list(mentioned_document_ids or []), + request_id=request_id, + turn_id=turn_id, + ) + + await session.commit() + return StreamingContext( + agent=agent, + config=config, + input_data=input_state, + streaming_service=VercelStreamingService(), + step_prefix="thinking", + initial_step_id="thinking-1", + initial_step_title=initial_title, + initial_step_items=initial_items, + content_builder=None, + runtime_context=runtime_context, + ) + except Exception: + logger.exception( + "Failed to build chat streaming context (llm_config_id=%s requested=%s)", + llm_config_id, + requested_llm_config_id, + ) + return None + finally: + await session.close() + diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py new file mode 100644 index 000000000..02e871a2c --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py @@ -0,0 +1,49 @@ +"""Build ``StreamingContext`` for regenerate streaming.""" + +from __future__ import annotations + +from app.agents.new_chat.filesystem_selection import FilesystemSelection +from app.db import ChatVisibility +from app.tasks.chat.streaming.orchestration.input import StreamingContext +from app.tasks.chat.streaming.orchestration.streaming_context.chat import ( + build_chat_streaming_context, +) + + +async def build_regenerate_streaming_context( + *, + user_query: str, + search_space_id: int, + chat_id: int, + user_id: str | None = None, + llm_config_id: int = -1, + mentioned_document_ids: list[int] | None = None, + mentioned_surfsense_doc_ids: list[int] | None = None, + checkpoint_id: str | None = None, + needs_history_bootstrap: bool = False, + thread_visibility: ChatVisibility | None = None, + current_user_display_name: str | None = None, + disabled_tools: list[str] | None = None, + filesystem_selection: FilesystemSelection | None = None, + request_id: str | None = None, + user_image_data_urls: list[str] | None = None, +) -> StreamingContext | None: + """Build context for ``stream_regenerate`` execution.""" + return await build_chat_streaming_context( + user_query=user_query, + search_space_id=search_space_id, + chat_id=chat_id, + user_id=user_id, + llm_config_id=llm_config_id, + mentioned_document_ids=mentioned_document_ids, + mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, + checkpoint_id=checkpoint_id, + needs_history_bootstrap=needs_history_bootstrap, + thread_visibility=thread_visibility, + current_user_display_name=current_user_display_name, + disabled_tools=disabled_tools, + filesystem_selection=filesystem_selection, + request_id=request_id, + user_image_data_urls=user_image_data_urls, + ) + diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py new file mode 100644 index 000000000..6d0caea4d --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py @@ -0,0 +1,154 @@ +"""Build ``StreamingContext`` for resume streaming.""" + +from __future__ import annotations + +import logging +import time +from typing import Any + +from langgraph.types import Command + +from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent +from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent +from app.agents.new_chat.checkpointer import get_checkpointer +from app.agents.new_chat.context import SurfSenseContextSchema +from app.agents.new_chat.filesystem_selection import FilesystemSelection +from app.agents.new_chat.llm_config import ( + AgentConfig, + create_chat_litellm_from_agent_config, + create_chat_litellm_from_config, + load_agent_config, + load_global_llm_config_by_id, +) +from app.db import ChatVisibility, SearchSourceConnectorType, async_session_maker +from app.services.auto_model_pin_service import resolve_or_get_pinned_llm_config_id +from app.services.connector_service import ConnectorService +from app.services.new_streaming_service import VercelStreamingService +from app.tasks.chat.streaming.agent_setup import build_main_agent_for_thread +from app.tasks.chat.streaming.orchestration.input import StreamingContext + +logger = logging.getLogger(__name__) + + +async def build_resume_streaming_context( + *, + chat_id: int, + search_space_id: int, + decisions: list[dict], + user_id: str | None = None, + llm_config_id: int = -1, + thread_visibility: ChatVisibility | None = None, + filesystem_selection: FilesystemSelection | None = None, + request_id: str | None = None, + disabled_tools: list[str] | None = None, +) -> StreamingContext | None: + """Build context for ``stream_resume`` execution.""" + session = async_session_maker() + try: + llm_config_id = ( + await resolve_or_get_pinned_llm_config_id( + session, + thread_id=chat_id, + search_space_id=search_space_id, + user_id=user_id, + selected_llm_config_id=llm_config_id, + ) + ).resolved_llm_config_id + + llm: Any + agent_config: AgentConfig | None + if llm_config_id >= 0: + agent_config = await load_agent_config( + session=session, + config_id=llm_config_id, + search_space_id=search_space_id, + ) + if not agent_config: + logger.warning("resume context build failed: missing config %s", llm_config_id) + return None + llm = create_chat_litellm_from_agent_config(agent_config) + else: + loaded_llm_config = load_global_llm_config_by_id(llm_config_id) + if not loaded_llm_config: + logger.warning( + "resume context build failed: missing global config %s", + llm_config_id, + ) + return None + llm = create_chat_litellm_from_config(loaded_llm_config) + agent_config = AgentConfig.from_yaml_config(loaded_llm_config) + + connector_service = ConnectorService(session, search_space_id=search_space_id) + firecrawl_api_key = None + webcrawler_connector = await connector_service.get_connector_by_type( + SearchSourceConnectorType.WEBCRAWLER_CONNECTOR, + search_space_id, + ) + if webcrawler_connector and webcrawler_connector.config: + firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY") + + checkpointer = await get_checkpointer() + visibility = thread_visibility or ChatVisibility.PRIVATE + + from app.config import config as app_config + + agent_factory = ( + create_multi_agent_chat_deep_agent + if bool(app_config.MULTI_AGENT_CHAT_ENABLED) + else create_surfsense_deep_agent + ) + agent = await build_main_agent_for_thread( + agent_factory, + llm=llm, + search_space_id=search_space_id, + db_session=session, + connector_service=connector_service, + checkpointer=checkpointer, + user_id=user_id, + thread_id=chat_id, + agent_config=agent_config, + firecrawl_api_key=firecrawl_api_key, + thread_visibility=visibility, + filesystem_selection=filesystem_selection, + disabled_tools=disabled_tools, + ) + + turn_id = f"{chat_id}:{int(time.time() * 1000)}" + config = { + "configurable": { + "thread_id": str(chat_id), + "request_id": request_id or "unknown", + "turn_id": turn_id, + "surfsense_resume_value": {"decisions": decisions}, + }, + "recursion_limit": 10_000, + } + + runtime_context = SurfSenseContextSchema( + search_space_id=search_space_id, + request_id=request_id, + turn_id=turn_id, + ) + + await session.commit() + return StreamingContext( + agent=agent, + config=config, + input_data=Command(resume={"decisions": decisions}), + streaming_service=VercelStreamingService(), + step_prefix="thinking-resume", + initial_step_id=None, + initial_step_title="", + initial_step_items=None, + content_builder=None, + runtime_context=runtime_context, + ) + except Exception: + logger.exception( + "Failed to build resume streaming context (llm_config_id=%s)", + llm_config_id, + ) + return None + finally: + await session.close() + diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py index b84193cb7..46c61b498 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py @@ -8,6 +8,7 @@ from typing import Any import pytest from app.tasks.chat.streaming.orchestration import StreamingContext +from app.tasks.chat.streaming.orchestration import orchestrator from app.tasks.chat.streaming.orchestration.orchestrator import ( stream_chat, stream_regenerate, @@ -138,3 +139,102 @@ async def test_stream_regenerate_uses_streaming_context_path() -> None: "text_delta:text-1:g", "text_end:text-1", ] + + +async def test_stream_chat_builds_streaming_context_when_not_provided() -> None: + service = _StreamingService() + agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("b")}}]) + + async def _fake_builder(**kwargs: Any) -> StreamingContext: + del kwargs + return StreamingContext( + agent=agent, + config={"configurable": {"thread_id": "thread-b"}}, + input_data={"messages": []}, + streaming_service=service, + ) + + old = orchestrator.build_chat_streaming_context + orchestrator.build_chat_streaming_context = _fake_builder + try: + frames = await _collect( + stream_chat( + user_query="q", + search_space_id=1, + chat_id=3, + ) + ) + finally: + orchestrator.build_chat_streaming_context = old + + assert frames == [ + "text_start:text-1", + "text_delta:text-1:b", + "text_end:text-1", + ] + + +async def test_stream_resume_builds_streaming_context_when_not_provided() -> None: + service = _StreamingService() + agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("u")}}]) + + async def _fake_builder(**kwargs: Any) -> StreamingContext: + del kwargs + return StreamingContext( + agent=agent, + config={"configurable": {"thread_id": "thread-u"}}, + input_data={"messages": []}, + streaming_service=service, + ) + + old = orchestrator.build_resume_streaming_context + orchestrator.build_resume_streaming_context = _fake_builder + try: + frames = await _collect( + stream_resume( + chat_id=9, + search_space_id=1, + decisions=[], + ) + ) + finally: + orchestrator.build_resume_streaming_context = old + + assert frames == [ + "text_start:text-1", + "text_delta:text-1:u", + "text_end:text-1", + ] + + +async def test_stream_regenerate_builds_streaming_context_when_not_provided() -> None: + service = _StreamingService() + agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("x")}}]) + + async def _fake_builder(**kwargs: Any) -> StreamingContext: + del kwargs + return StreamingContext( + agent=agent, + config={"configurable": {"thread_id": "thread-x"}}, + input_data={"messages": []}, + streaming_service=service, + ) + + old = orchestrator.build_regenerate_streaming_context + orchestrator.build_regenerate_streaming_context = _fake_builder + try: + frames = await _collect( + stream_regenerate( + user_query="q", + search_space_id=1, + chat_id=2, + ) + ) + finally: + orchestrator.build_regenerate_streaming_context = old + + assert frames == [ + "text_start:text-1", + "text_delta:text-1:x", + "text_end:text-1", + ] From 7e07092f67d55a81916b74a5d3691dea5461f273 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 7 May 2026 19:25:20 +0200 Subject: [PATCH 24/58] refactor(chat): drop alternate streaming entry path; use graph_stream --- surfsense_backend/app/config/__init__.py | 6 - .../app/routes/new_chat_routes.py | 32 +-- surfsense_backend/app/schemas/new_chat.py | 2 +- .../app/services/streaming/__init__.py | 6 +- .../app/services/streaming/events/error.py | 2 +- .../app/tasks/chat/streaming/__init__.py | 2 +- .../app/tasks/chat/streaming/agent_setup.py | 92 ------ .../chat/streaming/graph_stream/__init__.py | 21 ++ .../event_stream.py | 4 +- .../output.py => graph_stream/result.py} | 3 +- .../chat/streaming/orchestration/__init__.py | 11 - .../chat/streaming/orchestration/input.py | 23 -- .../streaming/orchestration/orchestrator.py | 261 ------------------ .../streaming_context/__init__.py | 18 -- .../orchestration/streaming_context/chat.py | 258 ----------------- .../streaming_context/regenerate.py | 49 ---- .../orchestration/streaming_context/resume.py | 154 ----------- .../tasks/chat/streaming/relay/__init__.py | 22 +- .../tasks/chat/streaming/relay/event_relay.py | 2 +- .../tasks/chat/streaming/test_agent_setup.py | 120 -------- .../test_orchestrator_stream_chat.py | 240 ---------------- .../chat/streaming/test_stage_1_parity.py | 4 +- ..._event_stream.py => test_stream_output.py} | 7 +- 23 files changed, 61 insertions(+), 1278 deletions(-) delete mode 100644 surfsense_backend/app/tasks/chat/streaming/agent_setup.py create mode 100644 surfsense_backend/app/tasks/chat/streaming/graph_stream/__init__.py rename surfsense_backend/app/tasks/chat/streaming/{orchestration => graph_stream}/event_stream.py (92%) rename surfsense_backend/app/tasks/chat/streaming/{orchestration/output.py => graph_stream/result.py} (91%) delete mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py delete mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/input.py delete mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py delete mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py delete mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py delete mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py delete mode 100644 surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py delete mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py delete mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py rename surfsense_backend/tests/unit/tasks/chat/streaming/{test_orchestration_event_stream.py => test_stream_output.py} (93%) diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index 543524456..f6f0c7f62 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -490,12 +490,6 @@ class Config: ENABLE_DESKTOP_LOCAL_FILESYSTEM = ( os.getenv("ENABLE_DESKTOP_LOCAL_FILESYSTEM", "FALSE").upper() == "TRUE" ) - # Streaming entrypoint switch. Keep this at the route layer so orchestrator - # code stays free of legacy fallback branching. - ENABLE_CHAT_STREAM_ORCHESTRATOR = ( - os.getenv("SURFSENSE_ENABLE_CHAT_STREAM_ORCHESTRATOR", "TRUE").upper() - == "TRUE" - ) @classmethod def is_self_hosted(cls) -> bool: diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index e54497f93..743b5b849 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -72,13 +72,8 @@ from app.schemas.new_chat import ( TurnStatusResponse, ) from app.tasks.chat.stream_new_chat import ( - stream_new_chat as legacy_stream_new_chat, - stream_resume_chat as legacy_stream_resume_chat, -) -from app.tasks.chat.streaming.orchestration.orchestrator import ( - stream_chat, - stream_regenerate, - stream_resume, + stream_new_chat, + stream_resume_chat, ) from app.users import current_active_user from app.utils.perf import get_perf_logger @@ -98,10 +93,6 @@ TURN_CANCELLING_MAX_DELAY_MS = 1500 router = APIRouter() -def _use_streaming_orchestrator() -> bool: - return config.ENABLE_CHAT_STREAM_ORCHESTRATOR - - def _resolve_filesystem_selection( *, mode: str, @@ -1782,11 +1773,7 @@ async def handle_new_chat( ) return StreamingResponse( - ( - stream_chat - if _use_streaming_orchestrator() - else legacy_stream_new_chat - )( + stream_new_chat( user_query=request.user_query, search_space_id=request.search_space_id, chat_id=request.chat_id, @@ -2271,12 +2258,7 @@ async def regenerate_response( else None ) try: - regenerate_fn = ( - stream_regenerate - if _use_streaming_orchestrator() - else legacy_stream_new_chat - ) - async for chunk in regenerate_fn( + async for chunk in stream_new_chat( user_query=str(user_query_to_use), search_space_id=request.search_space_id, chat_id=thread_id, @@ -2408,11 +2390,7 @@ async def resume_chat( await session.close() return StreamingResponse( - ( - stream_resume - if _use_streaming_orchestrator() - else legacy_stream_resume_chat - )( + stream_resume_chat( chat_id=thread_id, search_space_id=request.search_space_id, decisions=decisions, diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py index 95d183433..fe8dab076 100644 --- a/surfsense_backend/app/schemas/new_chat.py +++ b/surfsense_backend/app/schemas/new_chat.py @@ -380,7 +380,7 @@ class ResumeRequest(BaseModel): "/regenerate. Resume reuses the original interrupted user " "turn so the server does not write a new user message. " "Currently unused but accepted to keep request bodies " - "uniform across the three streaming entrypoints." + "uniform across new-message, regenerate, and resume stream routes." ), ) diff --git a/surfsense_backend/app/services/streaming/__init__.py b/surfsense_backend/app/services/streaming/__init__.py index 287d48a7a..3ec9b9cf1 100644 --- a/surfsense_backend/app/services/streaming/__init__.py +++ b/surfsense_backend/app/services/streaming/__init__.py @@ -4,7 +4,7 @@ Layout: * ``envelope/`` - SSE wire framing + ID generators * ``emitter/`` - identity of the agent that emitted an event + runtime registry * ``events/`` - one module per SSE event family -* ``service.py`` - composition root used by the orchestrator +* ``service.py`` - composition root used when emitting chat SSE * ``interrupt_correlation.py`` - id-aware lookup over LangGraph state Naming on the wire: @@ -13,8 +13,8 @@ Naming on the wire: * Every SurfSense-added field uses ``snake_case``, including the top-level ``emitted_by`` envelope and all inner ``data`` payloads. -Production keeps using ``app.services.new_streaming_service`` and -``app.tasks.chat.stream_new_chat`` until the cutover phase. +Production chat uses ``app.services.new_streaming_service`` from +``app.tasks.chat.stream_new_chat`` and related routes. """ from __future__ import annotations diff --git a/surfsense_backend/app/services/streaming/events/error.py b/surfsense_backend/app/services/streaming/events/error.py index cd190d1f4..a1e8e01ca 100644 --- a/surfsense_backend/app/services/streaming/events/error.py +++ b/surfsense_backend/app/services/streaming/events/error.py @@ -1,4 +1,4 @@ -"""Single terminal error path the orchestrator must route through.""" +"""Single terminal error path chat streaming must route through.""" from __future__ import annotations diff --git a/surfsense_backend/app/tasks/chat/streaming/__init__.py b/surfsense_backend/app/tasks/chat/streaming/__init__.py index bb06cc021..70c99342a 100644 --- a/surfsense_backend/app/tasks/chat/streaming/__init__.py +++ b/surfsense_backend/app/tasks/chat/streaming/__init__.py @@ -1,3 +1,3 @@ -"""Chat streaming orchestrator and event relay.""" +"""Chat streaming helpers (e.g. LangGraph → SSE relay under ``graph_stream``).""" from __future__ import annotations diff --git a/surfsense_backend/app/tasks/chat/streaming/agent_setup.py b/surfsense_backend/app/tasks/chat/streaming/agent_setup.py deleted file mode 100644 index f67c6ad65..000000000 --- a/surfsense_backend/app/tasks/chat/streaming/agent_setup.py +++ /dev/null @@ -1,92 +0,0 @@ -"""Agent setup helpers for orchestrated chat streaming.""" - -from __future__ import annotations - -import contextlib -import logging -from collections.abc import Callable -from typing import Any - -_PREFLIGHT_TIMEOUT_SEC: float = 2.5 -_PREFLIGHT_MAX_TOKENS: int = 1 - - -async def preflight_llm( - llm: Any, - *, - is_provider_rate_limited: Callable[[BaseException], bool], -) -> None: - """Issue a minimal completion probe to catch immediate provider 429s.""" - from litellm import acompletion - - model = getattr(llm, "model", None) - if not model or model == "auto": - return - - try: - await acompletion( - model=model, - messages=[{"role": "user", "content": "ping"}], - api_key=getattr(llm, "api_key", None), - api_base=getattr(llm, "api_base", None), - max_tokens=_PREFLIGHT_MAX_TOKENS, - timeout=_PREFLIGHT_TIMEOUT_SEC, - stream=False, - metadata={"tags": ["surfsense:internal", "auto-pin-preflight"]}, - ) - except Exception as exc: - if is_provider_rate_limited(exc): - raise - logging.getLogger(__name__).debug( - "auto_pin_preflight non_rate_limit_error model=%s err=%s", - model, - exc, - ) - - -async def build_main_agent_for_thread( - agent_factory: Any, - *, - llm: Any, - search_space_id: int, - db_session: Any, - connector_service: Any, - checkpointer: Any, - user_id: str | None, - thread_id: int | None, - agent_config: Any, - firecrawl_api_key: str | None, - thread_visibility: Any, - filesystem_selection: Any, - disabled_tools: list[str] | None = None, - mentioned_document_ids: list[int] | None = None, -) -> Any: - """Run one canonical agent-build call for a single thread.""" - return await agent_factory( - llm=llm, - search_space_id=search_space_id, - db_session=db_session, - connector_service=connector_service, - checkpointer=checkpointer, - user_id=user_id, - thread_id=thread_id, - agent_config=agent_config, - firecrawl_api_key=firecrawl_api_key, - thread_visibility=thread_visibility, - filesystem_selection=filesystem_selection, - disabled_tools=disabled_tools, - mentioned_document_ids=mentioned_document_ids, - ) - - -async def settle_speculative_agent_build(task: Any) -> None: - """Wait for a discarded speculative build and swallow its outcome.""" - with contextlib.suppress(BaseException): - await task - - -__all__ = [ - "build_main_agent_for_thread", - "preflight_llm", - "settle_speculative_agent_build", -] diff --git a/surfsense_backend/app/tasks/chat/streaming/graph_stream/__init__.py b/surfsense_backend/app/tasks/chat/streaming/graph_stream/__init__.py new file mode 100644 index 000000000..e3bf0426c --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/__init__.py @@ -0,0 +1,21 @@ +"""LangGraph ``astream_events`` → SSE (``stream_output`` + ``StreamingResult``). + +Imports are lazy to avoid a circular import with ``relay.event_relay``. +""" + +from __future__ import annotations + +__all__ = ["StreamingResult", "stream_output"] + + +def __getattr__(name: str): + if name == "stream_output": + from app.tasks.chat.streaming.graph_stream.event_stream import stream_output + + return stream_output + if name == "StreamingResult": + from app.tasks.chat.streaming.graph_stream.result import StreamingResult + + return StreamingResult + msg = f"module {__name__!r} has no attribute {name!r}" + raise AttributeError(msg) diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py b/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py similarity index 92% rename from surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py rename to surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py index fc8c13027..9142dd914 100644 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py +++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py @@ -1,4 +1,4 @@ -"""Run LangGraph event streams through the EventRelay.""" +"""Run LangGraph event streams through ``EventRelay``.""" from __future__ import annotations @@ -6,7 +6,7 @@ from collections.abc import AsyncIterator from typing import Any from app.agents.new_chat.feature_flags import get_flags -from app.tasks.chat.streaming.orchestration.output import StreamingResult +from app.tasks.chat.streaming.graph_stream.result import StreamingResult from app.tasks.chat.streaming.relay.event_relay import EventRelay from app.tasks.chat.streaming.relay.state import AgentEventRelayState diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py b/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py similarity index 91% rename from surfsense_backend/app/tasks/chat/streaming/orchestration/output.py rename to surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py index 60f8ee6ee..40404e9d0 100644 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py +++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py @@ -1,4 +1,4 @@ -"""Output facts collected while streaming one orchestrated agent turn.""" +"""Mutable facts collected while relaying one agent stream (``stream_output``).""" from __future__ import annotations @@ -26,4 +26,3 @@ class StreamingResult: commit_gate_reason: str = "" assistant_message_id: int | None = None content_builder: Any | None = field(default=None, repr=False) - diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py deleted file mode 100644 index b1a201fd3..000000000 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Composable orchestration pieces for chat streaming.""" - -from app.tasks.chat.streaming.orchestration.event_stream import stream_output -from app.tasks.chat.streaming.orchestration.input import StreamingContext -from app.tasks.chat.streaming.orchestration.output import StreamingResult - -__all__ = [ - "StreamingContext", - "StreamingResult", - "stream_output", -] diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py deleted file mode 100644 index 45a33d435..000000000 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Inputs for orchestrator-owned streaming execution.""" - -from __future__ import annotations - -from dataclasses import dataclass -from typing import Any - - -@dataclass(frozen=True) -class StreamingContext: - """Container for dependencies required by ``stream_output``.""" - - agent: Any - config: dict[str, Any] - input_data: Any - streaming_service: Any - step_prefix: str = "thinking" - initial_step_id: str | None = None - initial_step_title: str = "" - initial_step_items: list[str] | None = None - content_builder: Any | None = None - runtime_context: Any = None - diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py deleted file mode 100644 index 80cae77a2..000000000 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py +++ /dev/null @@ -1,261 +0,0 @@ -"""Top-level chat streaming entrypoints. -""" - -from __future__ import annotations - -from collections.abc import AsyncGenerator -from typing import Any, Literal - -from app.agents.new_chat.filesystem_selection import FilesystemSelection -from app.db import ChatVisibility -from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat -from app.tasks.chat.streaming.orchestration.streaming_context import ( - build_chat_streaming_context, - build_regenerate_streaming_context, - build_resume_streaming_context, -) -from app.tasks.chat.streaming.orchestration.event_stream import stream_output -from app.tasks.chat.streaming.orchestration.input import StreamingContext -from app.tasks.chat.streaming.orchestration.output import StreamingResult - - -def _build_streaming_result( - *, - chat_id: int, - request_id: str | None, - filesystem_selection: FilesystemSelection | None, - suffix: str, -) -> StreamingResult: - return StreamingResult( - request_id=request_id, - turn_id=f"{chat_id}:{suffix}", - filesystem_mode=(filesystem_selection.mode.value if filesystem_selection else "cloud"), - client_platform=( - filesystem_selection.client_platform.value if filesystem_selection else "web" - ), - ) - - -async def _stream_output_with_streaming_context( - *, - streaming_context: StreamingContext, - result: StreamingResult, -) -> AsyncGenerator[str, None]: - async for frame in stream_output( - agent=streaming_context.agent, - config=streaming_context.config, - input_data=streaming_context.input_data, - streaming_service=streaming_context.streaming_service, - result=result, - step_prefix=streaming_context.step_prefix, - initial_step_id=streaming_context.initial_step_id, - initial_step_title=streaming_context.initial_step_title, - initial_step_items=streaming_context.initial_step_items, - content_builder=streaming_context.content_builder, - runtime_context=streaming_context.runtime_context, - ): - yield frame - - -async def stream_chat( - *, - user_query: str, - search_space_id: int, - chat_id: int, - user_id: str | None = None, - llm_config_id: int = -1, - mentioned_document_ids: list[int] | None = None, - mentioned_surfsense_doc_ids: list[int] | None = None, - mentioned_documents: list[dict[str, Any]] | None = None, - checkpoint_id: str | None = None, - needs_history_bootstrap: bool = False, - thread_visibility: ChatVisibility | None = None, - current_user_display_name: str | None = None, - disabled_tools: list[str] | None = None, - filesystem_selection: FilesystemSelection | None = None, - request_id: str | None = None, - user_image_data_urls: list[str] | None = None, - streaming_context: StreamingContext | None = None, -) -> AsyncGenerator[str, None]: - """Stream a new chat turn through the current production pipeline.""" - if streaming_context is None: - streaming_context = await build_chat_streaming_context( - user_query=user_query, - search_space_id=search_space_id, - chat_id=chat_id, - user_id=user_id, - llm_config_id=llm_config_id, - mentioned_document_ids=mentioned_document_ids, - mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, - checkpoint_id=checkpoint_id, - needs_history_bootstrap=needs_history_bootstrap, - thread_visibility=thread_visibility, - current_user_display_name=current_user_display_name, - disabled_tools=disabled_tools, - filesystem_selection=filesystem_selection, - request_id=request_id, - user_image_data_urls=user_image_data_urls, - ) - if streaming_context is not None: - result = _build_streaming_result( - chat_id=chat_id, - request_id=request_id, - filesystem_selection=filesystem_selection, - suffix="orchestrator", - ) - async for frame in _stream_output_with_streaming_context( - streaming_context=streaming_context, - result=result, - ): - yield frame - return - - async for chunk in stream_new_chat( - user_query=user_query, - search_space_id=search_space_id, - chat_id=chat_id, - user_id=user_id, - llm_config_id=llm_config_id, - mentioned_document_ids=mentioned_document_ids, - mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, - mentioned_documents=mentioned_documents, - checkpoint_id=checkpoint_id, - needs_history_bootstrap=needs_history_bootstrap, - thread_visibility=thread_visibility, - current_user_display_name=current_user_display_name, - disabled_tools=disabled_tools, - filesystem_selection=filesystem_selection, - request_id=request_id, - user_image_data_urls=user_image_data_urls, - ): - yield chunk - - -async def stream_resume( - *, - chat_id: int, - search_space_id: int, - decisions: list[dict], - user_id: str | None = None, - llm_config_id: int = -1, - thread_visibility: ChatVisibility | None = None, - filesystem_selection: FilesystemSelection | None = None, - request_id: str | None = None, - disabled_tools: list[str] | None = None, - streaming_context: StreamingContext | None = None, -) -> AsyncGenerator[str, None]: - """Resume an interrupted chat turn through the current production pipeline.""" - if streaming_context is None: - streaming_context = await build_resume_streaming_context( - chat_id=chat_id, - search_space_id=search_space_id, - decisions=decisions, - user_id=user_id, - llm_config_id=llm_config_id, - thread_visibility=thread_visibility, - filesystem_selection=filesystem_selection, - request_id=request_id, - disabled_tools=disabled_tools, - ) - if streaming_context is not None: - result = _build_streaming_result( - chat_id=chat_id, - request_id=request_id, - filesystem_selection=filesystem_selection, - suffix="orchestrator-resume", - ) - async for frame in _stream_output_with_streaming_context( - streaming_context=streaming_context, - result=result, - ): - yield frame - return - - async for chunk in stream_resume_chat( - chat_id=chat_id, - search_space_id=search_space_id, - decisions=decisions, - user_id=user_id, - llm_config_id=llm_config_id, - thread_visibility=thread_visibility, - filesystem_selection=filesystem_selection, - request_id=request_id, - disabled_tools=disabled_tools, - ): - yield chunk - - -async def stream_regenerate( - *, - user_query: str, - search_space_id: int, - chat_id: int, - user_id: str | None = None, - llm_config_id: int = -1, - mentioned_document_ids: list[int] | None = None, - mentioned_surfsense_doc_ids: list[int] | None = None, - mentioned_documents: list[dict[str, Any]] | None = None, - checkpoint_id: str | None = None, - needs_history_bootstrap: bool = False, - thread_visibility: ChatVisibility | None = None, - current_user_display_name: str | None = None, - disabled_tools: list[str] | None = None, - filesystem_selection: FilesystemSelection | None = None, - request_id: str | None = None, - user_image_data_urls: list[str] | None = None, - flow: Literal["new", "regenerate"] = "regenerate", - streaming_context: StreamingContext | None = None, -) -> AsyncGenerator[str, None]: - """Regenerate an assistant turn through the current production pipeline.""" - if streaming_context is None: - streaming_context = await build_regenerate_streaming_context( - user_query=user_query, - search_space_id=search_space_id, - chat_id=chat_id, - user_id=user_id, - llm_config_id=llm_config_id, - mentioned_document_ids=mentioned_document_ids, - mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, - checkpoint_id=checkpoint_id, - needs_history_bootstrap=needs_history_bootstrap, - thread_visibility=thread_visibility, - current_user_display_name=current_user_display_name, - disabled_tools=disabled_tools, - filesystem_selection=filesystem_selection, - request_id=request_id, - user_image_data_urls=user_image_data_urls, - ) - if streaming_context is not None: - result = _build_streaming_result( - chat_id=chat_id, - request_id=request_id, - filesystem_selection=filesystem_selection, - suffix="orchestrator-regenerate", - ) - async for frame in _stream_output_with_streaming_context( - streaming_context=streaming_context, - result=result, - ): - yield frame - return - - async for chunk in stream_new_chat( - user_query=user_query, - search_space_id=search_space_id, - chat_id=chat_id, - user_id=user_id, - llm_config_id=llm_config_id, - mentioned_document_ids=mentioned_document_ids, - mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, - mentioned_documents=mentioned_documents, - checkpoint_id=checkpoint_id, - needs_history_bootstrap=needs_history_bootstrap, - thread_visibility=thread_visibility, - current_user_display_name=current_user_display_name, - disabled_tools=disabled_tools, - filesystem_selection=filesystem_selection, - request_id=request_id, - user_image_data_urls=user_image_data_urls, - flow=flow, - ): - yield chunk diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py deleted file mode 100644 index 1bd3e103d..000000000 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -"""Streaming context builders per orchestrator entrypoint.""" - -from app.tasks.chat.streaming.orchestration.streaming_context.chat import ( - build_chat_streaming_context, -) -from app.tasks.chat.streaming.orchestration.streaming_context.regenerate import ( - build_regenerate_streaming_context, -) -from app.tasks.chat.streaming.orchestration.streaming_context.resume import ( - build_resume_streaming_context, -) - -__all__ = [ - "build_chat_streaming_context", - "build_regenerate_streaming_context", - "build_resume_streaming_context", -] - diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py deleted file mode 100644 index eb459ae5c..000000000 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py +++ /dev/null @@ -1,258 +0,0 @@ -"""Build ``StreamingContext`` for chat streaming.""" - -from __future__ import annotations - -import logging -import time -from typing import Any - -from langchain_core.messages import HumanMessage -from sqlalchemy.future import select -from sqlalchemy.orm import selectinload - -from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent -from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent -from app.agents.new_chat.checkpointer import get_checkpointer -from app.agents.new_chat.context import SurfSenseContextSchema -from app.agents.new_chat.filesystem_selection import FilesystemSelection -from app.agents.new_chat.llm_config import ( - AgentConfig, - create_chat_litellm_from_agent_config, - create_chat_litellm_from_config, - load_agent_config, - load_global_llm_config_by_id, -) -from app.db import ( - ChatVisibility, - NewChatThread, - Report, - SearchSourceConnectorType, - SurfsenseDocsDocument, - async_session_maker, -) -from app.services.auto_model_pin_service import resolve_or_get_pinned_llm_config_id -from app.services.connector_service import ConnectorService -from app.services.new_streaming_service import VercelStreamingService -from app.tasks.chat.stream_new_chat import format_mentioned_surfsense_docs_as_context -from app.tasks.chat.streaming.agent_setup import build_main_agent_for_thread -from app.tasks.chat.streaming.orchestration.input import StreamingContext -from app.utils.content_utils import bootstrap_history_from_db -from app.utils.user_message_multimodal import build_human_message_content - -logger = logging.getLogger(__name__) - - -async def build_chat_streaming_context( - *, - user_query: str, - search_space_id: int, - chat_id: int, - user_id: str | None = None, - llm_config_id: int = -1, - mentioned_document_ids: list[int] | None = None, - mentioned_surfsense_doc_ids: list[int] | None = None, - checkpoint_id: str | None = None, - needs_history_bootstrap: bool = False, - thread_visibility: ChatVisibility | None = None, - current_user_display_name: str | None = None, - disabled_tools: list[str] | None = None, - filesystem_selection: FilesystemSelection | None = None, - request_id: str | None = None, - user_image_data_urls: list[str] | None = None, -) -> StreamingContext | None: - """Build context for ``stream_output`` from route-level chat inputs.""" - session = async_session_maker() - try: - requested_llm_config_id = llm_config_id - llm_config_id = ( - await resolve_or_get_pinned_llm_config_id( - session, - thread_id=chat_id, - search_space_id=search_space_id, - user_id=user_id, - selected_llm_config_id=llm_config_id, - requires_image_input=bool(user_image_data_urls), - ) - ).resolved_llm_config_id - - llm: Any - agent_config: AgentConfig | None - if llm_config_id >= 0: - agent_config = await load_agent_config( - session=session, - config_id=llm_config_id, - search_space_id=search_space_id, - ) - if not agent_config: - logger.warning("streaming context build failed: missing config %s", llm_config_id) - return None - llm = create_chat_litellm_from_agent_config(agent_config) - else: - loaded_llm_config = load_global_llm_config_by_id(llm_config_id) - if not loaded_llm_config: - logger.warning( - "streaming context build failed: missing global config %s", - llm_config_id, - ) - return None - llm = create_chat_litellm_from_config(loaded_llm_config) - agent_config = AgentConfig.from_yaml_config(loaded_llm_config) - - connector_service = ConnectorService(session, search_space_id=search_space_id) - firecrawl_api_key = None - webcrawler_connector = await connector_service.get_connector_by_type( - SearchSourceConnectorType.WEBCRAWLER_CONNECTOR, - search_space_id, - ) - if webcrawler_connector and webcrawler_connector.config: - firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY") - - checkpointer = await get_checkpointer() - visibility = thread_visibility or ChatVisibility.PRIVATE - - from app.config import config as app_config - - agent_factory = ( - create_multi_agent_chat_deep_agent - if bool(app_config.MULTI_AGENT_CHAT_ENABLED) - else create_surfsense_deep_agent - ) - agent = await build_main_agent_for_thread( - agent_factory, - llm=llm, - search_space_id=search_space_id, - db_session=session, - connector_service=connector_service, - checkpointer=checkpointer, - user_id=user_id, - thread_id=chat_id, - agent_config=agent_config, - firecrawl_api_key=firecrawl_api_key, - thread_visibility=visibility, - filesystem_selection=filesystem_selection, - disabled_tools=disabled_tools, - mentioned_document_ids=mentioned_document_ids, - ) - - langchain_messages = [] - if needs_history_bootstrap: - langchain_messages = await bootstrap_history_from_db( - session, - chat_id, - thread_visibility=visibility, - ) - thread_result = await session.execute( - select(NewChatThread).filter(NewChatThread.id == chat_id) - ) - thread = thread_result.scalars().first() - if thread: - thread.needs_history_bootstrap = False - await session.commit() - - mentioned_surfsense_docs: list[SurfsenseDocsDocument] = [] - if mentioned_surfsense_doc_ids: - result = await session.execute( - select(SurfsenseDocsDocument) - .options(selectinload(SurfsenseDocsDocument.chunks)) - .filter(SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids)) - ) - mentioned_surfsense_docs = list(result.scalars().all()) - - recent_reports_result = await session.execute( - select(Report) - .filter(Report.thread_id == chat_id, Report.content.isnot(None)) - .order_by(Report.id.desc()) - .limit(3) - ) - recent_reports = list(recent_reports_result.scalars().all()) - - final_query = user_query - context_parts = [] - if mentioned_surfsense_docs: - context_parts.append( - format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs) - ) - if recent_reports: - report_lines = [ - f' - report_id={r.id}, title="{r.title}", style="{r.report_style or "detailed"}"' - for r in recent_reports - ] - reports_listing = "\n".join(report_lines) - context_parts.append( - "\n" - "Previously generated reports in this conversation:\n" - f"{reports_listing}\n\n" - "If the user wants to MODIFY, REVISE, UPDATE, or ADD to one of these reports, " - "set parent_report_id to the relevant report_id above.\n" - "If the user wants a completely NEW report on a different topic, " - "leave parent_report_id unset.\n" - "" - ) - if context_parts: - joined_context = "\n\n".join(context_parts) - final_query = f"{joined_context}\n\n{user_query}" - if visibility == ChatVisibility.SEARCH_SPACE and current_user_display_name: - final_query = f"**[{current_user_display_name}]:** {final_query}" - - human_content = build_human_message_content( - final_query, - list(user_image_data_urls or ()), - ) - langchain_messages.append(HumanMessage(content=human_content)) - - turn_id = f"{chat_id}:{int(time.time() * 1000)}" - input_state = { - "messages": langchain_messages, - "search_space_id": search_space_id, - "request_id": request_id or "unknown", - "turn_id": turn_id, - } - configurable = { - "thread_id": str(chat_id), - "request_id": request_id or "unknown", - "turn_id": turn_id, - } - if checkpoint_id: - configurable["checkpoint_id"] = checkpoint_id - config = {"configurable": configurable, "recursion_limit": 10_000} - - initial_title = ( - "Analyzing referenced content" - if mentioned_surfsense_docs - else "Understanding your request" - ) - action_verb = "Analyzing" if mentioned_surfsense_docs else "Processing" - query_excerpt = user_query[:80] + ("..." if len(user_query) > 80 else "") - query_part = query_excerpt if query_excerpt.strip() else "(message)" - initial_items = [f"{action_verb}: {query_part}"] - - runtime_context = SurfSenseContextSchema( - search_space_id=search_space_id, - mentioned_document_ids=list(mentioned_document_ids or []), - request_id=request_id, - turn_id=turn_id, - ) - - await session.commit() - return StreamingContext( - agent=agent, - config=config, - input_data=input_state, - streaming_service=VercelStreamingService(), - step_prefix="thinking", - initial_step_id="thinking-1", - initial_step_title=initial_title, - initial_step_items=initial_items, - content_builder=None, - runtime_context=runtime_context, - ) - except Exception: - logger.exception( - "Failed to build chat streaming context (llm_config_id=%s requested=%s)", - llm_config_id, - requested_llm_config_id, - ) - return None - finally: - await session.close() - diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py deleted file mode 100644 index 02e871a2c..000000000 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py +++ /dev/null @@ -1,49 +0,0 @@ -"""Build ``StreamingContext`` for regenerate streaming.""" - -from __future__ import annotations - -from app.agents.new_chat.filesystem_selection import FilesystemSelection -from app.db import ChatVisibility -from app.tasks.chat.streaming.orchestration.input import StreamingContext -from app.tasks.chat.streaming.orchestration.streaming_context.chat import ( - build_chat_streaming_context, -) - - -async def build_regenerate_streaming_context( - *, - user_query: str, - search_space_id: int, - chat_id: int, - user_id: str | None = None, - llm_config_id: int = -1, - mentioned_document_ids: list[int] | None = None, - mentioned_surfsense_doc_ids: list[int] | None = None, - checkpoint_id: str | None = None, - needs_history_bootstrap: bool = False, - thread_visibility: ChatVisibility | None = None, - current_user_display_name: str | None = None, - disabled_tools: list[str] | None = None, - filesystem_selection: FilesystemSelection | None = None, - request_id: str | None = None, - user_image_data_urls: list[str] | None = None, -) -> StreamingContext | None: - """Build context for ``stream_regenerate`` execution.""" - return await build_chat_streaming_context( - user_query=user_query, - search_space_id=search_space_id, - chat_id=chat_id, - user_id=user_id, - llm_config_id=llm_config_id, - mentioned_document_ids=mentioned_document_ids, - mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids, - checkpoint_id=checkpoint_id, - needs_history_bootstrap=needs_history_bootstrap, - thread_visibility=thread_visibility, - current_user_display_name=current_user_display_name, - disabled_tools=disabled_tools, - filesystem_selection=filesystem_selection, - request_id=request_id, - user_image_data_urls=user_image_data_urls, - ) - diff --git a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py deleted file mode 100644 index 6d0caea4d..000000000 --- a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py +++ /dev/null @@ -1,154 +0,0 @@ -"""Build ``StreamingContext`` for resume streaming.""" - -from __future__ import annotations - -import logging -import time -from typing import Any - -from langgraph.types import Command - -from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent -from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent -from app.agents.new_chat.checkpointer import get_checkpointer -from app.agents.new_chat.context import SurfSenseContextSchema -from app.agents.new_chat.filesystem_selection import FilesystemSelection -from app.agents.new_chat.llm_config import ( - AgentConfig, - create_chat_litellm_from_agent_config, - create_chat_litellm_from_config, - load_agent_config, - load_global_llm_config_by_id, -) -from app.db import ChatVisibility, SearchSourceConnectorType, async_session_maker -from app.services.auto_model_pin_service import resolve_or_get_pinned_llm_config_id -from app.services.connector_service import ConnectorService -from app.services.new_streaming_service import VercelStreamingService -from app.tasks.chat.streaming.agent_setup import build_main_agent_for_thread -from app.tasks.chat.streaming.orchestration.input import StreamingContext - -logger = logging.getLogger(__name__) - - -async def build_resume_streaming_context( - *, - chat_id: int, - search_space_id: int, - decisions: list[dict], - user_id: str | None = None, - llm_config_id: int = -1, - thread_visibility: ChatVisibility | None = None, - filesystem_selection: FilesystemSelection | None = None, - request_id: str | None = None, - disabled_tools: list[str] | None = None, -) -> StreamingContext | None: - """Build context for ``stream_resume`` execution.""" - session = async_session_maker() - try: - llm_config_id = ( - await resolve_or_get_pinned_llm_config_id( - session, - thread_id=chat_id, - search_space_id=search_space_id, - user_id=user_id, - selected_llm_config_id=llm_config_id, - ) - ).resolved_llm_config_id - - llm: Any - agent_config: AgentConfig | None - if llm_config_id >= 0: - agent_config = await load_agent_config( - session=session, - config_id=llm_config_id, - search_space_id=search_space_id, - ) - if not agent_config: - logger.warning("resume context build failed: missing config %s", llm_config_id) - return None - llm = create_chat_litellm_from_agent_config(agent_config) - else: - loaded_llm_config = load_global_llm_config_by_id(llm_config_id) - if not loaded_llm_config: - logger.warning( - "resume context build failed: missing global config %s", - llm_config_id, - ) - return None - llm = create_chat_litellm_from_config(loaded_llm_config) - agent_config = AgentConfig.from_yaml_config(loaded_llm_config) - - connector_service = ConnectorService(session, search_space_id=search_space_id) - firecrawl_api_key = None - webcrawler_connector = await connector_service.get_connector_by_type( - SearchSourceConnectorType.WEBCRAWLER_CONNECTOR, - search_space_id, - ) - if webcrawler_connector and webcrawler_connector.config: - firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY") - - checkpointer = await get_checkpointer() - visibility = thread_visibility or ChatVisibility.PRIVATE - - from app.config import config as app_config - - agent_factory = ( - create_multi_agent_chat_deep_agent - if bool(app_config.MULTI_AGENT_CHAT_ENABLED) - else create_surfsense_deep_agent - ) - agent = await build_main_agent_for_thread( - agent_factory, - llm=llm, - search_space_id=search_space_id, - db_session=session, - connector_service=connector_service, - checkpointer=checkpointer, - user_id=user_id, - thread_id=chat_id, - agent_config=agent_config, - firecrawl_api_key=firecrawl_api_key, - thread_visibility=visibility, - filesystem_selection=filesystem_selection, - disabled_tools=disabled_tools, - ) - - turn_id = f"{chat_id}:{int(time.time() * 1000)}" - config = { - "configurable": { - "thread_id": str(chat_id), - "request_id": request_id or "unknown", - "turn_id": turn_id, - "surfsense_resume_value": {"decisions": decisions}, - }, - "recursion_limit": 10_000, - } - - runtime_context = SurfSenseContextSchema( - search_space_id=search_space_id, - request_id=request_id, - turn_id=turn_id, - ) - - await session.commit() - return StreamingContext( - agent=agent, - config=config, - input_data=Command(resume={"decisions": decisions}), - streaming_service=VercelStreamingService(), - step_prefix="thinking-resume", - initial_step_id=None, - initial_step_title="", - initial_step_items=None, - content_builder=None, - runtime_context=runtime_context, - ) - except Exception: - logger.exception( - "Failed to build resume streaming context (llm_config_id=%s)", - llm_config_id, - ) - return None - finally: - await session.close() - diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py b/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py index 351e878a8..18eda9a6d 100644 --- a/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py +++ b/surfsense_backend/app/tasks/chat/streaming/relay/__init__.py @@ -1,7 +1,23 @@ -"""Relay state: thinking steps, tool bookkeeping, and stream helpers.""" +"""Relay: thinking steps, tool bookkeeping, and ``EventRelay``. + +Package imports are lazy so ``relay.thinking_step_sse`` (and siblings) can load +without pulling in ``event_relay`` (which imports handler modules that may +import those siblings). +""" from __future__ import annotations -from app.tasks.chat.streaming.relay.event_relay import EventRelay, EventRelayConfig - __all__ = ["EventRelay", "EventRelayConfig"] + + +def __getattr__(name: str): + if name == "EventRelay": + from app.tasks.chat.streaming.relay.event_relay import EventRelay + + return EventRelay + if name == "EventRelayConfig": + from app.tasks.chat.streaming.relay.event_relay import EventRelayConfig + + return EventRelayConfig + msg = f"module {__name__!r} has no attribute {name!r}" + raise AttributeError(msg) diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py index c8aebd99c..872998926 100644 --- a/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py +++ b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py @@ -7,6 +7,7 @@ from dataclasses import dataclass, field from typing import Any from app.services.streaming.emitter import EmitterRegistry +from app.tasks.chat.streaming.graph_stream.result import StreamingResult from app.tasks.chat.streaming.handlers.chain_end import iter_chain_end_frames from app.tasks.chat.streaming.handlers.chat_model_stream import ( iter_chat_model_stream_frames, @@ -16,7 +17,6 @@ from app.tasks.chat.streaming.handlers.custom_event_dispatch import ( ) from app.tasks.chat.streaming.handlers.tool_end import iter_tool_end_frames from app.tasks.chat.streaming.handlers.tool_start import iter_tool_start_frames -from app.tasks.chat.streaming.orchestration.output import StreamingResult from app.tasks.chat.streaming.relay.state import AgentEventRelayState from app.tasks.chat.streaming.relay.thinking_step_completion import ( complete_active_thinking_step, diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py deleted file mode 100644 index e1f7dd027..000000000 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py +++ /dev/null @@ -1,120 +0,0 @@ -"""Behavior tests for streaming agent setup helpers.""" - -from __future__ import annotations - -import sys -import types -from typing import Any - -import pytest - -from app.tasks.chat.streaming import agent_setup - -pytestmark = pytest.mark.unit - - -async def test_preflight_llm_calls_litellm_when_model_present( - monkeypatch: pytest.MonkeyPatch, -) -> None: - calls: dict[str, Any] = {} - - async def _fake_acompletion(**kwargs: Any): - calls.update(kwargs) - return {"ok": True} - - monkeypatch.setitem( - sys.modules, - "litellm", - types.SimpleNamespace(acompletion=_fake_acompletion), - ) - - llm = types.SimpleNamespace(model="openai/test", api_key="k", api_base="b") - await agent_setup.preflight_llm(llm, is_provider_rate_limited=lambda _: False) - - assert calls["model"] == "openai/test" - assert calls["max_tokens"] == 1 - assert calls["timeout"] == 2.5 - assert calls["stream"] is False - - -async def test_preflight_llm_rethrows_rate_limited(monkeypatch: pytest.MonkeyPatch) -> None: - class _RateLimitedError(Exception): - pass - - async def _fake_acompletion(**kwargs: Any): - del kwargs - raise _RateLimitedError("rl") - - monkeypatch.setitem( - sys.modules, - "litellm", - types.SimpleNamespace(acompletion=_fake_acompletion), - ) - - with pytest.raises(_RateLimitedError): - await agent_setup.preflight_llm( - types.SimpleNamespace(model="openai/test"), - is_provider_rate_limited=lambda exc: isinstance(exc, _RateLimitedError), - ) - - -async def test_preflight_llm_skips_probe_for_auto_model( - monkeypatch: pytest.MonkeyPatch, -) -> None: - called = {"count": 0} - - async def _fake_acompletion(**kwargs: Any): - del kwargs - called["count"] += 1 - return {"ok": True} - - monkeypatch.setitem( - sys.modules, - "litellm", - types.SimpleNamespace(acompletion=_fake_acompletion), - ) - - await agent_setup.preflight_llm( - types.SimpleNamespace(model="auto"), - is_provider_rate_limited=lambda _: False, - ) - assert called["count"] == 0 - - -async def test_build_main_agent_for_thread_forwards_arguments() -> None: - seen: dict[str, Any] = {} - - async def _factory(**kwargs: Any): - seen.update(kwargs) - return "agent" - - out = await agent_setup.build_main_agent_for_thread( - _factory, - llm="llm", - search_space_id=1, - db_session="db", - connector_service="connector", - checkpointer="cp", - user_id="u", - thread_id=10, - agent_config="cfg", - firecrawl_api_key="key", - thread_visibility="vis", - filesystem_selection="fs", - disabled_tools=["a"], - mentioned_document_ids=[5], - ) - assert out == "agent" - assert seen["thread_id"] == 10 - assert seen["mentioned_document_ids"] == [5] - - -async def test_settle_speculative_agent_build_swallows_exceptions() -> None: - async def _boom() -> None: - raise RuntimeError("ignore") - - import asyncio - - task = asyncio.create_task(_boom()) - await agent_setup.settle_speculative_agent_build(task) - assert task.done() diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py deleted file mode 100644 index 46c61b498..000000000 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py +++ /dev/null @@ -1,240 +0,0 @@ -"""Behavior tests for orchestrator ``stream_chat`` public API.""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from typing import Any - -import pytest - -from app.tasks.chat.streaming.orchestration import StreamingContext -from app.tasks.chat.streaming.orchestration import orchestrator -from app.tasks.chat.streaming.orchestration.orchestrator import ( - stream_chat, - stream_regenerate, - stream_resume, -) - -pytestmark = pytest.mark.unit - - -@dataclass -class _Chunk: - content: Any = "" - additional_kwargs: dict[str, Any] = field(default_factory=dict) - tool_call_chunks: list[dict[str, Any]] = field(default_factory=list) - - -class _StreamingService: - def __init__(self) -> None: - self._text_idx = 0 - - def generate_text_id(self) -> str: - self._text_idx += 1 - return f"text-{self._text_idx}" - - def format_text_start(self, text_id: str) -> str: - return f"text_start:{text_id}" - - def format_text_delta(self, text_id: str, text: str) -> str: - return f"text_delta:{text_id}:{text}" - - def format_text_end(self, text_id: str) -> str: - return f"text_end:{text_id}" - - -class _Agent: - def __init__(self, events: list[dict[str, Any]]) -> None: - self.events = list(events) - self.calls: list[tuple[Any, dict[str, Any]]] = [] - - async def astream_events(self, input_data: Any, **kwargs: Any): - self.calls.append((input_data, kwargs)) - for event in self.events: - yield event - - -async def _collect(stream: Any) -> list[str]: - out: list[str] = [] - async for x in stream: - out.append(x) - return out - - -async def test_stream_chat_uses_streaming_context_path() -> None: - service = _StreamingService() - agent = _Agent( - [ - {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content="hello")}}, - {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content="!")}}, - ] - ) - frames = await _collect( - stream_chat( - user_query="ignored-here", - search_space_id=1, - chat_id=77, - streaming_context=StreamingContext( - agent=agent, - config={"configurable": {"thread_id": "thread-1"}}, - input_data={"messages": []}, - streaming_service=service, - ), - ) - ) - - assert frames == [ - "text_start:text-1", - "text_delta:text-1:hello", - "text_delta:text-1:!", - "text_end:text-1", - ] - - -async def test_stream_resume_uses_streaming_context_path() -> None: - service = _StreamingService() - agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("r")}}]) - - frames = await _collect( - stream_resume( - chat_id=9, - search_space_id=1, - decisions=[], - streaming_context=StreamingContext( - agent=agent, - config={"configurable": {"thread_id": "thread-r"}}, - input_data={"messages": []}, - streaming_service=service, - ), - ) - ) - - assert frames == [ - "text_start:text-1", - "text_delta:text-1:r", - "text_end:text-1", - ] - - -async def test_stream_regenerate_uses_streaming_context_path() -> None: - service = _StreamingService() - agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("g")}}]) - - frames = await _collect( - stream_regenerate( - user_query="q", - search_space_id=1, - chat_id=2, - streaming_context=StreamingContext( - agent=agent, - config={"configurable": {"thread_id": "thread-g"}}, - input_data={"messages": []}, - streaming_service=service, - ), - ) - ) - - assert frames == [ - "text_start:text-1", - "text_delta:text-1:g", - "text_end:text-1", - ] - - -async def test_stream_chat_builds_streaming_context_when_not_provided() -> None: - service = _StreamingService() - agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("b")}}]) - - async def _fake_builder(**kwargs: Any) -> StreamingContext: - del kwargs - return StreamingContext( - agent=agent, - config={"configurable": {"thread_id": "thread-b"}}, - input_data={"messages": []}, - streaming_service=service, - ) - - old = orchestrator.build_chat_streaming_context - orchestrator.build_chat_streaming_context = _fake_builder - try: - frames = await _collect( - stream_chat( - user_query="q", - search_space_id=1, - chat_id=3, - ) - ) - finally: - orchestrator.build_chat_streaming_context = old - - assert frames == [ - "text_start:text-1", - "text_delta:text-1:b", - "text_end:text-1", - ] - - -async def test_stream_resume_builds_streaming_context_when_not_provided() -> None: - service = _StreamingService() - agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("u")}}]) - - async def _fake_builder(**kwargs: Any) -> StreamingContext: - del kwargs - return StreamingContext( - agent=agent, - config={"configurable": {"thread_id": "thread-u"}}, - input_data={"messages": []}, - streaming_service=service, - ) - - old = orchestrator.build_resume_streaming_context - orchestrator.build_resume_streaming_context = _fake_builder - try: - frames = await _collect( - stream_resume( - chat_id=9, - search_space_id=1, - decisions=[], - ) - ) - finally: - orchestrator.build_resume_streaming_context = old - - assert frames == [ - "text_start:text-1", - "text_delta:text-1:u", - "text_end:text-1", - ] - - -async def test_stream_regenerate_builds_streaming_context_when_not_provided() -> None: - service = _StreamingService() - agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("x")}}]) - - async def _fake_builder(**kwargs: Any) -> StreamingContext: - del kwargs - return StreamingContext( - agent=agent, - config={"configurable": {"thread_id": "thread-x"}}, - input_data={"messages": []}, - streaming_service=service, - ) - - old = orchestrator.build_regenerate_streaming_context - orchestrator.build_regenerate_streaming_context = _fake_builder - try: - frames = await _collect( - stream_regenerate( - user_query="q", - search_space_id=1, - chat_id=2, - ) - ) - finally: - orchestrator.build_regenerate_streaming_context = old - - assert frames == [ - "text_start:text-1", - "text_delta:text-1:x", - "text_end:text-1", - ] diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py index 9207f37d1..023c8b999 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py @@ -1,7 +1,7 @@ """Pin Stage 1 extractions as faithful copies of the old helpers. -The new orchestrator under ``app.tasks.chat.streaming`` is built in -parallel with the production module ``app.tasks.chat.stream_new_chat``. +Extractions under ``app.tasks.chat.streaming`` are compared to +``app.tasks.chat.stream_new_chat`` helpers. For each Stage 1 extraction we assert the new function returns the same output as the old one for a representative input set. The moment the two diverge - intentionally or otherwise - this file fails loudly so diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py similarity index 93% rename from surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py rename to surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py index b17d82293..9fb876dd7 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py @@ -1,4 +1,4 @@ -"""Behavior tests for orchestration event-stream execution.""" +"""Tests for ``stream_output`` (LangGraph events → SSE).""" from __future__ import annotations @@ -7,8 +7,8 @@ from typing import Any import pytest -from app.tasks.chat.streaming.orchestration import stream_output -from app.tasks.chat.streaming.orchestration.output import StreamingResult +from app.tasks.chat.streaming.graph_stream import stream_output +from app.tasks.chat.streaming.graph_stream.result import StreamingResult pytestmark = pytest.mark.unit @@ -88,6 +88,7 @@ async def test_stream_output_emits_text_lifecycle_and_updates_result() -> None: async def test_stream_output_passes_runtime_context_to_agent() -> None: service = _StreamingService() + class _ContextAwareAgent: async def astream_events(self, input_data: Any, **kwargs: Any): del input_data From 78f4747382cead46c0c72040002562fe56bc35e4 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 7 May 2026 19:40:10 +0200 Subject: [PATCH 25/58] refactor(chat): stream agent events via stream_output and remove parity v2 flag --- docker/.env.example | 1 - surfsense_backend/.env.example | 8 - .../app/agents/new_chat/feature_flags.py | 15 - .../app/services/new_streaming_service.py | 17 +- .../app/tasks/chat/content_builder.py | 8 +- .../app/tasks/chat/stream_new_chat.py | 1524 +---------------- .../streaming/graph_stream/event_stream.py | 2 - .../streaming/handlers/chat_model_stream.py | 4 +- .../chat/streaming/handlers/tool_start.py | 24 +- .../app/tasks/chat/streaming/relay/state.py | 3 - .../agents/new_chat/test_feature_flags.py | 3 - .../chat/streaming/test_stage_2_parity.py | 4 +- .../unit/tasks/chat/test_content_builder.py | 4 +- .../tasks/chat/test_tool_input_streaming.py | 112 +- .../assistant-ui/reasoning-message-part.tsx | 4 +- .../components/assistant-ui/tool-fallback.tsx | 14 +- surfsense_web/lib/chat/streaming-state.ts | 5 +- 17 files changed, 76 insertions(+), 1676 deletions(-) diff --git a/docker/.env.example b/docker/.env.example index fd56bdccc..aba15f13f 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -324,7 +324,6 @@ SURFSENSE_ENABLE_ACTION_LOG=true SURFSENSE_ENABLE_REVERT_ROUTE=true SURFSENSE_ENABLE_PERMISSION=true SURFSENSE_ENABLE_DOOM_LOOP=true -SURFSENSE_ENABLE_STREAM_PARITY_V2=true # Periodic connector sync interval (default: 5m) # SCHEDULE_CHECKER_INTERVAL=5m diff --git a/surfsense_backend/.env.example b/surfsense_backend/.env.example index ba89059c8..3d442973c 100644 --- a/surfsense_backend/.env.example +++ b/surfsense_backend/.env.example @@ -315,14 +315,6 @@ LANGSMITH_PROJECT=surfsense # SURFSENSE_ENABLE_ACTION_LOG=false # SURFSENSE_ENABLE_REVERT_ROUTE=false # Backend-only; flip when UI ships -# Streaming parity v2 — opt in to LangChain's structured AIMessageChunk -# content (typed reasoning blocks, tool-input deltas) and propagate the -# real tool_call_id to the SSE layer. When OFF, the stream falls back to -# the str-only text path and synthetic "call_" tool-call ids. -# Schema migrations 135/136 ship unconditionally because they are -# forward-compatible. -# SURFSENSE_ENABLE_STREAM_PARITY_V2=false - # Plugins # SURFSENSE_ENABLE_PLUGIN_LOADER=false # Comma-separated allowlist of plugin entry-point names diff --git a/surfsense_backend/app/agents/new_chat/feature_flags.py b/surfsense_backend/app/agents/new_chat/feature_flags.py index b3dc0fa82..3cea051ef 100644 --- a/surfsense_backend/app/agents/new_chat/feature_flags.py +++ b/surfsense_backend/app/agents/new_chat/feature_flags.py @@ -28,7 +28,6 @@ Defaults: SURFSENSE_ENABLE_PERMISSION=true SURFSENSE_ENABLE_DOOM_LOOP=true SURFSENSE_ENABLE_LLM_TOOL_SELECTOR=false # adds a per-turn LLM call - SURFSENSE_ENABLE_STREAM_PARITY_V2=true Master kill-switch (overrides everything else): @@ -88,15 +87,6 @@ class AgentFeatureFlags: enable_action_log: bool = True enable_revert_route: bool = True - # Streaming parity v2 — opt in to LangChain's structured - # ``AIMessageChunk`` content (typed reasoning blocks, tool-input - # deltas) and propagate the real ``tool_call_id`` to the SSE layer. - # When OFF the ``stream_new_chat`` task falls back to the str-only - # text path and the synthetic ``call_`` tool-call id (no - # ``langchainToolCallId`` propagation). Schema migrations 135/136 - # ship unconditionally because they're forward-compatible. - enable_stream_parity_v2: bool = True - # Plugins enable_plugin_loader: bool = False @@ -169,7 +159,6 @@ class AgentFeatureFlags: enable_kb_planner_runnable=False, enable_action_log=False, enable_revert_route=False, - enable_stream_parity_v2=False, enable_plugin_loader=False, enable_otel=False, enable_agent_cache=False, @@ -208,10 +197,6 @@ class AgentFeatureFlags: # Snapshot / revert enable_action_log=_env_bool("SURFSENSE_ENABLE_ACTION_LOG", True), enable_revert_route=_env_bool("SURFSENSE_ENABLE_REVERT_ROUTE", True), - # Streaming parity v2 - enable_stream_parity_v2=_env_bool( - "SURFSENSE_ENABLE_STREAM_PARITY_V2", True - ), # Plugins enable_plugin_loader=_env_bool("SURFSENSE_ENABLE_PLUGIN_LOADER", False), # Observability diff --git a/surfsense_backend/app/services/new_streaming_service.py b/surfsense_backend/app/services/new_streaming_service.py index 55129668c..cec0c8a5e 100644 --- a/surfsense_backend/app/services/new_streaming_service.py +++ b/surfsense_backend/app/services/new_streaming_service.py @@ -608,15 +608,14 @@ class VercelStreamingService: Args: tool_call_id: The unique tool call identifier. May be EITHER the synthetic ``call_`` id derived from LangGraph - ``run_id`` (legacy / ``SURFSENSE_ENABLE_STREAM_PARITY_V2`` - OFF, or the unmatched-fallback path under parity_v2) OR - the authoritative LangChain ``tool_call.id`` (parity_v2 - path: when the provider streams ``tool_call_chunks`` we - register the ``index`` and reuse the lc-id as the card - id so live ``tool-input-delta`` events can be routed - without a downstream join). Either way, the same id is - preserved across ``tool-input-start`` / ``-delta`` / - ``-available`` / ``tool-output-available`` for one call. + ``run_id`` (unmatched chunk fallback when no ``index`` was + registered) OR the authoritative LangChain ``tool_call.id`` + (when the provider streams ``tool_call_chunks`` we register + the ``index`` and reuse the lc-id as the card id so live + ``tool-input-delta`` events route without a downstream join). + Either way, the same id is preserved across + ``tool-input-start`` / ``-delta`` / ``-available`` / + ``tool-output-available`` for one call. tool_name: The name of the tool being called. langchain_tool_call_id: Optional authoritative LangChain ``tool_call.id``. When set, surfaces as diff --git a/surfsense_backend/app/tasks/chat/content_builder.py b/surfsense_backend/app/tasks/chat/content_builder.py index 041cab286..32b49e6b5 100644 --- a/surfsense_backend/app/tasks/chat/content_builder.py +++ b/surfsense_backend/app/tasks/chat/content_builder.py @@ -85,8 +85,8 @@ class AssistantContentBuilder: self._current_text_idx: int = -1 self._current_reasoning_idx: int = -1 # ``ui_id``-keyed indexes for tool-call parts. ``ui_id`` is the - # synthetic ``call_`` (legacy) or the LangChain - # ``tool_call.id`` (parity_v2) — same key the streaming layer + # synthetic ``call_`` (chunk fallback) or the LangChain + # ``tool_call.id`` (indexed chunk path) — same key the streaming layer # threads through every ``tool-input-*`` / ``tool-output-*`` event. self._tool_call_idx_by_ui_id: dict[str, int] = {} # Live argsText accumulator (concatenated ``tool-input-delta`` chunks) @@ -181,7 +181,7 @@ class AssistantContentBuilder: """Register a tool-call card. Args are filled in by later events.""" if not ui_id: return - # Skip duplicate registration: parity_v2 may emit + # Skip duplicate registration: the stream may emit # ``tool-input-start`` from both ``on_chat_model_stream`` # (when tool_call_chunks register a name) and ``on_tool_start`` # (the canonical path). The FE de-dupes via ``toolCallIndices``; @@ -243,7 +243,7 @@ class AssistantContentBuilder: pretty-printed JSON, sets the full ``args`` dict, and backfills ``langchainToolCallId`` if it wasn't known at ``tool-input-start`` time. Also creates the card if no prior ``tool-input-start`` registered it - (legacy parity_v2-OFF / late-registration paths). + (late-registration when no prior ``tool-input-start``). """ if not ui_id: return diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 1a2f38077..8e135179a 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -9,13 +9,11 @@ Supports loading LLM configurations from: - NewLLMConfig database table (positive IDs for user-created configs with prompt settings) """ -import ast import asyncio import contextlib import gc import json import logging -import re import time from collections.abc import AsyncGenerator from dataclasses import dataclass, field @@ -33,7 +31,6 @@ from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent from app.agents.new_chat.checkpointer import get_checkpointer from app.agents.new_chat.context import SurfSenseContextSchema from app.agents.new_chat.errors import BusyError -from app.agents.new_chat.feature_flags import get_flags from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection from app.agents.new_chat.llm_config import ( AgentConfig, @@ -77,6 +74,7 @@ from app.services.chat_session_state_service import ( ) from app.services.connector_service import ConnectorService from app.services.new_streaming_service import VercelStreamingService +from app.tasks.chat.streaming.graph_stream.event_stream import stream_output from app.utils.content_utils import bootstrap_history_from_db from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap from app.utils.user_message_multimodal import build_human_message_content @@ -729,9 +727,9 @@ def _legacy_match_lc_id( ) -> str | None: """Best-effort match a buffered ``tool_call_chunk`` to a tool name. - Pure extract of the legacy in-line match used at ``on_tool_start`` for - parity_v2-OFF and unmatched (chunk path didn't register an index for - this call) tools. Pops the next id-bearing chunk whose ``name`` + Pure extract of the in-line match used at ``on_tool_start`` when the + chunk path didn't register an index for this call. Pops the next + id-bearing chunk whose ``name`` matches ``tool_name`` (or any id-bearing chunk as a fallback) and returns its id. Mutates ``pending_tool_call_chunks`` and ``lc_tool_call_id_by_run`` in place. @@ -803,1505 +801,22 @@ async def _stream_agent_events( Yields: SSE-formatted strings for each event. """ - accumulated_text = "" - current_text_id: str | None = None - thinking_step_counter = 1 if initial_step_id else 0 - tool_step_ids: dict[str, str] = {} - completed_step_ids: set[str] = set() - last_active_step_id: str | None = initial_step_id - last_active_step_title: str = initial_step_title - last_active_step_items: list[str] = initial_step_items or [] - just_finished_tool: bool = False - active_tool_depth: int = 0 # Track nesting: >0 means we're inside a tool - called_update_memory: bool = False + async for sse in stream_output( + agent=agent, + config=config, + input_data=input_data, + streaming_service=streaming_service, + result=result, + step_prefix=step_prefix, + initial_step_id=initial_step_id, + initial_step_title=initial_step_title, + initial_step_items=initial_step_items, + content_builder=content_builder, + runtime_context=runtime_context, + ): + yield sse - # Reasoning-block streaming. We open a reasoning block on the - # first reasoning delta of a step, append deltas as they arrive, and - # close it when text starts (the model has switched to writing its - # answer) or ``on_chat_model_end`` fires for the model node. Reuses - # the same Vercel format-helpers as text-start/delta/end. - current_reasoning_id: str | None = None - - # Streaming-parity v2 feature flag. When OFF we keep the legacy - # shape: str-only content, no reasoning blocks, no - # ``langchainToolCallId`` propagation. The schema migrations - # (135 / 136) ship unconditionally because they're forward-compatible. - parity_v2 = bool(get_flags().enable_stream_parity_v2) - - # Best-effort attach of LangChain ``tool_call_id`` to the synthetic - # ``call_`` card id we already emit. We accumulate - # ``tool_call_chunks`` from ``on_chat_model_stream``, key them by - # name, and pop the next unconsumed entry at ``on_tool_start``. The - # authoritative id is later filled in at ``on_tool_end`` from - # ``ToolMessage.tool_call_id``. Under parity_v2 we ALSO short-circuit - # this list for chunks that already registered into ``index_to_meta`` - # below — so this list is reserved for the parity_v2-OFF / unmatched - # fallback path only and never re-pops a chunk we already streamed. - pending_tool_call_chunks: list[dict[str, Any]] = [] - lc_tool_call_id_by_run: dict[str, str] = {} - file_path_by_run: dict[str, str] = {} - - # parity_v2 only: live tool-call argument streaming. ``index_to_meta`` - # is keyed by the chunk's ``index`` field — LangChain - # ``ToolCallChunk``s for the same call share an index but only the - # first chunk carries id+name (subsequent ones are id=None, - # name=None, args=""). We register an index when both id and - # name are observed on a chunk (per ToolCallChunk semantics they - # arrive together on the first chunk), then route every later chunk - # at that index to the same ``ui_id`` as a ``tool-input-delta``. - # ``ui_tool_call_id_by_run`` maps LangGraph ``run_id`` to the - # ``ui_id`` used for that call's ``tool-input-start`` so the matching - # ``tool-output-available`` (emitted from ``on_tool_end``) lands on - # the same card. - index_to_meta: dict[int, dict[str, str]] = {} - ui_tool_call_id_by_run: dict[str, str] = {} - - # Per-tool-end mutable cache for the LangChain tool_call_id resolved - # at ``on_tool_end``. ``_emit_tool_output`` reads this so every - # ``format_tool_output_available`` call automatically carries the - # authoritative id without duplicating the kwarg at every call site. - current_lc_tool_call_id: dict[str, str | None] = {"value": None} - - def _emit_tool_output(call_id: str, output: Any) -> str: - # Drive the builder before formatting the SSE so the in-memory - # ContentPart[] mirror sees the result attached to the same - # card the FE will render. Builder method is a no-op when - # ``content_builder`` is None (anonymous / legacy paths). - if content_builder is not None: - content_builder.on_tool_output_available( - call_id, output, current_lc_tool_call_id["value"] - ) - return streaming_service.format_tool_output_available( - call_id, - output, - langchain_tool_call_id=current_lc_tool_call_id["value"], - ) - - def _emit_thinking_step( - *, - step_id: str, - title: str, - status: str = "in_progress", - items: list[str] | None = None, - ) -> str: - """Format a thinking-step SSE event and notify the builder. - - Single helper used at every ``format_thinking_step`` yield site - in this generator. Drives ``AssistantContentBuilder.on_thinking_step`` - first so the FE-mirror state lands the update before the SSE - carrying the same data leaves the wire — order matches the FE - pipeline (``processSharedStreamEvent`` updates state, then - flushes). Builder call is a no-op when ``content_builder`` is - None (anonymous / legacy paths). - """ - if content_builder is not None: - content_builder.on_thinking_step(step_id, title, status, items) - return streaming_service.format_thinking_step( - step_id=step_id, - title=title, - status=status, - items=items, - ) - - def next_thinking_step_id() -> str: - nonlocal thinking_step_counter - thinking_step_counter += 1 - return f"{step_prefix}-{thinking_step_counter}" - - def complete_current_step() -> str | None: - nonlocal last_active_step_id - if last_active_step_id and last_active_step_id not in completed_step_ids: - completed_step_ids.add(last_active_step_id) - event = _emit_thinking_step( - step_id=last_active_step_id, - title=last_active_step_title, - status="completed", - items=last_active_step_items if last_active_step_items else None, - ) - last_active_step_id = None - return event - return None - - # Per-invocation runtime context (Phase 1.5). When supplied, - # ``KnowledgePriorityMiddleware`` reads ``mentioned_document_ids`` - # from ``runtime.context`` instead of its constructor closure — the - # prerequisite that lets the compiled-agent cache (Phase 1) reuse a - # single graph across turns. Astream_events_kwargs stays empty when - # callers leave ``runtime_context`` as ``None`` to preserve the - # legacy code path bit-for-bit. - astream_kwargs: dict[str, Any] = {"config": config, "version": "v2"} - if runtime_context is not None: - astream_kwargs["context"] = runtime_context - - async for event in agent.astream_events(input_data, **astream_kwargs): - event_type = event.get("event", "") - - if event_type == "on_chat_model_stream": - if active_tool_depth > 0: - continue # Suppress inner-tool LLM tokens from leaking into chat - if "surfsense:internal" in event.get("tags", []): - continue # Suppress middleware-internal LLM tokens (e.g. KB search classification) - chunk = event.get("data", {}).get("chunk") - if not chunk: - continue - parts = _extract_chunk_parts(chunk) - - reasoning_delta = parts["reasoning"] - text_delta = parts["text"] - - # Reasoning streaming. Open a reasoning block on first - # delta; append every subsequent delta until text begins. - # When text starts we close the reasoning block first so the - # frontend sees the natural hand-off. Gated behind the - # parity-v2 flag so legacy deployments keep today's shape. - if parity_v2 and reasoning_delta: - if current_text_id is not None: - yield streaming_service.format_text_end(current_text_id) - if content_builder is not None: - content_builder.on_text_end(current_text_id) - current_text_id = None - if current_reasoning_id is None: - completion_event = complete_current_step() - if completion_event: - yield completion_event - if just_finished_tool: - last_active_step_id = None - last_active_step_title = "" - last_active_step_items = [] - just_finished_tool = False - current_reasoning_id = streaming_service.generate_reasoning_id() - yield streaming_service.format_reasoning_start(current_reasoning_id) - if content_builder is not None: - content_builder.on_reasoning_start(current_reasoning_id) - yield streaming_service.format_reasoning_delta( - current_reasoning_id, reasoning_delta - ) - if content_builder is not None: - content_builder.on_reasoning_delta( - current_reasoning_id, reasoning_delta - ) - - if text_delta: - if current_reasoning_id is not None: - yield streaming_service.format_reasoning_end(current_reasoning_id) - if content_builder is not None: - content_builder.on_reasoning_end(current_reasoning_id) - current_reasoning_id = None - if current_text_id is None: - completion_event = complete_current_step() - if completion_event: - yield completion_event - if just_finished_tool: - last_active_step_id = None - last_active_step_title = "" - last_active_step_items = [] - just_finished_tool = False - current_text_id = streaming_service.generate_text_id() - yield streaming_service.format_text_start(current_text_id) - if content_builder is not None: - content_builder.on_text_start(current_text_id) - yield streaming_service.format_text_delta(current_text_id, text_delta) - accumulated_text += text_delta - if content_builder is not None: - content_builder.on_text_delta(current_text_id, text_delta) - - # Live tool-call argument streaming. Runs AFTER text/reasoning - # processing so chunks containing both stay in their natural - # wire order (text → text-end → tool-input-start). Active - # text/reasoning are closed inside the registration branch - # before ``tool-input-start`` so the frontend sees a clean - # part boundary even when providers interleave. - if parity_v2 and parts["tool_call_chunks"]: - for tcc in parts["tool_call_chunks"]: - idx = tcc.get("index") - - # Register this index when we first see id+name - # TOGETHER. Per LangChain ToolCallChunk semantics the - # first chunk for a tool call carries both fields - # together; later chunks have id=None, name=None and - # only ``args``. Requiring BOTH keeps wire - # ``tool-input-start`` always carrying a real - # toolName (assistant-ui's typed tool-part dispatch - # keys off it). - if idx is not None and idx not in index_to_meta: - lc_id = tcc.get("id") - name = tcc.get("name") - if lc_id and name: - ui_id = lc_id - - # Close active text/reasoning so wire - # ordering stays clean even on providers - # that interleave text and tool-call chunks - # within the same stream window. - if current_text_id is not None: - yield streaming_service.format_text_end(current_text_id) - if content_builder is not None: - content_builder.on_text_end(current_text_id) - current_text_id = None - if current_reasoning_id is not None: - yield streaming_service.format_reasoning_end( - current_reasoning_id - ) - if content_builder is not None: - content_builder.on_reasoning_end( - current_reasoning_id - ) - current_reasoning_id = None - - index_to_meta[idx] = { - "ui_id": ui_id, - "lc_id": lc_id, - "name": name, - } - yield streaming_service.format_tool_input_start( - ui_id, - name, - langchain_tool_call_id=lc_id, - ) - if content_builder is not None: - content_builder.on_tool_input_start(ui_id, name, lc_id) - - # Emit args delta for any chunk at a registered - # index (including idless continuations). Once an - # index is owned by ``index_to_meta`` we DO NOT - # append to ``pending_tool_call_chunks`` — that list - # is reserved for the parity_v2-OFF / unmatched - # fallback path so it never re-pops chunks already - # consumed here (skip-append). - meta = index_to_meta.get(idx) if idx is not None else None - if meta: - args_chunk = tcc.get("args") or "" - if args_chunk: - yield streaming_service.format_tool_input_delta( - meta["ui_id"], args_chunk - ) - if content_builder is not None: - content_builder.on_tool_input_delta( - meta["ui_id"], args_chunk - ) - else: - pending_tool_call_chunks.append(tcc) - - elif event_type == "on_tool_start": - active_tool_depth += 1 - tool_name = event.get("name", "unknown_tool") - run_id = event.get("run_id", "") - tool_input = event.get("data", {}).get("input", {}) - if tool_name in ("write_file", "edit_file"): - result.write_attempted = True - if isinstance(tool_input, dict): - file_path = tool_input.get("file_path") - if isinstance(file_path, str) and file_path.strip() and run_id: - file_path_by_run[run_id] = file_path.strip() - - if current_text_id is not None: - yield streaming_service.format_text_end(current_text_id) - if content_builder is not None: - content_builder.on_text_end(current_text_id) - current_text_id = None - - if last_active_step_title != "Synthesizing response": - completion_event = complete_current_step() - if completion_event: - yield completion_event - - just_finished_tool = False - tool_step_id = next_thinking_step_id() - tool_step_ids[run_id] = tool_step_id - last_active_step_id = tool_step_id - - if tool_name == "ls": - ls_path = ( - tool_input.get("path", "/") - if isinstance(tool_input, dict) - else str(tool_input) - ) - last_active_step_title = "Listing files" - last_active_step_items = [ls_path] - yield _emit_thinking_step( - step_id=tool_step_id, - title="Listing files", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "read_file": - fp = ( - tool_input.get("file_path", "") - if isinstance(tool_input, dict) - else str(tool_input) - ) - display_fp = fp if len(fp) <= 80 else "…" + fp[-77:] - last_active_step_title = "Reading file" - last_active_step_items = [display_fp] - yield _emit_thinking_step( - step_id=tool_step_id, - title="Reading file", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "write_file": - fp = ( - tool_input.get("file_path", "") - if isinstance(tool_input, dict) - else str(tool_input) - ) - display_fp = fp if len(fp) <= 80 else "…" + fp[-77:] - last_active_step_title = "Writing file" - last_active_step_items = [display_fp] - yield _emit_thinking_step( - step_id=tool_step_id, - title="Writing file", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "edit_file": - fp = ( - tool_input.get("file_path", "") - if isinstance(tool_input, dict) - else str(tool_input) - ) - display_fp = fp if len(fp) <= 80 else "…" + fp[-77:] - last_active_step_title = "Editing file" - last_active_step_items = [display_fp] - yield _emit_thinking_step( - step_id=tool_step_id, - title="Editing file", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "glob": - pat = ( - tool_input.get("pattern", "") - if isinstance(tool_input, dict) - else str(tool_input) - ) - base_path = ( - tool_input.get("path", "/") if isinstance(tool_input, dict) else "/" - ) - last_active_step_title = "Searching files" - last_active_step_items = [f"{pat} in {base_path}"] - yield _emit_thinking_step( - step_id=tool_step_id, - title="Searching files", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "grep": - pat = ( - tool_input.get("pattern", "") - if isinstance(tool_input, dict) - else str(tool_input) - ) - grep_path = ( - tool_input.get("path", "") if isinstance(tool_input, dict) else "" - ) - display_pat = pat[:60] + ("…" if len(pat) > 60 else "") - last_active_step_title = "Searching content" - last_active_step_items = [ - f'"{display_pat}"' + (f" in {grep_path}" if grep_path else "") - ] - yield _emit_thinking_step( - step_id=tool_step_id, - title="Searching content", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "rm": - rm_path = ( - tool_input.get("path", "") - if isinstance(tool_input, dict) - else str(tool_input) - ) - display_path = rm_path if len(rm_path) <= 80 else "…" + rm_path[-77:] - last_active_step_title = "Deleting file" - last_active_step_items = [display_path] if display_path else [] - yield _emit_thinking_step( - step_id=tool_step_id, - title="Deleting file", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "rmdir": - rmdir_path = ( - tool_input.get("path", "") - if isinstance(tool_input, dict) - else str(tool_input) - ) - display_path = ( - rmdir_path if len(rmdir_path) <= 80 else "…" + rmdir_path[-77:] - ) - last_active_step_title = "Deleting folder" - last_active_step_items = [display_path] if display_path else [] - yield _emit_thinking_step( - step_id=tool_step_id, - title="Deleting folder", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "mkdir": - mkdir_path = ( - tool_input.get("path", "") - if isinstance(tool_input, dict) - else str(tool_input) - ) - display_path = ( - mkdir_path if len(mkdir_path) <= 80 else "…" + mkdir_path[-77:] - ) - last_active_step_title = "Creating folder" - last_active_step_items = [display_path] if display_path else [] - yield _emit_thinking_step( - step_id=tool_step_id, - title="Creating folder", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "move_file": - src = ( - tool_input.get("source_path", "") - if isinstance(tool_input, dict) - else "" - ) - dst = ( - tool_input.get("destination_path", "") - if isinstance(tool_input, dict) - else "" - ) - display_src = src if len(src) <= 60 else "…" + src[-57:] - display_dst = dst if len(dst) <= 60 else "…" + dst[-57:] - last_active_step_title = "Moving file" - last_active_step_items = ( - [f"{display_src} → {display_dst}"] if src or dst else [] - ) - yield _emit_thinking_step( - step_id=tool_step_id, - title="Moving file", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "write_todos": - todos = ( - tool_input.get("todos", []) if isinstance(tool_input, dict) else [] - ) - todo_count = len(todos) if isinstance(todos, list) else 0 - last_active_step_title = "Planning tasks" - last_active_step_items = ( - [f"{todo_count} task{'s' if todo_count != 1 else ''}"] - if todo_count - else [] - ) - yield _emit_thinking_step( - step_id=tool_step_id, - title="Planning tasks", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "save_document": - doc_title = ( - tool_input.get("title", "") - if isinstance(tool_input, dict) - else str(tool_input) - ) - display_title = doc_title[:60] + ("…" if len(doc_title) > 60 else "") - last_active_step_title = "Saving document" - last_active_step_items = [display_title] - yield _emit_thinking_step( - step_id=tool_step_id, - title="Saving document", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "generate_image": - prompt = ( - tool_input.get("prompt", "") - if isinstance(tool_input, dict) - else str(tool_input) - ) - last_active_step_title = "Generating image" - last_active_step_items = [ - f"Prompt: {prompt[:80]}{'...' if len(prompt) > 80 else ''}" - ] - yield _emit_thinking_step( - step_id=tool_step_id, - title="Generating image", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "scrape_webpage": - url = ( - tool_input.get("url", "") - if isinstance(tool_input, dict) - else str(tool_input) - ) - last_active_step_title = "Scraping webpage" - last_active_step_items = [ - f"URL: {url[:80]}{'...' if len(url) > 80 else ''}" - ] - yield _emit_thinking_step( - step_id=tool_step_id, - title="Scraping webpage", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "generate_podcast": - podcast_title = ( - tool_input.get("podcast_title", "SurfSense Podcast") - if isinstance(tool_input, dict) - else "SurfSense Podcast" - ) - content_len = len( - tool_input.get("source_content", "") - if isinstance(tool_input, dict) - else "" - ) - last_active_step_title = "Generating podcast" - last_active_step_items = [ - f"Title: {podcast_title}", - f"Content: {content_len:,} characters", - "Preparing audio generation...", - ] - yield _emit_thinking_step( - step_id=tool_step_id, - title="Generating podcast", - status="in_progress", - items=last_active_step_items, - ) - elif tool_name == "generate_report": - report_topic = ( - tool_input.get("topic", "Report") - if isinstance(tool_input, dict) - else "Report" - ) - is_revision = bool( - isinstance(tool_input, dict) and tool_input.get("parent_report_id") - ) - step_title = "Revising report" if is_revision else "Generating report" - last_active_step_title = step_title - last_active_step_items = [ - f"Topic: {report_topic}", - "Analyzing source content...", - ] - yield _emit_thinking_step( - step_id=tool_step_id, - title=step_title, - status="in_progress", - items=last_active_step_items, - ) - elif tool_name in ("execute", "execute_code"): - cmd = ( - tool_input.get("command", "") - if isinstance(tool_input, dict) - else str(tool_input) - ) - display_cmd = cmd[:80] + ("…" if len(cmd) > 80 else "") - last_active_step_title = "Running command" - last_active_step_items = [f"$ {display_cmd}"] - yield _emit_thinking_step( - step_id=tool_step_id, - title="Running command", - status="in_progress", - items=last_active_step_items, - ) - else: - # Fallback for tools without a curated thinking-step title - # (typically connector tools, MCP-registered tools, or - # newly added tools that haven't been wired up here yet). - # Render the snake_cased name as a sentence-cased phrase - # so non-technical users see e.g. "Send gmail email" - # rather than the raw identifier "send_gmail_email". - last_active_step_title = ( - tool_name.replace("_", " ").strip().capitalize() or tool_name - ) - last_active_step_items = [] - yield _emit_thinking_step( - step_id=tool_step_id, - title=last_active_step_title, - status="in_progress", - ) - - # Resolve the card identity. If the chunk-emission loop - # already registered an ``index`` for this tool call (parity_v2 - # path), reuse the same ui_id so the card sees: - # tool-input-start → deltas… → tool-input-available → - # tool-output-available all keyed by lc_id. Otherwise fall - # back to the synthetic ``call_`` id and the legacy - # best-effort match against ``pending_tool_call_chunks``. - matched_meta: dict[str, str] | None = None - if parity_v2: - # FIFO over indices 0,1,2…; first unassigned same-name - # match wins. Handles parallel same-name calls (e.g. two - # write_file calls) deterministically as long as the - # model interleaves on_tool_start in the same order it - # streamed the args. - taken_ui_ids = set(ui_tool_call_id_by_run.values()) - for meta in index_to_meta.values(): - if meta["name"] == tool_name and meta["ui_id"] not in taken_ui_ids: - matched_meta = meta - break - - tool_call_id: str - langchain_tool_call_id: str | None = None - if matched_meta is not None: - tool_call_id = matched_meta["ui_id"] - langchain_tool_call_id = matched_meta["lc_id"] - # ``tool-input-start`` already fired during chunk - # emission — skip the duplicate. No pruning is needed - # because the chunk-emission loop intentionally never - # appends registered-index chunks to - # ``pending_tool_call_chunks`` (skip-append). - if run_id: - lc_tool_call_id_by_run[run_id] = matched_meta["lc_id"] - else: - tool_call_id = ( - f"call_{run_id[:32]}" - if run_id - else streaming_service.generate_tool_call_id() - ) - # Legacy fallback: parity_v2 OFF, or parity_v2 ON but the - # provider didn't stream tool_call_chunks for this call - # (no index registered). Run the existing best-effort - # match BEFORE emitting start so we still attach an - # authoritative ``langchainToolCallId`` when possible. - if parity_v2: - langchain_tool_call_id = _legacy_match_lc_id( - pending_tool_call_chunks, - tool_name, - run_id, - lc_tool_call_id_by_run, - ) - yield streaming_service.format_tool_input_start( - tool_call_id, - tool_name, - langchain_tool_call_id=langchain_tool_call_id, - ) - if content_builder is not None: - content_builder.on_tool_input_start( - tool_call_id, tool_name, langchain_tool_call_id - ) - - if run_id: - ui_tool_call_id_by_run[run_id] = tool_call_id - - # Sanitize tool_input: strip runtime-injected non-serializable - # values (e.g. LangChain ToolRuntime) before sending over SSE. - if isinstance(tool_input, dict): - _safe_input: dict[str, Any] = {} - for _k, _v in tool_input.items(): - try: - json.dumps(_v) - _safe_input[_k] = _v - except (TypeError, ValueError, OverflowError): - pass - else: - _safe_input = {"input": tool_input} - yield streaming_service.format_tool_input_available( - tool_call_id, - tool_name, - _safe_input, - langchain_tool_call_id=langchain_tool_call_id, - ) - if content_builder is not None: - content_builder.on_tool_input_available( - tool_call_id, - tool_name, - _safe_input, - langchain_tool_call_id, - ) - - elif event_type == "on_tool_end": - active_tool_depth = max(0, active_tool_depth - 1) - run_id = event.get("run_id", "") - tool_name = event.get("name", "unknown_tool") - raw_output = event.get("data", {}).get("output", "") - staged_file_path = file_path_by_run.pop(run_id, None) if run_id else None - - if tool_name == "update_memory": - called_update_memory = True - - if hasattr(raw_output, "content"): - content = raw_output.content - if isinstance(content, str): - try: - tool_output = json.loads(content) - except (json.JSONDecodeError, TypeError): - tool_output = {"result": content} - elif isinstance(content, dict): - tool_output = content - else: - tool_output = {"result": str(content)} - elif isinstance(raw_output, dict): - tool_output = raw_output - else: - tool_output = {"result": str(raw_output) if raw_output else "completed"} - - if tool_name in ("write_file", "edit_file"): - if _tool_output_has_error(tool_output): - # Keep successful evidence if a previous write/edit in this turn succeeded. - pass - else: - result.write_succeeded = True - result.verification_succeeded = True - - # Look up the SAME card id used at on_tool_start (either the - # parity_v2 lc-id-derived ui_id or the legacy synthetic - # ``call_``) so the output event always lands on the - # same card as start/delta/available. Fallback preserves the - # legacy synthetic shape for parity_v2-OFF / unknown-run paths. - tool_call_id = ui_tool_call_id_by_run.get( - run_id, - f"call_{run_id[:32]}" if run_id else "call_unknown", - ) - original_step_id = tool_step_ids.get( - run_id, f"{step_prefix}-unknown-{run_id[:8]}" - ) - completed_step_ids.add(original_step_id) - - # Authoritative LangChain tool_call_id from the returned - # ``ToolMessage``. Falls back to whatever we matched - # at ``on_tool_start`` time (kept in ``lc_tool_call_id_by_run``) - # if the output isn't a ToolMessage. The value is stored in - # ``current_lc_tool_call_id`` so ``_emit_tool_output`` - # picks it up for every output emit below. - # - # Emitted in BOTH parity_v2 and legacy modes: the chat tool - # card needs the LangChain id to match against the - # ``data-action-log`` SSE event (keyed by ``lc_tool_call_id``) - # so the inline Revert button can light up. Reading - # ``raw_output.tool_call_id`` is a cheap, non-mutating attribute - # access that is safe regardless of feature-flag state. - current_lc_tool_call_id["value"] = None - authoritative = getattr(raw_output, "tool_call_id", None) - if isinstance(authoritative, str) and authoritative: - current_lc_tool_call_id["value"] = authoritative - if run_id: - lc_tool_call_id_by_run[run_id] = authoritative - elif run_id and run_id in lc_tool_call_id_by_run: - current_lc_tool_call_id["value"] = lc_tool_call_id_by_run[run_id] - - if tool_name == "read_file": - yield _emit_thinking_step( - step_id=original_step_id, - title="Reading file", - status="completed", - items=last_active_step_items, - ) - elif tool_name == "write_file": - yield _emit_thinking_step( - step_id=original_step_id, - title="Writing file", - status="completed", - items=last_active_step_items, - ) - elif tool_name == "edit_file": - yield _emit_thinking_step( - step_id=original_step_id, - title="Editing file", - status="completed", - items=last_active_step_items, - ) - elif tool_name == "glob": - yield _emit_thinking_step( - step_id=original_step_id, - title="Searching files", - status="completed", - items=last_active_step_items, - ) - elif tool_name == "grep": - yield _emit_thinking_step( - step_id=original_step_id, - title="Searching content", - status="completed", - items=last_active_step_items, - ) - elif tool_name == "rm": - yield _emit_thinking_step( - step_id=original_step_id, - title="Deleting file", - status="completed", - items=last_active_step_items, - ) - elif tool_name == "rmdir": - yield _emit_thinking_step( - step_id=original_step_id, - title="Deleting folder", - status="completed", - items=last_active_step_items, - ) - elif tool_name == "mkdir": - yield _emit_thinking_step( - step_id=original_step_id, - title="Creating folder", - status="completed", - items=last_active_step_items, - ) - elif tool_name == "move_file": - yield _emit_thinking_step( - step_id=original_step_id, - title="Moving file", - status="completed", - items=last_active_step_items, - ) - elif tool_name == "write_todos": - yield _emit_thinking_step( - step_id=original_step_id, - title="Planning tasks", - status="completed", - items=last_active_step_items, - ) - elif tool_name == "save_document": - result_str = ( - tool_output.get("result", "") - if isinstance(tool_output, dict) - else str(tool_output) - ) - is_error = "Error" in result_str - completed_items = [ - *last_active_step_items, - result_str[:80] if is_error else "Saved to knowledge base", - ] - yield _emit_thinking_step( - step_id=original_step_id, - title="Saving document", - status="completed", - items=completed_items, - ) - elif tool_name == "generate_image": - if isinstance(tool_output, dict) and not tool_output.get("error"): - completed_items = [ - *last_active_step_items, - "Image generated successfully", - ] - else: - error_msg = ( - tool_output.get("error", "Generation failed") - if isinstance(tool_output, dict) - else "Generation failed" - ) - completed_items = [*last_active_step_items, f"Error: {error_msg}"] - yield _emit_thinking_step( - step_id=original_step_id, - title="Generating image", - status="completed", - items=completed_items, - ) - elif tool_name == "scrape_webpage": - if isinstance(tool_output, dict): - title = tool_output.get("title", "Webpage") - word_count = tool_output.get("word_count", 0) - has_error = "error" in tool_output - if has_error: - completed_items = [ - *last_active_step_items, - f"Error: {tool_output.get('error', 'Failed to scrape')[:50]}", - ] - else: - completed_items = [ - *last_active_step_items, - f"Title: {title[:50]}{'...' if len(title) > 50 else ''}", - f"Extracted: {word_count:,} words", - ] - else: - completed_items = [*last_active_step_items, "Content extracted"] - yield _emit_thinking_step( - step_id=original_step_id, - title="Scraping webpage", - status="completed", - items=completed_items, - ) - elif tool_name == "generate_podcast": - podcast_status = ( - tool_output.get("status", "unknown") - if isinstance(tool_output, dict) - else "unknown" - ) - podcast_title = ( - tool_output.get("title", "Podcast") - if isinstance(tool_output, dict) - else "Podcast" - ) - if podcast_status in ("pending", "generating", "processing"): - completed_items = [ - f"Title: {podcast_title}", - "Podcast generation started", - "Processing in background...", - ] - elif podcast_status == "already_generating": - completed_items = [ - f"Title: {podcast_title}", - "Podcast already in progress", - "Please wait for it to complete", - ] - elif podcast_status in ("failed", "error"): - error_msg = ( - tool_output.get("error", "Unknown error") - if isinstance(tool_output, dict) - else "Unknown error" - ) - completed_items = [ - f"Title: {podcast_title}", - f"Error: {error_msg[:50]}", - ] - elif podcast_status in ("ready", "success"): - completed_items = [ - f"Title: {podcast_title}", - "Podcast ready", - ] - else: - completed_items = last_active_step_items - yield _emit_thinking_step( - step_id=original_step_id, - title="Generating podcast", - status="completed", - items=completed_items, - ) - elif tool_name == "generate_video_presentation": - vp_status = ( - tool_output.get("status", "unknown") - if isinstance(tool_output, dict) - else "unknown" - ) - vp_title = ( - tool_output.get("title", "Presentation") - if isinstance(tool_output, dict) - else "Presentation" - ) - if vp_status in ("pending", "generating"): - completed_items = [ - f"Title: {vp_title}", - "Presentation generation started", - "Processing in background...", - ] - elif vp_status == "failed": - error_msg = ( - tool_output.get("error", "Unknown error") - if isinstance(tool_output, dict) - else "Unknown error" - ) - completed_items = [ - f"Title: {vp_title}", - f"Error: {error_msg[:50]}", - ] - else: - completed_items = last_active_step_items - yield _emit_thinking_step( - step_id=original_step_id, - title="Generating video presentation", - status="completed", - items=completed_items, - ) - elif tool_name == "generate_report": - report_status = ( - tool_output.get("status", "unknown") - if isinstance(tool_output, dict) - else "unknown" - ) - report_title = ( - tool_output.get("title", "Report") - if isinstance(tool_output, dict) - else "Report" - ) - word_count = ( - tool_output.get("word_count", 0) - if isinstance(tool_output, dict) - else 0 - ) - is_revision = ( - tool_output.get("is_revision", False) - if isinstance(tool_output, dict) - else False - ) - step_title = "Revising report" if is_revision else "Generating report" - - if report_status == "ready": - completed_items = [ - f"Topic: {report_title}", - f"{word_count:,} words", - "Report ready", - ] - elif report_status == "failed": - error_msg = ( - tool_output.get("error", "Unknown error") - if isinstance(tool_output, dict) - else "Unknown error" - ) - completed_items = [ - f"Topic: {report_title}", - f"Error: {error_msg[:50]}", - ] - else: - completed_items = last_active_step_items - - yield _emit_thinking_step( - step_id=original_step_id, - title=step_title, - status="completed", - items=completed_items, - ) - elif tool_name in ("execute", "execute_code"): - raw_text = ( - tool_output.get("result", "") - if isinstance(tool_output, dict) - else str(tool_output) - ) - m = re.match(r"^Exit code:\s*(\d+)", raw_text) - exit_code_val = int(m.group(1)) if m else None - if exit_code_val is not None and exit_code_val == 0: - completed_items = [ - *last_active_step_items, - "Completed successfully", - ] - elif exit_code_val is not None: - completed_items = [ - *last_active_step_items, - f"Exit code: {exit_code_val}", - ] - else: - completed_items = [*last_active_step_items, "Finished"] - yield _emit_thinking_step( - step_id=original_step_id, - title="Running command", - status="completed", - items=completed_items, - ) - elif tool_name == "ls": - if isinstance(tool_output, dict): - ls_output = tool_output.get("result", "") - elif isinstance(tool_output, str): - ls_output = tool_output - else: - ls_output = str(tool_output) if tool_output else "" - file_names: list[str] = [] - if ls_output: - paths: list[str] = [] - try: - parsed = ast.literal_eval(ls_output) - if isinstance(parsed, list): - paths = [str(p) for p in parsed] - except (ValueError, SyntaxError): - paths = [ - line.strip() - for line in ls_output.strip().split("\n") - if line.strip() - ] - for p in paths: - name = p.rstrip("/").split("/")[-1] - if name and len(name) <= 40: - file_names.append(name) - elif name: - file_names.append(name[:37] + "...") - if file_names: - if len(file_names) <= 5: - completed_items = [f"[{name}]" for name in file_names] - else: - completed_items = [f"[{name}]" for name in file_names[:4]] - completed_items.append(f"(+{len(file_names) - 4} more)") - else: - completed_items = ["No files found"] - yield _emit_thinking_step( - step_id=original_step_id, - title="Listing files", - status="completed", - items=completed_items, - ) - else: - # Fallback completion title — see the matching in-progress - # branch above for the wording rationale. - fallback_title = ( - tool_name.replace("_", " ").strip().capitalize() or tool_name - ) - yield _emit_thinking_step( - step_id=original_step_id, - title=fallback_title, - status="completed", - items=last_active_step_items, - ) - - just_finished_tool = True - last_active_step_id = None - last_active_step_title = "" - last_active_step_items = [] - - if tool_name == "generate_podcast": - yield _emit_tool_output( - tool_call_id, - tool_output - if isinstance(tool_output, dict) - else {"result": tool_output}, - ) - if isinstance(tool_output, dict) and tool_output.get("status") in ( - "pending", - "generating", - "processing", - ): - yield streaming_service.format_terminal_info( - f"Podcast queued: {tool_output.get('title', 'Podcast')}", - "success", - ) - elif isinstance(tool_output, dict) and tool_output.get("status") in ( - "ready", - "success", - ): - yield streaming_service.format_terminal_info( - f"Podcast generated successfully: {tool_output.get('title', 'Podcast')}", - "success", - ) - elif isinstance(tool_output, dict) and tool_output.get("status") in ( - "failed", - "error", - ): - error_msg = tool_output.get("error", "Unknown error") - yield streaming_service.format_terminal_info( - f"Podcast generation failed: {error_msg}", - "error", - ) - elif tool_name == "generate_video_presentation": - yield _emit_tool_output( - tool_call_id, - tool_output - if isinstance(tool_output, dict) - else {"result": tool_output}, - ) - if ( - isinstance(tool_output, dict) - and tool_output.get("status") == "pending" - ): - yield streaming_service.format_terminal_info( - f"Video presentation queued: {tool_output.get('title', 'Presentation')}", - "success", - ) - elif ( - isinstance(tool_output, dict) - and tool_output.get("status") == "failed" - ): - error_msg = ( - tool_output.get("error", "Unknown error") - if isinstance(tool_output, dict) - else "Unknown error" - ) - yield streaming_service.format_terminal_info( - f"Presentation generation failed: {error_msg}", - "error", - ) - elif tool_name == "generate_image": - yield _emit_tool_output( - tool_call_id, - tool_output - if isinstance(tool_output, dict) - else {"result": tool_output}, - ) - if isinstance(tool_output, dict): - if tool_output.get("error"): - yield streaming_service.format_terminal_info( - f"Image generation failed: {tool_output['error'][:60]}", - "error", - ) - else: - yield streaming_service.format_terminal_info( - "Image generated successfully", - "success", - ) - elif tool_name == "scrape_webpage": - if isinstance(tool_output, dict): - display_output = { - k: v for k, v in tool_output.items() if k != "content" - } - if "content" in tool_output: - content = tool_output.get("content", "") - display_output["content_preview"] = ( - content[:500] + "..." if len(content) > 500 else content - ) - yield _emit_tool_output( - tool_call_id, - display_output, - ) - else: - yield _emit_tool_output( - tool_call_id, - {"result": tool_output}, - ) - if isinstance(tool_output, dict) and "error" not in tool_output: - title = tool_output.get("title", "Webpage") - word_count = tool_output.get("word_count", 0) - yield streaming_service.format_terminal_info( - f"Scraped: {title[:40]}{'...' if len(title) > 40 else ''} ({word_count:,} words)", - "success", - ) - else: - error_msg = ( - tool_output.get("error", "Failed to scrape") - if isinstance(tool_output, dict) - else "Failed to scrape" - ) - yield streaming_service.format_terminal_info( - f"Scrape failed: {error_msg}", - "error", - ) - elif tool_name in ("write_file", "edit_file"): - resolved_path = _extract_resolved_file_path( - tool_name=tool_name, - tool_output=tool_output, - tool_input={"file_path": staged_file_path} - if staged_file_path - else None, - ) - result_text = _tool_output_to_text(tool_output) - if _tool_output_has_error(tool_output): - yield _emit_tool_output( - tool_call_id, - { - "status": "error", - "error": result_text, - "path": resolved_path, - }, - ) - else: - yield _emit_tool_output( - tool_call_id, - { - "status": "completed", - "path": resolved_path, - "result": result_text, - }, - ) - elif tool_name == "generate_report": - # Stream the full report result so frontend can render the ReportCard - yield _emit_tool_output( - tool_call_id, - tool_output - if isinstance(tool_output, dict) - else {"result": tool_output}, - ) - # Send appropriate terminal message based on status - if ( - isinstance(tool_output, dict) - and tool_output.get("status") == "ready" - ): - word_count = tool_output.get("word_count", 0) - yield streaming_service.format_terminal_info( - f"Report generated: {tool_output.get('title', 'Report')} ({word_count:,} words)", - "success", - ) - else: - error_msg = ( - tool_output.get("error", "Unknown error") - if isinstance(tool_output, dict) - else "Unknown error" - ) - yield streaming_service.format_terminal_info( - f"Report generation failed: {error_msg}", - "error", - ) - elif tool_name == "generate_resume": - yield _emit_tool_output( - tool_call_id, - tool_output - if isinstance(tool_output, dict) - else {"result": tool_output}, - ) - if ( - isinstance(tool_output, dict) - and tool_output.get("status") == "ready" - ): - yield streaming_service.format_terminal_info( - f"Resume generated: {tool_output.get('title', 'Resume')}", - "success", - ) - else: - error_msg = ( - tool_output.get("error", "Unknown error") - if isinstance(tool_output, dict) - else "Unknown error" - ) - yield streaming_service.format_terminal_info( - f"Resume generation failed: {error_msg}", - "error", - ) - elif tool_name in ( - "create_notion_page", - "update_notion_page", - "delete_notion_page", - "create_linear_issue", - "update_linear_issue", - "delete_linear_issue", - "create_google_drive_file", - "delete_google_drive_file", - "create_onedrive_file", - "delete_onedrive_file", - "create_dropbox_file", - "delete_dropbox_file", - "create_gmail_draft", - "update_gmail_draft", - "send_gmail_email", - "trash_gmail_email", - "create_calendar_event", - "update_calendar_event", - "delete_calendar_event", - "create_jira_issue", - "update_jira_issue", - "delete_jira_issue", - "create_confluence_page", - "update_confluence_page", - "delete_confluence_page", - ): - yield _emit_tool_output( - tool_call_id, - tool_output - if isinstance(tool_output, dict) - else {"result": tool_output}, - ) - elif tool_name in ("execute", "execute_code"): - raw_text = ( - tool_output.get("result", "") - if isinstance(tool_output, dict) - else str(tool_output) - ) - exit_code: int | None = None - output_text = raw_text - m = re.match(r"^Exit code:\s*(\d+)", raw_text) - if m: - exit_code = int(m.group(1)) - om = re.search(r"\nOutput:\n([\s\S]*)", raw_text) - output_text = om.group(1) if om else "" - thread_id_str = config.get("configurable", {}).get("thread_id", "") - - for sf_match in re.finditer( - r"^SANDBOX_FILE:\s*(.+)$", output_text, re.MULTILINE - ): - fpath = sf_match.group(1).strip() - if fpath and fpath not in result.sandbox_files: - result.sandbox_files.append(fpath) - - yield _emit_tool_output( - tool_call_id, - { - "exit_code": exit_code, - "output": output_text, - "thread_id": thread_id_str, - }, - ) - elif tool_name == "web_search": - xml = ( - tool_output.get("result", str(tool_output)) - if isinstance(tool_output, dict) - else str(tool_output) - ) - citations: dict[str, dict[str, str]] = {} - for m in re.finditer( - r"<!\[CDATA\[(.*?)\]\]>\s*", - xml, - ): - title, url = m.group(1).strip(), m.group(2).strip() - if url.startswith("http") and url not in citations: - citations[url] = {"title": title} - for m in re.finditer( - r"", - xml, - ): - chunk_url, content = m.group(1).strip(), m.group(2).strip() - if ( - chunk_url.startswith("http") - and chunk_url in citations - and content - ): - citations[chunk_url]["snippet"] = ( - content[:200] + "…" if len(content) > 200 else content - ) - yield _emit_tool_output( - tool_call_id, - {"status": "completed", "citations": citations}, - ) - else: - yield _emit_tool_output( - tool_call_id, - {"status": "completed", "result_length": len(str(tool_output))}, - ) - yield streaming_service.format_terminal_info( - f"Tool {tool_name} completed", "success" - ) - - elif event_type == "on_custom_event" and event.get("name") == "report_progress": - # Live progress updates from inside the generate_report tool - data = event.get("data", {}) - message = data.get("message", "") - if message and last_active_step_id: - phase = data.get("phase", "") - # Always keep the "Topic: ..." line - topic_items = [ - item for item in last_active_step_items if item.startswith("Topic:") - ] - - if phase in ("revising_section", "adding_section"): - # During section-level ops: keep plan summary + show current op - plan_items = [ - item - for item in last_active_step_items - if item.startswith("Topic:") - or item.startswith("Modifying ") - or item.startswith("Adding ") - or item.startswith("Removing ") - ] - # Only keep plan_items that don't end with "..." (not progress lines) - plan_items = [ - item for item in plan_items if not item.endswith("...") - ] - last_active_step_items = [*plan_items, message] - else: - # Phase transitions: replace everything after topic - last_active_step_items = [*topic_items, message] - - yield _emit_thinking_step( - step_id=last_active_step_id, - title=last_active_step_title, - status="in_progress", - items=last_active_step_items, - ) - - elif ( - event_type == "on_custom_event" and event.get("name") == "document_created" - ): - data = event.get("data", {}) - if data.get("id"): - yield streaming_service.format_data( - "documents-updated", - { - "action": "created", - "document": data, - }, - ) - - elif event_type == "on_custom_event" and event.get("name") == "action_log": - # Surface a freshly committed AgentActionLog row so the chat - # tool card can render its Revert button immediately. - data = event.get("data", {}) - if data.get("id") is not None: - yield streaming_service.format_data("action-log", data) - - elif ( - event_type == "on_custom_event" - and event.get("name") == "action_log_updated" - ): - # Reversibility flipped in kb_persistence after the SAVEPOINT - # for a destructive op (rm/rmdir/move/edit/write) committed. - # Frontend uses this to flip the card's Revert - # button on without re-fetching the actions list. - data = event.get("data", {}) - if data.get("id") is not None: - yield streaming_service.format_data("action-log-updated", data) - - elif event_type in ("on_chain_end", "on_agent_end"): - if current_text_id is not None: - yield streaming_service.format_text_end(current_text_id) - if content_builder is not None: - content_builder.on_text_end(current_text_id) - current_text_id = None - - if current_text_id is not None: - yield streaming_service.format_text_end(current_text_id) - if content_builder is not None: - content_builder.on_text_end(current_text_id) - - completion_event = complete_current_step() - if completion_event: - yield completion_event + accumulated_text = result.accumulated_text state = await agent.aget_state(config) state_values = getattr(state, "values", {}) or {} @@ -2397,7 +912,6 @@ async def _stream_agent_events( result.commit_gate_reason = "" result.accumulated_text = accumulated_text - result.agent_called_update_memory = called_update_memory _log_file_contract("turn_outcome", result) interrupt_value = _first_interrupt_value(state) diff --git a/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py b/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py index 9142dd914..9a309f9d7 100644 --- a/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py +++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py @@ -5,7 +5,6 @@ from __future__ import annotations from collections.abc import AsyncIterator from typing import Any -from app.agents.new_chat.feature_flags import get_flags from app.tasks.chat.streaming.graph_stream.result import StreamingResult from app.tasks.chat.streaming.relay.event_relay import EventRelay from app.tasks.chat.streaming.relay.state import AgentEventRelayState @@ -30,7 +29,6 @@ async def stream_output( initial_step_id=initial_step_id, initial_step_title=initial_step_title, initial_step_items=initial_step_items, - parity_v2=bool(get_flags().enable_stream_parity_v2), ) astream_kwargs: dict[str, Any] = {"config": config, "version": "v2"} diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py b/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py index 861342b32..ef86dae56 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py @@ -33,7 +33,7 @@ def iter_chat_model_stream_frames( reasoning_delta = parts["reasoning"] text_delta = parts["text"] - if state.parity_v2 and reasoning_delta: + if reasoning_delta: if state.current_text_id is not None: yield streaming_service.format_text_end(state.current_text_id) if content_builder is not None: @@ -100,7 +100,7 @@ def iter_chat_model_stream_frames( if content_builder is not None: content_builder.on_text_delta(state.current_text_id, text_delta) - if state.parity_v2 and parts["tool_call_chunks"]: + if parts["tool_call_chunks"]: for tcc in parts["tool_call_chunks"]: idx = tcc.get("index") diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py index c316cc74a..e7d2d7f78 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py @@ -77,12 +77,11 @@ def iter_tool_start_frames( yield emit_thinking_step_frame(**frame_kw) matched_meta: dict[str, str] | None = None - if state.parity_v2: - taken_ui_ids = set(state.ui_tool_call_id_by_run.values()) - for meta in state.index_to_meta.values(): - if meta["name"] == tool_name and meta["ui_id"] not in taken_ui_ids: - matched_meta = meta - break + taken_ui_ids = set(state.ui_tool_call_id_by_run.values()) + for meta in state.index_to_meta.values(): + if meta["name"] == tool_name and meta["ui_id"] not in taken_ui_ids: + matched_meta = meta + break tool_call_id: str langchain_tool_call_id: str | None = None @@ -97,13 +96,12 @@ def iter_tool_start_frames( if run_id else streaming_service.generate_tool_call_id() ) - if state.parity_v2: - langchain_tool_call_id = match_buffered_langchain_tool_call_id( - state.pending_tool_call_chunks, - tool_name, - run_id, - state.lc_tool_call_id_by_run, - ) + langchain_tool_call_id = match_buffered_langchain_tool_call_id( + state.pending_tool_call_chunks, + tool_name, + run_id, + state.lc_tool_call_id_by_run, + ) yield streaming_service.format_tool_input_start( tool_call_id, tool_name, diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/state.py b/surfsense_backend/app/tasks/chat/streaming/relay/state.py index e8e35d0b2..7bd996606 100644 --- a/surfsense_backend/app/tasks/chat/streaming/relay/state.py +++ b/surfsense_backend/app/tasks/chat/streaming/relay/state.py @@ -22,7 +22,6 @@ class AgentEventRelayState: active_tool_depth: int = 0 called_update_memory: bool = False current_reasoning_id: str | None = None - parity_v2: bool = False pending_tool_call_chunks: list[dict[str, Any]] = field(default_factory=list) lc_tool_call_id_by_run: dict[str, str] = field(default_factory=dict) file_path_by_run: dict[str, str] = field(default_factory=dict) @@ -39,7 +38,6 @@ class AgentEventRelayState: initial_step_id: str | None = None, initial_step_title: str = "", initial_step_items: list[str] | None = None, - parity_v2: bool, ) -> AgentEventRelayState: counter = 1 if initial_step_id else 0 return cls( @@ -47,7 +45,6 @@ class AgentEventRelayState: last_active_step_id=initial_step_id, last_active_step_title=initial_step_title, last_active_step_items=list(initial_step_items or []), - parity_v2=parity_v2, ) def next_thinking_step_id(self, step_prefix: str) -> str: diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py b/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py index 6800be2af..099aea882 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py +++ b/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py @@ -31,7 +31,6 @@ def _clear_all(monkeypatch: pytest.MonkeyPatch) -> None: "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE", "SURFSENSE_ENABLE_ACTION_LOG", "SURFSENSE_ENABLE_REVERT_ROUTE", - "SURFSENSE_ENABLE_STREAM_PARITY_V2", "SURFSENSE_ENABLE_PLUGIN_LOADER", "SURFSENSE_ENABLE_OTEL", "SURFSENSE_ENABLE_AGENT_CACHE", @@ -61,7 +60,6 @@ def test_defaults_match_shipped_agent_stack(monkeypatch: pytest.MonkeyPatch) -> assert flags.enable_kb_planner_runnable is True assert flags.enable_action_log is True assert flags.enable_revert_route is True - assert flags.enable_stream_parity_v2 is True assert flags.enable_plugin_loader is False assert flags.enable_otel is False # Phase 2: agent cache is now default-on (the prerequisite tool @@ -127,7 +125,6 @@ def test_each_flag_can_be_set_independently(monkeypatch: pytest.MonkeyPatch) -> "enable_kb_planner_runnable": "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE", "enable_action_log": "SURFSENSE_ENABLE_ACTION_LOG", "enable_revert_route": "SURFSENSE_ENABLE_REVERT_ROUTE", - "enable_stream_parity_v2": "SURFSENSE_ENABLE_STREAM_PARITY_V2", "enable_plugin_loader": "SURFSENSE_ENABLE_PLUGIN_LOADER", "enable_otel": "SURFSENSE_ENABLE_OTEL", } diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py index 892bb7a6a..9ae7defec 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py @@ -137,7 +137,7 @@ def test_complete_active_thinking_step_mirrors_closure_semantics() -> None: def test_agent_event_relay_state_factory_matches_counter_rule() -> None: - s0 = AgentEventRelayState.for_invocation(parity_v2=False) + s0 = AgentEventRelayState.for_invocation() assert s0.thinking_step_counter == 0 assert s0.last_active_step_id is None @@ -145,11 +145,9 @@ def test_agent_event_relay_state_factory_matches_counter_rule() -> None: initial_step_id="thinking-resume-1", initial_step_title="Inherited", initial_step_items=["Topic: X"], - parity_v2=True, ) assert s1.thinking_step_counter == 1 assert s1.last_active_step_id == "thinking-resume-1" - assert s1.parity_v2 is True assert s1.next_thinking_step_id("thinking") == "thinking-2" diff --git a/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py b/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py index c317eba20..4b1fadd9c 100644 --- a/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py +++ b/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py @@ -161,7 +161,7 @@ class TestToolHeavyTurn: _assert_jsonb_safe(snap) def test_tool_input_available_without_prior_start_creates_card(self): - # Legacy / parity_v2-OFF path: tool-input-available may be + # Late-registration: tool-input-available may be # emitted without a prior tool-input-start (no streamed # tool_call_chunks). The card should still be created. b = AssistantContentBuilder() @@ -187,7 +187,7 @@ class TestToolHeavyTurn: assert part["result"] == {"matches": 3} def test_tool_input_start_idempotent_for_same_ui_id(self): - # parity_v2: tool-input-start can fire from BOTH the chunk + # tool-input-start can fire from BOTH the chunk # registration path AND the canonical ``on_tool_start`` path. # The second call must not create a duplicate part. b = AssistantContentBuilder() diff --git a/surfsense_backend/tests/unit/tasks/chat/test_tool_input_streaming.py b/surfsense_backend/tests/unit/tasks/chat/test_tool_input_streaming.py index 60750396c..ada32d168 100644 --- a/surfsense_backend/tests/unit/tasks/chat/test_tool_input_streaming.py +++ b/surfsense_backend/tests/unit/tasks/chat/test_tool_input_streaming.py @@ -1,16 +1,13 @@ """Unit tests for live tool-call argument streaming. -Pins the wire format that ``_stream_agent_events`` emits when -``SURFSENSE_ENABLE_STREAM_PARITY_V2=true``: ``tool-input-start`` → -``tool-input-delta``... → ``tool-input-available`` → ``tool-output-available`` -all keyed by the same LangChain ``tool_call.id``. +Pins the wire format that ``_stream_agent_events`` emits: +``tool-input-start`` → ``tool-input-delta``... → ``tool-input-available`` → +``tool-output-available``, keyed consistently with LangChain ``tool_call.id`` +when the model streams indexed chunks. Identity is tracked in ``index_to_meta`` (per-chunk ``index``) and -``ui_tool_call_id_by_run`` (LangGraph ``run_id``); both are private to -``_stream_agent_events`` so we exercise them via the public wire output. - -These tests also lock in the legacy / parity_v2-OFF behaviour so the -synthetic ``call_`` shape stays stable for older clients. +``ui_tool_call_id_by_run`` (LangGraph ``run_id``); both are internal to the +streaming layer so we assert on the public SSE payloads. """ from __future__ import annotations @@ -22,8 +19,6 @@ from typing import Any import pytest -import app.tasks.chat.stream_new_chat as stream_module -from app.agents.new_chat.feature_flags import AgentFeatureFlags from app.services.new_streaming_service import VercelStreamingService from app.tasks.chat.stream_new_chat import ( StreamResult, @@ -164,24 +159,6 @@ def _tool_end( } -@pytest.fixture -def parity_v2_on(monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setattr( - stream_module, - "get_flags", - lambda: AgentFeatureFlags(enable_stream_parity_v2=True), - ) - - -@pytest.fixture -def parity_v2_off(monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setattr( - stream_module, - "get_flags", - lambda: AgentFeatureFlags(enable_stream_parity_v2=False), - ) - - async def _drain( events: list[dict[str, Any]], state: _FakeAgentState | None = None ) -> list[dict[str, Any]]: @@ -253,12 +230,12 @@ class TestLegacyMatch: # --------------------------------------------------------------------------- -# parity_v2 wire format tests. +# Tool input streaming wire format # --------------------------------------------------------------------------- @pytest.mark.asyncio -async def test_idless_chunk_merging_by_index(parity_v2_on: None) -> None: +async def test_idless_chunk_merging_by_index() -> None: """First chunk carries id+name; later idless chunks at the same ``index`` merge into the SAME ``tool-input-start`` ui id and emit one ``tool-input-delta`` per chunk.""" @@ -302,9 +279,7 @@ async def test_idless_chunk_merging_by_index(parity_v2_on: None) -> None: @pytest.mark.asyncio -async def test_two_interleaved_tool_calls_route_by_index( - parity_v2_on: None, -) -> None: +async def test_two_interleaved_tool_calls_route_by_index() -> None: """Two same-name calls with distinct indices keep their deltas routed to the right card.""" events = [ @@ -344,7 +319,7 @@ async def test_two_interleaved_tool_calls_route_by_index( @pytest.mark.asyncio -async def test_identity_stable_across_lifecycle(parity_v2_on: None) -> None: +async def test_identity_stable_across_lifecycle() -> None: """Whatever id ``tool-input-start`` chose must be the SAME id used on ``tool-input-available`` AND ``tool-output-available``.""" events = [ @@ -367,7 +342,7 @@ async def test_identity_stable_across_lifecycle(parity_v2_on: None) -> None: @pytest.mark.asyncio -async def test_no_duplicate_tool_input_start(parity_v2_on: None) -> None: +async def test_no_duplicate_tool_input_start() -> None: """When the chunk-emission loop already fired ``tool-input-start`` for this run, ``on_tool_start`` MUST NOT emit a second one.""" events = [ @@ -386,9 +361,7 @@ async def test_no_duplicate_tool_input_start(parity_v2_on: None) -> None: @pytest.mark.asyncio -async def test_active_text_closes_before_early_tool_input_start( - parity_v2_on: None, -) -> None: +async def test_active_text_closes_before_early_tool_input_start() -> None: """Streaming a text-delta then a tool-call chunk in subsequent chunks: the wire MUST contain ``text-end`` before the FIRST ``tool-input-start`` (clean part boundary on the frontend).""" @@ -409,9 +382,7 @@ async def test_active_text_closes_before_early_tool_input_start( @pytest.mark.asyncio -async def test_mixed_text_and_tool_chunk_preserve_order( - parity_v2_on: None, -) -> None: +async def test_mixed_text_and_tool_chunk_preserve_order() -> None: """One AIMessageChunk that carries BOTH ``text`` content AND ``tool_call_chunks`` should emit the text delta FIRST, then close text, then ``tool-input-start``+``tool-input-delta``.""" @@ -441,45 +412,7 @@ async def test_mixed_text_and_tool_chunk_preserve_order( @pytest.mark.asyncio -async def test_parity_v2_off_preserves_legacy_shape( - parity_v2_off: None, -) -> None: - """When the flag is OFF, no deltas are emitted and the ``toolCallId`` - is ``call_`` (NOT the lc id).""" - events = [ - _model_stream( - tool_call_chunks=[ - {"id": "lc-1", "name": "ls", "args": '{"path":"/"}', "index": 0} - ] - ), - _tool_start(name="ls", run_id="run-A", input_payload={"path": "/"}), - _tool_end(name="ls", run_id="run-A", tool_call_id="lc-1"), - ] - payloads = await _drain(events) - - assert _of_type(payloads, "tool-input-delta") == [] - starts = _of_type(payloads, "tool-input-start") - assert len(starts) == 1 - assert starts[0]["toolCallId"].startswith("call_run-A") - # No ``langchainToolCallId`` propagation on ``tool-input-start`` in - # legacy mode (the start event fires before the ToolMessage is - # available, so we can't extract the authoritative LangChain id yet). - assert "langchainToolCallId" not in starts[0] - output = _of_type(payloads, "tool-output-available") - assert output[0]["toolCallId"].startswith("call_run-A") - # ``tool-output-available`` MUST carry ``langchainToolCallId`` even - # in legacy mode: the chat tool card uses it to backfill the - # LangChain id and join against the ``data-action-log`` SSE event - # (keyed by ``lc_tool_call_id``) so the inline Revert button can - # light up. Sourced from the returned ``ToolMessage.tool_call_id``, - # which is populated regardless of feature-flag state. - assert output[0]["langchainToolCallId"] == "lc-1" - - -@pytest.mark.asyncio -async def test_skip_append_prevents_stale_id_reuse( - parity_v2_on: None, -) -> None: +async def test_skip_append_prevents_stale_id_reuse() -> None: """Two same-name tools: the SECOND tool's ``langchainToolCallId`` must NOT come from the first tool's chunk (``pending_tool_call_chunks`` must stay empty for indexed-registered chunks).""" @@ -506,9 +439,7 @@ async def test_skip_append_prevents_stale_id_reuse( @pytest.mark.asyncio -async def test_registration_waits_for_both_id_and_name( - parity_v2_on: None, -) -> None: +async def test_registration_waits_for_both_id_and_name() -> None: """An id-only chunk (no name yet) must NOT emit ``tool-input-start``.""" events = [ _model_stream( @@ -520,12 +451,9 @@ async def test_registration_waits_for_both_id_and_name( @pytest.mark.asyncio -async def test_unmatched_fallback_still_attaches_lc_id( - parity_v2_on: None, -) -> None: - """parity_v2 ON, but the provider didn't include an ``index``: the - legacy fallback path must still emit ``tool-input-start`` with the - matching ``langchainToolCallId``.""" +async def test_unmatched_fallback_still_attaches_lc_id() -> None: + """When the provider omits chunk ``index``, buffered chunks still get a + ``tool-input-start`` with the matching ``langchainToolCallId``.""" events = [ # No index on the chunk → not registered into index_to_meta; # falls through to ``pending_tool_call_chunks`` so the legacy @@ -542,9 +470,7 @@ async def test_unmatched_fallback_still_attaches_lc_id( @pytest.mark.asyncio -async def test_interrupt_request_uses_task_that_contains_interrupt( - parity_v2_on: None, -) -> None: +async def test_interrupt_request_uses_task_that_contains_interrupt() -> None: interrupt_payload = { "type": "calendar_event_create", "action": { diff --git a/surfsense_web/components/assistant-ui/reasoning-message-part.tsx b/surfsense_web/components/assistant-ui/reasoning-message-part.tsx index 70636eab8..6e7aaf048 100644 --- a/surfsense_web/components/assistant-ui/reasoning-message-part.tsx +++ b/surfsense_web/components/assistant-ui/reasoning-message-part.tsx @@ -7,8 +7,8 @@ import { TextShimmerLoader } from "@/components/prompt-kit/loader"; import { cn } from "@/lib/utils"; /** - * Renders the structured `reasoning` part emitted by the backend's - * stream-parity v2 path (A1). + * Renders the structured `reasoning` part emitted by the backend stream + * (typed reasoning deltas from the chat model). * * Behaviour mirrors the existing `ThinkingStepsDisplay`: * - collapsed by default; diff --git a/surfsense_web/components/assistant-ui/tool-fallback.tsx b/surfsense_web/components/assistant-ui/tool-fallback.tsx index 06082c9c7..ba58f4158 100644 --- a/surfsense_web/components/assistant-ui/tool-fallback.tsx +++ b/surfsense_web/components/assistant-ui/tool-fallback.tsx @@ -48,13 +48,11 @@ import { cn } from "@/lib/utils"; * stream, post-stream reversibility flip, and explicit revert clicks. * * Match key (in priority order): - * 1. ``a.tool_call_id === toolCallId`` — direct hit in parity_v2 when - * the model streamed ``tool_call_chunks`` so the card's synthetic - * id IS the LangChain id. - * 2. ``a.tool_call_id === langchainToolCallId`` — legacy mode (or - * parity_v2 with provider-side chunk emission) where the card's - * synthetic id is ``call_`` and the LangChain id is - * backfilled onto the part by ``tool-output-available``. + * 1. ``a.tool_call_id === toolCallId`` — direct hit when the model + * streamed ``tool_call_chunks`` so the card id matches the LangChain id. + * 2. ``a.tool_call_id === langchainToolCallId`` — synthetic card id is + * ``call_`` and the LangChain id is backfilled by + * ``tool-output-available``. * 3. ``(chat_turn_id, tool_name, position-within-turn)`` — fallback * for cards whose synthetic id is ``call_`` AND whose * ``langchainToolCallId`` never got backfilled (provider emitted @@ -116,7 +114,7 @@ function ToolCardRevertButton({ const action = useMemo(() => { // Tier 1 + 2: O(1) Map-backed direct id match. Covers - // ~all parity_v2 streams and any legacy stream that backfilled + // Indexed chunk streams and any stream that backfilled // ``langchainToolCallId`` via ``tool-output-available``. const direct = findByToolCallId(toolCallId) ?? findByToolCallId(langchainToolCallId); if (direct) return direct; diff --git a/surfsense_web/lib/chat/streaming-state.ts b/surfsense_web/lib/chat/streaming-state.ts index 27047ecfe..809e214d1 100644 --- a/surfsense_web/lib/chat/streaming-state.ts +++ b/surfsense_web/lib/chat/streaming-state.ts @@ -421,9 +421,8 @@ export type SSEEvent = /** * Live tool-call argument delta. Concatenated into * ``argsText`` on the matching ``tool-call`` content part - * by ``appendToolInputDelta``. parity_v2 only — the legacy - * code path emits ``tool-input-available`` without prior - * deltas. + * by ``appendToolInputDelta``. Some providers emit + * ``tool-input-available`` without prior deltas. */ type: "tool-input-delta"; toolCallId: string; From f0f87107f2a9922bb6e4f8d2cfb48764dc098e5a Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 8 May 2026 22:46:58 +0200 Subject: [PATCH 26/58] Track active task span id on the agent event relay state. --- .../app/tasks/chat/streaming/relay/state.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/state.py b/surfsense_backend/app/tasks/chat/streaming/relay/state.py index 7bd996606..82525a52f 100644 --- a/surfsense_backend/app/tasks/chat/streaming/relay/state.py +++ b/surfsense_backend/app/tasks/chat/streaming/relay/state.py @@ -8,7 +8,13 @@ from typing import Any @dataclass class AgentEventRelayState: - """Tracks text, thinking steps, tool depth, and pending tool-call metadata.""" + """Tracks text, thinking steps, tool depth, and pending tool-call metadata. + + ``active_span_id`` groups steps/tools for one open ``task`` episode. + ``active_task_run_id`` is the LangGraph ``run_id`` of that ``task`` so we + only clear the span when that run ends (not when child tools end). Handlers + will set/clear these via ``task_span`` helpers in a later change. + """ accumulated_text: str = "" current_text_id: str | None = None @@ -30,6 +36,17 @@ class AgentEventRelayState: current_lc_tool_call_id: dict[str, str | None] = field( default_factory=lambda: {"value": None} ) + # Open ``task`` delegation span (one id shared by nested activity); unset outside. + active_span_id: str | None = None + active_task_run_id: str | None = None + # Span id minted when a ``task`` tool_call_chunk registers (before ``on_tool_start``). + pending_task_span_by_lc: dict[str, str] = field(default_factory=dict) + + def span_metadata_if_active(self) -> dict[str, Any] | None: + """``{"spanId": ...}`` when a span is active; ``None`` otherwise.""" + if self.active_span_id: + return {"spanId": self.active_span_id} + return None @classmethod def for_invocation( From f944cdacb753369af8e117e34cdaca39c3c48c73 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 8 May 2026 22:47:03 +0200 Subject: [PATCH 27/58] Add helpers to open and close task delegation span ids. --- .../tasks/chat/streaming/relay/task_span.py | 74 +++++++++++++++++++ .../tasks/chat/streaming/test_task_span.py | 69 +++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 surfsense_backend/app/tasks/chat/streaming/relay/task_span.py create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_task_span.py diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/task_span.py b/surfsense_backend/app/tasks/chat/streaming/relay/task_span.py new file mode 100644 index 000000000..c4cdf24ba --- /dev/null +++ b/surfsense_backend/app/tasks/chat/streaming/relay/task_span.py @@ -0,0 +1,74 @@ +"""Open/close ``active_span_id`` around a delegating ``task`` tool run.""" + +from __future__ import annotations + +import uuid + +from app.tasks.chat.streaming.relay.state import AgentEventRelayState + + +def new_span_id() -> str: + """One delegation-episode id (shared by activity under an open ``task``).""" + return f"spn_{uuid.uuid4().hex}" + + +def _run_key(run_id: str) -> str: + return (run_id or "").strip() + + +def _lc_key(langchain_tool_call_id: str | None) -> str: + return (langchain_tool_call_id or "").strip() + + +def ensure_pending_task_span_for_lc(state: AgentEventRelayState, lc_id: str) -> str: + """Return span id for this LangChain tool call id, storing it in ``pending`` if new. + + Used from ``chat_model_stream`` when the first ``task`` chunk registers so + early ``tool-input-start`` can carry ``metadata.spanId`` before ``on_tool_start``. + """ + key = _lc_key(lc_id) + if not key: + return new_span_id() + existing = state.pending_task_span_by_lc.get(key) + if existing: + return existing + sid = new_span_id() + state.pending_task_span_by_lc[key] = sid + return sid + + +def open_task_span( + state: AgentEventRelayState, + *, + run_id: str, + langchain_tool_call_id: str | None = None, +) -> str: + """Set ``active_span_id`` from pending (same lc) or mint; remember ``active_task_run_id``. + + Call when the ``task`` tool **starts**. Nested ``task`` is not supported: + a second call replaces the previous span without restoring it. + """ + key = _lc_key(langchain_tool_call_id) + sid: str | None = state.pending_task_span_by_lc.pop(key, None) if key else None + if not sid: + sid = new_span_id() + state.active_span_id = sid + state.active_task_run_id = _run_key(run_id) or None + return sid + + +def clear_task_span_if_delegating_task_ended( + state: AgentEventRelayState, + *, + tool_name: str, + run_id: str, +) -> None: + """Clear span state only when this event is the end of the opening ``task`` run.""" + if tool_name != "task": + return + if state.active_task_run_id is None: + return + if state.active_task_run_id != _run_key(run_id): + return + state.active_span_id = None + state.active_task_run_id = None diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_task_span.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_task_span.py new file mode 100644 index 000000000..349c9879c --- /dev/null +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_task_span.py @@ -0,0 +1,69 @@ +"""Unit tests for ``task_span`` open/close helpers.""" + +from __future__ import annotations + +import pytest + +from app.tasks.chat.streaming.relay.state import AgentEventRelayState +from app.tasks.chat.streaming.relay.task_span import ( + clear_task_span_if_delegating_task_ended, + ensure_pending_task_span_for_lc, + open_task_span, +) + +pytestmark = pytest.mark.unit + + +def test_open_task_span_sets_span_and_run_id() -> None: + state = AgentEventRelayState.for_invocation() + sid = open_task_span(state, run_id="run-abc") + assert sid.startswith("spn_") + assert state.active_span_id == sid + assert state.active_task_run_id == "run-abc" + assert state.span_metadata_if_active() == {"spanId": sid} + + +def test_clear_ignored_for_non_task_tool() -> None: + state = AgentEventRelayState.for_invocation() + open_task_span(state, run_id="run-1") + sid = state.active_span_id + clear_task_span_if_delegating_task_ended( + state, tool_name="web_search", run_id="run-1" + ) + assert state.active_span_id == sid + assert state.active_task_run_id == "run-1" + + +def test_clear_ignored_when_task_run_id_mismatches() -> None: + state = AgentEventRelayState.for_invocation() + open_task_span(state, run_id="run-open") + clear_task_span_if_delegating_task_ended(state, tool_name="task", run_id="run-other") + assert state.active_span_id is not None + assert state.active_task_run_id == "run-open" + + +def test_clear_on_matching_task_end() -> None: + state = AgentEventRelayState.for_invocation() + open_task_span(state, run_id="run-x") + clear_task_span_if_delegating_task_ended(state, tool_name="task", run_id="run-x") + assert state.active_span_id is None + assert state.active_task_run_id is None + assert state.span_metadata_if_active() is None + + +def test_clear_noop_when_no_open_span() -> None: + state = AgentEventRelayState.for_invocation() + clear_task_span_if_delegating_task_ended(state, tool_name="task", run_id="run-x") + assert state.active_span_id is None + + +def test_pending_then_open_reuses_same_span_id() -> None: + state = AgentEventRelayState.for_invocation() + sid_pending = ensure_pending_task_span_for_lc(state, "lc-task-1") + assert state.pending_task_span_by_lc["lc-task-1"] == sid_pending + sid_active = open_task_span( + state, run_id="run-1", langchain_tool_call_id="lc-task-1" + ) + assert sid_active == sid_pending + assert state.active_span_id == sid_pending + assert "lc-task-1" not in state.pending_task_span_by_lc From 695f9ded2c6401f23e386019eff346df39a3a6f9 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 8 May 2026 22:47:08 +0200 Subject: [PATCH 28/58] Mint pending span id when the task tool registers from chunks. --- .../chat/streaming/handlers/chat_model_stream.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py b/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py index ef86dae56..c3f6d6d59 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/chat_model_stream.py @@ -7,6 +7,7 @@ from typing import Any from app.tasks.chat.streaming.helpers.chunk_parts import extract_chunk_parts from app.tasks.chat.streaming.relay.state import AgentEventRelayState +from app.tasks.chat.streaming.relay.task_span import ensure_pending_task_span_for_lc from app.tasks.chat.streaming.relay.thinking_step_completion import ( complete_active_thinking_step, ) @@ -41,6 +42,7 @@ def iter_chat_model_stream_frames( state.current_text_id = None if state.current_reasoning_id is None: comp, new_active = complete_active_thinking_step( + state=state, streaming_service=streaming_service, content_builder=content_builder, last_active_step_id=state.last_active_step_id, @@ -76,6 +78,7 @@ def iter_chat_model_stream_frames( state.current_reasoning_id = None if state.current_text_id is None: comp, new_active = complete_active_thinking_step( + state=state, streaming_service=streaming_service, content_builder=content_builder, last_active_step_id=state.last_active_step_id, @@ -109,6 +112,10 @@ def iter_chat_model_stream_frames( name = tcc.get("name") if lc_id and name: ui_id = lc_id + tool_input_metadata: dict[str, Any] | None = None + if name == "task": + sid = ensure_pending_task_span_for_lc(state, str(lc_id)) + tool_input_metadata = {"spanId": sid} if state.current_text_id is not None: yield streaming_service.format_text_end(state.current_text_id) @@ -132,9 +139,12 @@ def iter_chat_model_stream_frames( ui_id, name, langchain_tool_call_id=lc_id, + metadata=tool_input_metadata, ) if content_builder is not None: - content_builder.on_tool_input_start(ui_id, name, lc_id) + content_builder.on_tool_input_start( + ui_id, name, lc_id, metadata=tool_input_metadata + ) meta = state.index_to_meta.get(idx) if idx is not None else None if meta: From 2c1b219c6cb32573e08bc0852dd2236fc3e325cc Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 8 May 2026 22:47:32 +0200 Subject: [PATCH 29/58] Open task spans at tool start and tag unmatched tool-input SSE. --- .../chat/streaming/handlers/tool_start.py | 50 +++++++++++++------ 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py index e7d2d7f78..3b3537567 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py @@ -11,6 +11,7 @@ from app.tasks.chat.streaming.helpers.tool_call_matching import ( match_buffered_langchain_tool_call_id, ) from app.tasks.chat.streaming.relay.state import AgentEventRelayState +from app.tasks.chat.streaming.relay.task_span import open_task_span from app.tasks.chat.streaming.relay.thinking_step_completion import ( complete_active_thinking_step, ) @@ -46,6 +47,7 @@ def iter_tool_start_frames( if state.last_active_step_title != "Synthesizing response": comp, new_active = complete_active_thinking_step( + state=state, streaming_service=streaming_service, content_builder=content_builder, last_active_step_id=state.last_active_step_id, @@ -62,20 +64,6 @@ def iter_tool_start_frames( state.tool_step_ids[run_id] = tool_step_id state.last_active_step_id = tool_step_id - thinking = resolve_tool_start_thinking(tool_name, tool_input) - state.last_active_step_title = thinking.title - state.last_active_step_items = thinking.items - frame_kw: dict[str, Any] = { - "streaming_service": streaming_service, - "content_builder": content_builder, - "step_id": tool_step_id, - "title": thinking.title, - "status": "in_progress", - } - if thinking.include_items_on_frame: - frame_kw["items"] = thinking.items - yield emit_thinking_step_frame(**frame_kw) - matched_meta: dict[str, str] | None = None taken_ui_ids = set(state.ui_tool_call_id_by_run.values()) for meta in state.index_to_meta.values(): @@ -102,16 +90,46 @@ def iter_tool_start_frames( run_id, state.lc_tool_call_id_by_run, ) + + if tool_name == "task": + open_task_span( + state, + run_id=run_id, + langchain_tool_call_id=langchain_tool_call_id, + ) + + span_md = state.span_metadata_if_active() + + if matched_meta is None: yield streaming_service.format_tool_input_start( tool_call_id, tool_name, langchain_tool_call_id=langchain_tool_call_id, + metadata=span_md, ) if content_builder is not None: content_builder.on_tool_input_start( - tool_call_id, tool_name, langchain_tool_call_id + tool_call_id, + tool_name, + langchain_tool_call_id, + metadata=span_md, ) + thinking = resolve_tool_start_thinking(tool_name, tool_input) + state.last_active_step_title = thinking.title + state.last_active_step_items = thinking.items + frame_kw: dict[str, Any] = { + "streaming_service": streaming_service, + "content_builder": content_builder, + "step_id": tool_step_id, + "title": thinking.title, + "status": "in_progress", + "metadata": span_md, + } + if thinking.include_items_on_frame: + frame_kw["items"] = thinking.items + yield emit_thinking_step_frame(**frame_kw) + if run_id: state.ui_tool_call_id_by_run[run_id] = tool_call_id @@ -130,6 +148,7 @@ def iter_tool_start_frames( tool_name, _safe_input, langchain_tool_call_id=langchain_tool_call_id, + metadata=span_md, ) if content_builder is not None: content_builder.on_tool_input_available( @@ -137,4 +156,5 @@ def iter_tool_start_frames( tool_name, _safe_input, langchain_tool_call_id, + metadata=span_md, ) From 3ed09bdd90f114bfb370c614bf3e697244288965 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 8 May 2026 22:47:38 +0200 Subject: [PATCH 30/58] Clear spans after task completion and pass span id on tool output. --- .../app/tasks/chat/streaming/handlers/tool_end.py | 7 +++++++ .../app/tasks/chat/streaming/handlers/tool_output_frame.py | 7 ++++++- .../chat/streaming/handlers/tools/emission_context.py | 2 ++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py index 0bfef25eb..ec7d6551c 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py @@ -13,6 +13,7 @@ from app.tasks.chat.streaming.handlers.tools import ( ) from app.tasks.chat.streaming.helpers.tool_output import tool_output_has_error from app.tasks.chat.streaming.relay.state import AgentEventRelayState +from app.tasks.chat.streaming.relay.task_span import clear_task_span_if_delegating_task_ended from app.tasks.chat.streaming.relay.thinking_step_sse import emit_thinking_step_frame @@ -91,6 +92,7 @@ def iter_tool_end_frames( title=title, status="completed", items=completed_items, + metadata=state.span_metadata_if_active(), ) state.just_finished_tool = True @@ -108,5 +110,10 @@ def iter_tool_end_frames( stream_result=result, langgraph_config=config, staged_workspace_file_path=staged_file_path, + tool_metadata=state.span_metadata_if_active(), ) yield from iter_tool_completion_emission_frames(emission_ctx) + + clear_task_span_if_delegating_task_ended( + state, tool_name=tool_name, run_id=run_id + ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py index 07244364c..4cd8e3274 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_output_frame.py @@ -12,13 +12,18 @@ def emit_tool_output_available_frame( langchain_id_holder: dict[str, str | None], call_id: str, output: Any, + tool_metadata: dict[str, Any] | None = None, ) -> str: if content_builder is not None: content_builder.on_tool_output_available( - call_id, output, langchain_id_holder["value"] + call_id, + output, + langchain_id_holder["value"], + metadata=tool_metadata, ) return streaming_service.format_tool_output_available( call_id, output, langchain_tool_call_id=langchain_id_holder["value"], + metadata=tool_metadata, ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py index d9ff796c0..baa1d7336 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/emission_context.py @@ -23,6 +23,7 @@ class ToolCompletionEmissionContext: stream_result: Any langgraph_config: dict[str, Any] staged_workspace_file_path: str | None + tool_metadata: dict[str, Any] | None = None def emit_tool_output_card(self, payload: Any) -> str: return emit_tool_output_available_frame( @@ -31,4 +32,5 @@ class ToolCompletionEmissionContext: langchain_id_holder=self.langchain_tool_call_id_holder, call_id=self.tool_call_id, output=payload, + tool_metadata=self.tool_metadata, ) From 1dcb08e925f3d08bcd7936e6ce52b18efd9f9fd8 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 8 May 2026 22:47:46 +0200 Subject: [PATCH 31/58] Attach active span metadata to thinking-step SSE and completion. --- .../app/tasks/chat/streaming/relay/event_relay.py | 1 + .../tasks/chat/streaming/relay/thinking_step_completion.py | 3 +++ .../app/tasks/chat/streaming/relay/thinking_step_sse.py | 6 +++++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py index 872998926..03d6a66e6 100644 --- a/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py +++ b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py @@ -115,6 +115,7 @@ class EventRelay: state.current_text_id = None completion_event, new_active = complete_active_thinking_step( + state=state, streaming_service=self.streaming_service, content_builder=content_builder, last_active_step_id=state.last_active_step_id, diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py index a0be71281..ad0930341 100644 --- a/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py +++ b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_completion.py @@ -4,11 +4,13 @@ from __future__ import annotations from typing import Any +from .state import AgentEventRelayState from .thinking_step_sse import emit_thinking_step_frame def complete_active_thinking_step( *, + state: AgentEventRelayState, streaming_service: Any, content_builder: Any | None, last_active_step_id: str | None, @@ -26,6 +28,7 @@ def complete_active_thinking_step( title=last_active_step_title, status="completed", items=last_active_step_items if last_active_step_items else None, + metadata=state.span_metadata_if_active(), ) return event, None return None, last_active_step_id diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py index 9e8c08dd5..6737f536b 100644 --- a/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py +++ b/surfsense_backend/app/tasks/chat/streaming/relay/thinking_step_sse.py @@ -13,12 +13,16 @@ def emit_thinking_step_frame( title: str, status: str = "in_progress", items: list[str] | None = None, + metadata: dict[str, Any] | None = None, ) -> str: if content_builder is not None: - content_builder.on_thinking_step(step_id, title, status, items) + content_builder.on_thinking_step( + step_id, title, status, items, metadata=metadata + ) return streaming_service.format_thinking_step( step_id=step_id, title=title, status=status, items=items, + metadata=metadata, ) From f1d80ffe5d9ca9005b4aa4769cd34a418a6fc345 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 8 May 2026 22:47:50 +0200 Subject: [PATCH 32/58] Forward span metadata from report_progress thinking updates. --- .../app/tasks/chat/streaming/handlers/custom_event_dispatch.py | 1 + .../app/tasks/chat/streaming/handlers/custom_events.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py index b373919cf..69f4b8a24 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_event_dispatch.py @@ -33,6 +33,7 @@ def iter_custom_event_frames( last_active_step_items=state.last_active_step_items, streaming_service=streaming_service, content_builder=content_builder, + thinking_metadata=state.span_metadata_if_active(), ) if frame: yield frame diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py index 765f1d790..e48e2c493 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/custom_events.py @@ -15,6 +15,7 @@ def handle_report_progress( last_active_step_items: list[str], streaming_service: Any, content_builder: Any | None, + thinking_metadata: dict[str, Any] | None = None, ) -> tuple[str | None, list[str]]: """Update report step items; may emit one thinking SSE frame. @@ -50,6 +51,7 @@ def handle_report_progress( title=last_active_step_title, status="in_progress", items=new_items, + metadata=thinking_metadata, ) return frame, new_items From e802de233330c8aeb913e02ab922fa4a65dc7a00 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 8 May 2026 22:47:58 +0200 Subject: [PATCH 33/58] Include optional metadata on tool and thinking-step SSE payloads. --- .../app/services/new_streaming_service.py | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/surfsense_backend/app/services/new_streaming_service.py b/surfsense_backend/app/services/new_streaming_service.py index cec0c8a5e..ba0cb8753 100644 --- a/surfsense_backend/app/services/new_streaming_service.py +++ b/surfsense_backend/app/services/new_streaming_service.py @@ -456,6 +456,8 @@ class VercelStreamingService: title: str, status: str = "in_progress", items: list[str] | None = None, + *, + metadata: dict[str, Any] | None = None, ) -> str: """ Format a thinking step for chain-of-thought display (SurfSense specific). @@ -469,15 +471,15 @@ class VercelStreamingService: Returns: str: SSE formatted thinking step data part """ - return self.format_data( - "thinking-step", - { - "id": step_id, - "title": title, - "status": status, - "items": items or [], - }, - ) + payload: dict[str, Any] = { + "id": step_id, + "title": title, + "status": status, + "items": items or [], + } + if metadata: + payload["metadata"] = metadata + return self.format_data("thinking-step", payload) def format_thread_title_update(self, thread_id: int, title: str) -> str: """ @@ -601,6 +603,7 @@ class VercelStreamingService: tool_name: str, *, langchain_tool_call_id: str | None = None, + metadata: dict[str, Any] | None = None, ) -> str: """ Format the start of tool input streaming. @@ -635,6 +638,8 @@ class VercelStreamingService: } if langchain_tool_call_id: payload["langchainToolCallId"] = langchain_tool_call_id + if metadata: + payload["metadata"] = metadata return self._format_sse(payload) def format_tool_input_delta(self, tool_call_id: str, input_text_delta: str) -> str: @@ -666,6 +671,7 @@ class VercelStreamingService: input_data: dict[str, Any], *, langchain_tool_call_id: str | None = None, + metadata: dict[str, Any] | None = None, ) -> str: """ Format the completion of tool input. @@ -691,6 +697,8 @@ class VercelStreamingService: } if langchain_tool_call_id: payload["langchainToolCallId"] = langchain_tool_call_id + if metadata: + payload["metadata"] = metadata return self._format_sse(payload) def format_tool_output_available( @@ -699,6 +707,7 @@ class VercelStreamingService: output: Any, *, langchain_tool_call_id: str | None = None, + metadata: dict[str, Any] | None = None, ) -> str: """ Format tool execution output. @@ -725,6 +734,8 @@ class VercelStreamingService: } if langchain_tool_call_id: payload["langchainToolCallId"] = langchain_tool_call_id + if metadata: + payload["metadata"] = metadata return self._format_sse(payload) # ========================================================================= From 3dbcac4b9d656af92fd7fe376386124411fd53e5 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 8 May 2026 22:48:07 +0200 Subject: [PATCH 34/58] Merge span metadata into persisted tool-call and thinking parts. --- .../app/tasks/chat/content_builder.py | 38 ++++++++++++++++--- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/surfsense_backend/app/tasks/chat/content_builder.py b/surfsense_backend/app/tasks/chat/content_builder.py index 32b49e6b5..2b6dbe649 100644 --- a/surfsense_backend/app/tasks/chat/content_builder.py +++ b/surfsense_backend/app/tasks/chat/content_builder.py @@ -51,6 +51,15 @@ logger = logging.getLogger(__name__) _MEANINGFUL_PART_TYPES: frozenset[str] = frozenset({"text", "reasoning", "tool-call"}) +def _merge_tool_part_metadata(part: dict[str, Any], metadata: dict[str, Any] | None) -> None: + if not metadata: + return + md = part.setdefault("metadata", {}) + for k, v in metadata.items(): + if k not in md: + md[k] = v + + class AssistantContentBuilder: """Server-side projection of ``surfsense_web/lib/chat/streaming-state.ts``. @@ -177,6 +186,8 @@ class AssistantContentBuilder: ui_id: str, tool_name: str, langchain_tool_call_id: str | None, + *, + metadata: dict[str, Any] | None = None, ) -> None: """Register a tool-call card. Args are filled in by later events.""" if not ui_id: @@ -187,11 +198,11 @@ class AssistantContentBuilder: # (the canonical path). The FE de-dupes via ``toolCallIndices``; # we mirror that here. if ui_id in self._tool_call_idx_by_ui_id: - if langchain_tool_call_id: - idx = self._tool_call_idx_by_ui_id[ui_id] - part = self.parts[idx] - if not part.get("langchainToolCallId"): - part["langchainToolCallId"] = langchain_tool_call_id + idx = self._tool_call_idx_by_ui_id[ui_id] + part = self.parts[idx] + if langchain_tool_call_id and not part.get("langchainToolCallId"): + part["langchainToolCallId"] = langchain_tool_call_id + _merge_tool_part_metadata(part, metadata) return part: dict[str, Any] = { @@ -202,6 +213,8 @@ class AssistantContentBuilder: } if langchain_tool_call_id: part["langchainToolCallId"] = langchain_tool_call_id + if metadata: + part["metadata"] = dict(metadata) self.parts.append(part) self._tool_call_idx_by_ui_id[ui_id] = len(self.parts) - 1 @@ -235,6 +248,8 @@ class AssistantContentBuilder: tool_name: str, args: dict[str, Any], langchain_tool_call_id: str | None, + *, + metadata: dict[str, Any] | None = None, ) -> None: """Finalize the tool-call card's input. @@ -264,6 +279,7 @@ class AssistantContentBuilder: part["argsText"] = final_args_text if langchain_tool_call_id and not part.get("langchainToolCallId"): part["langchainToolCallId"] = langchain_tool_call_id + _merge_tool_part_metadata(part, metadata) return # No prior tool-input-start: register the card now. @@ -276,6 +292,7 @@ class AssistantContentBuilder: } if langchain_tool_call_id: new_part["langchainToolCallId"] = langchain_tool_call_id + _merge_tool_part_metadata(new_part, metadata) self.parts.append(new_part) self._tool_call_idx_by_ui_id[ui_id] = len(self.parts) - 1 @@ -287,6 +304,8 @@ class AssistantContentBuilder: ui_id: str, output: Any, langchain_tool_call_id: str | None, + *, + metadata: dict[str, Any] | None = None, ) -> None: """Attach the tool's output (``result``) to the matching card. @@ -305,6 +324,7 @@ class AssistantContentBuilder: part["result"] = output if langchain_tool_call_id and not part.get("langchainToolCallId"): part["langchainToolCallId"] = langchain_tool_call_id + _merge_tool_part_metadata(part, metadata) # ------------------------------------------------------------------ # Thinking steps & step separators @@ -316,6 +336,8 @@ class AssistantContentBuilder: title: str, status: str, items: list[str] | None, + *, + metadata: dict[str, Any] | None = None, ) -> None: """Update / insert the singleton ``data-thinking-steps`` part. @@ -328,12 +350,14 @@ class AssistantContentBuilder: if not step_id: return - new_step = { + new_step: dict[str, Any] = { "id": step_id, "title": title or "", "status": status or "in_progress", "items": list(items) if items else [], } + if metadata: + new_step["metadata"] = dict(metadata) # Find existing data-thinking-steps part. existing_idx = -1 @@ -347,6 +371,8 @@ class AssistantContentBuilder: replaced = False for i, step in enumerate(current_steps): if step.get("id") == step_id: + if not metadata and step.get("metadata"): + new_step["metadata"] = dict(step["metadata"]) current_steps[i] = new_step replaced = True break From d136fcd054d7bbd4d51ecd9c4e5368037cc43e0e Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 8 May 2026 23:16:44 +0200 Subject: [PATCH 35/58] Add tool_activity_metadata to merge spanId and thinkingStepId for tools. --- .../app/tasks/chat/streaming/relay/state.py | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/state.py b/surfsense_backend/app/tasks/chat/streaming/relay/state.py index 82525a52f..27898403d 100644 --- a/surfsense_backend/app/tasks/chat/streaming/relay/state.py +++ b/surfsense_backend/app/tasks/chat/streaming/relay/state.py @@ -10,10 +10,16 @@ from typing import Any class AgentEventRelayState: """Tracks text, thinking steps, tool depth, and pending tool-call metadata. - ``active_span_id`` groups steps/tools for one open ``task`` episode. - ``active_task_run_id`` is the LangGraph ``run_id`` of that ``task`` so we - only clear the span when that run ends (not when child tools end). Handlers - will set/clear these via ``task_span`` helpers in a later change. + **Task span (`spanId`)** — ``active_span_id`` groups steps and tools for one + open delegating ``task`` episode. ``active_task_run_id`` is the LangGraph + ``run_id`` of that ``task`` so the span clears only when that run ends, not + when child tools end. Open/close uses ``relay.task_span`` helpers. + + **Tool ↔ thinking link (`thinkingStepId`)** — Each tool run gets a thinking-row + id (``tool_step_ids[run_id]``, emitted as ``data-thinking-step`` ``data.id``). + ``tool_activity_metadata`` supplies ``metadata`` for ``tool-input-start`` / + ``tool-input-available`` (``handlers.tool_start``) and + ``tool-output-available`` (``handlers.tool_end``). """ accumulated_text: str = "" @@ -48,6 +54,29 @@ class AgentEventRelayState: return {"spanId": self.active_span_id} return None + def tool_activity_metadata( + self, *, thinking_step_id: str | None + ) -> dict[str, Any] | None: + """Build ``metadata`` for tool SSE and ``tool-call`` persistence. + + Contract (keys omitted when not applicable): + + - ``spanId`` (str): present while a task-delegation span is active + (same value as ``span_metadata_if_active()``). + - ``thinkingStepId`` (str): equals the thinking-step row ``id`` for this + tool (``data-thinking-step`` payload ``data.id`` on the wire). + + Returns ``None`` if neither applies. Whitespace-only + ``thinking_step_id`` is ignored. + """ + out: dict[str, Any] = {} + if self.active_span_id: + out["spanId"] = self.active_span_id + tid = (thinking_step_id or "").strip() + if tid: + out["thinkingStepId"] = tid + return out if out else None + @classmethod def for_invocation( cls, From 007a0a30ec7c2850316f5503729a5f8c67312074 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 8 May 2026 23:16:56 +0200 Subject: [PATCH 36/58] Cover tool_activity_metadata for span-only, step-only, and combined cases. --- .../streaming/test_tool_activity_metadata.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_tool_activity_metadata.py diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_tool_activity_metadata.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_tool_activity_metadata.py new file mode 100644 index 000000000..c2e68dacd --- /dev/null +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_tool_activity_metadata.py @@ -0,0 +1,42 @@ +"""Unit tests for ``AgentEventRelayState.tool_activity_metadata``.""" + +from __future__ import annotations + +import pytest + +from app.tasks.chat.streaming.relay.state import AgentEventRelayState +from app.tasks.chat.streaming.relay.task_span import open_task_span + +pytestmark = pytest.mark.unit + + +def test_returns_none_when_no_span_and_no_thinking_step() -> None: + state = AgentEventRelayState.for_invocation() + assert state.tool_activity_metadata(thinking_step_id=None) is None + assert state.tool_activity_metadata(thinking_step_id="") is None + assert state.tool_activity_metadata(thinking_step_id=" ") is None + + +def test_thinking_step_id_only() -> None: + state = AgentEventRelayState.for_invocation() + assert state.tool_activity_metadata(thinking_step_id="thinking-3") == { + "thinkingStepId": "thinking-3", + } + + +def test_span_only_when_active() -> None: + state = AgentEventRelayState.for_invocation() + open_task_span(state, run_id="run-x") + assert state.tool_activity_metadata(thinking_step_id=None) == { + "spanId": state.active_span_id, + } + + +def test_merges_span_and_thinking_step_when_both_set() -> None: + state = AgentEventRelayState.for_invocation() + open_task_span(state, run_id="run-x") + md = state.tool_activity_metadata(thinking_step_id="thinking-7") + assert md == { + "spanId": state.active_span_id, + "thinkingStepId": "thinking-7", + } From a309e830d34dedfb07e3a263bc7e34ec30fa5335 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 8 May 2026 23:17:01 +0200 Subject: [PATCH 37/58] Document thinkingStepId on tool-call parts and first-key metadata merge. --- surfsense_backend/app/tasks/chat/content_builder.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/tasks/chat/content_builder.py b/surfsense_backend/app/tasks/chat/content_builder.py index 2b6dbe649..f0804159a 100644 --- a/surfsense_backend/app/tasks/chat/content_builder.py +++ b/surfsense_backend/app/tasks/chat/content_builder.py @@ -52,6 +52,12 @@ _MEANINGFUL_PART_TYPES: frozenset[str] = frozenset({"text", "reasoning", "tool-c def _merge_tool_part_metadata(part: dict[str, Any], metadata: dict[str, Any] | None) -> None: + """Shallow-merge ``metadata`` into ``part["metadata"]``; first key wins. + + Used for tool-call linkage (``spanId``, ``thinkingStepId``, …): a later + event must not overwrite an existing key so chunk order vs ``on_tool_start`` + stays stable. + """ if not metadata: return md = part.setdefault("metadata", {}) @@ -70,6 +76,7 @@ class AssistantContentBuilder: | { type: "reasoning"; text: string } | { type: "tool-call"; toolCallId: str; toolName: str; args: dict; result?: any; argsText?: str; langchainToolCallId?: str; + metadata?: { spanId?: str; thinkingStepId?: str; ... }; state?: "aborted" } | { type: "data-thinking-steps"; data: { steps: ThinkingStepData[] } } | { type: "data-step-separator"; data: { stepIndex: int } } @@ -189,7 +196,11 @@ class AssistantContentBuilder: *, metadata: dict[str, Any] | None = None, ) -> None: - """Register a tool-call card. Args are filled in by later events.""" + """Register a tool-call card. Args are filled in by later events. + + Optional ``metadata`` (``spanId``, ``thinkingStepId``, …) is stored on the + part; duplicate ``tool-input-start`` calls merge with first-key-wins. + """ if not ui_id: return # Skip duplicate registration: the stream may emit From 32092c0b65914a046a0f6a02e76bfa6a561af71b Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 8 May 2026 23:17:05 +0200 Subject: [PATCH 38/58] Pass thinkingStepId through tool-input start and available metadata. --- .../app/tasks/chat/streaming/handlers/tool_start.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py index 3b3537567..e0cac307c 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_start.py @@ -99,20 +99,21 @@ def iter_tool_start_frames( ) span_md = state.span_metadata_if_active() + tool_md = state.tool_activity_metadata(thinking_step_id=tool_step_id) if matched_meta is None: yield streaming_service.format_tool_input_start( tool_call_id, tool_name, langchain_tool_call_id=langchain_tool_call_id, - metadata=span_md, + metadata=tool_md, ) if content_builder is not None: content_builder.on_tool_input_start( tool_call_id, tool_name, langchain_tool_call_id, - metadata=span_md, + metadata=tool_md, ) thinking = resolve_tool_start_thinking(tool_name, tool_input) @@ -148,7 +149,7 @@ def iter_tool_start_frames( tool_name, _safe_input, langchain_tool_call_id=langchain_tool_call_id, - metadata=span_md, + metadata=tool_md, ) if content_builder is not None: content_builder.on_tool_input_available( @@ -156,5 +157,5 @@ def iter_tool_start_frames( tool_name, _safe_input, langchain_tool_call_id, - metadata=span_md, + metadata=tool_md, ) From 1761b60c16397310145cb8739b93cc105c5576e5 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 8 May 2026 23:17:12 +0200 Subject: [PATCH 39/58] Carry thinkingStepId on tool output and extend builder and parity tests. --- .../tasks/chat/streaming/handlers/tool_end.py | 4 +- .../chat/streaming/test_stage_2_parity.py | 3 + .../unit/tasks/chat/test_content_builder.py | 150 ++++++++++++++++++ 3 files changed, 156 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py index ec7d6551c..421c67a6d 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py @@ -110,7 +110,9 @@ def iter_tool_end_frames( stream_result=result, langgraph_config=config, staged_workspace_file_path=staged_file_path, - tool_metadata=state.span_metadata_if_active(), + tool_metadata=state.tool_activity_metadata( + thinking_step_id=original_step_id, + ), ) yield from iter_tool_completion_emission_frames(emission_ctx) diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py index 9ae7defec..3ee1ab622 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_2_parity.py @@ -111,8 +111,10 @@ def test_complete_active_thinking_step_mirrors_closure_semantics() -> None: svc = MagicMock() svc.format_thinking_step.return_value = "done-frame" completed: set[str] = set() + relay_state = AgentEventRelayState.for_invocation() frame, new_id = complete_active_thinking_step( + state=relay_state, streaming_service=svc, content_builder=None, last_active_step_id="thinking-1", @@ -125,6 +127,7 @@ def test_complete_active_thinking_step_mirrors_closure_semantics() -> None: assert "thinking-1" in completed frame2, id2 = complete_active_thinking_step( + state=relay_state, streaming_service=svc, content_builder=None, last_active_step_id="thinking-1", diff --git a/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py b/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py index 4b1fadd9c..9d3eb6fa4 100644 --- a/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py +++ b/surfsense_backend/tests/unit/tasks/chat/test_content_builder.py @@ -15,6 +15,7 @@ import json import pytest +from app.services.new_streaming_service import VercelStreamingService from app.tasks.chat.content_builder import AssistantContentBuilder pytestmark = pytest.mark.unit @@ -231,6 +232,155 @@ class TestToolHeavyTurn: ) +# --------------------------------------------------------------------------- +# Task-span metadata on tool-call parts (JSONB persistence) +# --------------------------------------------------------------------------- + + +class TestToolCallSpanMetadata: + def test_input_available_merges_new_metadata_keys_after_start(self): + b = AssistantContentBuilder() + b.on_tool_input_start( + "call_t", "task", "lc_t", metadata={"spanId": "spn_1"} + ) + b.on_tool_input_available( + "call_t", + "task", + {"goal": "x"}, + "lc_t", + metadata={"traceId": "tr_1"}, + ) + part = b.snapshot()[0] + assert part["metadata"]["spanId"] == "spn_1" + assert part["metadata"]["traceId"] == "tr_1" + _assert_jsonb_safe(b.snapshot()) + + def test_input_available_does_not_overwrite_existing_metadata_keys(self): + b = AssistantContentBuilder() + b.on_tool_input_start( + "call_t", "task", "lc_t", metadata={"spanId": "spn_keep"} + ) + b.on_tool_input_available( + "call_t", "task", {}, "lc_t", metadata={"spanId": "spn_other"} + ) + assert b.snapshot()[0]["metadata"]["spanId"] == "spn_keep" + + def test_late_tool_input_available_carries_metadata(self): + b = AssistantContentBuilder() + b.on_tool_input_available( + "call_l", + "grep", + {"pattern": "TODO"}, + None, + metadata={"spanId": "spn_l"}, + ) + part = b.snapshot()[0] + assert part["metadata"] == {"spanId": "spn_l"} + _assert_jsonb_safe(b.snapshot()) + + def test_output_available_merges_without_clobbering_span_id(self): + b = AssistantContentBuilder() + b.on_tool_input_start("call_t", "ls", "lc", metadata={"spanId": "spn_x"}) + b.on_tool_input_available("call_t", "ls", {"path": "/"}, "lc") + b.on_tool_output_available( + "call_t", + {"ok": True}, + "lc", + metadata={"spanId": "spn_y", "extra": 1}, + ) + md = b.snapshot()[0]["metadata"] + assert md["spanId"] == "spn_x" + assert md["extra"] == 1 + + def test_output_available_adds_thinking_step_id_without_clobbering_span(self): + b = AssistantContentBuilder() + b.on_tool_input_start( + "call_t", + "ls", + "lc", + metadata={"spanId": "spn_x", "thinkingStepId": "thinking-3"}, + ) + b.on_tool_input_available("call_t", "ls", {"path": "/"}, "lc") + b.on_tool_output_available( + "call_t", + {"ok": True}, + "lc", + metadata={"spanId": "spn_x", "thinkingStepId": "thinking-3"}, + ) + md = b.snapshot()[0]["metadata"] + assert md["spanId"] == "spn_x" + assert md["thinkingStepId"] == "thinking-3" + + def test_output_available_with_none_metadata_preserves_prior(self): + b = AssistantContentBuilder() + b.on_tool_input_start("c", "ls", "lc", metadata={"spanId": "spn_1"}) + b.on_tool_input_available("c", "ls", {}, "lc") + b.on_tool_output_available("c", {"r": 1}, "lc", metadata=None) + assert b.snapshot()[0]["metadata"] == {"spanId": "spn_1"} + + def test_available_adds_thinking_step_id_after_chunk_only_start(self): + """Mirrors chunk ``tool-input-start`` then ``on_tool_start`` ``available``.""" + b = AssistantContentBuilder() + b.on_tool_input_start("lc_1", "ls", "lc_1", metadata={"spanId": "spn_a"}) + b.on_tool_input_available( + "lc_1", + "ls", + {"path": "/"}, + "lc_1", + metadata={"spanId": "spn_a", "thinkingStepId": "thinking-2"}, + ) + md = b.snapshot()[0]["metadata"] + assert md["spanId"] == "spn_a" + assert md["thinkingStepId"] == "thinking-2" + + +class TestVercelStreamingServiceToolMetadataWire: + """SSE payloads include optional ``metadata`` for FE grouping.""" + + @staticmethod + def _parse_sse_data_line(raw: str) -> dict: + assert raw.startswith("data: ") + payload = raw.split("data: ", 1)[1].split("\n\n", 1)[0].strip() + return json.loads(payload) + + def test_tool_input_available_includes_metadata_when_set(self): + svc = VercelStreamingService() + raw = svc.format_tool_input_available( + "id1", + "task", + {"a": 1}, + langchain_tool_call_id="lc1", + metadata={"spanId": "spn_w", "thinkingStepId": "thinking-4"}, + ) + body = self._parse_sse_data_line(raw) + assert body["type"] == "tool-input-available" + assert body["metadata"] == { + "spanId": "spn_w", + "thinkingStepId": "thinking-4", + } + + def test_tool_output_available_includes_metadata_when_set(self): + svc = VercelStreamingService() + raw = svc.format_tool_output_available( + "id1", + {"status": "completed"}, + langchain_tool_call_id="lc1", + metadata={"spanId": "spn_o", "thinkingStepId": "thinking-9"}, + ) + body = self._parse_sse_data_line(raw) + assert body["type"] == "tool-output-available" + assert body["metadata"] == { + "spanId": "spn_o", + "thinkingStepId": "thinking-9", + } + + def test_tool_input_available_omits_metadata_key_when_none(self): + svc = VercelStreamingService() + raw = svc.format_tool_input_available("id1", "ls", {}) + body = self._parse_sse_data_line(raw) + assert "metadata" not in body + + # --------------------------------------------------------------------------- # Thinking steps & separators # --------------------------------------------------------------------------- From 47e64d1861a85636c8e8bde2268d6f34bf6bee2d Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Sat, 9 May 2026 00:39:43 +0200 Subject: [PATCH 40/58] Persist optional relay metadata on tool-call and thinking-step state. --- surfsense_web/lib/chat/streaming-state.ts | 43 ++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/surfsense_web/lib/chat/streaming-state.ts b/surfsense_web/lib/chat/streaming-state.ts index 809e214d1..ee3160a61 100644 --- a/surfsense_web/lib/chat/streaming-state.ts +++ b/surfsense_web/lib/chat/streaming-state.ts @@ -5,6 +5,11 @@ export interface ThinkingStepData { title: string; status: "pending" | "in_progress" | "completed"; items: string[]; + /** + * Optional relay fields from ``data-thinking-step`` when present on the wire + * (e.g. ``spanId``). Populated in a later slice; equality helpers ignore until wired. + */ + metadata?: Record; } export type ContentPart = @@ -42,6 +47,11 @@ export type ContentPart = * ``data-action-log`` events. */ langchainToolCallId?: string; + /** + * Relay correlation from tool SSE (e.g. ``spanId``, ``thinkingStepId``). + * Merged by ``mergeToolPartMetadata`` when events carry ``metadata``. + */ + metadata?: Record; } | { type: "data-thinking-steps"; @@ -252,6 +262,23 @@ function _toolPasses(gate: ToolUIGate, toolName: string): boolean { return gate === "all" || gate.has(toolName); } +/** + * Shallow-merge relay ``metadata`` into a tool-call part (SSE → content part). + * Keys already set on ``into`` are left unchanged so chunk vs canonical tool + * events cannot reorder or overwrite ``spanId`` / ``thinkingStepId``. + * Matches server ``AssistantContentBuilder`` merge semantics. + */ +function mergeToolPartMetadata( + into: Record, + incoming: Record | undefined +): void { + if (!incoming) return; + for (const [k, v] of Object.entries(incoming)) { + if (k === "__proto__" || k === "constructor") continue; + if (!(k in into)) into[k] = v; + } +} + export function addToolCall( state: ContentPartsState, toolsWithUI: ToolUIGate, @@ -259,15 +286,19 @@ export function addToolCall( toolName: string, args: Record, force = false, - langchainToolCallId?: string + langchainToolCallId?: string, + metadata?: Record ): void { if (force || _toolPasses(toolsWithUI, toolName)) { + const relayMeta: Record = {}; + mergeToolPartMetadata(relayMeta, metadata); state.contentParts.push({ type: "tool-call", toolCallId, toolName, args, ...(langchainToolCallId ? { langchainToolCallId } : {}), + ...(Object.keys(relayMeta).length > 0 ? { metadata: relayMeta } : {}), }); state.toolCallIndices.set(toolCallId, state.contentParts.length - 1); state.currentTextPartIndex = -1; @@ -304,6 +335,7 @@ export function updateToolCall( argsText?: string; result?: unknown; langchainToolCallId?: string; + metadata?: Record; } ): void { const index = state.toolCallIndices.get(toolCallId); @@ -323,6 +355,11 @@ export function updateToolCall( if (update.langchainToolCallId && !tc.langchainToolCallId) { tc.langchainToolCallId = update.langchainToolCallId; } + if (update.metadata && Object.keys(update.metadata).length > 0) { + const md = (tc.metadata ?? {}) as Record; + mergeToolPartMetadata(md, update.metadata); + tc.metadata = md; + } } } @@ -416,6 +453,8 @@ export type SSEEvent = toolName: string; /** Authoritative LangChain ``tool_call.id``. Optional. */ langchainToolCallId?: string; + /** Optional JSON object from tool SSE (same keys as persisted tool-call metadata). */ + metadata?: Record; } | { /** @@ -434,6 +473,7 @@ export type SSEEvent = toolName: string; input: Record; langchainToolCallId?: string; + metadata?: Record; } | { type: "tool-output-available"; @@ -443,6 +483,7 @@ export type SSEEvent = * ``ToolMessage.tool_call_id`` at on_tool_end. Backfills cards * that didn't get the id at tool-input-start time. */ langchainToolCallId?: string; + metadata?: Record; } | { type: "data-thinking-step"; data: ThinkingStepData } | { type: "data-thread-title-update"; data: { threadId: number; title: string } } From 39084b3075ef0dd322d2cfab037fe1b0fbd4dc0f Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Sat, 9 May 2026 00:39:50 +0200 Subject: [PATCH 41/58] Forward tool SSE metadata into the streaming content parts. --- surfsense_web/components/free-chat/free-chat-page.tsx | 8 ++++++-- surfsense_web/lib/chat/stream-pipeline.ts | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/surfsense_web/components/free-chat/free-chat-page.tsx b/surfsense_web/components/free-chat/free-chat-page.tsx index 080d9a2b6..b2f959f2e 100644 --- a/surfsense_web/components/free-chat/free-chat-page.tsx +++ b/surfsense_web/components/free-chat/free-chat-page.tsx @@ -228,7 +228,8 @@ export function FreeChatPage() { parsed.toolName, {}, false, - parsed.langchainToolCallId + parsed.langchainToolCallId, + parsed.metadata ); forceFlush(); break; @@ -245,6 +246,7 @@ export function FreeChatPage() { args: parsed.input || {}, argsText: finalArgsText, langchainToolCallId: parsed.langchainToolCallId, + metadata: parsed.metadata, }); } else { addToolCall( @@ -254,7 +256,8 @@ export function FreeChatPage() { parsed.toolName, parsed.input || {}, false, - parsed.langchainToolCallId + parsed.langchainToolCallId, + parsed.metadata ); updateToolCall(contentPartsState, parsed.toolCallId, { argsText: finalArgsText, @@ -268,6 +271,7 @@ export function FreeChatPage() { updateToolCall(contentPartsState, parsed.toolCallId, { result: parsed.output, langchainToolCallId: parsed.langchainToolCallId, + metadata: parsed.metadata, }); forceFlush(); break; diff --git a/surfsense_web/lib/chat/stream-pipeline.ts b/surfsense_web/lib/chat/stream-pipeline.ts index c76781083..a0f90afeb 100644 --- a/surfsense_web/lib/chat/stream-pipeline.ts +++ b/surfsense_web/lib/chat/stream-pipeline.ts @@ -112,7 +112,8 @@ export function processSharedStreamEvent( parsed.toolName, {}, false, - parsed.langchainToolCallId + parsed.langchainToolCallId, + parsed.metadata ); forceFlush(); return true; @@ -131,6 +132,7 @@ export function processSharedStreamEvent( args: parsed.input || {}, argsText: finalArgsText, langchainToolCallId: parsed.langchainToolCallId, + metadata: parsed.metadata, }); } else { addToolCall( @@ -140,7 +142,8 @@ export function processSharedStreamEvent( parsed.toolName, parsed.input || {}, false, - parsed.langchainToolCallId + parsed.langchainToolCallId, + parsed.metadata ); // addToolCall doesn't accept argsText today; backfill via // updateToolCall so the new card renders pretty-printed JSON. @@ -156,6 +159,7 @@ export function processSharedStreamEvent( updateToolCall(contentPartsState, parsed.toolCallId, { result: parsed.output, langchainToolCallId: parsed.langchainToolCallId, + metadata: parsed.metadata, }); markInterruptsCompleted(contentParts); context.onToolOutputAvailable?.(parsed, { contentPartsState, toolCallIndices }); From e7c5204b0248d7ceb6262478a4419eb7e0fe6f58 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Sat, 9 May 2026 00:39:59 +0200 Subject: [PATCH 42/58] Indent tool cards under an active delegating task span. --- .../assistant-ui/assistant-message.tsx | 88 ++++++++++--------- .../components/assistant-ui/tool-fallback.tsx | 22 +++++ .../components/public-chat/public-thread.tsx | 29 +++--- .../lib/chat/delegation-span-indent.ts | 19 ++++ 4 files changed, 106 insertions(+), 52 deletions(-) create mode 100644 surfsense_web/lib/chat/delegation-span-indent.ts diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx index 7bccc22ee..a21ade74a 100644 --- a/surfsense_web/components/assistant-ui/assistant-message.tsx +++ b/surfsense_web/components/assistant-ui/assistant-message.tsx @@ -4,6 +4,7 @@ import { AuiIf, ErrorPrimitive, MessagePrimitive, + type ToolCallMessagePartComponent, useAui, useAuiState, } from "@assistant-ui/react"; @@ -36,7 +37,7 @@ import { MarkdownText } from "@/components/assistant-ui/markdown-text"; import { ReasoningMessagePart } from "@/components/assistant-ui/reasoning-message-part"; import { RevertTurnButton } from "@/components/assistant-ui/revert-turn-button"; import { useTokenUsage } from "@/components/assistant-ui/token-usage-context"; -import { ToolFallback } from "@/components/assistant-ui/tool-fallback"; +import { ToolFallback, withDelegationSpanIndent } from "@/components/assistant-ui/tool-fallback"; import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; import { CommentPanelContainer } from "@/components/chat-comments/comment-panel-container/comment-panel-container"; import { CommentSheet } from "@/components/chat-comments/comment-sheet/comment-sheet"; @@ -505,48 +506,55 @@ const MessageInfoDropdown: FC = () => { // Wrap each tool-ui card with ``withBundleStep`` so multi-card HITL bundles // page through them and stage decisions instead of firing one resume per card. +// ``withDelegationSpanIndent`` wraps every entry (including Fallback) so delegated +// subagent tools don't bypass span indentation via a named ``by_name`` UI. +const bundleTool = (Component: ToolCallMessagePartComponent) => + withBundleStep(withDelegationSpanIndent(Component)); + +const NullToolUi: ToolCallMessagePartComponent = () => null; + const TOOLS_BY_NAME = { - generate_report: withBundleStep(GenerateReportToolUI), - generate_resume: withBundleStep(GenerateResumeToolUI), - generate_podcast: withBundleStep(GeneratePodcastToolUI), - generate_video_presentation: withBundleStep(GenerateVideoPresentationToolUI), - display_image: withBundleStep(GenerateImageToolUI), - generate_image: withBundleStep(GenerateImageToolUI), - update_memory: withBundleStep(UpdateMemoryToolUI), - execute: withBundleStep(SandboxExecuteToolUI), - execute_code: withBundleStep(SandboxExecuteToolUI), - create_notion_page: withBundleStep(CreateNotionPageToolUI), - update_notion_page: withBundleStep(UpdateNotionPageToolUI), - delete_notion_page: withBundleStep(DeleteNotionPageToolUI), - create_linear_issue: withBundleStep(CreateLinearIssueToolUI), - update_linear_issue: withBundleStep(UpdateLinearIssueToolUI), - delete_linear_issue: withBundleStep(DeleteLinearIssueToolUI), - create_google_drive_file: withBundleStep(CreateGoogleDriveFileToolUI), - delete_google_drive_file: withBundleStep(DeleteGoogleDriveFileToolUI), - create_onedrive_file: withBundleStep(CreateOneDriveFileToolUI), - delete_onedrive_file: withBundleStep(DeleteOneDriveFileToolUI), - create_dropbox_file: withBundleStep(CreateDropboxFileToolUI), - delete_dropbox_file: withBundleStep(DeleteDropboxFileToolUI), - create_calendar_event: withBundleStep(CreateCalendarEventToolUI), - update_calendar_event: withBundleStep(UpdateCalendarEventToolUI), - delete_calendar_event: withBundleStep(DeleteCalendarEventToolUI), - create_gmail_draft: withBundleStep(CreateGmailDraftToolUI), - update_gmail_draft: withBundleStep(UpdateGmailDraftToolUI), - send_gmail_email: withBundleStep(SendGmailEmailToolUI), - trash_gmail_email: withBundleStep(TrashGmailEmailToolUI), - create_jira_issue: withBundleStep(CreateJiraIssueToolUI), - update_jira_issue: withBundleStep(UpdateJiraIssueToolUI), - delete_jira_issue: withBundleStep(DeleteJiraIssueToolUI), - create_confluence_page: withBundleStep(CreateConfluencePageToolUI), - update_confluence_page: withBundleStep(UpdateConfluencePageToolUI), - delete_confluence_page: withBundleStep(DeleteConfluencePageToolUI), - web_search: () => null, - link_preview: () => null, - multi_link_preview: () => null, - scrape_webpage: () => null, + generate_report: bundleTool(GenerateReportToolUI), + generate_resume: bundleTool(GenerateResumeToolUI), + generate_podcast: bundleTool(GeneratePodcastToolUI), + generate_video_presentation: bundleTool(GenerateVideoPresentationToolUI), + display_image: bundleTool(GenerateImageToolUI), + generate_image: bundleTool(GenerateImageToolUI), + update_memory: bundleTool(UpdateMemoryToolUI), + execute: bundleTool(SandboxExecuteToolUI), + execute_code: bundleTool(SandboxExecuteToolUI), + create_notion_page: bundleTool(CreateNotionPageToolUI), + update_notion_page: bundleTool(UpdateNotionPageToolUI), + delete_notion_page: bundleTool(DeleteNotionPageToolUI), + create_linear_issue: bundleTool(CreateLinearIssueToolUI), + update_linear_issue: bundleTool(UpdateLinearIssueToolUI), + delete_linear_issue: bundleTool(DeleteLinearIssueToolUI), + create_google_drive_file: bundleTool(CreateGoogleDriveFileToolUI), + delete_google_drive_file: bundleTool(DeleteGoogleDriveFileToolUI), + create_onedrive_file: bundleTool(CreateOneDriveFileToolUI), + delete_onedrive_file: bundleTool(DeleteOneDriveFileToolUI), + create_dropbox_file: bundleTool(CreateDropboxFileToolUI), + delete_dropbox_file: bundleTool(DeleteDropboxFileToolUI), + create_calendar_event: bundleTool(CreateCalendarEventToolUI), + update_calendar_event: bundleTool(UpdateCalendarEventToolUI), + delete_calendar_event: bundleTool(DeleteCalendarEventToolUI), + create_gmail_draft: bundleTool(CreateGmailDraftToolUI), + update_gmail_draft: bundleTool(UpdateGmailDraftToolUI), + send_gmail_email: bundleTool(SendGmailEmailToolUI), + trash_gmail_email: bundleTool(TrashGmailEmailToolUI), + create_jira_issue: bundleTool(CreateJiraIssueToolUI), + update_jira_issue: bundleTool(UpdateJiraIssueToolUI), + delete_jira_issue: bundleTool(DeleteJiraIssueToolUI), + create_confluence_page: bundleTool(CreateConfluencePageToolUI), + update_confluence_page: bundleTool(UpdateConfluencePageToolUI), + delete_confluence_page: bundleTool(DeleteConfluencePageToolUI), + web_search: NullToolUi, + link_preview: NullToolUi, + multi_link_preview: NullToolUi, + scrape_webpage: NullToolUi, } as const; -const TOOLS_FALLBACK = withBundleStep(ToolFallback); +const TOOLS_FALLBACK = bundleTool(ToolFallback); const AssistantMessageInner: FC = () => { const isMobile = !useMediaQuery("(min-width: 768px)"); diff --git a/surfsense_web/components/assistant-ui/tool-fallback.tsx b/surfsense_web/components/assistant-ui/tool-fallback.tsx index ba58f4158..ec93b1018 100644 --- a/surfsense_web/components/assistant-ui/tool-fallback.tsx +++ b/surfsense_web/components/assistant-ui/tool-fallback.tsx @@ -31,6 +31,10 @@ import { Spinner } from "@/components/ui/spinner"; import { getToolDisplayName } from "@/contracts/enums/toolIcons"; import { markActionRevertedInCache, useAgentActionsQuery } from "@/hooks/use-agent-actions-query"; import { agentActionsApiService } from "@/lib/apis/agent-actions-api.service"; +import { + DELEGATION_SPAN_INDENT_CLASS, + shouldIndentToolCallForDelegationSpan, +} from "@/lib/chat/delegation-span-indent"; import { AppError } from "@/lib/error"; import { isInterruptResult } from "@/lib/hitl"; import { cn } from "@/lib/utils"; @@ -499,6 +503,24 @@ const DefaultToolFallbackInner: ToolCallMessagePartComponent = (props) => { ); }; +/** + * Wrap any tool-call UI so cards under an active delegating ``task`` span indent. + * Applied to named tool components as well as ``ToolFallback`` — only ``ToolFallback`` + * would miss delegated tools otherwise. + */ +export function withDelegationSpanIndent( + Component: ToolCallMessagePartComponent +): ToolCallMessagePartComponent { + const Wrapped: ToolCallMessagePartComponent = (props) => { + const metadata = (props as { metadata?: Record }).metadata; + const indent = shouldIndentToolCallForDelegationSpan(props.toolName, metadata); + const inner = ; + return indent ?
{inner}
: inner; + }; + Wrapped.displayName = `withDelegationSpanIndent(${Component.displayName ?? Component.name ?? "ToolUI"})`; + return Wrapped; +} + export const ToolFallback: ToolCallMessagePartComponent = (props) => { if (isInterruptResult(props.result)) { if (isDoomLoopInterrupt(props.result)) { diff --git a/surfsense_web/components/public-chat/public-thread.tsx b/surfsense_web/components/public-chat/public-thread.tsx index 750b7410e..2075d82b8 100644 --- a/surfsense_web/components/public-chat/public-thread.tsx +++ b/surfsense_web/components/public-chat/public-thread.tsx @@ -5,6 +5,7 @@ import { AuiIf, MessagePrimitive, ThreadPrimitive, + type ToolCallMessagePartComponent, useAuiState, } from "@assistant-ui/react"; import { CheckIcon, CopyIcon } from "lucide-react"; @@ -14,7 +15,7 @@ import { type FC, type ReactNode, useState } from "react"; import { CitationMetadataProvider } from "@/components/assistant-ui/citation-metadata-context"; import { MarkdownText } from "@/components/assistant-ui/markdown-text"; import { ReasoningMessagePart } from "@/components/assistant-ui/reasoning-message-part"; -import { ToolFallback } from "@/components/assistant-ui/tool-fallback"; +import { ToolFallback, withDelegationSpanIndent } from "@/components/assistant-ui/tool-fallback"; import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; import { GenerateImageToolUI } from "@/components/tool-ui/generate-image"; import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; @@ -29,6 +30,8 @@ const GenerateVideoPresentationToolUI = dynamic( { ssr: false } ); +const NullToolUi: ToolCallMessagePartComponent = () => null; + interface PublicThreadProps { footer?: ReactNode; } @@ -162,18 +165,20 @@ const PublicAssistantMessage: FC = () => { Reasoning: ReasoningMessagePart, tools: { by_name: { - generate_podcast: GeneratePodcastToolUI, - generate_report: GenerateReportToolUI, - generate_resume: GenerateResumeToolUI, - generate_video_presentation: GenerateVideoPresentationToolUI, - display_image: GenerateImageToolUI, - generate_image: GenerateImageToolUI, - web_search: () => null, - link_preview: () => null, - multi_link_preview: () => null, - scrape_webpage: () => null, + generate_podcast: withDelegationSpanIndent(GeneratePodcastToolUI), + generate_report: withDelegationSpanIndent(GenerateReportToolUI), + generate_resume: withDelegationSpanIndent(GenerateResumeToolUI), + generate_video_presentation: withDelegationSpanIndent( + GenerateVideoPresentationToolUI + ), + display_image: withDelegationSpanIndent(GenerateImageToolUI), + generate_image: withDelegationSpanIndent(GenerateImageToolUI), + web_search: NullToolUi, + link_preview: NullToolUi, + multi_link_preview: NullToolUi, + scrape_webpage: NullToolUi, }, - Fallback: ToolFallback, + Fallback: withDelegationSpanIndent(ToolFallback), }, }} /> diff --git a/surfsense_web/lib/chat/delegation-span-indent.ts b/surfsense_web/lib/chat/delegation-span-indent.ts new file mode 100644 index 000000000..99e292eaf --- /dev/null +++ b/surfsense_web/lib/chat/delegation-span-indent.ts @@ -0,0 +1,19 @@ +/** + * Indent tool-call cards that belong to an open delegating ``task`` episode. + * + * The backend only stamps ``metadata.spanId`` on tool SSE / persisted parts + * while a ``task`` is active (see ``AgentEventRelayState.tool_activity_metadata``), + * so its presence is sufficient. The opening ``task`` row itself carries the + * same span id but stays flush — it is the header of the delegation. + */ + +export function shouldIndentToolCallForDelegationSpan( + toolName: string, + metadata: Record | undefined +): boolean { + if (toolName === "task") return false; + const v = metadata?.spanId; + return typeof v === "string" && v.trim().length > 0; +} + +export const DELEGATION_SPAN_INDENT_CLASS = "pl-3 sm:ml-4"; From 4b2c9f07cd4966cd64a2d9a795a8556477cd3b91 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Sat, 9 May 2026 00:40:06 +0200 Subject: [PATCH 43/58] Group delegated thinking steps under their task parent and show subagent name. --- .../assistant-ui/thinking-steps.tsx | 247 +++++++++++++++--- 1 file changed, 213 insertions(+), 34 deletions(-) diff --git a/surfsense_web/components/assistant-ui/thinking-steps.tsx b/surfsense_web/components/assistant-ui/thinking-steps.tsx index df1cef12c..6c3832bff 100644 --- a/surfsense_web/components/assistant-ui/thinking-steps.tsx +++ b/surfsense_web/components/assistant-ui/thinking-steps.tsx @@ -1,7 +1,7 @@ import { makeAssistantDataUI, useAuiState } from "@assistant-ui/react"; import { ChevronRightIcon } from "lucide-react"; import type { FC } from "react"; -import { useCallback, useEffect, useState } from "react"; +import { useCallback, useEffect, useMemo, useState } from "react"; import { ChainOfThoughtItem } from "@/components/prompt-kit/chain-of-thought"; import { TextShimmerLoader } from "@/components/prompt-kit/loader"; import { cn } from "@/lib/utils"; @@ -11,15 +11,170 @@ export interface ThinkingStep { title: string; items: string[]; status: "pending" | "in_progress" | "completed"; + /** + * Optional relay metadata forwarded from ``data-thinking-step`` SSE + * (e.g. ``spanId`` set by ``AgentEventRelayState.span_metadata_if_active``). + * Steps under an open delegating ``task`` carry ``metadata.spanId`` and are + * grouped under the preceding parent (``task`` step) as indented children. + */ + metadata?: Record; } /** - * Chain of thought display component - single collapsible dropdown design + * Per-step info joined from the assistant message ``tool-call`` parts via + * the shared ``metadata.thinkingStepId`` correlation + * (set on the server in ``AgentEventRelayState.tool_activity_metadata``). */ -export const ThinkingStepsDisplay: FC<{ steps: ThinkingStep[]; isThreadRunning?: boolean }> = ({ - steps, - isThreadRunning = true, -}) => { +interface StepToolInfo { + toolName: string; + args: Record; +} + +export type ThinkingStepToolInfoMap = ReadonlyMap; + +/** + * Build ``thinkingStepId → {toolName, args}`` from message content. Used to + * - identify the opening ``task`` step (parent header, never indents) without + * relying on the human-readable title; + * - render the parent's display title from ``args.subagent_type`` instead of + * the generic "Task" copy. + */ +export function buildThinkingStepToolInfo( + content: readonly unknown[] | undefined +): ThinkingStepToolInfoMap { + const m = new Map(); + if (!content) return m; + for (const part of content) { + if (!part || typeof part !== "object") continue; + const o = part as { + type?: string; + toolName?: string; + args?: Record; + metadata?: Record; + }; + if (o.type !== "tool-call" || !o.toolName) continue; + const tid = o.metadata?.thinkingStepId; + if (typeof tid === "string" && tid.trim().length > 0) { + m.set(tid, { toolName: o.toolName, args: o.args ?? {} }); + } + } + return m; +} + +function asNonEmptyString(v: unknown): string | undefined { + return typeof v === "string" && v.trim().length > 0 ? v.trim() : undefined; +} + +function titleCaseSubagent(raw: string): string { + // "notion" → "Notion", "doc_research" → "Doc Research". + return raw + .split(/[\s_-]+/) + .filter(Boolean) + .map((part) => part.charAt(0).toUpperCase() + part.slice(1)) + .join(" "); +} + +/** + * Display title for a step. For the opening ``task`` step we substitute the + * subagent type from the matching tool-call args (e.g. ``"Notion"`` instead of + * the generic ``"Task"``). Falls back to the step's own title if the tool-call + * hasn't streamed in yet. + */ +function resolveDisplayTitle(step: ThinkingStep, info: StepToolInfo | undefined): string { + if (info?.toolName === "task") { + const subagent = asNonEmptyString(info.args?.subagent_type); + if (subagent) return titleCaseSubagent(subagent); + } + return step.title; +} + +function isDelegatedChild(step: ThinkingStep, info: StepToolInfo | undefined): boolean { + const sid = asNonEmptyString(step.metadata?.spanId); + if (!sid) return false; + // The opening ``task`` step also carries ``spanId`` (it owns the span) but + // must render as the parent header. Prefer the joined ``toolName`` (set by + // ``buildThinkingStepToolInfo`` from ``tool-call.metadata.thinkingStepId``). + // Fall back to the title heuristic when no tool-call is matched — happens + // for messages persisted before ``thinkingStepId`` shipped, and briefly + // during streaming if the ``tool-input-start`` frame hasn't been processed + // yet for some reason. + if (info) return info.toolName !== "task"; + return step.title !== "Task"; +} + +interface StepGroup { + parent: ThinkingStep; + children: ThinkingStep[]; +} + +/** + * Group consecutive delegated child steps under the preceding parent step. + * If the very first step is a child (no parent yet seen), it's promoted to a + * parent so it still renders — defensive only, real flows always start with a + * parent step. + */ +const EMPTY_STEP_TOOL_INFO: ThinkingStepToolInfoMap = new Map(); + +function groupSteps( + steps: readonly ThinkingStep[], + stepToolInfo: ThinkingStepToolInfoMap +): StepGroup[] { + const groups: StepGroup[] = []; + for (const step of steps) { + if (isDelegatedChild(step, stepToolInfo.get(step.id)) && groups.length > 0) { + groups[groups.length - 1].children.push(step); + } else { + groups.push({ parent: step, children: [] }); + } + } + return groups; +} + +const StepBody: FC<{ + step: ThinkingStep; + status: "pending" | "in_progress" | "completed"; + displayTitle: string; +}> = ({ step, status, displayTitle }) => ( +
+
+ {displayTitle} +
+ + {step.items && step.items.length > 0 && ( +
+ {step.items.map((item) => ( + + {item} + + ))} +
+ )} +
+); + +/** + * Chain of thought display component - single collapsible dropdown design. + * + * ``stepToolInfo`` joins each step (by ``thinkingStepId``) to its ``tool-call`` + * part so we can: + * - replace the generic ``"Task"`` title with the real subagent name + * (``args.subagent_type``) on the parent header; + * - decide parent-vs-child purely from the matched ``toolName`` instead of + * relying on the displayed title. + */ +export const ThinkingStepsDisplay: FC<{ + steps: ThinkingStep[]; + isThreadRunning?: boolean; + stepToolInfo?: ThinkingStepToolInfoMap; +}> = ({ steps, isThreadRunning = true, stepToolInfo }) => { + const toolInfo = stepToolInfo ?? EMPTY_STEP_TOOL_INFO; const getEffectiveStatus = useCallback( (step: ThinkingStep): "pending" | "in_progress" | "completed" => { if (step.status === "in_progress" && !isThreadRunning) { @@ -31,6 +186,9 @@ export const ThinkingStepsDisplay: FC<{ steps: ThinkingStep[]; isThreadRunning?: ); const inProgressStep = steps.find((s) => getEffectiveStatus(s) === "in_progress"); + const inProgressDisplayTitle = inProgressStep + ? resolveDisplayTitle(inProgressStep, toolInfo.get(inProgressStep.id)) + : undefined; const allCompleted = steps.length > 0 && !isThreadRunning && @@ -49,14 +207,16 @@ export const ThinkingStepsDisplay: FC<{ steps: ThinkingStep[]; isThreadRunning?: } }, [allCompleted, isProcessing]); + const groups = useMemo(() => groupSteps(steps, toolInfo), [steps, toolInfo]); + if (steps.length === 0) return null; const getHeaderText = () => { if (allCompleted) { return "Reviewed"; } - if (inProgressStep) { - return inProgressStep.title; + if (inProgressDisplayTitle) { + return inProgressDisplayTitle; } if (isProcessing) { return "Processing"; @@ -94,18 +254,26 @@ export const ThinkingStepsDisplay: FC<{ steps: ThinkingStep[]; isThreadRunning?: >
- {steps.map((step, index) => { - const effectiveStatus = getEffectiveStatus(step); - const isLast = index === steps.length - 1; + {groups.map((group, groupIndex) => { + const isLastGroup = groupIndex === groups.length - 1; + const parentStatus = getEffectiveStatus(group.parent); + const parentInfo = toolInfo.get(group.parent.id); + const parentTitle = resolveDisplayTitle(group.parent, parentInfo); + const hasChildren = group.children.length > 0; + // Parent dots are connected by a vertical line that runs through + // any indented children (their column has no dot, so the line + // passes cleanly behind them) and overshoots by ~15px to reach + // the next group's dot center (top-[15px]). + const showParentLine = !isLastGroup; return ( -
-
- {!isLast && ( -
+
+
+ {showParentLine && ( +
)}
- {effectiveStatus === "in_progress" ? ( + {parentStatus === "in_progress" ? ( @@ -117,24 +285,25 @@ export const ThinkingStepsDisplay: FC<{ steps: ThinkingStep[]; isThreadRunning?:
-
- {step.title} -
+ - {step.items && step.items.length > 0 && ( -
- {step.items.map((item) => ( - - {item} - - ))} + {hasChildren && ( +
+ {group.children.map((child) => { + const childInfo = toolInfo.get(child.id); + return ( + + ); + })}
)}
@@ -158,13 +327,23 @@ function ThinkingStepsDataRenderer({ data }: { name: string; data: unknown }) { const isThreadRunning = useAuiState(({ thread }) => thread.isRunning); const isLastMessage = useAuiState(({ message }) => message?.isLast ?? false); const isMessageStreaming = isThreadRunning && isLastMessage; + const content = useAuiState(({ message }) => message?.content); + + const stepToolInfo = useMemo( + () => buildThinkingStepToolInfo(Array.isArray(content) ? content : undefined), + [content] + ); const steps = (data as { steps: ThinkingStep[] } | null)?.steps ?? []; if (steps.length === 0) return null; return (
- +
); } From a8417e3c451090da6d05e54ef152b3ff3c72757a Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Sat, 9 May 2026 14:37:06 +0200 Subject: [PATCH 44/58] Render HITL approval cards inline in the thinking-steps timeline. --- .../assistant-ui/assistant-message.tsx | 17 ++- .../assistant-ui/thinking-steps.tsx | 101 ++++++++++++++++-- surfsense_web/lib/hitl/index.ts | 6 ++ surfsense_web/lib/hitl/render-target.tsx | 48 +++++++++ 4 files changed, 158 insertions(+), 14 deletions(-) create mode 100644 surfsense_web/lib/hitl/render-target.tsx diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx index a21ade74a..549141779 100644 --- a/surfsense_web/components/assistant-ui/assistant-message.tsx +++ b/surfsense_web/components/assistant-ui/assistant-message.tsx @@ -59,6 +59,7 @@ import { DropdownMenuLabel } from "@/components/ui/dropdown-menu"; import { useComments } from "@/hooks/use-comments"; import { useMediaQuery } from "@/hooks/use-media-query"; import { useElectronAPI } from "@/hooks/use-platform"; +import { withHitlInTimeline } from "@/lib/hitl"; import { getProviderIcon } from "@/lib/provider-icons"; import { cn } from "@/lib/utils"; @@ -508,12 +509,22 @@ const MessageInfoDropdown: FC = () => { // page through them and stage decisions instead of firing one resume per card. // ``withDelegationSpanIndent`` wraps every entry (including Fallback) so delegated // subagent tools don't bypass span indentation via a named ``by_name`` UI. +// ``withHitlInTimeline`` is the OUTERMOST wrapper so a body render with an +// interrupt result returns ``null`` immediately — no inner wrappers paint +// — while a timeline render (under ``HitlRenderTargetProvider value="timeline"`` +// inside ``ThinkingStepsDisplay``) passes through to the real component. const bundleTool = (Component: ToolCallMessagePartComponent) => - withBundleStep(withDelegationSpanIndent(Component)); + withHitlInTimeline(withBundleStep(withDelegationSpanIndent(Component))); const NullToolUi: ToolCallMessagePartComponent = () => null; -const TOOLS_BY_NAME = { +/** + * Tool-call UI registry. Exported so ``ThinkingStepsDisplay`` can mount + * the SAME wrapped components inline under a step row when the card's + * result is an HITL interrupt. The wrappers handle ``ToolCallIdProvider`` + * and bundle paging consistently across both render targets. + */ +export const TOOLS_BY_NAME = { generate_report: bundleTool(GenerateReportToolUI), generate_resume: bundleTool(GenerateResumeToolUI), generate_podcast: bundleTool(GeneratePodcastToolUI), @@ -554,7 +565,7 @@ const TOOLS_BY_NAME = { scrape_webpage: NullToolUi, } as const; -const TOOLS_FALLBACK = bundleTool(ToolFallback); +export const TOOLS_FALLBACK = bundleTool(ToolFallback); const AssistantMessageInner: FC = () => { const isMobile = !useMediaQuery("(min-width: 768px)"); diff --git a/surfsense_web/components/assistant-ui/thinking-steps.tsx b/surfsense_web/components/assistant-ui/thinking-steps.tsx index 6c3832bff..46d33a9fa 100644 --- a/surfsense_web/components/assistant-ui/thinking-steps.tsx +++ b/surfsense_web/components/assistant-ui/thinking-steps.tsx @@ -1,9 +1,15 @@ -import { makeAssistantDataUI, useAuiState } from "@assistant-ui/react"; +import { + makeAssistantDataUI, + type ToolCallMessagePartComponent, + useAuiState, +} from "@assistant-ui/react"; import { ChevronRightIcon } from "lucide-react"; import type { FC } from "react"; import { useCallback, useEffect, useMemo, useState } from "react"; +import { TOOLS_BY_NAME, TOOLS_FALLBACK } from "@/components/assistant-ui/assistant-message"; import { ChainOfThoughtItem } from "@/components/prompt-kit/chain-of-thought"; import { TextShimmerLoader } from "@/components/prompt-kit/loader"; +import { HitlRenderTargetProvider, isInterruptResult } from "@/lib/hitl"; import { cn } from "@/lib/utils"; export interface ThinkingStep { @@ -24,20 +30,33 @@ export interface ThinkingStep { * Per-step info joined from the assistant message ``tool-call`` parts via * the shared ``metadata.thinkingStepId`` correlation * (set on the server in ``AgentEventRelayState.tool_activity_metadata``). + * + * Carries enough of the part to: + * - identify the opening ``task`` step and substitute the subagent display + * name on the parent header (uses ``toolName`` and ``args``); + * - render the matching tool component inline under the step row when the + * card's result is an HITL interrupt (uses ``toolCallId``, ``argsText``, + * ``result``, ``langchainToolCallId``). */ interface StepToolInfo { + toolCallId: string; toolName: string; args: Record; + argsText?: string; + result?: unknown; + langchainToolCallId?: string; } export type ThinkingStepToolInfoMap = ReadonlyMap; /** - * Build ``thinkingStepId → {toolName, args}`` from message content. Used to + * Build ``thinkingStepId → StepToolInfo`` from message content. Used to * - identify the opening ``task`` step (parent header, never indents) without * relying on the human-readable title; * - render the parent's display title from ``args.subagent_type`` instead of - * the generic "Task" copy. + * the generic "Task" copy; + * - mount the matching tool-call card inline under a step row when the + * result is an HITL interrupt (see ``TimelineHitlCard``). */ export function buildThinkingStepToolInfo( content: readonly unknown[] | undefined @@ -48,14 +67,25 @@ export function buildThinkingStepToolInfo( if (!part || typeof part !== "object") continue; const o = part as { type?: string; + toolCallId?: string; toolName?: string; args?: Record; + argsText?: string; + result?: unknown; + langchainToolCallId?: string; metadata?: Record; }; - if (o.type !== "tool-call" || !o.toolName) continue; + if (o.type !== "tool-call" || !o.toolName || !o.toolCallId) continue; const tid = o.metadata?.thinkingStepId; if (typeof tid === "string" && tid.trim().length > 0) { - m.set(tid, { toolName: o.toolName, args: o.args ?? {} }); + m.set(tid, { + toolCallId: o.toolCallId, + toolName: o.toolName, + args: o.args ?? {}, + argsText: o.argsText, + result: o.result, + langchainToolCallId: o.langchainToolCallId, + }); } } return m; @@ -159,6 +189,47 @@ const StepBody: FC<{
); +/** + * Mount the same tool-call UI used in the message body, but inside the + * chain-of-thought timeline. The body copy returns ``null`` (see + * ``withHitlInTimeline`` in ``lib/hitl/render-target``), so the card + * effectively moves from the body to the timeline for the lifetime of the + * interrupt (pending → processing → complete / rejected). + * + * ``metadata`` is intentionally omitted from the props we forward — the + * step row already provides any indentation it needs, so we don't want + * ``withDelegationSpanIndent`` to add a second indent + border on top. + * + * ``status`` is a placeholder (HITL UIs read only ``args`` + ``result``) + * so we don't need to mirror assistant-ui's runtime status object here. + */ +const TimelineHitlCard: FC<{ info: StepToolInfo }> = ({ info }) => { + const Comp = + (TOOLS_BY_NAME as Record)[info.toolName] ?? + TOOLS_FALLBACK; + const props = { + toolCallId: info.toolCallId, + toolName: info.toolName, + args: info.args, + argsText: info.argsText, + result: info.result, + langchainToolCallId: info.langchainToolCallId, + status: { type: "complete" } as const, + }; + return ( + + {/* biome-ignore lint/suspicious/noExplicitAny: ToolCallMessagePartProps requires + runtime-only fields (addResult, resume, MessagePartState) we don't have when + re-rendering manually; HITL components only read args + result. */} + + + ); +}; + +function hitlInterruptInfo(info: StepToolInfo | undefined): StepToolInfo | undefined { + return info && isInterruptResult(info.result) ? info : undefined; +} + /** * Chain of thought display component - single collapsible dropdown design. * @@ -291,17 +362,25 @@ export const ThinkingStepsDisplay: FC<{ displayTitle={parentTitle} /> + {(() => { + const hitl = hitlInterruptInfo(parentInfo); + return hitl ? : null; + })()} + {hasChildren && (
{group.children.map((child) => { const childInfo = toolInfo.get(child.id); + const childHitl = hitlInterruptInfo(childInfo); return ( - +
+ + {childHitl && } +
); })}
diff --git a/surfsense_web/lib/hitl/index.ts b/surfsense_web/lib/hitl/index.ts index 4bb15e8b5..a2f218d5d 100644 --- a/surfsense_web/lib/hitl/index.ts +++ b/surfsense_web/lib/hitl/index.ts @@ -6,6 +6,12 @@ export { useHitlBundle, useToolCallIdContext, } from "./bundle-context"; +export { + type HitlRenderTarget, + HitlRenderTargetProvider, + useHitlRenderTarget, + withHitlInTimeline, +} from "./render-target"; export type { HitlDecision, InterruptActionRequest, diff --git a/surfsense_web/lib/hitl/render-target.tsx b/surfsense_web/lib/hitl/render-target.tsx new file mode 100644 index 000000000..cbfdbf2be --- /dev/null +++ b/surfsense_web/lib/hitl/render-target.tsx @@ -0,0 +1,48 @@ +"use client"; + +import type { ToolCallMessagePartComponent } from "@assistant-ui/react"; +import { createContext, useContext } from "react"; +import { isInterruptResult } from "./types"; + +/** + * Where this tool-call card is currently rendering. + * + * - ``"body"`` (default) — assistant-ui's ``MessagePrimitive.Parts`` renders + * the card inside the message bubble. + * - ``"timeline"`` — ``ThinkingStepsDisplay`` renders the SAME component + * inline under the matching step row so the HITL approval lives in the + * chain-of-thought instead of as a standalone card in the message body. + * + * The two render targets share one component implementation; the context + * lets the body render skip itself when the timeline copy will show the + * card, avoiding a double-render. + */ +export type HitlRenderTarget = "body" | "timeline"; + +const HitlRenderTargetContext = createContext("body"); + +export const HitlRenderTargetProvider = HitlRenderTargetContext.Provider; + +export function useHitlRenderTarget(): HitlRenderTarget { + return useContext(HitlRenderTargetContext); +} + +/** + * Hide the body render of a tool-call whose result is a HITL interrupt. + * The same component is mounted again inside ``ThinkingStepsDisplay`` + * with ``HitlRenderTargetProvider value="timeline"`` — that copy renders + * normally, so the card "moves" from the message body to the timeline. + * + * Pure pass-through for non-HITL results AND for the timeline render. + */ +export function withHitlInTimeline( + Component: ToolCallMessagePartComponent +): ToolCallMessagePartComponent { + const Wrapped: ToolCallMessagePartComponent = (props) => { + const target = useHitlRenderTarget(); + if (target === "body" && isInterruptResult(props.result)) return null; + return ; + }; + Wrapped.displayName = `withHitlInTimeline(${Component.displayName ?? Component.name ?? "ToolUI"})`; + return Wrapped; +} From 5c1f5edd751e1e6916dff83102226d43f5ee0180 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Sat, 9 May 2026 14:39:44 +0200 Subject: [PATCH 45/58] Add chat-messages feature module architecture doc. --- .../features/chat-messages/ARCHITECTURE.md | 483 ++++++++++++++++++ 1 file changed, 483 insertions(+) create mode 100644 surfsense_web/features/chat-messages/ARCHITECTURE.md diff --git a/surfsense_web/features/chat-messages/ARCHITECTURE.md b/surfsense_web/features/chat-messages/ARCHITECTURE.md new file mode 100644 index 000000000..030374aaf --- /dev/null +++ b/surfsense_web/features/chat-messages/ARCHITECTURE.md @@ -0,0 +1,483 @@ +# `features/chat-messages/` — Architecture + +> **Scope.** This module owns everything between an assistant message +> arriving and its rendering inside the chat UI: the timeline (the +> agent's process — reasoning + every tool call), and the HITL +> primitives that per-tool components compose to render approval views. +> +> It does **NOT** own: the thread shell, the composer, the streaming +> pipeline, the message frame (`assistant-message.tsx`, +> `user-message.tsx`, markdown renderer, citations), the comments +> sidebar, or any of the 63 individual tool-ui integration files +> under `components/tool-ui/`. + +--- + +## 1. Mental model + +Every assistant message has two regions: + +| Region | What it shows | +|---|---| +| **Timeline** | The agent's *process*. Reasoning, every tool call, grouped by delegation `spanId` into a tree. Each tool call is rendered by its registered component, which selects its own view (running, awaiting approval, success, error, etc.) by discriminating its `result` data. | +| **Body** | The agent's *product*. Markdown text, citations, native reasoning blocks, and value-add deliverables (image viewer, chart, canvas). Connector tool cards do NOT render here. | + +**Principle: timeline = process, body = product. No overlap.** + +A tool's UI lives in the body **if and only if** it produces a deliverable +the user wants to interact with — view, scrub, copy, share. If the UI +just shows that the tool ran and what it did, it lives in the timeline. + +``` +┌─ Assistant Message ─────────────────────────────────────────┐ +│ │ +│ ╔═════════════════════════════════════════════════════╗ │ +│ ║ TIMELINE (process) ║ │ +│ ║ ║ │ +│ ║ ▸ task: NotionAgent [running] ║ │ +│ ║ ▸ search_workspace [completed] ║ │ +│ ║ ▸ update_page ← rendered by ║ │ +│ ║ (Notion-styled approval UpdateNotion- ║ │ +│ ║ card OR Notion-styled PageToolUI; ║ │ +│ ║ success/error card, the component ║ │ +│ ║ per its own data picks the view ║ │ +│ ║ discrimination) from result) ║ │ +│ ║ ▸ summarize [completed] ║ │ +│ ╚═════════════════════════════════════════════════════╝ │ +│ │ +│ ╔═════════════════════════════════════════════════════╗ │ +│ ║ BODY (product) ║ │ +│ ║ ║ │ +│ ║ Markdown text, citations, value-add deliverables ║ │ +│ ║ only. Connector tool cards do NOT render here. ║ │ +│ ╚═════════════════════════════════════════════════════╝ │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +--- + +## 2. The single data model + +The timeline reads ONE data structure: a `TimelineItem[]`. There are +no parallel structures for "thinking steps", "tool calls", "HITL +bundles", etc. Every visible piece of agent activity is a `TimelineItem`. + +### 2.1 The discriminated union (outer discrimination) + +Two kinds. The timeline does **outer discrimination** — it chooses +reasoning view vs tool-call mounting based on `kind`. + +```ts +type ItemStatus = + | "pending" | "running" | "completed" | "cancelled" | "error"; + +interface BaseItem { + id: string; + spanId?: string; // groups items into delegation tree (parent task + children) + status: ItemStatus; +} + +interface ReasoningItem extends BaseItem { + kind: "reasoning"; + text: string; +} + +interface ToolCallItem extends BaseItem { + kind: "tool-call"; + toolName: string; + args: Record; + argsText?: string; + result?: unknown; // per-tool component discriminates this internally + langchainToolCallId?: string; +} + +type TimelineItem = ReasoningItem | ToolCallItem; + +interface TimelineGroup { + parent: TimelineItem; + children: TimelineItem[]; +} +``` + +**`ToolCallItem` has no `approval` field, no `phase`, no `view`.** All of +that is derived inside the per-tool component from the result data. + +### 2.2 Inner discrimination (per-tool component) + +Each tool registers a component that receives the tool-call data and +decides what to render based on its own result-shape discriminators: + +```tsx +const UpdateNotionPageToolUI: TimelineToolComponent = (props) => { + if (isInterruptResult(props.result)) return ; + if (isAuthErrorResult(props.result)) return ; + if (isErrorResult(props.result)) return ; + if (isInfoResult(props.result)) return ; + if (isSuccessResult(props.result)) return ; + return ; +}; +``` + +The discriminators (`isInterruptResult`, `isAuthErrorResult`, etc.) +are **types, not centralized infrastructure**. The component owns +the dispatch. The timeline knows none of this. + +### 2.3 The pure builder + +```ts +function buildTimeline( + content: MessageContent[], + thinkingSteps: ThinkingStep[], +): TimelineGroup[] +``` + +Builds the timeline from existing message content + thinking-step data +parts. Pure function. Sets `kind` and `status` on each item; preserves +`result` verbatim for per-tool discrimination. + +### 2.4 The dispatch (timeline-level) + +Two cases. Exhaustive switch. No runtime guards in the timeline renderer. + +```tsx +function TimelineItemView({ item }: { item: TimelineItem }) { + switch (item.kind) { + case "reasoning": return ; + case "tool-call": return ; + } +} + +function ToolCallItemView({ item }: { item: ToolCallItem }) { + const ToolBody = getToolComponent(item.toolName) ?? FallbackToolBody; + return ; +} +``` + +**No card frame, header, body slot, approval area, or result panel +at the timeline level.** Each tool component owns its own visual +presentation. This matches how every existing tool-ui component +already works — they each render their own rounded card with their +own header. + +--- + +## 3. The timeline's tool-component contract + +Tool components mounted by the timeline implement a subset of +assistant-ui's `ToolCallMessagePartProps` — only the fields the +timeline can supply: + +```ts +interface TimelineToolProps { + toolCallId: string; + toolName: string; + args: Record; + argsText?: string; + result?: unknown; + langchainToolCallId?: string; + status: ItemStatus; // simple enum, not assistant-ui's complex status object +} + +type TimelineToolComponent = (props: TimelineToolProps) => ReactNode; +``` + +Notably absent (compared to `ToolCallMessagePartProps`): +- `addResult`, `resume` — runtime-only, not needed; HITL decisions + flow through `useHitlDecision` (a hook) which talks to the runtime + directly. +- The complex `status: ToolCallMessagePartState["status"]` object — + replaced by our simple `ItemStatus` enum. + +The 15 existing HITL-aware tool-ui components only use the subset +above. They are **retyped** to `TimelineToolComponent` in the cutover +commit (mechanical: `ToolCallMessagePartComponent` → `TimelineToolComponent`). + +--- + +## 4. Rendering topology — how the body opts out + +The body uses assistant-ui's `MessagePrimitive.Parts` and registers a +**no-op fallback** for tool calls so they don't render here: + +```tsx + null, // every other tool-call: render nothing in the body + }} +/> +``` + +`BODY_TOOLS` starts empty (no value-add deliverables exist yet) and +grows as we identify them. Every tool not in `BODY_TOOLS` renders +nothing in the body. + +The timeline reads message content via `useAuiState(({ message }) => +message?.content)` and runs `buildTimeline` to produce the items it +renders. Tool-call data IS in the message; the body just chooses not +to render it. + +**Result:** zero dual placement. Zero suppression HOC. Zero +render-target context. Zero coordination. + +--- + +## 5. Slice layout + +``` +features/chat-messages/ +├── ARCHITECTURE.md +│ +├── timeline/ ← the process surface +│ ├── types.ts (TimelineItem union, ToolCallItem, ItemStatus, TimelineGroup) +│ ├── build-timeline.ts (pure: content + thinkingSteps → groups) +│ ├── grouping.ts (pure: group items by spanId) +│ ├── subagent-rename.ts (pure: parent task title from args.subagent_type) +│ ├── tool-registry/ (PRIVATE to timeline; only timeline mounts tools) +│ │ ├── types.ts (TimelineToolComponent, TimelineToolProps) +│ │ ├── registry.ts (TOOLS_BY_NAME from components/tool-ui/*) +│ │ ├── adapt-props.ts (pure: ToolCallItem → TimelineToolProps) +│ │ ├── fallback/ +│ │ │ ├── fallback-tool-body.tsx (TimelineToolComponent for unregistered tools — discriminates internally) +│ │ │ ├── default-fallback-card.tsx (the non-HITL fallback view: status icon + collapsible + JSON) +│ │ │ ├── revert-button.tsx (revert affordance — used by default-fallback-card) +│ │ │ ├── use-tool-action.ts (action lookup hook for revert) +│ │ │ └── index.ts +│ │ └── index.ts +│ ├── items/ +│ │ ├── reasoning-item.tsx (renders kind: "reasoning") +│ │ ├── tool-call-item.tsx (lookup component + mount with adapted props — ~10 lines) +│ │ └── index.ts +│ ├── timeline.tsx (groups + iteration + 2-case dispatch) +│ ├── data-renderer.tsx (assistant-ui adapter; exports TimelineDataUI) +│ └── index.ts +│ +├── hitl/ ← pure HITL primitives +│ ├── types.ts (InterruptResult, HitlPhase, HitlDecision, isInterruptResult) +│ ├── use-hitl-decision.ts (hook: dispatch approve/edit/reject — used by every approval card) +│ ├── use-hitl-phase.ts (hook: tracks pending → processing → approved/rejected/edited) +│ ├── approval-cards/ (the FALLBACK-mounted approval views; per-tool components import from here OR build their own) +│ │ ├── generic-approval.tsx (default approval UI — what FallbackToolBody mounts for interrupt results) +│ │ ├── doom-loop-approval.tsx (special-case approval UI + isDoomLoopInterrupt) +│ │ └── index.ts +│ ├── edit-panel/ +│ │ ├── edit-panel.atom.ts (Jotai atoms for the panel state) +│ │ ├── edit-panel.tsx (root: atom wiring + desktop/mobile switch only) +│ │ ├── fields/ +│ │ │ ├── email-tags-field.tsx (EmailsTagField + parse/format helpers) +│ │ │ ├── calendar-field.tsx (DateTimePickerField + parse/format helpers) +│ │ │ ├── extra-fields.tsx (ExtraField switch renderer) +│ │ │ └── index.ts (private barrel) +│ │ └── index.ts +│ └── index.ts +│ +└── (no body slice yet — body just registers `tools={{ fallback: () => null }}`) +``` + +### 5.1 Notable absences + +| Was | Status | Reason | +|---|---|---| +| `tool-cards/` slice | **Folded into `timeline/`** | Tool-call rendering happens in the timeline; the tool-registry is private to timeline. | +| `bundleTool` composer | **Deleted** | Body opts out via `fallback: () => null`. No HOCs to compose. | +| `withDelegationSpanIndent` HOC | **Deleted** | Tree indent is owned by the timeline's group renderer. | +| `withBundleStep` + `HitlBundleProvider` | **Deleted** | Multi-approval is just N inline renderings; no coordination needed. | +| `withHitlInTimeline` + `HitlRenderTargetProvider` | **Deleted** | Tool cards never render in body; no dual-placement to suppress. | +| `pickApprovalCard` central dispatcher | **Deleted** | Each tool component picks its own view via internal discrimination. The fallback has its OWN internal dispatcher (interrupt → generic-approval; doom-loop → doom-loop-approval). | +| `getHitlToolComponent` registry | **Deleted** | The tool-registry is just a `Record`; lookup is `TOOLS_BY_NAME[name]`. | +| Centralized `approval-area.tsx` in timeline | **Deleted** | The approval is a view the per-tool component renders, not an area the timeline composes. | +| `ApprovalState` on `ToolCallItem` | **Deleted** | Phase is local UI state inside per-tool approval cards (via `useHitlPhase`). The timeline doesn't track it. | +| `ThinkingStepToolInfoMap` Map join | **Deleted** | The unified `TimelineItem` union eliminates the join. | + +--- + +## 6. Public surfaces + +### `timeline/index.ts` + +```ts +export { TimelineDataUI }; // the assistant-ui registration +export { Timeline }; // exposed for tests +export type { TimelineItem, ReasoningItem, ToolCallItem, TimelineGroup, ItemStatus }; +export type { TimelineToolComponent, TimelineToolProps }; +``` + +### `hitl/index.ts` + +```ts +export type { InterruptResult, InterruptActionRequest, InterruptReviewConfig, HitlDecision, HitlPhase }; +export { isInterruptResult }; + +export { useHitlDecision }; +export { useHitlPhase }; + +export { GenericHitlApprovalToolUI }; // for tool-ui integrations that want to compose on top +export { DoomLoopApprovalToolUI, isDoomLoopInterrupt }; + +export { HitlEditPanel, MobileHitlEditPanel }; +export { openHitlEditPanelAtom, closeHitlEditPanelAtom, hitlEditPanelAtom }; +export type { ExtraField }; +``` + +The 63 `components/tool-ui/*` integrations consume `hitl/`'s public +surface (types, hooks, edit-panel atom, optionally the fallback +approval cards). Nothing else. + +--- + +## 7. Layering & SRP rules + +### 7.1 The "what knows about what" rule + +| Component | Knows about | +|---|---| +| `timeline/` | Itself + `hitl/` (via the fallback) + `components/tool-ui/*` (via the registry) | +| `timeline/tool-registry/` | The `TimelineToolComponent` contract, `components/tool-ui/*`, and `hitl/` (for the fallback's approval views) | +| `hitl/` | Itself only — no knowledge of timeline, tool-call types, registry | +| `components/tool-ui/*` | `hitl/` only (for HITL primitives + optional fallback approval cards); never reaches into `timeline/` | +| Body (`assistant-message.tsx`) | The `BODY_TOOLS` registry and `TimelineDataUI` from `timeline/index.ts` | + +`hitl/` does **NOT** import from `timeline/`. The dependency arrow is one-way. + +### 7.2 Render policy belongs to the surface, not the primitive + +- `hitl/` exposes hooks, types, and the fallback approval cards. +- `timeline/` decides WHEN and WHERE tool components mount (inside + `tool-call-item.tsx`). +- A `hitl/` primitive must never assume it's being rendered in the + timeline, the body, or anywhere else. It receives props, renders + UI, returns. No environment sniffing, no context. +- Per-tool components in `components/tool-ui/*` decide WHICH view to + render based on result-shape discriminators. The timeline does not + know these discriminators exist. + +### 7.3 Single Responsibility + +Rules in priority order: + +1. **One responsibility per file.** Need "and" to describe it? Split it. +2. **One responsibility per function.** Same. +3. **Line count is a smell, not a budget.** ~250 lines = pause and + ask "still one responsibility?"; ~500 lines = strong presumption + of split needed unless explicitly justified at the top of the file. + +Notable splits driven by SRP during the port: + +- `hitl-edit-panel.tsx` (current 405 lines, 4 responsibilities) → 5 + files: `edit-panel.tsx` (root + layout switch), `email-tags-field.tsx`, + `calendar-field.tsx`, `extra-fields.tsx`, `edit-panel.atom.ts`. +- `tool-fallback.tsx` (current 533 lines, 3 responsibilities) → split + across `fallback-tool-body.tsx`, `default-fallback-card.tsx`, + `revert-button.tsx`, `use-tool-action.ts`. +- `thinking-steps.tsx` (current 434 lines, 5 responsibilities) → + folded into the new `timeline/` slice across `types.ts`, + `build-timeline.ts`, `grouping.ts`, `subagent-rename.ts`, + `timeline.tsx`, `items/*`, `data-renderer.tsx`. + +--- + +## 8. Tested behaviors + +Unit tests live next to the file they cover (`*.test.ts(x)`). + +- `timeline/build-timeline.test.ts` — content + thinkingSteps → correct items, correct kind, correct status, correct ordering. `result` preserved verbatim. +- `timeline/grouping.test.ts` — items group correctly by spanId; first item with a spanId is the parent; orphaned children are promoted defensively. +- `timeline/subagent-rename.test.ts` — `task` step's display title resolves to `args.subagent_type` (title-cased); falls back to "Task" when subagent type is missing. +- `timeline/tool-registry/registry.test.ts` — `TOOLS_BY_NAME` includes every named tool; `FallbackToolBody` is returned for unknown names; the fallback dispatches correctly (interrupt → generic, doom-loop → doom-loop, otherwise → default fallback). +- `timeline/tool-registry/adapt-props.test.ts` — `ToolCallItem` → `TimelineToolProps` mapping is lossless; status mapping is correct. +- `hitl/use-hitl-phase.test.ts` — phase transitions through pending → processing → approved/rejected/edited correctly. +- `hitl/approval-cards/doom-loop-approval.test.tsx` — `isDoomLoopInterrupt` matches doom-loop-shape interrupts only. + +Smoke test after cutover: +- Assistant message renders; markdown + citations work in body. +- All connector tool calls render in timeline only (none in body). +- Reasoning steps render in timeline. +- Single HITL flow (Notion update): approve, edit, reject — each transitions through the phases correctly. +- Multiple pending HITL cards: each renders inline at its position; deciding one doesn't affect the others. +- Doom-loop approval renders the special card. +- Revert button works on completed default-fallback cards and survives reload. +- Subagent name renaming on `task` parent step. + +--- + +## 9. Migration plan (strangler fig, single atomic cutover) + +### Phase A — Build the new slice in parallel + +In dependency order: `hitl/` first (leaf), then `timeline/`. The +existing code (`thinking-steps.tsx`, `tool-fallback.tsx`, +`assistant-message.tsx`'s tool registry, etc.) remains fully +functional throughout Phase A. + +1. Port `hitl/` primitives. Apply SRP splits (edit panel into 5 files). + `hitl/approval-cards/{generic,doom-loop}-approval.tsx` are ported + as standalone components — they're what the fallback mounts and + what per-tool integrations may compose on top of. +2. Build `timeline/` slice. Implement `buildTimeline` from scratch + (do NOT copy thinking-steps logic verbatim — design the pure + function around the new union). Build the `tool-registry/` with + `TimelineToolComponent` contract; the registry imports from + `components/tool-ui/*` (no file moves yet). +3. Add unit tests as listed in §8. +4. Verify: tsc clean, biome clean, no consumer file touched, no + linter regressions. + +### Phase B — Atomic cutover (single commit) + +| File | Change | +|---|---| +| `components/assistant-ui/assistant-message.tsx` | Replace `TOOLS_BY_NAME`/`TOOLS_FALLBACK` definitions with `BODY_TOOLS` (initially empty) + `tools={{ fallback: () => null }}`. Replace `ThinkingStepsDataUI` registration with `TimelineDataUI`. | +| `components/public-chat/public-thread.tsx` | Same registry + data UI swap. | +| `app/dashboard/.../new-chat/page.tsx` | Switch `ThinkingStepsDataUI` → `TimelineDataUI`. Drop `HitlBundleProvider` (no longer needed). | +| `components/free-chat/free-chat-page.tsx` | Switch `ThinkingStepsDataUI` → `TimelineDataUI`. | +| `components/public-chat/public-chat-view.tsx` | Same. | +| `components/layout/ui/right-panel/RightPanel.tsx` | Switch `HitlEditPanel` import to `@/features/chat-messages/hitl`. | +| The 15 `components/tool-ui/*` HITL-aware integration files | (a) Switch HITL imports from `@/lib/hitl`, `@/hooks/use-hitl-phase`, `@/atoms/chat/hitl-edit-panel.atom` → `@/features/chat-messages/hitl`. (b) Retype from `ToolCallMessagePartComponent` → `TimelineToolComponent` (mechanical type rename). | + +### Phase C — Delete legacy + +After cutover passes smoke tests: + +- `components/assistant-ui/thinking-steps.tsx` +- `components/assistant-ui/tool-fallback.tsx` +- `lib/chat/delegation-span-indent.ts` +- `lib/hitl/` (entire folder) +- `components/hitl-bundle-pager/` (entire folder) +- `components/tool-ui/generic-hitl-approval.tsx` +- `components/tool-ui/doom-loop-approval.tsx` +- `components/hitl-edit-panel/` (entire folder) +- `hooks/use-hitl-phase.ts` +- `atoms/chat/hitl-edit-panel.atom.ts` + +Verify: no orphan files, no dead imports, no test regressions. + +--- + +## 10. Out of scope (and one consumer relationship) + +### 10.1 The 63 `components/tool-ui/*` integrations + +These are **first-class consumers** of `hitl/` and the +`TimelineToolComponent` contract. They are imported by +`timeline/tool-registry/registry.ts` to build `TOOLS_BY_NAME`. They +never reach into `timeline/` themselves. + +They stay where they are. Future option to move them is a separate, +mechanical follow-up refactor. + +### 10.2 Not touched by this refactor + +- The composer (input bar, mention picker, prompt picker, tool toggles). +- The streaming pipeline (`lib/chat/streaming-state.ts`, `stream-pipeline.ts`, `thread-persistence.ts`). +- The chat-comments sidebar. +- The message frame (`assistant-message.tsx`, `user-message.tsx`, `markdown-text.tsx`, `inline-citation.tsx`) beyond swapping the registry imports. + +If any of these become a blocker for the refactor (e.g. the streaming +pipeline needs a metadata field that doesn't exist), surface it +explicitly and decide whether to expand scope before touching it. From d9ad9ca5cbd55e178566387e9435ea1e7ea41539 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Sat, 9 May 2026 18:31:16 +0200 Subject: [PATCH 46/58] chat-messages: refresh feature module architecture doc. --- .../features/chat-messages/ARCHITECTURE.md | 120 +++++++++++++++--- 1 file changed, 105 insertions(+), 15 deletions(-) diff --git a/surfsense_web/features/chat-messages/ARCHITECTURE.md b/surfsense_web/features/chat-messages/ARCHITECTURE.md index 030374aaf..5b1dedd3b 100644 --- a/surfsense_web/features/chat-messages/ARCHITECTURE.md +++ b/surfsense_web/features/chat-messages/ARCHITECTURE.md @@ -184,8 +184,10 @@ type TimelineToolComponent = (props: TimelineToolProps) => ReactNode; Notably absent (compared to `ToolCallMessagePartProps`): - `addResult`, `resume` — runtime-only, not needed; HITL decisions - flow through `useHitlDecision` (a hook) which talks to the runtime - directly. + flow through `useHitlDecision`, which either stages in the active + bundle (N≥2) or fires the `hitl-decision` window event the page + listens for (N=1). The hook reads `useToolCallIdContext()` to know + which call is dispatching. - The complex `status: ToolCallMessagePartState["status"]` object — replaced by our simple `ItemStatus` enum. @@ -224,7 +226,81 @@ renders. Tool-call data IS in the message; the body just chooses not to render it. **Result:** zero dual placement. Zero suppression HOC. Zero -render-target context. Zero coordination. +render-target context. Zero pager HOC. + +--- + +## 4a. Multi-approval coordination (the bundle + pager) + +When N HITL interrupts are pending in the same assistant turn (e.g. an +agent fires multiple gated tool calls in parallel), the LangGraph +runtime expects **one resume call with N decisions in order**. Per-card +independent submission isn't possible without backend changes. + +The slice handles this with a single React state container, +`HitlBundleProvider`, mounted once at the thread root by the page that +owns the runtime (currently `app/dashboard/.../new-chat/page.tsx` and +`components/free-chat/free-chat-page.tsx`): + +```tsx + + {/* Thread + Timeline + approval cards mount inside */} + +``` + +Per-card flow: + +1. `tool-call-item.tsx` wraps each mounted tool component in + `` so `useHitlDecision` + knows which call is dispatching. +2. The user clicks approve/edit/reject on a card. +3. `useHitlDecision().dispatch([decision])` runs: + - **Bundle active (N≥2):** stages the decision under this card's + `toolCallId` and fires a `hitl-stage` event so the card's local + result mirror updates immediately (UX continuity — no re-prompt + if user navigates back via the pager). + - **No bundle (N=1):** dispatches the `hitl-decision` event + directly — single-decision fast path. +4. When all N decisions are staged, the user clicks "Submit decisions" + on the pager chrome. `bundle.submit()` dispatches the `hitl-decision` + event with the full ordered array. The page's listener calls + `runtime.resume({ resume: orderedDecisions })` once. + +### Pager UX (kept, not deleted) + +When a bundle is active (N≥2), only ONE approval card is visible at a +time — the current step. Other bundle members are hidden until the user +navigates to them. A small pager chrome (prev/next + "Step X / N" + +"Submit decisions" button) renders once at the end of the timeline. + +Where the responsibilities live: + +- **`tool-call-item.tsx`** (timeline) — checks `useHitlBundle()`. If + the item is in an active bundle but not the current step, returns + null. Otherwise wraps the per-tool component in `ToolCallIdProvider` + and mounts it. +- **`timeline.tsx`** — renders `` once at the bottom, + conditional on `useHitlBundle()` being non-null. +- **`hitl/bundle/pager-chrome.tsx`** — pure presentational component; + reads bundle state, renders nav + Submit. No knowledge of the timeline. + +This is **the only Provider in the slice.** It's a state container, not +a behavior HOC: nothing wraps individual cards. The hide-if-not-current +decision is made at the single mount point (`tool-call-item.tsx`), not +distributed across N HOC wrappers. + +What was deleted vs kept here: + +- **Deleted:** `withBundleStep` HOC. Its two responsibilities (hide + non-current cards; render pager after current card) split into the + two correct places: `tool-call-item.tsx` and `timeline.tsx` + respectively. No HOC to compose. +- **Kept (ported as-is to the slice):** `HitlBundleProvider`, + `useHitlBundle`, `ToolCallIdProvider`, `useToolCallIdContext`, + `BundleSubmit`, `HitlBundleAPI`, `PagerChrome`. --- @@ -258,10 +334,14 @@ features/chat-messages/ │ ├── data-renderer.tsx (assistant-ui adapter; exports TimelineDataUI) │ └── index.ts │ -├── hitl/ ← pure HITL primitives +├── hitl/ ← HITL primitives + bundle state container │ ├── types.ts (InterruptResult, HitlPhase, HitlDecision, isInterruptResult) -│ ├── use-hitl-decision.ts (hook: dispatch approve/edit/reject — used by every approval card) -│ ├── use-hitl-phase.ts (hook: tracks pending → processing → approved/rejected/edited) +│ ├── bundle/ (the ONLY Provider in the slice — coordinates N→1 submission + pager UX) +│ │ ├── bundle-context.tsx (HitlBundleProvider, useHitlBundle, ToolCallIdProvider, useToolCallIdContext, BundleSubmit, HitlBundleAPI) +│ │ ├── pager-chrome.tsx (prev/next/submit chrome — mounted once by timeline.tsx when bundle active) +│ │ └── index.ts +│ ├── use-hitl-decision.ts (hook: stages in bundle when N≥2, direct-dispatches when N=1; used by every approval card) +│ ├── use-hitl-phase.ts (hook: tracks pending → processing → complete/rejected) │ ├── approval-cards/ (the FALLBACK-mounted approval views; per-tool components import from here OR build their own) │ │ ├── generic-approval.tsx (default approval UI — what FallbackToolBody mounts for interrupt results) │ │ ├── doom-loop-approval.tsx (special-case approval UI + isDoomLoopInterrupt) @@ -287,7 +367,8 @@ features/chat-messages/ | `tool-cards/` slice | **Folded into `timeline/`** | Tool-call rendering happens in the timeline; the tool-registry is private to timeline. | | `bundleTool` composer | **Deleted** | Body opts out via `fallback: () => null`. No HOCs to compose. | | `withDelegationSpanIndent` HOC | **Deleted** | Tree indent is owned by the timeline's group renderer. | -| `withBundleStep` + `HitlBundleProvider` | **Deleted** | Multi-approval is just N inline renderings; no coordination needed. | +| `withBundleStep` HOC | **Deleted** | Two responsibilities split into the right places: hide-if-not-current → `tool-call-item.tsx`; render pager after current card → `timeline.tsx`. No HOC. | +| `HitlBundleProvider` + `useHitlBundle` + `PagerChrome` | **Kept** (state container + presentational chrome, not HOCs) | Backend constraint: parallel interrupts need ONE ordered resume call. Provider collects N decisions, pager is the user's submit affordance. | | `withHitlInTimeline` + `HitlRenderTargetProvider` | **Deleted** | Tool cards never render in body; no dual-placement to suppress. | | `pickApprovalCard` central dispatcher | **Deleted** | Each tool component picks its own view via internal discrimination. The fallback has its OWN internal dispatcher (interrupt → generic-approval; doom-loop → doom-loop-approval). | | `getHitlToolComponent` registry | **Deleted** | The tool-registry is just a `Record`; lookup is `TOOLS_BY_NAME[name]`. | @@ -317,6 +398,10 @@ export { isInterruptResult }; export { useHitlDecision }; export { useHitlPhase }; +export { HitlBundleProvider, ToolCallIdProvider, useHitlBundle, useToolCallIdContext }; +export { PagerChrome }; +export type { BundleSubmit, HitlBundleAPI }; + export { GenericHitlApprovalToolUI }; // for tool-ui integrations that want to compose on top export { DoomLoopApprovalToolUI, isDoomLoopInterrupt }; @@ -384,14 +469,19 @@ Notable splits driven by SRP during the port: ## 8. Tested behaviors -Unit tests live next to the file they cover (`*.test.ts(x)`). +> **Status:** No test runner is set up in `surfsense_web` yet. The pure +> functions below are *intended* to be unit-tested but tests are +> deferred to **Phase D** (post-cutover follow-up: install vitest, +> write the suites, update this section). -- `timeline/build-timeline.test.ts` — content + thinkingSteps → correct items, correct kind, correct status, correct ordering. `result` preserved verbatim. -- `timeline/grouping.test.ts` — items group correctly by spanId; first item with a spanId is the parent; orphaned children are promoted defensively. -- `timeline/subagent-rename.test.ts` — `task` step's display title resolves to `args.subagent_type` (title-cased); falls back to "Task" when subagent type is missing. +Planned tests once vitest is in: + +- `timeline/build-timeline.test.ts` — content + thinkingSteps → correct items, correct kind, correct status, correct ordering. `result` preserved verbatim. Orphan tool-calls (no `thinkingStepId`) appended at end. +- `timeline/grouping.test.ts` — items group correctly by spanId; first item with a spanId is the parent; orphaned children become parents defensively. +- `timeline/subagent-rename.test.ts` — `task` tool-call's display title resolves to `args.subagent_type` (title-cased); falls back to `getToolDisplayName("task")` when subagent type is missing. - `timeline/tool-registry/registry.test.ts` — `TOOLS_BY_NAME` includes every named tool; `FallbackToolBody` is returned for unknown names; the fallback dispatches correctly (interrupt → generic, doom-loop → doom-loop, otherwise → default fallback). - `timeline/tool-registry/adapt-props.test.ts` — `ToolCallItem` → `TimelineToolProps` mapping is lossless; status mapping is correct. -- `hitl/use-hitl-phase.test.ts` — phase transitions through pending → processing → approved/rejected/edited correctly. +- `hitl/use-hitl-phase.test.ts` — phase transitions through pending → processing → complete/rejected correctly. - `hitl/approval-cards/doom-loop-approval.test.tsx` — `isDoomLoopInterrupt` matches doom-loop-shape interrupts only. Smoke test after cutover: @@ -434,7 +524,7 @@ functional throughout Phase A. |---|---| | `components/assistant-ui/assistant-message.tsx` | Replace `TOOLS_BY_NAME`/`TOOLS_FALLBACK` definitions with `BODY_TOOLS` (initially empty) + `tools={{ fallback: () => null }}`. Replace `ThinkingStepsDataUI` registration with `TimelineDataUI`. | | `components/public-chat/public-thread.tsx` | Same registry + data UI swap. | -| `app/dashboard/.../new-chat/page.tsx` | Switch `ThinkingStepsDataUI` → `TimelineDataUI`. Drop `HitlBundleProvider` (no longer needed). | +| `app/dashboard/.../new-chat/page.tsx` | Switch `ThinkingStepsDataUI` → `TimelineDataUI`. Switch `HitlBundleProvider` import from `@/lib/hitl` → `@/features/chat-messages/hitl` (keep the wrap; just new path). | | `components/free-chat/free-chat-page.tsx` | Switch `ThinkingStepsDataUI` → `TimelineDataUI`. | | `components/public-chat/public-chat-view.tsx` | Same. | | `components/layout/ui/right-panel/RightPanel.tsx` | Switch `HitlEditPanel` import to `@/features/chat-messages/hitl`. | @@ -447,8 +537,8 @@ After cutover passes smoke tests: - `components/assistant-ui/thinking-steps.tsx` - `components/assistant-ui/tool-fallback.tsx` - `lib/chat/delegation-span-indent.ts` -- `lib/hitl/` (entire folder) -- `components/hitl-bundle-pager/` (entire folder) +- `lib/hitl/` (entire folder — replaced by `features/chat-messages/hitl/{types.ts,bundle/,use-hitl-decision.ts}`) +- `components/hitl-bundle-pager/` (entire folder — `PagerChrome` ported to `hitl/bundle/pager-chrome.tsx`; `withBundleStep` deleted, responsibilities split into `tool-call-item.tsx` + `timeline.tsx`) - `components/tool-ui/generic-hitl-approval.tsx` - `components/tool-ui/doom-loop-approval.tsx` - `components/hitl-edit-panel/` (entire folder) From 9e451a59072619ecfee560b4041d8077178ec681 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Sat, 9 May 2026 18:31:23 +0200 Subject: [PATCH 47/58] chat-messages: add hitl module with types, hooks, bundle, approval cards, and edit panel. --- .../approval-cards/doom-loop-approval.tsx | 191 +++++++++++++ .../hitl/approval-cards/generic-approval.tsx | 261 ++++++++++++++++++ .../hitl/approval-cards/index.ts | 2 + .../hitl/bundle/bundle-context.tsx | 157 +++++++++++ .../chat-messages/hitl/bundle/index.ts | 8 + .../hitl/bundle/pager-chrome.tsx | 65 +++++ .../hitl/edit-panel/edit-panel.atom.ts | 82 ++++++ .../hitl/edit-panel/edit-panel.tsx | 203 ++++++++++++++ .../hitl/edit-panel/fields/calendar-field.tsx | 112 ++++++++ .../edit-panel/fields/email-tags-field.tsx | 86 ++++++ .../hitl/edit-panel/fields/extra-fields.tsx | 74 +++++ .../hitl/edit-panel/fields/index.ts | 3 + .../chat-messages/hitl/edit-panel/index.ts | 7 + .../features/chat-messages/hitl/index.ts | 31 +++ .../features/chat-messages/hitl/types.ts | 51 ++++ .../chat-messages/hitl/use-hitl-decision.ts | 45 +++ .../chat-messages/hitl/use-hitl-phase.ts | 66 +++++ 17 files changed, 1444 insertions(+) create mode 100644 surfsense_web/features/chat-messages/hitl/approval-cards/doom-loop-approval.tsx create mode 100644 surfsense_web/features/chat-messages/hitl/approval-cards/generic-approval.tsx create mode 100644 surfsense_web/features/chat-messages/hitl/approval-cards/index.ts create mode 100644 surfsense_web/features/chat-messages/hitl/bundle/bundle-context.tsx create mode 100644 surfsense_web/features/chat-messages/hitl/bundle/index.ts create mode 100644 surfsense_web/features/chat-messages/hitl/bundle/pager-chrome.tsx create mode 100644 surfsense_web/features/chat-messages/hitl/edit-panel/edit-panel.atom.ts create mode 100644 surfsense_web/features/chat-messages/hitl/edit-panel/edit-panel.tsx create mode 100644 surfsense_web/features/chat-messages/hitl/edit-panel/fields/calendar-field.tsx create mode 100644 surfsense_web/features/chat-messages/hitl/edit-panel/fields/email-tags-field.tsx create mode 100644 surfsense_web/features/chat-messages/hitl/edit-panel/fields/extra-fields.tsx create mode 100644 surfsense_web/features/chat-messages/hitl/edit-panel/fields/index.ts create mode 100644 surfsense_web/features/chat-messages/hitl/edit-panel/index.ts create mode 100644 surfsense_web/features/chat-messages/hitl/index.ts create mode 100644 surfsense_web/features/chat-messages/hitl/types.ts create mode 100644 surfsense_web/features/chat-messages/hitl/use-hitl-decision.ts create mode 100644 surfsense_web/features/chat-messages/hitl/use-hitl-phase.ts diff --git a/surfsense_web/features/chat-messages/hitl/approval-cards/doom-loop-approval.tsx b/surfsense_web/features/chat-messages/hitl/approval-cards/doom-loop-approval.tsx new file mode 100644 index 000000000..5b2b0e385 --- /dev/null +++ b/surfsense_web/features/chat-messages/hitl/approval-cards/doom-loop-approval.tsx @@ -0,0 +1,191 @@ +"use client"; + +import { CornerDownLeftIcon, OctagonAlert } from "lucide-react"; +import { useCallback, useEffect, useMemo } from "react"; +import { TextShimmerLoader } from "@/components/prompt-kit/loader"; +import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert"; +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { Separator } from "@/components/ui/separator"; +import type { HitlApprovalCard, HitlDecision, InterruptResult } from "../types"; +import { isInterruptResult } from "../types"; +import { useHitlDecision } from "../use-hitl-decision"; +import { useHitlPhase } from "../use-hitl-phase"; + +/** + * Specialized HITL card for ``DoomLoopMiddleware`` interrupts. The + * backend signals these by setting ``context.permission === "doom_loop"`` + * on the ``permission_ask`` interrupt. + * + * The card replaces the generic "approve/reject" framing with a + * "continue/stop" affordance that better matches the user's mental + * model: the agent is stuck repeating itself, not asking permission + * for a destructive action. + */ +function DoomLoopCardView({ + toolName, + args, + interruptData, + onDecision, +}: { + toolName: string; + args: Record; + interruptData: InterruptResult; + onDecision: (decision: HitlDecision) => void; +}) { + const { phase, setProcessing, setRejected } = useHitlPhase(interruptData); + + const context = (interruptData.context ?? {}) as Record; + const threshold = typeof context.threshold === "number" ? context.threshold : 3; + const stuckTool = (typeof context.tool === "string" && context.tool) || toolName; + const recentSignatures = Array.isArray(context.recent_signatures) + ? (context.recent_signatures as string[]) + : []; + const displayName = stuckTool.replace(/_/g, " ").replace(/\b\w/g, (c) => c.toUpperCase()); + + const argPreview = useMemo(() => { + if (!args || Object.keys(args).length === 0) return null; + try { + const json = JSON.stringify(args, null, 2); + return json.length > 600 ? `${json.slice(0, 600)}…` : json; + } catch { + return null; + } + }, [args]); + + const handleContinue = useCallback(() => { + if (phase !== "pending") return; + setProcessing(); + onDecision({ type: "approve" }); + }, [phase, setProcessing, onDecision]); + + const handleStop = useCallback(() => { + if (phase !== "pending") return; + setRejected(); + onDecision({ type: "reject", message: "Doom loop: user requested stop." }); + }, [phase, setRejected, onDecision]); + + useEffect(() => { + const handler = (e: KeyboardEvent) => { + if (phase !== "pending") return; + if (e.key === "Enter" && !e.shiftKey && !e.ctrlKey && !e.metaKey) { + e.preventDefault(); + handleStop(); + } + }; + window.addEventListener("keydown", handler); + return () => window.removeEventListener("keydown", handler); + }, [phase, handleStop]); + + const isResolved = phase === "complete" || phase === "rejected"; + + return ( + + + + + {phase === "rejected" + ? "Stopped" + : phase === "processing" + ? "Continuing…" + : phase === "complete" + ? "Continued" + : "I might be stuck"} + + {!isResolved && ( + + doom-loop + + )} + + + {phase === "processing" ? ( + + ) : phase === "rejected" ? ( +

+ I stopped retrying {displayName} as you asked. +

+ ) : phase === "complete" ? ( +

+ Continuing to call {displayName} as you asked. +

+ ) : ( +

+ I called {displayName} {threshold} times in a row + with similar arguments. Should I keep going or stop and rethink? +

+ )} + + {argPreview && phase === "pending" && ( + <> + +
+

+ Last arguments +

+
+								{argPreview}
+							
+
+ + )} + + {recentSignatures.length > 0 && phase === "pending" && ( +
+ + Show repeated signatures ({recentSignatures.length}) + +
    + {recentSignatures.map((sig) => ( +
  • + {sig} +
  • + ))} +
+
+ )} + + {phase === "pending" && ( +
+ + +
+ )} +
+
+ ); +} + +/** + * Discriminator: returns true iff the result is a ``permission_ask`` + * interrupt with ``context.permission === "doom_loop"``. The fallback + * uses this BEFORE mounting an approval card to choose between + * ``DoomLoopApproval`` and ``GenericHitlApproval``. + */ +export function isDoomLoopInterrupt(result: unknown): boolean { + if (!isInterruptResult(result)) return false; + const ctx = (result.context ?? {}) as Record; + return ctx.permission === "doom_loop"; +} + +/** + * Specialized doom-loop approval mounted by ``FallbackToolBody`` when + * ``isDoomLoopInterrupt(result)`` is true. Caller is responsible for + * the discrimination; this card receives a known ``InterruptResult``. + */ +export const DoomLoopApproval: HitlApprovalCard = ({ toolName, args, result }) => { + const { dispatch } = useHitlDecision(); + return ( + dispatch([decision])} + /> + ); +}; diff --git a/surfsense_web/features/chat-messages/hitl/approval-cards/generic-approval.tsx b/surfsense_web/features/chat-messages/hitl/approval-cards/generic-approval.tsx new file mode 100644 index 000000000..c8b35dbe0 --- /dev/null +++ b/surfsense_web/features/chat-messages/hitl/approval-cards/generic-approval.tsx @@ -0,0 +1,261 @@ +"use client"; + +import { CornerDownLeftIcon, Pencil } from "lucide-react"; +import { useCallback, useEffect, useMemo, useState } from "react"; +import { toast } from "sonner"; +import { TextShimmerLoader } from "@/components/prompt-kit/loader"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Textarea } from "@/components/ui/textarea"; +import { getToolDisplayName } from "@/contracts/enums/toolIcons"; +import { connectorsApiService } from "@/lib/apis/connectors-api.service"; +import type { HitlApprovalCard, HitlDecision, InterruptResult } from "../types"; +import { useHitlDecision } from "../use-hitl-decision"; +import { useHitlPhase } from "../use-hitl-phase"; + +function ParamEditor({ + params, + onChange, + disabled, +}: { + params: Record; + onChange: (updated: Record) => void; + disabled: boolean; +}) { + const entries = Object.entries(params); + if (entries.length === 0) return null; + + return ( +
+ {entries.map(([key, value]) => { + const strValue = value == null ? "" : String(value); + const isLong = strValue.length > 120; + const fieldId = `hitl-param-${key}`; + + return ( +
+ + {isLong ? ( +