refactor(chat): drop alternate streaming entry path; use graph_stream

2026-05-12 09:12:40 +02:00 · 2026-05-07 19:25:20 +02:00 · 2026-05-07 19:25:20 +02:00 · 7e07092f67
commit 7e07092f67
parent 52895e37e9
23 changed files with 61 additions and 1278 deletions
--- a/surfsense_backend/app/tasks/chat/streaming/init.py
+++ b/surfsense_backend/app/tasks/chat/streaming/init.py
@ -1,3 +1,3 @@
-"""Chat streaming orchestrator and event relay."""
+"""Chat streaming helpers (e.g. LangGraph → SSE relay under ``graph_stream``)."""

 from __future__ import annotations
--- a/surfsense_backend/app/tasks/chat/streaming/agent_setup.py
+++ b/surfsense_backend/app/tasks/chat/streaming/agent_setup.py
@ -1,92 +0,0 @@
-"""Agent setup helpers for orchestrated chat streaming."""
-
-from __future__ import annotations
-
-import contextlib
-import logging
-from collections.abc import Callable
-from typing import Any
-
-_PREFLIGHT_TIMEOUT_SEC: float = 2.5
-_PREFLIGHT_MAX_TOKENS: int = 1
-
-
-async def preflight_llm(
-    llm: Any,
-    *,
-    is_provider_rate_limited: Callable[[BaseException], bool],
-) -> None:
-    """Issue a minimal completion probe to catch immediate provider 429s."""
-    from litellm import acompletion
-
-    model = getattr(llm, "model", None)
-    if not model or model == "auto":
-        return
-
-    try:
-        await acompletion(
-            model=model,
-            messages=[{"role": "user", "content": "ping"}],
-            api_key=getattr(llm, "api_key", None),
-            api_base=getattr(llm, "api_base", None),
-            max_tokens=_PREFLIGHT_MAX_TOKENS,
-            timeout=_PREFLIGHT_TIMEOUT_SEC,
-            stream=False,
-            metadata={"tags": ["surfsense:internal", "auto-pin-preflight"]},
-        )
-    except Exception as exc:
-        if is_provider_rate_limited(exc):
-            raise
-        logging.getLogger(__name__).debug(
-            "auto_pin_preflight non_rate_limit_error model=%s err=%s",
-            model,
-            exc,
-        )
-
-
-async def build_main_agent_for_thread(
-    agent_factory: Any,
-    *,
-    llm: Any,
-    search_space_id: int,
-    db_session: Any,
-    connector_service: Any,
-    checkpointer: Any,
-    user_id: str | None,
-    thread_id: int | None,
-    agent_config: Any,
-    firecrawl_api_key: str | None,
-    thread_visibility: Any,
-    filesystem_selection: Any,
-    disabled_tools: list[str] | None = None,
-    mentioned_document_ids: list[int] | None = None,
-) -> Any:
-    """Run one canonical agent-build call for a single thread."""
-    return await agent_factory(
-        llm=llm,
-        search_space_id=search_space_id,
-        db_session=db_session,
-        connector_service=connector_service,
-        checkpointer=checkpointer,
-        user_id=user_id,
-        thread_id=thread_id,
-        agent_config=agent_config,
-        firecrawl_api_key=firecrawl_api_key,
-        thread_visibility=thread_visibility,
-        filesystem_selection=filesystem_selection,
-        disabled_tools=disabled_tools,
-        mentioned_document_ids=mentioned_document_ids,
-    )
-
-
-async def settle_speculative_agent_build(task: Any) -> None:
-    """Wait for a discarded speculative build and swallow its outcome."""
-    with contextlib.suppress(BaseException):
-        await task
-
-
-__all__ = [
-    "build_main_agent_for_thread",
-    "preflight_llm",
-    "settle_speculative_agent_build",
-]
--- a/surfsense_backend/app/tasks/chat/streaming/graph_stream/init.py
+++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/init.py
@ -0,0 +1,21 @@
+"""LangGraph ``astream_events`` → SSE (``stream_output`` + ``StreamingResult``).
+
+Imports are lazy to avoid a circular import with ``relay.event_relay``.
+"""
+
+from __future__ import annotations
+
+__all__ = ["StreamingResult", "stream_output"]
+
+
+def __getattr__(name: str):
+    if name == "stream_output":
+        from app.tasks.chat.streaming.graph_stream.event_stream import stream_output
+
+        return stream_output
+    if name == "StreamingResult":
+        from app.tasks.chat.streaming.graph_stream.result import StreamingResult
+
+        return StreamingResult
+    msg = f"module {__name__!r} has no attribute {name!r}"
+    raise AttributeError(msg)
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/event_stream.py
@ -1,4 +1,4 @@
-"""Run LangGraph event streams through the  EventRelay."""
+"""Run LangGraph event streams through ``EventRelay``."""

 from __future__ import annotations

@ -6,7 +6,7 @@ from collections.abc import AsyncIterator
 from typing import Any

 from app.agents.new_chat.feature_flags import get_flags
-from app.tasks.chat.streaming.orchestration.output import StreamingResult
+from app.tasks.chat.streaming.graph_stream.result import StreamingResult
 from app.tasks.chat.streaming.relay.event_relay import EventRelay
 from app.tasks.chat.streaming.relay.state import AgentEventRelayState

--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/output.py
@ -1,4 +1,4 @@
-"""Output facts collected while streaming one orchestrated agent turn."""
+"""Mutable facts collected while relaying one agent stream (``stream_output``)."""

 from __future__ import annotations

@ -26,4 +26,3 @@ class StreamingResult:
    commit_gate_reason: str = ""
    assistant_message_id: int | None = None
    content_builder: Any | None = field(default=None, repr=False)
-
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/init.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/init.py
@ -1,11 +0,0 @@
-"""Composable orchestration pieces for chat streaming."""
-
-from app.tasks.chat.streaming.orchestration.event_stream import stream_output
-from app.tasks.chat.streaming.orchestration.input import StreamingContext
-from app.tasks.chat.streaming.orchestration.output import StreamingResult
-
-__all__ = [
-    "StreamingContext",
-    "StreamingResult",
-    "stream_output",
-]
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/input.py
@ -1,23 +0,0 @@
-"""Inputs for orchestrator-owned streaming execution."""
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Any
-
-
-@dataclass(frozen=True)
-class StreamingContext:
-    """Container for dependencies required by ``stream_output``."""
-
-    agent: Any
-    config: dict[str, Any]
-    input_data: Any
-    streaming_service: Any
-    step_prefix: str = "thinking"
-    initial_step_id: str | None = None
-    initial_step_title: str = ""
-    initial_step_items: list[str] | None = None
-    content_builder: Any | None = None
-    runtime_context: Any = None
-
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/orchestrator.py
@ -1,261 +0,0 @@
-"""Top-level chat streaming entrypoints.
-"""
-
-from __future__ import annotations
-
-from collections.abc import AsyncGenerator
-from typing import Any, Literal
-
-from app.agents.new_chat.filesystem_selection import FilesystemSelection
-from app.db import ChatVisibility
-from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
-from app.tasks.chat.streaming.orchestration.streaming_context import (
-    build_chat_streaming_context,
-    build_regenerate_streaming_context,
-    build_resume_streaming_context,
-)
-from app.tasks.chat.streaming.orchestration.event_stream import stream_output
-from app.tasks.chat.streaming.orchestration.input import StreamingContext
-from app.tasks.chat.streaming.orchestration.output import StreamingResult
-
-
-def _build_streaming_result(
-    *,
-    chat_id: int,
-    request_id: str | None,
-    filesystem_selection: FilesystemSelection | None,
-    suffix: str,
-) -> StreamingResult:
-    return StreamingResult(
-        request_id=request_id,
-        turn_id=f"{chat_id}:{suffix}",
-        filesystem_mode=(filesystem_selection.mode.value if filesystem_selection else "cloud"),
-        client_platform=(
-            filesystem_selection.client_platform.value if filesystem_selection else "web"
-        ),
-    )
-
-
-async def _stream_output_with_streaming_context(
-    *,
-    streaming_context: StreamingContext,
-    result: StreamingResult,
-) -> AsyncGenerator[str, None]:
-    async for frame in stream_output(
-        agent=streaming_context.agent,
-        config=streaming_context.config,
-        input_data=streaming_context.input_data,
-        streaming_service=streaming_context.streaming_service,
-        result=result,
-        step_prefix=streaming_context.step_prefix,
-        initial_step_id=streaming_context.initial_step_id,
-        initial_step_title=streaming_context.initial_step_title,
-        initial_step_items=streaming_context.initial_step_items,
-        content_builder=streaming_context.content_builder,
-        runtime_context=streaming_context.runtime_context,
-    ):
-        yield frame
-
-
-async def stream_chat(
-    *,
-    user_query: str,
-    search_space_id: int,
-    chat_id: int,
-    user_id: str | None = None,
-    llm_config_id: int = -1,
-    mentioned_document_ids: list[int] | None = None,
-    mentioned_surfsense_doc_ids: list[int] | None = None,
-    mentioned_documents: list[dict[str, Any]] | None = None,
-    checkpoint_id: str | None = None,
-    needs_history_bootstrap: bool = False,
-    thread_visibility: ChatVisibility | None = None,
-    current_user_display_name: str | None = None,
-    disabled_tools: list[str] | None = None,
-    filesystem_selection: FilesystemSelection | None = None,
-    request_id: str | None = None,
-    user_image_data_urls: list[str] | None = None,
-    streaming_context: StreamingContext | None = None,
-) -> AsyncGenerator[str, None]:
-    """Stream a new chat turn through the current production pipeline."""
-    if streaming_context is None:
-        streaming_context = await build_chat_streaming_context(
-            user_query=user_query,
-            search_space_id=search_space_id,
-            chat_id=chat_id,
-            user_id=user_id,
-            llm_config_id=llm_config_id,
-            mentioned_document_ids=mentioned_document_ids,
-            mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
-            checkpoint_id=checkpoint_id,
-            needs_history_bootstrap=needs_history_bootstrap,
-            thread_visibility=thread_visibility,
-            current_user_display_name=current_user_display_name,
-            disabled_tools=disabled_tools,
-            filesystem_selection=filesystem_selection,
-            request_id=request_id,
-            user_image_data_urls=user_image_data_urls,
-        )
-    if streaming_context is not None:
-        result = _build_streaming_result(
-            chat_id=chat_id,
-            request_id=request_id,
-            filesystem_selection=filesystem_selection,
-            suffix="orchestrator",
-        )
-        async for frame in _stream_output_with_streaming_context(
-            streaming_context=streaming_context,
-            result=result,
-        ):
-            yield frame
-        return
-
-    async for chunk in stream_new_chat(
-        user_query=user_query,
-        search_space_id=search_space_id,
-        chat_id=chat_id,
-        user_id=user_id,
-        llm_config_id=llm_config_id,
-        mentioned_document_ids=mentioned_document_ids,
-        mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
-        mentioned_documents=mentioned_documents,
-        checkpoint_id=checkpoint_id,
-        needs_history_bootstrap=needs_history_bootstrap,
-        thread_visibility=thread_visibility,
-        current_user_display_name=current_user_display_name,
-        disabled_tools=disabled_tools,
-        filesystem_selection=filesystem_selection,
-        request_id=request_id,
-        user_image_data_urls=user_image_data_urls,
-    ):
-        yield chunk
-
-
-async def stream_resume(
-    *,
-    chat_id: int,
-    search_space_id: int,
-    decisions: list[dict],
-    user_id: str | None = None,
-    llm_config_id: int = -1,
-    thread_visibility: ChatVisibility | None = None,
-    filesystem_selection: FilesystemSelection | None = None,
-    request_id: str | None = None,
-    disabled_tools: list[str] | None = None,
-    streaming_context: StreamingContext | None = None,
-) -> AsyncGenerator[str, None]:
-    """Resume an interrupted chat turn through the current production pipeline."""
-    if streaming_context is None:
-        streaming_context = await build_resume_streaming_context(
-            chat_id=chat_id,
-            search_space_id=search_space_id,
-            decisions=decisions,
-            user_id=user_id,
-            llm_config_id=llm_config_id,
-            thread_visibility=thread_visibility,
-            filesystem_selection=filesystem_selection,
-            request_id=request_id,
-            disabled_tools=disabled_tools,
-        )
-    if streaming_context is not None:
-        result = _build_streaming_result(
-            chat_id=chat_id,
-            request_id=request_id,
-            filesystem_selection=filesystem_selection,
-            suffix="orchestrator-resume",
-        )
-        async for frame in _stream_output_with_streaming_context(
-            streaming_context=streaming_context,
-            result=result,
-        ):
-            yield frame
-        return
-
-    async for chunk in stream_resume_chat(
-        chat_id=chat_id,
-        search_space_id=search_space_id,
-        decisions=decisions,
-        user_id=user_id,
-        llm_config_id=llm_config_id,
-        thread_visibility=thread_visibility,
-        filesystem_selection=filesystem_selection,
-        request_id=request_id,
-        disabled_tools=disabled_tools,
-    ):
-        yield chunk
-
-
-async def stream_regenerate(
-    *,
-    user_query: str,
-    search_space_id: int,
-    chat_id: int,
-    user_id: str | None = None,
-    llm_config_id: int = -1,
-    mentioned_document_ids: list[int] | None = None,
-    mentioned_surfsense_doc_ids: list[int] | None = None,
-    mentioned_documents: list[dict[str, Any]] | None = None,
-    checkpoint_id: str | None = None,
-    needs_history_bootstrap: bool = False,
-    thread_visibility: ChatVisibility | None = None,
-    current_user_display_name: str | None = None,
-    disabled_tools: list[str] | None = None,
-    filesystem_selection: FilesystemSelection | None = None,
-    request_id: str | None = None,
-    user_image_data_urls: list[str] | None = None,
-    flow: Literal["new", "regenerate"] = "regenerate",
-    streaming_context: StreamingContext | None = None,
-) -> AsyncGenerator[str, None]:
-    """Regenerate an assistant turn through the current production pipeline."""
-    if streaming_context is None:
-        streaming_context = await build_regenerate_streaming_context(
-            user_query=user_query,
-            search_space_id=search_space_id,
-            chat_id=chat_id,
-            user_id=user_id,
-            llm_config_id=llm_config_id,
-            mentioned_document_ids=mentioned_document_ids,
-            mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
-            checkpoint_id=checkpoint_id,
-            needs_history_bootstrap=needs_history_bootstrap,
-            thread_visibility=thread_visibility,
-            current_user_display_name=current_user_display_name,
-            disabled_tools=disabled_tools,
-            filesystem_selection=filesystem_selection,
-            request_id=request_id,
-            user_image_data_urls=user_image_data_urls,
-        )
-    if streaming_context is not None:
-        result = _build_streaming_result(
-            chat_id=chat_id,
-            request_id=request_id,
-            filesystem_selection=filesystem_selection,
-            suffix="orchestrator-regenerate",
-        )
-        async for frame in _stream_output_with_streaming_context(
-            streaming_context=streaming_context,
-            result=result,
-        ):
-            yield frame
-        return
-
-    async for chunk in stream_new_chat(
-        user_query=user_query,
-        search_space_id=search_space_id,
-        chat_id=chat_id,
-        user_id=user_id,
-        llm_config_id=llm_config_id,
-        mentioned_document_ids=mentioned_document_ids,
-        mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
-        mentioned_documents=mentioned_documents,
-        checkpoint_id=checkpoint_id,
-        needs_history_bootstrap=needs_history_bootstrap,
-        thread_visibility=thread_visibility,
-        current_user_display_name=current_user_display_name,
-        disabled_tools=disabled_tools,
-        filesystem_selection=filesystem_selection,
-        request_id=request_id,
-        user_image_data_urls=user_image_data_urls,
-        flow=flow,
-    ):
-        yield chunk
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/init.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/init.py
@ -1,18 +0,0 @@
-"""Streaming context builders per orchestrator entrypoint."""
-
-from app.tasks.chat.streaming.orchestration.streaming_context.chat import (
-    build_chat_streaming_context,
-)
-from app.tasks.chat.streaming.orchestration.streaming_context.regenerate import (
-    build_regenerate_streaming_context,
-)
-from app.tasks.chat.streaming.orchestration.streaming_context.resume import (
-    build_resume_streaming_context,
-)
-
-__all__ = [
-    "build_chat_streaming_context",
-    "build_regenerate_streaming_context",
-    "build_resume_streaming_context",
-]
-
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/chat.py
@ -1,258 +0,0 @@
-"""Build ``StreamingContext`` for chat streaming."""
-
-from __future__ import annotations
-
-import logging
-import time
-from typing import Any
-
-from langchain_core.messages import HumanMessage
-from sqlalchemy.future import select
-from sqlalchemy.orm import selectinload
-
-from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
-from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
-from app.agents.new_chat.checkpointer import get_checkpointer
-from app.agents.new_chat.context import SurfSenseContextSchema
-from app.agents.new_chat.filesystem_selection import FilesystemSelection
-from app.agents.new_chat.llm_config import (
-    AgentConfig,
-    create_chat_litellm_from_agent_config,
-    create_chat_litellm_from_config,
-    load_agent_config,
-    load_global_llm_config_by_id,
-)
-from app.db import (
-    ChatVisibility,
-    NewChatThread,
-    Report,
-    SearchSourceConnectorType,
-    SurfsenseDocsDocument,
-    async_session_maker,
-)
-from app.services.auto_model_pin_service import resolve_or_get_pinned_llm_config_id
-from app.services.connector_service import ConnectorService
-from app.services.new_streaming_service import VercelStreamingService
-from app.tasks.chat.stream_new_chat import format_mentioned_surfsense_docs_as_context
-from app.tasks.chat.streaming.agent_setup import build_main_agent_for_thread
-from app.tasks.chat.streaming.orchestration.input import StreamingContext
-from app.utils.content_utils import bootstrap_history_from_db
-from app.utils.user_message_multimodal import build_human_message_content
-
-logger = logging.getLogger(__name__)
-
-
-async def build_chat_streaming_context(
-    *,
-    user_query: str,
-    search_space_id: int,
-    chat_id: int,
-    user_id: str | None = None,
-    llm_config_id: int = -1,
-    mentioned_document_ids: list[int] | None = None,
-    mentioned_surfsense_doc_ids: list[int] | None = None,
-    checkpoint_id: str | None = None,
-    needs_history_bootstrap: bool = False,
-    thread_visibility: ChatVisibility | None = None,
-    current_user_display_name: str | None = None,
-    disabled_tools: list[str] | None = None,
-    filesystem_selection: FilesystemSelection | None = None,
-    request_id: str | None = None,
-    user_image_data_urls: list[str] | None = None,
-) -> StreamingContext | None:
-    """Build context for ``stream_output`` from route-level chat inputs."""
-    session = async_session_maker()
-    try:
-        requested_llm_config_id = llm_config_id
-        llm_config_id = (
-            await resolve_or_get_pinned_llm_config_id(
-                session,
-                thread_id=chat_id,
-                search_space_id=search_space_id,
-                user_id=user_id,
-                selected_llm_config_id=llm_config_id,
-                requires_image_input=bool(user_image_data_urls),
-            )
-        ).resolved_llm_config_id
-
-        llm: Any
-        agent_config: AgentConfig | None
-        if llm_config_id >= 0:
-            agent_config = await load_agent_config(
-                session=session,
-                config_id=llm_config_id,
-                search_space_id=search_space_id,
-            )
-            if not agent_config:
-                logger.warning("streaming context build failed: missing config %s", llm_config_id)
-                return None
-            llm = create_chat_litellm_from_agent_config(agent_config)
-        else:
-            loaded_llm_config = load_global_llm_config_by_id(llm_config_id)
-            if not loaded_llm_config:
-                logger.warning(
-                    "streaming context build failed: missing global config %s",
-                    llm_config_id,
-                )
-                return None
-            llm = create_chat_litellm_from_config(loaded_llm_config)
-            agent_config = AgentConfig.from_yaml_config(loaded_llm_config)
-
-        connector_service = ConnectorService(session, search_space_id=search_space_id)
-        firecrawl_api_key = None
-        webcrawler_connector = await connector_service.get_connector_by_type(
-            SearchSourceConnectorType.WEBCRAWLER_CONNECTOR,
-            search_space_id,
-        )
-        if webcrawler_connector and webcrawler_connector.config:
-            firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY")
-
-        checkpointer = await get_checkpointer()
-        visibility = thread_visibility or ChatVisibility.PRIVATE
-
-        from app.config import config as app_config
-
-        agent_factory = (
-            create_multi_agent_chat_deep_agent
-            if bool(app_config.MULTI_AGENT_CHAT_ENABLED)
-            else create_surfsense_deep_agent
-        )
-        agent = await build_main_agent_for_thread(
-            agent_factory,
-            llm=llm,
-            search_space_id=search_space_id,
-            db_session=session,
-            connector_service=connector_service,
-            checkpointer=checkpointer,
-            user_id=user_id,
-            thread_id=chat_id,
-            agent_config=agent_config,
-            firecrawl_api_key=firecrawl_api_key,
-            thread_visibility=visibility,
-            filesystem_selection=filesystem_selection,
-            disabled_tools=disabled_tools,
-            mentioned_document_ids=mentioned_document_ids,
-        )
-
-        langchain_messages = []
-        if needs_history_bootstrap:
-            langchain_messages = await bootstrap_history_from_db(
-                session,
-                chat_id,
-                thread_visibility=visibility,
-            )
-            thread_result = await session.execute(
-                select(NewChatThread).filter(NewChatThread.id == chat_id)
-            )
-            thread = thread_result.scalars().first()
-            if thread:
-                thread.needs_history_bootstrap = False
-                await session.commit()
-
-        mentioned_surfsense_docs: list[SurfsenseDocsDocument] = []
-        if mentioned_surfsense_doc_ids:
-            result = await session.execute(
-                select(SurfsenseDocsDocument)
-                .options(selectinload(SurfsenseDocsDocument.chunks))
-                .filter(SurfsenseDocsDocument.id.in_(mentioned_surfsense_doc_ids))
-            )
-            mentioned_surfsense_docs = list(result.scalars().all())
-
-        recent_reports_result = await session.execute(
-            select(Report)
-            .filter(Report.thread_id == chat_id, Report.content.isnot(None))
-            .order_by(Report.id.desc())
-            .limit(3)
-        )
-        recent_reports = list(recent_reports_result.scalars().all())
-
-        final_query = user_query
-        context_parts = []
-        if mentioned_surfsense_docs:
-            context_parts.append(
-                format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs)
-            )
-        if recent_reports:
-            report_lines = [
-                f'  - report_id={r.id}, title="{r.title}", style="{r.report_style or "detailed"}"'
-                for r in recent_reports
-            ]
-            reports_listing = "\n".join(report_lines)
-            context_parts.append(
-                "<report_context>\n"
-                "Previously generated reports in this conversation:\n"
-                f"{reports_listing}\n\n"
-                "If the user wants to MODIFY, REVISE, UPDATE, or ADD to one of these reports, "
-                "set parent_report_id to the relevant report_id above.\n"
-                "If the user wants a completely NEW report on a different topic, "
-                "leave parent_report_id unset.\n"
-                "</report_context>"
-            )
-        if context_parts:
-            joined_context = "\n\n".join(context_parts)
-            final_query = f"{joined_context}\n\n<user_query>{user_query}</user_query>"
-        if visibility == ChatVisibility.SEARCH_SPACE and current_user_display_name:
-            final_query = f"**[{current_user_display_name}]:** {final_query}"
-
-        human_content = build_human_message_content(
-            final_query,
-            list(user_image_data_urls or ()),
-        )
-        langchain_messages.append(HumanMessage(content=human_content))
-
-        turn_id = f"{chat_id}:{int(time.time() * 1000)}"
-        input_state = {
-            "messages": langchain_messages,
-            "search_space_id": search_space_id,
-            "request_id": request_id or "unknown",
-            "turn_id": turn_id,
-        }
-        configurable = {
-            "thread_id": str(chat_id),
-            "request_id": request_id or "unknown",
-            "turn_id": turn_id,
-        }
-        if checkpoint_id:
-            configurable["checkpoint_id"] = checkpoint_id
-        config = {"configurable": configurable, "recursion_limit": 10_000}
-
-        initial_title = (
-            "Analyzing referenced content"
-            if mentioned_surfsense_docs
-            else "Understanding your request"
-        )
-        action_verb = "Analyzing" if mentioned_surfsense_docs else "Processing"
-        query_excerpt = user_query[:80] + ("..." if len(user_query) > 80 else "")
-        query_part = query_excerpt if query_excerpt.strip() else "(message)"
-        initial_items = [f"{action_verb}: {query_part}"]
-
-        runtime_context = SurfSenseContextSchema(
-            search_space_id=search_space_id,
-            mentioned_document_ids=list(mentioned_document_ids or []),
-            request_id=request_id,
-            turn_id=turn_id,
-        )
-
-        await session.commit()
-        return StreamingContext(
-            agent=agent,
-            config=config,
-            input_data=input_state,
-            streaming_service=VercelStreamingService(),
-            step_prefix="thinking",
-            initial_step_id="thinking-1",
-            initial_step_title=initial_title,
-            initial_step_items=initial_items,
-            content_builder=None,
-            runtime_context=runtime_context,
-        )
-    except Exception:
-        logger.exception(
-            "Failed to build chat streaming context (llm_config_id=%s requested=%s)",
-            llm_config_id,
-            requested_llm_config_id,
-        )
-        return None
-    finally:
-        await session.close()
-
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/regenerate.py
@ -1,49 +0,0 @@
-"""Build ``StreamingContext`` for regenerate streaming."""
-
-from __future__ import annotations
-
-from app.agents.new_chat.filesystem_selection import FilesystemSelection
-from app.db import ChatVisibility
-from app.tasks.chat.streaming.orchestration.input import StreamingContext
-from app.tasks.chat.streaming.orchestration.streaming_context.chat import (
-    build_chat_streaming_context,
-)
-
-
-async def build_regenerate_streaming_context(
-    *,
-    user_query: str,
-    search_space_id: int,
-    chat_id: int,
-    user_id: str | None = None,
-    llm_config_id: int = -1,
-    mentioned_document_ids: list[int] | None = None,
-    mentioned_surfsense_doc_ids: list[int] | None = None,
-    checkpoint_id: str | None = None,
-    needs_history_bootstrap: bool = False,
-    thread_visibility: ChatVisibility | None = None,
-    current_user_display_name: str | None = None,
-    disabled_tools: list[str] | None = None,
-    filesystem_selection: FilesystemSelection | None = None,
-    request_id: str | None = None,
-    user_image_data_urls: list[str] | None = None,
-) -> StreamingContext | None:
-    """Build context for ``stream_regenerate`` execution."""
-    return await build_chat_streaming_context(
-        user_query=user_query,
-        search_space_id=search_space_id,
-        chat_id=chat_id,
-        user_id=user_id,
-        llm_config_id=llm_config_id,
-        mentioned_document_ids=mentioned_document_ids,
-        mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
-        checkpoint_id=checkpoint_id,
-        needs_history_bootstrap=needs_history_bootstrap,
-        thread_visibility=thread_visibility,
-        current_user_display_name=current_user_display_name,
-        disabled_tools=disabled_tools,
-        filesystem_selection=filesystem_selection,
-        request_id=request_id,
-        user_image_data_urls=user_image_data_urls,
-    )
-
--- a/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py
+++ b/surfsense_backend/app/tasks/chat/streaming/orchestration/streaming_context/resume.py
@ -1,154 +0,0 @@
-"""Build ``StreamingContext`` for resume streaming."""
-
-from __future__ import annotations
-
-import logging
-import time
-from typing import Any
-
-from langgraph.types import Command
-
-from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent
-from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
-from app.agents.new_chat.checkpointer import get_checkpointer
-from app.agents.new_chat.context import SurfSenseContextSchema
-from app.agents.new_chat.filesystem_selection import FilesystemSelection
-from app.agents.new_chat.llm_config import (
-    AgentConfig,
-    create_chat_litellm_from_agent_config,
-    create_chat_litellm_from_config,
-    load_agent_config,
-    load_global_llm_config_by_id,
-)
-from app.db import ChatVisibility, SearchSourceConnectorType, async_session_maker
-from app.services.auto_model_pin_service import resolve_or_get_pinned_llm_config_id
-from app.services.connector_service import ConnectorService
-from app.services.new_streaming_service import VercelStreamingService
-from app.tasks.chat.streaming.agent_setup import build_main_agent_for_thread
-from app.tasks.chat.streaming.orchestration.input import StreamingContext
-
-logger = logging.getLogger(__name__)
-
-
-async def build_resume_streaming_context(
-    *,
-    chat_id: int,
-    search_space_id: int,
-    decisions: list[dict],
-    user_id: str | None = None,
-    llm_config_id: int = -1,
-    thread_visibility: ChatVisibility | None = None,
-    filesystem_selection: FilesystemSelection | None = None,
-    request_id: str | None = None,
-    disabled_tools: list[str] | None = None,
-) -> StreamingContext | None:
-    """Build context for ``stream_resume`` execution."""
-    session = async_session_maker()
-    try:
-        llm_config_id = (
-            await resolve_or_get_pinned_llm_config_id(
-                session,
-                thread_id=chat_id,
-                search_space_id=search_space_id,
-                user_id=user_id,
-                selected_llm_config_id=llm_config_id,
-            )
-        ).resolved_llm_config_id
-
-        llm: Any
-        agent_config: AgentConfig | None
-        if llm_config_id >= 0:
-            agent_config = await load_agent_config(
-                session=session,
-                config_id=llm_config_id,
-                search_space_id=search_space_id,
-            )
-            if not agent_config:
-                logger.warning("resume context build failed: missing config %s", llm_config_id)
-                return None
-            llm = create_chat_litellm_from_agent_config(agent_config)
-        else:
-            loaded_llm_config = load_global_llm_config_by_id(llm_config_id)
-            if not loaded_llm_config:
-                logger.warning(
-                    "resume context build failed: missing global config %s",
-                    llm_config_id,
-                )
-                return None
-            llm = create_chat_litellm_from_config(loaded_llm_config)
-            agent_config = AgentConfig.from_yaml_config(loaded_llm_config)
-
-        connector_service = ConnectorService(session, search_space_id=search_space_id)
-        firecrawl_api_key = None
-        webcrawler_connector = await connector_service.get_connector_by_type(
-            SearchSourceConnectorType.WEBCRAWLER_CONNECTOR,
-            search_space_id,
-        )
-        if webcrawler_connector and webcrawler_connector.config:
-            firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY")
-
-        checkpointer = await get_checkpointer()
-        visibility = thread_visibility or ChatVisibility.PRIVATE
-
-        from app.config import config as app_config
-
-        agent_factory = (
-            create_multi_agent_chat_deep_agent
-            if bool(app_config.MULTI_AGENT_CHAT_ENABLED)
-            else create_surfsense_deep_agent
-        )
-        agent = await build_main_agent_for_thread(
-            agent_factory,
-            llm=llm,
-            search_space_id=search_space_id,
-            db_session=session,
-            connector_service=connector_service,
-            checkpointer=checkpointer,
-            user_id=user_id,
-            thread_id=chat_id,
-            agent_config=agent_config,
-            firecrawl_api_key=firecrawl_api_key,
-            thread_visibility=visibility,
-            filesystem_selection=filesystem_selection,
-            disabled_tools=disabled_tools,
-        )
-
-        turn_id = f"{chat_id}:{int(time.time() * 1000)}"
-        config = {
-            "configurable": {
-                "thread_id": str(chat_id),
-                "request_id": request_id or "unknown",
-                "turn_id": turn_id,
-                "surfsense_resume_value": {"decisions": decisions},
-            },
-            "recursion_limit": 10_000,
-        }
-
-        runtime_context = SurfSenseContextSchema(
-            search_space_id=search_space_id,
-            request_id=request_id,
-            turn_id=turn_id,
-        )
-
-        await session.commit()
-        return StreamingContext(
-            agent=agent,
-            config=config,
-            input_data=Command(resume={"decisions": decisions}),
-            streaming_service=VercelStreamingService(),
-            step_prefix="thinking-resume",
-            initial_step_id=None,
-            initial_step_title="",
-            initial_step_items=None,
-            content_builder=None,
-            runtime_context=runtime_context,
-        )
-    except Exception:
-        logger.exception(
-            "Failed to build resume streaming context (llm_config_id=%s)",
-            llm_config_id,
-        )
-        return None
-    finally:
-        await session.close()
-
--- a/surfsense_backend/app/tasks/chat/streaming/relay/init.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/init.py
@ -1,7 +1,23 @@
-"""Relay state: thinking steps, tool bookkeeping, and stream helpers."""
+"""Relay: thinking steps, tool bookkeeping, and ``EventRelay``.
+
+Package imports are lazy so ``relay.thinking_step_sse`` (and siblings) can load
+without pulling in ``event_relay`` (which imports handler modules that may
+import those siblings).
+"""

 from __future__ import annotations

-from app.tasks.chat.streaming.relay.event_relay import EventRelay, EventRelayConfig
-
 __all__ = ["EventRelay", "EventRelayConfig"]
+
+
+def __getattr__(name: str):
+    if name == "EventRelay":
+        from app.tasks.chat.streaming.relay.event_relay import EventRelay
+
+        return EventRelay
+    if name == "EventRelayConfig":
+        from app.tasks.chat.streaming.relay.event_relay import EventRelayConfig
+
+        return EventRelayConfig
+    msg = f"module {__name__!r} has no attribute {name!r}"
+    raise AttributeError(msg)
--- a/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/event_relay.py
@ -7,6 +7,7 @@ from dataclasses import dataclass, field
 from typing import Any

 from app.services.streaming.emitter import EmitterRegistry
+from app.tasks.chat.streaming.graph_stream.result import StreamingResult
 from app.tasks.chat.streaming.handlers.chain_end import iter_chain_end_frames
 from app.tasks.chat.streaming.handlers.chat_model_stream import (
    iter_chat_model_stream_frames,
@ -16,7 +17,6 @@ from app.tasks.chat.streaming.handlers.custom_event_dispatch import (
 )
 from app.tasks.chat.streaming.handlers.tool_end import iter_tool_end_frames
 from app.tasks.chat.streaming.handlers.tool_start import iter_tool_start_frames
-from app.tasks.chat.streaming.orchestration.output import StreamingResult
 from app.tasks.chat.streaming.relay.state import AgentEventRelayState
 from app.tasks.chat.streaming.relay.thinking_step_completion import (
    complete_active_thinking_step,