refactor(chat): add streaming/flows/shared/ base helpers

Six small, single-purpose modules shared by the upcoming new_chat and resume_chat orchestrators: * llm_bundle: dispatches negative config_id to the YAML loader and non-negative config_id to the DB loader, returning (llm, AgentConfig). * pre_stream_setup: builds the connector service, resolves the Firecrawl API key, and returns the chat checkpointer. * first_frames: iter_initial_frames + iter_final_frames emit the canonical message-start / step-start / idle / finish / done SSE envelope. * finalize_emit: iter_token_usage_frame emits the per-turn usage frame from a TokenAccumulator summary. * finally_cleanup: close_session_and_clear_ai_responding and run_gc_pass centralize the finally-block bookkeeping. * span: open_chat_request_span / set_agent_mode / close_chat_request_span / record_outcome_attrs wrap the OpenTelemetry chat_request span. Add-only; these are not yet wired into stream_new_chat.py.
2026-07-16 23:01:06 +02:00 · 2026-05-25 21:49:09 +02:00 · 2026-05-25 21:49:09 +02:00 · e9a98ecafb
commit e9a98ecafb
parent 26c569467d
7 changed files with 343 additions and 0 deletions
--- a/surfsense_backend/app/tasks/chat/streaming/flows/shared/init.py
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/init.py
@ -0,0 +1,3 @@
+"""Building blocks shared by ``new_chat`` and ``resume_chat`` orchestrators."""
+
+from __future__ import annotations
--- a/surfsense_backend/app/tasks/chat/streaming/flows/shared/finalize_emit.py
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/finalize_emit.py
@ -0,0 +1,54 @@
+"""Emit the per-turn token-usage SSE frame from the accumulator.
+
+``per_message_summary()`` returns ``None`` when the turn made no chargeable
+LLM calls (e.g. interrupt-on-input). In that case we skip the frame; the
+frontend has no usage to render.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+from app.services.new_streaming_service import VercelStreamingService
+from app.utils.perf import get_perf_logger
+
+if TYPE_CHECKING:
+    from app.services.token_tracking_service import TokenAccumulator
+
+_perf_log = get_perf_logger()
+logger = logging.getLogger(__name__)
+
+
+def iter_token_usage_frame(
+    streaming_service: VercelStreamingService,
+    *,
+    accumulator: TokenAccumulator,
+    log_label: str,
+):
+    """Yield zero or one ``data: token-usage`` SSE frame.
+
+    Side effect: logs a one-line ``[token_usage] {log_label}: ...`` summary so
+    cost analysis can grep call/total/cost across all flows.
+    """
+    usage_summary = accumulator.per_message_summary()
+    _perf_log.info(
+        "[token_usage] %s: calls=%d total=%d cost_micros=%d summary=%s",
+        log_label,
+        len(accumulator.calls),
+        accumulator.grand_total,
+        accumulator.total_cost_micros,
+        usage_summary,
+    )
+    if usage_summary:
+        yield streaming_service.format_data(
+            "token-usage",
+            {
+                "usage": usage_summary,
+                "prompt_tokens": accumulator.total_prompt_tokens,
+                "completion_tokens": accumulator.total_completion_tokens,
+                "total_tokens": accumulator.grand_total,
+                "cost_micros": accumulator.total_cost_micros,
+                "call_details": accumulator.serialized_calls(),
+            },
+        )
--- a/surfsense_backend/app/tasks/chat/streaming/flows/shared/finally_cleanup.py
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/finally_cleanup.py
@ -0,0 +1,69 @@
+"""Shared finally-block helpers: session close, GC pass, native-heap trim.
+
+These are called from inside an ``anyio.CancelScope(shield=True)`` block in
+each flow's ``finally`` (Starlette's BaseHTTPMiddleware cancels the scope on
+client disconnect; without the shield the very first ``await`` would raise
+``CancelledError`` and the rest of cleanup — including ``session.close()`` —
+would never run).
+"""
+
+from __future__ import annotations
+
+import contextlib
+import gc
+import logging
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import shielded_async_session
+from app.services.chat_session_state_service import clear_ai_responding
+from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap
+
+_perf_log = get_perf_logger()
+logger = logging.getLogger(__name__)
+
+
+async def close_session_and_clear_ai_responding(
+    session: AsyncSession, chat_id: int
+) -> None:
+    """Rollback + clear AI-responding flag + expunge_all + close.
+
+    On rollback failure we fall back to a fresh shielded session for the flag
+    clear so a UI is never stuck on "AI is responding…" after a crash.
+    """
+    try:
+        await session.rollback()
+        await clear_ai_responding(session, chat_id)
+    except Exception:
+        try:
+            async with shielded_async_session() as fresh_session:
+                await clear_ai_responding(fresh_session, chat_id)
+        except Exception:
+            logger.warning(
+                "Failed to clear AI responding state for thread %s", chat_id
+            )
+
+    with contextlib.suppress(Exception):
+        session.expunge_all()
+
+    with contextlib.suppress(Exception):
+        await session.close()
+
+
+def run_gc_pass(*, log_prefix: str, chat_id: int) -> None:
+    """One full gen0/1/2 pass + native-heap trim + END system snapshot.
+
+    Breaking circular refs held by the agent graph, tools, and LLM wrappers
+    needs to happen in the caller (set the locals to ``None``) — this just
+    runs the collector and logs how many objects came back.
+    """
+    collected = gc.collect(0) + gc.collect(1) + gc.collect(2)
+    if collected:
+        _perf_log.info(
+            "[%s] gc.collect() reclaimed %d objects (chat_id=%s)",
+            log_prefix,
+            collected,
+            chat_id,
+        )
+    trim_native_heap()
+    log_system_snapshot(f"{log_prefix}_END")
--- a/surfsense_backend/app/tasks/chat/streaming/flows/shared/first_frames.py
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/first_frames.py
@ -0,0 +1,40 @@
+"""Initial SSE frames every flow emits right after pre-stream setup.
+
+Order matters: ``message_start`` opens the assistant message, ``start_step``
+opens the first thinking step, ``turn-info`` lets the frontend stamp the
+correlation id onto the in-flight message, and ``turn-status: busy`` flips the
+UI into the streaming state.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from app.services.new_streaming_service import VercelStreamingService
+
+
+def iter_initial_frames(
+    streaming_service: VercelStreamingService,
+    *,
+    turn_id: str,
+) -> Iterator[str]:
+    """Yield the four canonical opening frames in order.
+
+    ``turn-info`` carries ``chat_turn_id`` so even pure-text turns (which
+    never produce a tool / action-log event) still teach the frontend the
+    turn correlation id used for ``appendMessage`` durable storage.
+    """
+    yield streaming_service.format_message_start()
+    yield streaming_service.format_start_step()
+    yield streaming_service.format_data("turn-info", {"chat_turn_id": turn_id})
+    yield streaming_service.format_data("turn-status", {"status": "busy"})
+
+
+def iter_final_frames(
+    streaming_service: VercelStreamingService,
+) -> Iterator[str]:
+    """Yield ``turn-status: idle`` plus the finish/done trailer in order."""
+    yield streaming_service.format_data("turn-status", {"status": "idle"})
+    yield streaming_service.format_finish_step()
+    yield streaming_service.format_finish()
+    yield streaming_service.format_done()
--- a/surfsense_backend/app/tasks/chat/streaming/flows/shared/llm_bundle.py
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/llm_bundle.py
@ -0,0 +1,57 @@
+"""Load an LLM + AgentConfig bundle for a given config id.
+
+Handles both code paths uniformly:
+- ``config_id >= 0`` → database-backed ``NewLLMConfig`` row (per-user/per-space).
+- ``config_id < 0``  → YAML-defined global LLM config (built-in defaults).
+
+Returns ``(llm, agent_config, error_message)``; on success ``error_message`` is
+``None``. The caller emits the friendly SSE error frame.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.new_chat.llm_config import (
+    AgentConfig,
+    create_chat_litellm_from_agent_config,
+    create_chat_litellm_from_config,
+    load_agent_config,
+    load_global_llm_config_by_id,
+)
+
+
+async def load_llm_bundle(
+    session: AsyncSession,
+    *,
+    config_id: int,
+    search_space_id: int,
+) -> tuple[Any, AgentConfig | None, str | None]:
+    if config_id >= 0:
+        loaded_agent_config = await load_agent_config(
+            session=session,
+            config_id=config_id,
+            search_space_id=search_space_id,
+        )
+        if not loaded_agent_config:
+            return (
+                None,
+                None,
+                f"Failed to load NewLLMConfig with id {config_id}",
+            )
+        return (
+            create_chat_litellm_from_agent_config(loaded_agent_config),
+            loaded_agent_config,
+            None,
+        )
+
+    loaded_llm_config = load_global_llm_config_by_id(config_id)
+    if not loaded_llm_config:
+        return None, None, f"Failed to load LLM config with id {config_id}"
+    return (
+        create_chat_litellm_from_config(loaded_llm_config),
+        AgentConfig.from_yaml_config(loaded_llm_config),
+        None,
+    )
--- a/surfsense_backend/app/tasks/chat/streaming/flows/shared/pre_stream_setup.py
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/pre_stream_setup.py
@ -0,0 +1,40 @@
+"""Pre-stream setup: connector service, firecrawl key, checkpointer."""
+
+from __future__ import annotations
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.new_chat.checkpointer import get_checkpointer
+from app.db import SearchSourceConnectorType
+from app.services.connector_service import ConnectorService
+
+
+async def setup_connector_and_firecrawl(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+) -> tuple[ConnectorService, str | None]:
+    """Build the per-turn connector service and pull the firecrawl API key.
+
+    Returns ``(connector_service, firecrawl_api_key)``. ``firecrawl_api_key`` is
+    ``None`` when no web-crawler connector is configured (the agent simply
+    skips firecrawl-backed tools in that case).
+    """
+    connector_service = ConnectorService(session, search_space_id=search_space_id)
+    firecrawl_api_key: str | None = None
+    webcrawler_connector = await connector_service.get_connector_by_type(
+        SearchSourceConnectorType.WEBCRAWLER_CONNECTOR, search_space_id
+    )
+    if webcrawler_connector and webcrawler_connector.config:
+        firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY")
+    return connector_service, firecrawl_api_key
+
+
+async def get_chat_checkpointer():
+    """Resolve the PostgreSQL checkpointer for persistent conversation memory.
+
+    Thin wrapper around ``app.agents.new_chat.checkpointer.get_checkpointer`` so
+    flow orchestrators can rely on a streaming-local symbol and we have a hook
+    point if the checkpointer source ever needs to vary per flow.
+    """
+    return await get_checkpointer()
--- a/surfsense_backend/app/tasks/chat/streaming/flows/shared/span.py
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/span.py
@ -0,0 +1,80 @@
+"""OpenTelemetry chat-request span wrapper for streaming flows."""
+
+from __future__ import annotations
+
+import contextlib
+import sys
+from typing import Any, Literal
+
+from app.observability import metrics as ot_metrics
+from app.observability import otel as ot
+
+
+def open_chat_request_span(
+    *,
+    chat_id: int,
+    search_space_id: int,
+    flow: Literal["new", "regenerate", "resume"],
+    request_id: str | None,
+    turn_id: str,
+    filesystem_mode: str,
+    client_platform: str,
+    agent_mode: str,
+) -> tuple[Any, Any]:
+    """Open the per-request span; returns ``(span_cm, span)`` for finally-close."""
+    span_cm = ot.chat_request_span(
+        chat_id=chat_id,
+        search_space_id=search_space_id,
+        flow=flow,
+        request_id=request_id,
+        turn_id=turn_id,
+        filesystem_mode=filesystem_mode,
+        client_platform=client_platform,
+        agent_mode=agent_mode,
+    )
+    span = span_cm.__enter__()
+    return span_cm, span
+
+
+def set_agent_mode(span: Any, agent_mode: str) -> None:
+    """Tag the span with the resolved agent mode (single / multi)."""
+    with contextlib.suppress(Exception):
+        span.set_attribute("agent.mode", agent_mode)
+
+
+def close_chat_request_span(
+    *,
+    span_cm: Any,
+    span: Any,
+    chat_outcome: str,
+    chat_agent_mode: str,
+    flow: Literal["new", "regenerate", "resume"],
+    chat_error_category: str | None,
+    duration_seconds: float,
+) -> None:
+    """Record metrics + close the span. Swallows errors (finally-block context)."""
+    with contextlib.suppress(Exception):
+        span.set_attribute("chat.outcome", chat_outcome)
+        ot_metrics.record_chat_request_duration(
+            duration_seconds * 1000,
+            flow=flow,
+            outcome=chat_outcome,
+            agent_mode=chat_agent_mode,
+        )
+        ot_metrics.record_chat_request_outcome(
+            flow=flow,
+            outcome=chat_outcome,
+            agent_mode=chat_agent_mode,
+            error_category=chat_error_category,
+        )
+    span_cm.__exit__(*sys.exc_info())
+
+
+def record_outcome_attrs(
+    span: Any, *, chat_outcome: str, chat_error_category: str | None
+) -> None:
+    """Stamp outcome + error.category on the span (used in the except branch)."""
+    with contextlib.suppress(Exception):
+        span.set_attribute("chat.outcome", chat_outcome)
+        if chat_error_category is not None:
+            span.set_attribute("error.category", chat_error_category)