refactor(chat): add streaming/flows/resume_chat/ per-concern leaf modules

Three focused modules used by the upcoming resume-chat orchestrator: * runtime_context: build_resume_chat_runtime_context assembles the SurfSenseContextSchema for a resume turn (handles empty mention lists, since resume requests do not carry fresh @-mentions). * assistant_shell: persist_resume_assistant_shell writes a fresh assistant row for the resumed turn so the post-stream finalize has a target. * resume_routing: build_resume_routing collects the pending interrupts across paused subagents and slices the flat list of ResumeDecision[] into the correct (thread, subagent) buckets so LangGraph routes each decision back to the right paused tool call. Add-only; no orchestrator yet (next commit).
2026-07-14 22:52:15 +02:00 · 2026-05-25 21:50:03 +02:00 · 2026-05-25 21:50:03 +02:00 · 885d4acda9
commit 885d4acda9
parent b2a0888588
3 changed files with 119 additions and 0 deletions
--- a/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/assistant_shell.py
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/assistant_shell.py
@ -0,0 +1,31 @@
 """Pre-write a fresh assistant row for this resume turn.
 The original (interrupted) ``stream_new_chat`` invocation already persisted
 its own assistant row anchored to a different ``turn_id``; resume allocates a
 new ``turn_id`` (per-request, see ``orchestrator``) so we need a separate row
 keyed on the same ``(thread_id, turn_id, ASSISTANT)`` invariant.
 Idempotent against migration 141's partial unique index — recovers the
 existing id on retry.
 Resume does NOT emit ``data-user-message-id``: the user row is from the
 original interrupted turn (different ``turn_id``) and is never re-persisted
 here. See B5 in the ``sse-based_message_id_handshake`` plan.
 """
 from __future__ import annotations
 from app.tasks.chat.persistence import persist_assistant_shell
 async def persist_resume_assistant_shell(
    *,
    chat_id: int,
    user_id: str | None,
    turn_id: str,
 ) -> int | None:
    return await persist_assistant_shell(
        chat_id=chat_id,
        user_id=user_id,
        turn_id=turn_id,
    )
--- a/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/resume_routing.py
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/resume_routing.py
@ -0,0 +1,65 @@
 """Route a flat ``decisions`` list back to the right paused subagent.
 Each pending interrupt is stamped with its originating ``tool_call_id`` (see
 ``checkpointed_subagent_middleware.propagation``) so the resume slicer can
 re-target each ``HumanReview`` decision at the right ``tool_call_id``.
 LangGraph rejects scalar ``Command(resume=...)`` when multiple interrupts are
 pending (parallel HITL); the mapped form works for the single-pause case too,
 so we always use it.
 """
 from __future__ import annotations
 import logging
 from dataclasses import dataclass
 from typing import Any
 from app.utils.perf import get_perf_logger
 _perf_log = get_perf_logger()
 logger = logging.getLogger(__name__)
@dataclass
 class ResumeRoutingPayload:
    """Resolved per-``tool_call_id`` resume slices + the lg-shaped resume map."""
    routed_resume_value: dict[str, Any]
    lg_resume_map: dict[str, Any]
 async def build_resume_routing(
    agent: Any,
    *,
    chat_id: int,
    decisions: list[dict],
 ) -> ResumeRoutingPayload:
    """Read parent_state, collect pending tool-calls, slice decisions, build map.
    The middleware reads its per-``tool_call_id`` resume slice from the
    ``surfsense_resume_value`` configurable; parallel siblings each pop their
    own entry so they never race.
    """
    from app.agents.multi_agent_chat.middleware.main_agent.checkpointed_subagent_middleware.resume_routing import (
        build_lg_resume_map,
        collect_pending_tool_calls,
        slice_decisions_by_tool_call,
    )
    parent_state = await agent.aget_state(
        {"configurable": {"thread_id": str(chat_id)}}
    )
    pending = collect_pending_tool_calls(parent_state)
    _perf_log.info(
        "[hitl_route] resume_entry chat_id=%s decisions=%d pending_subagents=%d",
        chat_id,
        len(decisions),
        len(pending),
    )
    routed_resume_value = slice_decisions_by_tool_call(decisions, pending)
    lg_resume_map = build_lg_resume_map(parent_state, routed_resume_value)
    return ResumeRoutingPayload(
        routed_resume_value=routed_resume_value,
        lg_resume_map=lg_resume_map,
    )
--- a/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/runtime_context.py
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/resume_chat/runtime_context.py
@ -0,0 +1,23 @@
 """Build the per-invocation ``SurfSenseContextSchema`` for a resume turn.
 Resume doesn't carry new ``mentioned_document_ids`` (those are seeded by the
 original turn). We still build the context so future middleware extensions
 can rely on ``runtime.context`` always being populated.
 """
 from __future__ import annotations
 from app.agents.new_chat.context import SurfSenseContextSchema
 def build_resume_chat_runtime_context(
    *,
    search_space_id: int,
    request_id: str | None,
    turn_id: str,
 ) -> SurfSenseContextSchema:
    return SurfSenseContextSchema(
        search_space_id=search_space_id,
        request_id=request_id,
        turn_id=turn_id,
    )