refactor(chat): drop alternate streaming entry path; use graph_stream

2026-05-12 17:22:38 +02:00 · 2026-05-07 19:25:20 +02:00 · 2026-05-07 19:25:20 +02:00 · 7e07092f67
commit 7e07092f67
parent 52895e37e9
23 changed files with 61 additions and 1278 deletions
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_agent_setup.py
@ -1,120 +0,0 @@
-"""Behavior tests for streaming agent setup helpers."""
-
-from __future__ import annotations
-
-import sys
-import types
-from typing import Any
-
-import pytest
-
-from app.tasks.chat.streaming import agent_setup
-
-pytestmark = pytest.mark.unit
-
-
-async def test_preflight_llm_calls_litellm_when_model_present(
-    monkeypatch: pytest.MonkeyPatch,
-) -> None:
-    calls: dict[str, Any] = {}
-
-    async def _fake_acompletion(**kwargs: Any):
-        calls.update(kwargs)
-        return {"ok": True}
-
-    monkeypatch.setitem(
-        sys.modules,
-        "litellm",
-        types.SimpleNamespace(acompletion=_fake_acompletion),
-    )
-
-    llm = types.SimpleNamespace(model="openai/test", api_key="k", api_base="b")
-    await agent_setup.preflight_llm(llm, is_provider_rate_limited=lambda _: False)
-
-    assert calls["model"] == "openai/test"
-    assert calls["max_tokens"] == 1
-    assert calls["timeout"] == 2.5
-    assert calls["stream"] is False
-
-
-async def test_preflight_llm_rethrows_rate_limited(monkeypatch: pytest.MonkeyPatch) -> None:
-    class _RateLimitedError(Exception):
-        pass
-
-    async def _fake_acompletion(**kwargs: Any):
-        del kwargs
-        raise _RateLimitedError("rl")
-
-    monkeypatch.setitem(
-        sys.modules,
-        "litellm",
-        types.SimpleNamespace(acompletion=_fake_acompletion),
-    )
-
-    with pytest.raises(_RateLimitedError):
-        await agent_setup.preflight_llm(
-            types.SimpleNamespace(model="openai/test"),
-            is_provider_rate_limited=lambda exc: isinstance(exc, _RateLimitedError),
-        )
-
-
-async def test_preflight_llm_skips_probe_for_auto_model(
-    monkeypatch: pytest.MonkeyPatch,
-) -> None:
-    called = {"count": 0}
-
-    async def _fake_acompletion(**kwargs: Any):
-        del kwargs
-        called["count"] += 1
-        return {"ok": True}
-
-    monkeypatch.setitem(
-        sys.modules,
-        "litellm",
-        types.SimpleNamespace(acompletion=_fake_acompletion),
-    )
-
-    await agent_setup.preflight_llm(
-        types.SimpleNamespace(model="auto"),
-        is_provider_rate_limited=lambda _: False,
-    )
-    assert called["count"] == 0
-
-
-async def test_build_main_agent_for_thread_forwards_arguments() -> None:
-    seen: dict[str, Any] = {}
-
-    async def _factory(**kwargs: Any):
-        seen.update(kwargs)
-        return "agent"
-
-    out = await agent_setup.build_main_agent_for_thread(
-        _factory,
-        llm="llm",
-        search_space_id=1,
-        db_session="db",
-        connector_service="connector",
-        checkpointer="cp",
-        user_id="u",
-        thread_id=10,
-        agent_config="cfg",
-        firecrawl_api_key="key",
-        thread_visibility="vis",
-        filesystem_selection="fs",
-        disabled_tools=["a"],
-        mentioned_document_ids=[5],
-    )
-    assert out == "agent"
-    assert seen["thread_id"] == 10
-    assert seen["mentioned_document_ids"] == [5]
-
-
-async def test_settle_speculative_agent_build_swallows_exceptions() -> None:
-    async def _boom() -> None:
-        raise RuntimeError("ignore")
-
-    import asyncio
-
-    task = asyncio.create_task(_boom())
-    await agent_setup.settle_speculative_agent_build(task)
-    assert task.done()
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestrator_stream_chat.py
@ -1,240 +0,0 @@
-"""Behavior tests for orchestrator ``stream_chat`` public API."""
-
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from typing import Any
-
-import pytest
-
-from app.tasks.chat.streaming.orchestration import StreamingContext
-from app.tasks.chat.streaming.orchestration import orchestrator
-from app.tasks.chat.streaming.orchestration.orchestrator import (
-    stream_chat,
-    stream_regenerate,
-    stream_resume,
-)
-
-pytestmark = pytest.mark.unit
-
-
-@dataclass
-class _Chunk:
-    content: Any = ""
-    additional_kwargs: dict[str, Any] = field(default_factory=dict)
-    tool_call_chunks: list[dict[str, Any]] = field(default_factory=list)
-
-
-class _StreamingService:
-    def __init__(self) -> None:
-        self._text_idx = 0
-
-    def generate_text_id(self) -> str:
-        self._text_idx += 1
-        return f"text-{self._text_idx}"
-
-    def format_text_start(self, text_id: str) -> str:
-        return f"text_start:{text_id}"
-
-    def format_text_delta(self, text_id: str, text: str) -> str:
-        return f"text_delta:{text_id}:{text}"
-
-    def format_text_end(self, text_id: str) -> str:
-        return f"text_end:{text_id}"
-
-
-class _Agent:
-    def __init__(self, events: list[dict[str, Any]]) -> None:
-        self.events = list(events)
-        self.calls: list[tuple[Any, dict[str, Any]]] = []
-
-    async def astream_events(self, input_data: Any, **kwargs: Any):
-        self.calls.append((input_data, kwargs))
-        for event in self.events:
-            yield event
-
-
-async def _collect(stream: Any) -> list[str]:
-    out: list[str] = []
-    async for x in stream:
-        out.append(x)
-    return out
-
-
-async def test_stream_chat_uses_streaming_context_path() -> None:
-    service = _StreamingService()
-    agent = _Agent(
-        [
-            {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content="hello")}},
-            {"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content="!")}},
-        ]
-    )
-    frames = await _collect(
-        stream_chat(
-            user_query="ignored-here",
-            search_space_id=1,
-            chat_id=77,
-            streaming_context=StreamingContext(
-                agent=agent,
-                config={"configurable": {"thread_id": "thread-1"}},
-                input_data={"messages": []},
-                streaming_service=service,
-            ),
-        )
-    )
-
-    assert frames == [
-        "text_start:text-1",
-        "text_delta:text-1:hello",
-        "text_delta:text-1:!",
-        "text_end:text-1",
-    ]
-
-
-async def test_stream_resume_uses_streaming_context_path() -> None:
-    service = _StreamingService()
-    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("r")}}])
-
-    frames = await _collect(
-        stream_resume(
-            chat_id=9,
-            search_space_id=1,
-            decisions=[],
-            streaming_context=StreamingContext(
-                agent=agent,
-                config={"configurable": {"thread_id": "thread-r"}},
-                input_data={"messages": []},
-                streaming_service=service,
-            ),
-        )
-    )
-
-    assert frames == [
-        "text_start:text-1",
-        "text_delta:text-1:r",
-        "text_end:text-1",
-    ]
-
-
-async def test_stream_regenerate_uses_streaming_context_path() -> None:
-    service = _StreamingService()
-    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("g")}}])
-
-    frames = await _collect(
-        stream_regenerate(
-            user_query="q",
-            search_space_id=1,
-            chat_id=2,
-            streaming_context=StreamingContext(
-                agent=agent,
-                config={"configurable": {"thread_id": "thread-g"}},
-                input_data={"messages": []},
-                streaming_service=service,
-            ),
-        )
-    )
-
-    assert frames == [
-        "text_start:text-1",
-        "text_delta:text-1:g",
-        "text_end:text-1",
-    ]
-
-
-async def test_stream_chat_builds_streaming_context_when_not_provided() -> None:
-    service = _StreamingService()
-    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("b")}}])
-
-    async def _fake_builder(**kwargs: Any) -> StreamingContext:
-        del kwargs
-        return StreamingContext(
-            agent=agent,
-            config={"configurable": {"thread_id": "thread-b"}},
-            input_data={"messages": []},
-            streaming_service=service,
-        )
-
-    old = orchestrator.build_chat_streaming_context
-    orchestrator.build_chat_streaming_context = _fake_builder
-    try:
-        frames = await _collect(
-            stream_chat(
-                user_query="q",
-                search_space_id=1,
-                chat_id=3,
-            )
-        )
-    finally:
-        orchestrator.build_chat_streaming_context = old
-
-    assert frames == [
-        "text_start:text-1",
-        "text_delta:text-1:b",
-        "text_end:text-1",
-    ]
-
-
-async def test_stream_resume_builds_streaming_context_when_not_provided() -> None:
-    service = _StreamingService()
-    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("u")}}])
-
-    async def _fake_builder(**kwargs: Any) -> StreamingContext:
-        del kwargs
-        return StreamingContext(
-            agent=agent,
-            config={"configurable": {"thread_id": "thread-u"}},
-            input_data={"messages": []},
-            streaming_service=service,
-        )
-
-    old = orchestrator.build_resume_streaming_context
-    orchestrator.build_resume_streaming_context = _fake_builder
-    try:
-        frames = await _collect(
-            stream_resume(
-                chat_id=9,
-                search_space_id=1,
-                decisions=[],
-            )
-        )
-    finally:
-        orchestrator.build_resume_streaming_context = old
-
-    assert frames == [
-        "text_start:text-1",
-        "text_delta:text-1:u",
-        "text_end:text-1",
-    ]
-
-
-async def test_stream_regenerate_builds_streaming_context_when_not_provided() -> None:
-    service = _StreamingService()
-    agent = _Agent([{"event": "on_chat_model_stream", "data": {"chunk": _Chunk("x")}}])
-
-    async def _fake_builder(**kwargs: Any) -> StreamingContext:
-        del kwargs
-        return StreamingContext(
-            agent=agent,
-            config={"configurable": {"thread_id": "thread-x"}},
-            input_data={"messages": []},
-            streaming_service=service,
-        )
-
-    old = orchestrator.build_regenerate_streaming_context
-    orchestrator.build_regenerate_streaming_context = _fake_builder
-    try:
-        frames = await _collect(
-            stream_regenerate(
-                user_query="q",
-                search_space_id=1,
-                chat_id=2,
-            )
-        )
-    finally:
-        orchestrator.build_regenerate_streaming_context = old
-
-    assert frames == [
-        "text_start:text-1",
-        "text_delta:text-1:x",
-        "text_end:text-1",
-    ]
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stage_1_parity.py
@ -1,7 +1,7 @@
 """Pin Stage 1 extractions as faithful copies of the old helpers.

-The new orchestrator under ``app.tasks.chat.streaming`` is built in
-parallel with the production module ``app.tasks.chat.stream_new_chat``.
+Extractions under ``app.tasks.chat.streaming`` are compared to
+``app.tasks.chat.stream_new_chat`` helpers.
 For each Stage 1 extraction we assert the new function returns the same
 output as the old one for a representative input set. The moment the
 two diverge - intentionally or otherwise - this file fails loudly so
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_orchestration_event_stream.py
@ -1,4 +1,4 @@
-"""Behavior tests for orchestration event-stream execution."""
+"""Tests for ``stream_output`` (LangGraph events → SSE)."""

 from __future__ import annotations

@ -7,8 +7,8 @@ from typing import Any

 import pytest

-from app.tasks.chat.streaming.orchestration import stream_output
-from app.tasks.chat.streaming.orchestration.output import StreamingResult
+from app.tasks.chat.streaming.graph_stream import stream_output
+from app.tasks.chat.streaming.graph_stream.result import StreamingResult

 pytestmark = pytest.mark.unit

@ -88,6 +88,7 @@ async def test_stream_output_emits_text_lifecycle_and_updates_result() -> None:

 async def test_stream_output_passes_runtime_context_to_agent() -> None:
    service = _StreamingService()
+
    class _ContextAwareAgent:
        async def astream_events(self, input_data: Any, **kwargs: Any):
            del input_data