Add a route-level kill switch for streaming orchestrator cutover.

This commit is contained in:
CREDO23 2026-05-07 14:44:36 +02:00
parent 2ec2e82d9d
commit c0706364d1
3 changed files with 146 additions and 36 deletions

View file

@ -490,6 +490,12 @@ class Config:
ENABLE_DESKTOP_LOCAL_FILESYSTEM = ( ENABLE_DESKTOP_LOCAL_FILESYSTEM = (
os.getenv("ENABLE_DESKTOP_LOCAL_FILESYSTEM", "FALSE").upper() == "TRUE" os.getenv("ENABLE_DESKTOP_LOCAL_FILESYSTEM", "FALSE").upper() == "TRUE"
) )
# Streaming entrypoint switch. Keep this at the route layer so orchestrator
# code stays free of legacy fallback branching.
ENABLE_CHAT_STREAM_ORCHESTRATOR = (
os.getenv("SURFSENSE_ENABLE_CHAT_STREAM_ORCHESTRATOR", "TRUE").upper()
== "TRUE"
)
@classmethod @classmethod
def is_self_hosted(cls) -> bool: def is_self_hosted(cls) -> bool:

View file

@ -71,7 +71,15 @@ from app.schemas.new_chat import (
TokenUsageSummary, TokenUsageSummary,
TurnStatusResponse, TurnStatusResponse,
) )
from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat from app.tasks.chat.stream_new_chat import (
stream_new_chat as legacy_stream_new_chat,
stream_resume_chat as legacy_stream_resume_chat,
)
from app.tasks.chat.streaming.orchestrator import (
stream_chat,
stream_regenerate,
stream_resume,
)
from app.users import current_active_user from app.users import current_active_user
from app.utils.perf import get_perf_logger from app.utils.perf import get_perf_logger
from app.utils.rbac import check_permission from app.utils.rbac import check_permission
@ -90,6 +98,10 @@ TURN_CANCELLING_MAX_DELAY_MS = 1500
router = APIRouter() router = APIRouter()
def _use_streaming_orchestrator() -> bool:
return config.ENABLE_CHAT_STREAM_ORCHESTRATOR
def _resolve_filesystem_selection( def _resolve_filesystem_selection(
*, *,
mode: str, mode: str,
@ -1770,7 +1782,11 @@ async def handle_new_chat(
) )
return StreamingResponse( return StreamingResponse(
stream_new_chat( (
stream_chat
if _use_streaming_orchestrator()
else legacy_stream_new_chat
)(
user_query=request.user_query, user_query=request.user_query,
search_space_id=request.search_space_id, search_space_id=request.search_space_id,
chat_id=request.chat_id, chat_id=request.chat_id,
@ -2255,7 +2271,12 @@ async def regenerate_response(
else None else None
) )
try: try:
async for chunk in stream_new_chat( regenerate_fn = (
stream_regenerate
if _use_streaming_orchestrator()
else legacy_stream_new_chat
)
async for chunk in regenerate_fn(
user_query=str(user_query_to_use), user_query=str(user_query_to_use),
search_space_id=request.search_space_id, search_space_id=request.search_space_id,
chat_id=thread_id, chat_id=thread_id,
@ -2387,7 +2408,11 @@ async def resume_chat(
await session.close() await session.close()
return StreamingResponse( return StreamingResponse(
stream_resume_chat( (
stream_resume
if _use_streaming_orchestrator()
else legacy_stream_resume_chat
)(
chat_id=thread_id, chat_id=thread_id,
search_space_id=request.search_space_id, search_space_id=request.search_space_id,
decisions=decisions, decisions=decisions,

View file

@ -1,48 +1,127 @@
"""Top-level chat streaming entrypoints (stubs until wired).""" """Top-level chat streaming entrypoints.
For now these orchestrator functions are thin compatibility wrappers around the
current ``stream_new_chat`` / ``stream_resume_chat`` implementations. Routing
calls through this module lets us cut over to the fully modular event relay in
one place later without touching API routes again.
"""
from __future__ import annotations from __future__ import annotations
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
from typing import Any from typing import Any, Literal
from app.agents.new_chat.filesystem_selection import FilesystemSelection
from app.db import ChatVisibility
from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
async def stream_chat( async def stream_chat(
*, *,
request: Any, user_query: str,
user: Any, search_space_id: int,
db_session: Any, chat_id: int,
) -> AsyncGenerator[str, None]: # pragma: no cover - orchestrator port in progress user_id: str | None = None,
del request, user, db_session llm_config_id: int = -1,
raise NotImplementedError( mentioned_document_ids: list[int] | None = None,
"stream_chat: orchestrator not wired yet" mentioned_surfsense_doc_ids: list[int] | None = None,
) mentioned_documents: list[dict[str, Any]] | None = None,
if False: # pragma: no cover checkpoint_id: str | None = None,
yield "" needs_history_bootstrap: bool = False,
thread_visibility: ChatVisibility | None = None,
current_user_display_name: str | None = None,
disabled_tools: list[str] | None = None,
filesystem_selection: FilesystemSelection | None = None,
request_id: str | None = None,
user_image_data_urls: list[str] | None = None,
) -> AsyncGenerator[str, None]:
"""Stream a new chat turn through the current production pipeline."""
async for chunk in stream_new_chat(
user_query=user_query,
search_space_id=search_space_id,
chat_id=chat_id,
user_id=user_id,
llm_config_id=llm_config_id,
mentioned_document_ids=mentioned_document_ids,
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
mentioned_documents=mentioned_documents,
checkpoint_id=checkpoint_id,
needs_history_bootstrap=needs_history_bootstrap,
thread_visibility=thread_visibility,
current_user_display_name=current_user_display_name,
disabled_tools=disabled_tools,
filesystem_selection=filesystem_selection,
request_id=request_id,
user_image_data_urls=user_image_data_urls,
):
yield chunk
async def stream_resume( async def stream_resume(
*, *,
request: Any, chat_id: int,
user: Any, search_space_id: int,
db_session: Any, decisions: list[dict],
) -> AsyncGenerator[str, None]: # pragma: no cover - orchestrator port in progress user_id: str | None = None,
del request, user, db_session llm_config_id: int = -1,
raise NotImplementedError( thread_visibility: ChatVisibility | None = None,
"stream_resume: orchestrator not wired yet" filesystem_selection: FilesystemSelection | None = None,
) request_id: str | None = None,
if False: # pragma: no cover disabled_tools: list[str] | None = None,
yield "" ) -> AsyncGenerator[str, None]:
"""Resume an interrupted chat turn through the current production pipeline."""
async for chunk in stream_resume_chat(
chat_id=chat_id,
search_space_id=search_space_id,
decisions=decisions,
user_id=user_id,
llm_config_id=llm_config_id,
thread_visibility=thread_visibility,
filesystem_selection=filesystem_selection,
request_id=request_id,
disabled_tools=disabled_tools,
):
yield chunk
async def stream_regenerate( async def stream_regenerate(
*, *,
request: Any, user_query: str,
user: Any, search_space_id: int,
db_session: Any, chat_id: int,
) -> AsyncGenerator[str, None]: # pragma: no cover - orchestrator port in progress user_id: str | None = None,
del request, user, db_session llm_config_id: int = -1,
raise NotImplementedError( mentioned_document_ids: list[int] | None = None,
"stream_regenerate: orchestrator not wired yet" mentioned_surfsense_doc_ids: list[int] | None = None,
) mentioned_documents: list[dict[str, Any]] | None = None,
if False: # pragma: no cover checkpoint_id: str | None = None,
yield "" needs_history_bootstrap: bool = False,
thread_visibility: ChatVisibility | None = None,
current_user_display_name: str | None = None,
disabled_tools: list[str] | None = None,
filesystem_selection: FilesystemSelection | None = None,
request_id: str | None = None,
user_image_data_urls: list[str] | None = None,
flow: Literal["new", "regenerate"] = "regenerate",
) -> AsyncGenerator[str, None]:
"""Regenerate an assistant turn through the current production pipeline."""
async for chunk in stream_new_chat(
user_query=user_query,
search_space_id=search_space_id,
chat_id=chat_id,
user_id=user_id,
llm_config_id=llm_config_id,
mentioned_document_ids=mentioned_document_ids,
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
mentioned_documents=mentioned_documents,
checkpoint_id=checkpoint_id,
needs_history_bootstrap=needs_history_bootstrap,
thread_visibility=thread_visibility,
current_user_display_name=current_user_display_name,
disabled_tools=disabled_tools,
filesystem_selection=filesystem_selection,
request_id=request_id,
user_image_data_urls=user_image_data_urls,
flow=flow,
):
yield chunk