feat(chat): add multi-agent mode routing scaffold and telemetry.

2026-05-07 06:42:39 +02:00 · 2026-04-28 15:35:14 +02:00 · 2026-04-28 15:35:14 +02:00 · 7b9a218d62
commit 7b9a218d62
parent 78f71c7e3a
13 changed files with 742 additions and 58 deletions
--- a/surfsense_backend/app/tasks/chat/stream_dispatch.py
+++ b/surfsense_backend/app/tasks/chat/stream_dispatch.py
@ -0,0 +1,47 @@
+"""Thin architecture dispatch seam for chat streaming entrypoints."""
+
+from __future__ import annotations
+
+from collections.abc import AsyncGenerator
+from typing import Any
+
+from app.agents.multi_agent_v1.entrypoint import MultiAgentEntrypoint
+from app.agents.new_chat.architecture_mode import (
+    ArchitectureMode,
+    parse_architecture_mode,
+)
+from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
+
+
+def _resolve_mode(mode_value: str) -> ArchitectureMode:
+    return parse_architecture_mode(mode_value) or ArchitectureMode.SINGLE_AGENT
+
+
+def dispatch_new_chat_stream(
+    *,
+    architecture_mode: str,
+    stream_kwargs: dict[str, Any],
+) -> AsyncGenerator[str, None]:
+    mode = _resolve_mode(architecture_mode)
+    if mode == ArchitectureMode.SINGLE_AGENT:
+        return stream_new_chat(**stream_kwargs)
+    entrypoint = MultiAgentEntrypoint()
+    return entrypoint.stream_new_chat(
+        fallback_streamer=stream_new_chat,
+        fallback_kwargs=stream_kwargs,
+    )
+
+
+def dispatch_resume_chat_stream(
+    *,
+    architecture_mode: str,
+    stream_kwargs: dict[str, Any],
+) -> AsyncGenerator[str, None]:
+    mode = _resolve_mode(architecture_mode)
+    if mode == ArchitectureMode.SINGLE_AGENT:
+        return stream_resume_chat(**stream_kwargs)
+    entrypoint = MultiAgentEntrypoint()
+    return entrypoint.stream_resume_chat(
+        fallback_streamer=stream_resume_chat,
+        fallback_kwargs=stream_kwargs,
+    )
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -42,6 +42,7 @@ from app.agents.new_chat.memory_extraction import (
    extract_and_save_memory,
    extract_and_save_team_memory,
 )
+from app.agents.new_chat.telemetry import log_architecture_telemetry
 from app.db import (
    ChatVisibility,
    NewChatMessage,
@ -149,6 +150,7 @@ class StreamResult:
    agent_called_update_memory: bool = False
    request_id: str | None = None
    turn_id: str = ""
+    architecture_mode: str = "single_agent"
    filesystem_mode: str = "cloud"
    client_platform: str = "web"
    intent_detected: str = "chat_only"
@ -182,9 +184,7 @@ def _tool_output_has_error(tool_output: Any) -> bool:
        if tool_output.get("error"):
            return True
        result = tool_output.get("result")
-        if isinstance(result, str) and result.strip().lower().startswith("error:"):
-            return True
-        return False
+        return isinstance(result, str) and result.strip().lower().startswith("error:")
    if isinstance(tool_output, str):
        return tool_output.strip().lower().startswith("error:")
    return False
@ -231,6 +231,7 @@ def _log_file_contract(stage: str, result: StreamResult, **extra: Any) -> None:
        "request_id": result.request_id or "unknown",
        "turn_id": result.turn_id or "unknown",
        "chat_id": result.turn_id.split(":", 1)[0] if ":" in result.turn_id else "unknown",
+        "architecture_mode": result.architecture_mode,
        "filesystem_mode": result.filesystem_mode,
        "client_platform": result.client_platform,
        "intent_detected": result.intent_detected,
@ -1308,18 +1309,17 @@ async def _stream_agent_events(
        result.commit_gate_passed, result.commit_gate_reason = (
            _evaluate_file_contract_outcome(result)
        )
-        if not result.commit_gate_passed:
-            if _contract_enforcement_active(result):
-                gate_notice = (
-                    "I could not complete the requested file write because no successful "
-                    "write_file/edit_file operation was confirmed."
-                )
-                gate_text_id = streaming_service.generate_text_id()
-                yield streaming_service.format_text_start(gate_text_id)
-                yield streaming_service.format_text_delta(gate_text_id, gate_notice)
-                yield streaming_service.format_text_end(gate_text_id)
-                yield streaming_service.format_terminal_info(gate_notice, "error")
-                accumulated_text = gate_notice
+        if not result.commit_gate_passed and _contract_enforcement_active(result):
+            gate_notice = (
+                "I could not complete the requested file write because no successful "
+                "write_file/edit_file operation was confirmed."
+            )
+            gate_text_id = streaming_service.generate_text_id()
+            yield streaming_service.format_text_start(gate_text_id)
+            yield streaming_service.format_text_delta(gate_text_id, gate_notice)
+            yield streaming_service.format_text_end(gate_text_id)
+            yield streaming_service.format_terminal_info(gate_notice, "error")
+            accumulated_text = gate_notice
    else:
        result.commit_gate_passed = True
        result.commit_gate_reason = ""
@ -1351,6 +1351,7 @@ async def stream_new_chat(
    filesystem_selection: FilesystemSelection | None = None,
    request_id: str | None = None,
    user_image_data_urls: list[str] | None = None,
+    architecture_mode: str = "single_agent",
 ) -> AsyncGenerator[str, None]:
    """
    Stream chat responses from the new SurfSense deep agent.
@ -1384,8 +1385,22 @@ async def stream_new_chat(
    )
    stream_result.request_id = request_id
    stream_result.turn_id = f"{chat_id}:{int(time.time() * 1000)}"
+    stream_result.architecture_mode = architecture_mode
    stream_result.filesystem_mode = fs_mode
    stream_result.client_platform = fs_platform
+    log_architecture_telemetry(
+        phase="turn_start",
+        source="new_chat",
+        status="started",
+        architecture_mode=architecture_mode,
+        orchestrator_used=False,
+        worker_count=0,
+        retry_count=0,
+        latency_ms=0.0,
+        token_total=0,
+        request_id=request_id,
+        turn_id=stream_result.turn_id,
+    )
    _log_file_contract("turn_start", stream_result)
    _perf_log.info(
        "[stream_new_chat] filesystem_mode=%s client_platform=%s",
@ -1638,6 +1653,7 @@ async def stream_new_chat(
            "search_space_id": search_space_id,
            "request_id": request_id or "unknown",
            "turn_id": stream_result.turn_id,
+            "architecture_mode": architecture_mode,
        }

        _perf_log.info(
@ -1669,6 +1685,7 @@ async def stream_new_chat(
        configurable = {"thread_id": str(chat_id)}
        configurable["request_id"] = request_id or "unknown"
        configurable["turn_id"] = stream_result.turn_id
+        configurable["architecture_mode"] = architecture_mode
        if checkpoint_id:
            configurable["checkpoint_id"] = checkpoint_id

@ -1884,6 +1901,19 @@ async def stream_new_chat(
                        "call_details": accumulator.serialized_calls(),
                    },
                )
+            log_architecture_telemetry(
+                phase="turn_end",
+                source="new_chat",
+                status="interrupted",
+                architecture_mode=stream_result.architecture_mode,
+                orchestrator_used=False,
+                worker_count=0,
+                retry_count=0,
+                latency_ms=(time.perf_counter() - _t_total) * 1000.0,
+                token_total=accumulator.grand_total,
+                request_id=request_id,
+                turn_id=stream_result.turn_id,
+            )

            yield streaming_service.format_finish_step()
            yield streaming_service.format_finish()
@ -1956,6 +1986,19 @@ async def stream_new_chat(
                    "call_details": accumulator.serialized_calls(),
                },
            )
+        log_architecture_telemetry(
+            phase="turn_end",
+            source="new_chat",
+            status="completed",
+            architecture_mode=stream_result.architecture_mode,
+            orchestrator_used=False,
+            worker_count=0,
+            retry_count=0,
+            latency_ms=(time.perf_counter() - _t_total) * 1000.0,
+            token_total=accumulator.grand_total,
+            request_id=request_id,
+            turn_id=stream_result.turn_id,
+        )

        # Fire background memory extraction if the agent didn't handle it.
        # Shared threads write to team memory; private threads write to user memory.
@ -2000,6 +2043,20 @@ async def stream_new_chat(
        print(f"[stream_new_chat] {error_message}")
        print(f"[stream_new_chat] Exception type: {type(e).__name__}")
        print(f"[stream_new_chat] Traceback:\n{traceback.format_exc()}")
+        log_architecture_telemetry(
+            phase="turn_end",
+            source="new_chat",
+            status="error",
+            architecture_mode=stream_result.architecture_mode,
+            orchestrator_used=False,
+            worker_count=0,
+            retry_count=0,
+            latency_ms=(time.perf_counter() - _t_total) * 1000.0,
+            token_total=accumulator.grand_total,
+            request_id=request_id,
+            turn_id=stream_result.turn_id,
+            extra={"error_type": type(e).__name__},
+        )

        yield streaming_service.format_error(error_message)
        yield streaming_service.format_finish_step()
@ -2093,6 +2150,7 @@ async def stream_resume_chat(
    thread_visibility: ChatVisibility | None = None,
    filesystem_selection: FilesystemSelection | None = None,
    request_id: str | None = None,
+    architecture_mode: str = "single_agent",
 ) -> AsyncGenerator[str, None]:
    streaming_service = VercelStreamingService()
    stream_result = StreamResult()
@ -2103,8 +2161,22 @@ async def stream_resume_chat(
    )
    stream_result.request_id = request_id
    stream_result.turn_id = f"{chat_id}:{int(time.time() * 1000)}"
+    stream_result.architecture_mode = architecture_mode
    stream_result.filesystem_mode = fs_mode
    stream_result.client_platform = fs_platform
+    log_architecture_telemetry(
+        phase="turn_start",
+        source="resume_chat",
+        status="started",
+        architecture_mode=architecture_mode,
+        orchestrator_used=False,
+        worker_count=0,
+        retry_count=0,
+        latency_ms=0.0,
+        token_total=0,
+        request_id=request_id,
+        turn_id=stream_result.turn_id,
+    )
    _log_file_contract("turn_start", stream_result)
    _perf_log.info(
        "[stream_resume] filesystem_mode=%s client_platform=%s",
@ -2250,6 +2322,7 @@ async def stream_resume_chat(
                "thread_id": str(chat_id),
                "request_id": request_id or "unknown",
                "turn_id": stream_result.turn_id,
+                "architecture_mode": architecture_mode,
            },
            "recursion_limit": 80,
        }
@ -2300,6 +2373,19 @@ async def stream_resume_chat(
                        "call_details": accumulator.serialized_calls(),
                    },
                )
+            log_architecture_telemetry(
+                phase="turn_end",
+                source="resume_chat",
+                status="interrupted",
+                architecture_mode=stream_result.architecture_mode,
+                orchestrator_used=False,
+                worker_count=0,
+                retry_count=0,
+                latency_ms=(time.perf_counter() - _t_total) * 1000.0,
+                token_total=accumulator.grand_total,
+                request_id=request_id,
+                turn_id=stream_result.turn_id,
+            )

            yield streaming_service.format_finish_step()
            yield streaming_service.format_finish()
@ -2353,6 +2439,19 @@ async def stream_resume_chat(
                    "call_details": accumulator.serialized_calls(),
                },
            )
+        log_architecture_telemetry(
+            phase="turn_end",
+            source="resume_chat",
+            status="completed",
+            architecture_mode=stream_result.architecture_mode,
+            orchestrator_used=False,
+            worker_count=0,
+            retry_count=0,
+            latency_ms=(time.perf_counter() - _t_total) * 1000.0,
+            token_total=accumulator.grand_total,
+            request_id=request_id,
+            turn_id=stream_result.turn_id,
+        )

        yield streaming_service.format_finish_step()
        yield streaming_service.format_finish()
@ -2364,6 +2463,20 @@ async def stream_resume_chat(
        error_message = f"Error during resume: {e!s}"
        print(f"[stream_resume_chat] {error_message}")
        print(f"[stream_resume_chat] Traceback:\n{traceback.format_exc()}")
+        log_architecture_telemetry(
+            phase="turn_end",
+            source="resume_chat",
+            status="error",
+            architecture_mode=stream_result.architecture_mode,
+            orchestrator_used=False,
+            worker_count=0,
+            retry_count=0,
+            latency_ms=(time.perf_counter() - _t_total) * 1000.0,
+            token_total=accumulator.grand_total,
+            request_id=request_id,
+            turn_id=stream_result.turn_id,
+            extra={"error_type": type(e).__name__},
+        )
        yield streaming_service.format_error(error_message)
        yield streaming_service.format_finish_step()
        yield streaming_service.format_finish()