feat(chat): add multi-agent mode routing scaffold and telemetry.

This commit is contained in:
CREDO23 2026-04-28 15:35:14 +02:00
parent 78f71c7e3a
commit 7b9a218d62
13 changed files with 742 additions and 58 deletions

View file

@ -0,0 +1,47 @@
"""Thin architecture dispatch seam for chat streaming entrypoints."""
from __future__ import annotations
from collections.abc import AsyncGenerator
from typing import Any
from app.agents.multi_agent_v1.entrypoint import MultiAgentEntrypoint
from app.agents.new_chat.architecture_mode import (
ArchitectureMode,
parse_architecture_mode,
)
from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
def _resolve_mode(mode_value: str) -> ArchitectureMode:
return parse_architecture_mode(mode_value) or ArchitectureMode.SINGLE_AGENT
def dispatch_new_chat_stream(
*,
architecture_mode: str,
stream_kwargs: dict[str, Any],
) -> AsyncGenerator[str, None]:
mode = _resolve_mode(architecture_mode)
if mode == ArchitectureMode.SINGLE_AGENT:
return stream_new_chat(**stream_kwargs)
entrypoint = MultiAgentEntrypoint()
return entrypoint.stream_new_chat(
fallback_streamer=stream_new_chat,
fallback_kwargs=stream_kwargs,
)
def dispatch_resume_chat_stream(
*,
architecture_mode: str,
stream_kwargs: dict[str, Any],
) -> AsyncGenerator[str, None]:
mode = _resolve_mode(architecture_mode)
if mode == ArchitectureMode.SINGLE_AGENT:
return stream_resume_chat(**stream_kwargs)
entrypoint = MultiAgentEntrypoint()
return entrypoint.stream_resume_chat(
fallback_streamer=stream_resume_chat,
fallback_kwargs=stream_kwargs,
)

View file

@ -42,6 +42,7 @@ from app.agents.new_chat.memory_extraction import (
extract_and_save_memory,
extract_and_save_team_memory,
)
from app.agents.new_chat.telemetry import log_architecture_telemetry
from app.db import (
ChatVisibility,
NewChatMessage,
@ -149,6 +150,7 @@ class StreamResult:
agent_called_update_memory: bool = False
request_id: str | None = None
turn_id: str = ""
architecture_mode: str = "single_agent"
filesystem_mode: str = "cloud"
client_platform: str = "web"
intent_detected: str = "chat_only"
@ -182,9 +184,7 @@ def _tool_output_has_error(tool_output: Any) -> bool:
if tool_output.get("error"):
return True
result = tool_output.get("result")
if isinstance(result, str) and result.strip().lower().startswith("error:"):
return True
return False
return isinstance(result, str) and result.strip().lower().startswith("error:")
if isinstance(tool_output, str):
return tool_output.strip().lower().startswith("error:")
return False
@ -231,6 +231,7 @@ def _log_file_contract(stage: str, result: StreamResult, **extra: Any) -> None:
"request_id": result.request_id or "unknown",
"turn_id": result.turn_id or "unknown",
"chat_id": result.turn_id.split(":", 1)[0] if ":" in result.turn_id else "unknown",
"architecture_mode": result.architecture_mode,
"filesystem_mode": result.filesystem_mode,
"client_platform": result.client_platform,
"intent_detected": result.intent_detected,
@ -1308,18 +1309,17 @@ async def _stream_agent_events(
result.commit_gate_passed, result.commit_gate_reason = (
_evaluate_file_contract_outcome(result)
)
if not result.commit_gate_passed:
if _contract_enforcement_active(result):
gate_notice = (
"I could not complete the requested file write because no successful "
"write_file/edit_file operation was confirmed."
)
gate_text_id = streaming_service.generate_text_id()
yield streaming_service.format_text_start(gate_text_id)
yield streaming_service.format_text_delta(gate_text_id, gate_notice)
yield streaming_service.format_text_end(gate_text_id)
yield streaming_service.format_terminal_info(gate_notice, "error")
accumulated_text = gate_notice
if not result.commit_gate_passed and _contract_enforcement_active(result):
gate_notice = (
"I could not complete the requested file write because no successful "
"write_file/edit_file operation was confirmed."
)
gate_text_id = streaming_service.generate_text_id()
yield streaming_service.format_text_start(gate_text_id)
yield streaming_service.format_text_delta(gate_text_id, gate_notice)
yield streaming_service.format_text_end(gate_text_id)
yield streaming_service.format_terminal_info(gate_notice, "error")
accumulated_text = gate_notice
else:
result.commit_gate_passed = True
result.commit_gate_reason = ""
@ -1351,6 +1351,7 @@ async def stream_new_chat(
filesystem_selection: FilesystemSelection | None = None,
request_id: str | None = None,
user_image_data_urls: list[str] | None = None,
architecture_mode: str = "single_agent",
) -> AsyncGenerator[str, None]:
"""
Stream chat responses from the new SurfSense deep agent.
@ -1384,8 +1385,22 @@ async def stream_new_chat(
)
stream_result.request_id = request_id
stream_result.turn_id = f"{chat_id}:{int(time.time() * 1000)}"
stream_result.architecture_mode = architecture_mode
stream_result.filesystem_mode = fs_mode
stream_result.client_platform = fs_platform
log_architecture_telemetry(
phase="turn_start",
source="new_chat",
status="started",
architecture_mode=architecture_mode,
orchestrator_used=False,
worker_count=0,
retry_count=0,
latency_ms=0.0,
token_total=0,
request_id=request_id,
turn_id=stream_result.turn_id,
)
_log_file_contract("turn_start", stream_result)
_perf_log.info(
"[stream_new_chat] filesystem_mode=%s client_platform=%s",
@ -1638,6 +1653,7 @@ async def stream_new_chat(
"search_space_id": search_space_id,
"request_id": request_id or "unknown",
"turn_id": stream_result.turn_id,
"architecture_mode": architecture_mode,
}
_perf_log.info(
@ -1669,6 +1685,7 @@ async def stream_new_chat(
configurable = {"thread_id": str(chat_id)}
configurable["request_id"] = request_id or "unknown"
configurable["turn_id"] = stream_result.turn_id
configurable["architecture_mode"] = architecture_mode
if checkpoint_id:
configurable["checkpoint_id"] = checkpoint_id
@ -1884,6 +1901,19 @@ async def stream_new_chat(
"call_details": accumulator.serialized_calls(),
},
)
log_architecture_telemetry(
phase="turn_end",
source="new_chat",
status="interrupted",
architecture_mode=stream_result.architecture_mode,
orchestrator_used=False,
worker_count=0,
retry_count=0,
latency_ms=(time.perf_counter() - _t_total) * 1000.0,
token_total=accumulator.grand_total,
request_id=request_id,
turn_id=stream_result.turn_id,
)
yield streaming_service.format_finish_step()
yield streaming_service.format_finish()
@ -1956,6 +1986,19 @@ async def stream_new_chat(
"call_details": accumulator.serialized_calls(),
},
)
log_architecture_telemetry(
phase="turn_end",
source="new_chat",
status="completed",
architecture_mode=stream_result.architecture_mode,
orchestrator_used=False,
worker_count=0,
retry_count=0,
latency_ms=(time.perf_counter() - _t_total) * 1000.0,
token_total=accumulator.grand_total,
request_id=request_id,
turn_id=stream_result.turn_id,
)
# Fire background memory extraction if the agent didn't handle it.
# Shared threads write to team memory; private threads write to user memory.
@ -2000,6 +2043,20 @@ async def stream_new_chat(
print(f"[stream_new_chat] {error_message}")
print(f"[stream_new_chat] Exception type: {type(e).__name__}")
print(f"[stream_new_chat] Traceback:\n{traceback.format_exc()}")
log_architecture_telemetry(
phase="turn_end",
source="new_chat",
status="error",
architecture_mode=stream_result.architecture_mode,
orchestrator_used=False,
worker_count=0,
retry_count=0,
latency_ms=(time.perf_counter() - _t_total) * 1000.0,
token_total=accumulator.grand_total,
request_id=request_id,
turn_id=stream_result.turn_id,
extra={"error_type": type(e).__name__},
)
yield streaming_service.format_error(error_message)
yield streaming_service.format_finish_step()
@ -2093,6 +2150,7 @@ async def stream_resume_chat(
thread_visibility: ChatVisibility | None = None,
filesystem_selection: FilesystemSelection | None = None,
request_id: str | None = None,
architecture_mode: str = "single_agent",
) -> AsyncGenerator[str, None]:
streaming_service = VercelStreamingService()
stream_result = StreamResult()
@ -2103,8 +2161,22 @@ async def stream_resume_chat(
)
stream_result.request_id = request_id
stream_result.turn_id = f"{chat_id}:{int(time.time() * 1000)}"
stream_result.architecture_mode = architecture_mode
stream_result.filesystem_mode = fs_mode
stream_result.client_platform = fs_platform
log_architecture_telemetry(
phase="turn_start",
source="resume_chat",
status="started",
architecture_mode=architecture_mode,
orchestrator_used=False,
worker_count=0,
retry_count=0,
latency_ms=0.0,
token_total=0,
request_id=request_id,
turn_id=stream_result.turn_id,
)
_log_file_contract("turn_start", stream_result)
_perf_log.info(
"[stream_resume] filesystem_mode=%s client_platform=%s",
@ -2250,6 +2322,7 @@ async def stream_resume_chat(
"thread_id": str(chat_id),
"request_id": request_id or "unknown",
"turn_id": stream_result.turn_id,
"architecture_mode": architecture_mode,
},
"recursion_limit": 80,
}
@ -2300,6 +2373,19 @@ async def stream_resume_chat(
"call_details": accumulator.serialized_calls(),
},
)
log_architecture_telemetry(
phase="turn_end",
source="resume_chat",
status="interrupted",
architecture_mode=stream_result.architecture_mode,
orchestrator_used=False,
worker_count=0,
retry_count=0,
latency_ms=(time.perf_counter() - _t_total) * 1000.0,
token_total=accumulator.grand_total,
request_id=request_id,
turn_id=stream_result.turn_id,
)
yield streaming_service.format_finish_step()
yield streaming_service.format_finish()
@ -2353,6 +2439,19 @@ async def stream_resume_chat(
"call_details": accumulator.serialized_calls(),
},
)
log_architecture_telemetry(
phase="turn_end",
source="resume_chat",
status="completed",
architecture_mode=stream_result.architecture_mode,
orchestrator_used=False,
worker_count=0,
retry_count=0,
latency_ms=(time.perf_counter() - _t_total) * 1000.0,
token_total=accumulator.grand_total,
request_id=request_id,
turn_id=stream_result.turn_id,
)
yield streaming_service.format_finish_step()
yield streaming_service.format_finish()
@ -2364,6 +2463,20 @@ async def stream_resume_chat(
error_message = f"Error during resume: {e!s}"
print(f"[stream_resume_chat] {error_message}")
print(f"[stream_resume_chat] Traceback:\n{traceback.format_exc()}")
log_architecture_telemetry(
phase="turn_end",
source="resume_chat",
status="error",
architecture_mode=stream_result.architecture_mode,
orchestrator_used=False,
worker_count=0,
retry_count=0,
latency_ms=(time.perf_counter() - _t_total) * 1000.0,
token_total=accumulator.grand_total,
request_id=request_id,
turn_id=stream_result.turn_id,
extra={"error_type": type(e).__name__},
)
yield streaming_service.format_error(error_message)
yield streaming_service.format_finish_step()
yield streaming_service.format_finish()