feat(chat): add multi-agent mode routing scaffold and telemetry.

This commit is contained in:
CREDO23 2026-04-28 15:35:14 +02:00
parent 78f71c7e3a
commit 7b9a218d62
13 changed files with 742 additions and 58 deletions

View file

@ -4,6 +4,7 @@ from __future__ import annotations
import logging
import secrets
import time
import uuid
from pathlib import PurePosixPath
from typing import Any
@ -12,6 +13,11 @@ from fastapi import APIRouter, HTTPException, Request, Response, UploadFile, sta
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from app.agents.new_chat.architecture_mode import (
ArchitectureMode,
resolve_architecture_mode,
)
from app.agents.new_chat.telemetry import log_architecture_telemetry
from app.config import config
from app.etl_pipeline.file_classifier import (
DIRECT_CONVERT_EXTENSIONS,
@ -84,6 +90,7 @@ class AnonChatRequest(BaseModel):
messages: list[dict[str, Any]] = Field(..., min_length=1)
disabled_tools: list[str] | None = None
turnstile_token: str | None = None
architecture_mode: ArchitectureMode | None = None
class AnonQuotaResponse(BaseModel):
@ -361,6 +368,22 @@ async def stream_anonymous_chat(
accumulator = start_turn()
streaming_service = VercelStreamingService()
architecture_mode = resolve_architecture_mode(body.architecture_mode)
started_at = time.perf_counter()
turn_id = f"anon:{session_id}:{request_id}"
log_architecture_telemetry(
phase="turn_start",
source="anon_chat",
status="started",
architecture_mode=architecture_mode.value,
orchestrator_used=False,
worker_count=0,
retry_count=0,
latency_ms=0.0,
token_total=0,
request_id=request_id,
turn_id=turn_id,
)
try:
async with shielded_async_session() as session:
@ -400,7 +423,10 @@ async def stream_anonymous_chat(
}
langgraph_config = {
"configurable": {"thread_id": anon_thread_id},
"configurable": {
"thread_id": anon_thread_id,
"architecture_mode": architecture_mode.value,
},
"recursion_limit": 40,
}
@ -468,6 +494,19 @@ async def stream_anonymous_chat(
"total_tokens": accumulator.grand_total,
},
)
log_architecture_telemetry(
phase="turn_end",
source="anon_chat",
status="completed",
architecture_mode=architecture_mode.value,
orchestrator_used=False,
worker_count=0,
retry_count=0,
latency_ms=(time.perf_counter() - started_at) * 1000.0,
token_total=accumulator.grand_total,
request_id=request_id,
turn_id=turn_id,
)
yield streaming_service.format_finish_step()
yield streaming_service.format_finish()
@ -475,6 +514,20 @@ async def stream_anonymous_chat(
except Exception as e:
logger.exception("Anonymous chat stream error")
log_architecture_telemetry(
phase="turn_end",
source="anon_chat",
status="error",
architecture_mode=architecture_mode.value,
orchestrator_used=False,
worker_count=0,
retry_count=0,
latency_ms=(time.perf_counter() - started_at) * 1000.0,
token_total=accumulator.grand_total,
request_id=request_id,
turn_id=turn_id,
extra={"error_type": type(e).__name__},
)
await TokenQuotaService.anon_release(session_key, ip_key, request_id)
yield streaming_service.format_error(f"Error during chat: {e!s}")
yield streaming_service.format_done()

View file

@ -22,6 +22,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from sqlalchemy.orm import selectinload
from app.agents.new_chat.architecture_mode import resolve_architecture_mode
from app.agents.new_chat.filesystem_selection import (
ClientPlatform,
FilesystemMode,
@ -61,7 +62,10 @@ from app.schemas.new_chat import (
TokenUsageSummary,
)
from app.services.token_tracking_service import record_token_usage
from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
from app.tasks.chat.stream_dispatch import (
dispatch_new_chat_stream,
dispatch_resume_chat_stream,
)
from app.users import current_active_user
from app.utils.rbac import check_permission
from app.utils.user_message_multimodal import (
@ -1244,23 +1248,28 @@ async def handle_new_chat(
image_urls = (
[p.as_data_url() for p in request.user_images] if request.user_images else None
)
architecture_mode = resolve_architecture_mode(request.architecture_mode)
return StreamingResponse(
stream_new_chat(
user_query=request.user_query,
search_space_id=request.search_space_id,
chat_id=request.chat_id,
user_id=str(user.id),
llm_config_id=llm_config_id,
mentioned_document_ids=request.mentioned_document_ids,
mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids,
needs_history_bootstrap=thread.needs_history_bootstrap,
thread_visibility=thread.visibility,
current_user_display_name=user.display_name or "A team member",
disabled_tools=request.disabled_tools,
filesystem_selection=filesystem_selection,
request_id=getattr(http_request.state, "request_id", "unknown"),
user_image_data_urls=image_urls,
dispatch_new_chat_stream(
architecture_mode=architecture_mode.value,
stream_kwargs={
"user_query": request.user_query,
"search_space_id": request.search_space_id,
"chat_id": request.chat_id,
"user_id": str(user.id),
"llm_config_id": llm_config_id,
"mentioned_document_ids": request.mentioned_document_ids,
"mentioned_surfsense_doc_ids": request.mentioned_surfsense_doc_ids,
"needs_history_bootstrap": thread.needs_history_bootstrap,
"thread_visibility": thread.visibility,
"current_user_display_name": user.display_name or "A team member",
"disabled_tools": request.disabled_tools,
"filesystem_selection": filesystem_selection,
"request_id": getattr(http_request.state, "request_id", "unknown"),
"user_image_data_urls": image_urls,
"architecture_mode": architecture_mode.value,
},
),
media_type="text/event-stream",
headers={
@ -1458,6 +1467,7 @@ async def regenerate_response(
if request.user_images is not None:
regenerate_image_urls = [p.as_data_url() for p in request.user_images]
architecture_mode = resolve_architecture_mode(request.architecture_mode)
if user_query_to_use is None:
raise HTTPException(
@ -1506,23 +1516,28 @@ async def regenerate_response(
async def stream_with_cleanup():
streaming_completed = False
try:
async for chunk in stream_new_chat(
user_query=str(user_query_to_use),
search_space_id=request.search_space_id,
chat_id=thread_id,
user_id=str(user.id),
llm_config_id=llm_config_id,
mentioned_document_ids=request.mentioned_document_ids,
mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids,
checkpoint_id=target_checkpoint_id,
needs_history_bootstrap=thread.needs_history_bootstrap,
thread_visibility=thread.visibility,
current_user_display_name=user.display_name or "A team member",
disabled_tools=request.disabled_tools,
filesystem_selection=filesystem_selection,
request_id=getattr(http_request.state, "request_id", "unknown"),
user_image_data_urls=regenerate_image_urls or None,
):
stream = dispatch_new_chat_stream(
architecture_mode=architecture_mode.value,
stream_kwargs={
"user_query": str(user_query_to_use),
"search_space_id": request.search_space_id,
"chat_id": thread_id,
"user_id": str(user.id),
"llm_config_id": llm_config_id,
"mentioned_document_ids": request.mentioned_document_ids,
"mentioned_surfsense_doc_ids": request.mentioned_surfsense_doc_ids,
"checkpoint_id": target_checkpoint_id,
"needs_history_bootstrap": thread.needs_history_bootstrap,
"thread_visibility": thread.visibility,
"current_user_display_name": user.display_name or "A team member",
"disabled_tools": request.disabled_tools,
"filesystem_selection": filesystem_selection,
"request_id": getattr(http_request.state, "request_id", "unknown"),
"user_image_data_urls": regenerate_image_urls or None,
"architecture_mode": architecture_mode.value,
},
)
async for chunk in stream:
yield chunk
streaming_completed = True
finally:
@ -1628,6 +1643,7 @@ async def resume_chat(
)
decisions = [d.model_dump() for d in request.decisions]
architecture_mode = resolve_architecture_mode(request.architecture_mode)
# Release the read-transaction so we don't hold ACCESS SHARE locks
# on searchspaces/documents for the entire duration of the stream.
@ -1635,15 +1651,19 @@ async def resume_chat(
await session.close()
return StreamingResponse(
stream_resume_chat(
chat_id=thread_id,
search_space_id=request.search_space_id,
decisions=decisions,
user_id=str(user.id),
llm_config_id=llm_config_id,
thread_visibility=thread.visibility,
filesystem_selection=filesystem_selection,
request_id=getattr(http_request.state, "request_id", "unknown"),
dispatch_resume_chat_stream(
architecture_mode=architecture_mode.value,
stream_kwargs={
"chat_id": thread_id,
"search_space_id": request.search_space_id,
"decisions": decisions,
"user_id": str(user.id),
"llm_config_id": llm_config_id,
"thread_visibility": thread.visibility,
"filesystem_selection": filesystem_selection,
"request_id": getattr(http_request.state, "request_id", "unknown"),
"architecture_mode": architecture_mode.value,
},
),
media_type="text/event-stream",
headers={