feat(chat): add multi-agent mode routing scaffold and telemetry.

This commit is contained in:
CREDO23 2026-04-28 15:35:14 +02:00
parent 78f71c7e3a
commit 7b9a218d62
13 changed files with 742 additions and 58 deletions

View file

@ -4,6 +4,7 @@ from __future__ import annotations
import logging
import secrets
import time
import uuid
from pathlib import PurePosixPath
from typing import Any
@ -12,6 +13,11 @@ from fastapi import APIRouter, HTTPException, Request, Response, UploadFile, sta
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from app.agents.new_chat.architecture_mode import (
ArchitectureMode,
resolve_architecture_mode,
)
from app.agents.new_chat.telemetry import log_architecture_telemetry
from app.config import config
from app.etl_pipeline.file_classifier import (
DIRECT_CONVERT_EXTENSIONS,
@ -84,6 +90,7 @@ class AnonChatRequest(BaseModel):
messages: list[dict[str, Any]] = Field(..., min_length=1)
disabled_tools: list[str] | None = None
turnstile_token: str | None = None
architecture_mode: ArchitectureMode | None = None
class AnonQuotaResponse(BaseModel):
@ -361,6 +368,22 @@ async def stream_anonymous_chat(
accumulator = start_turn()
streaming_service = VercelStreamingService()
architecture_mode = resolve_architecture_mode(body.architecture_mode)
started_at = time.perf_counter()
turn_id = f"anon:{session_id}:{request_id}"
log_architecture_telemetry(
phase="turn_start",
source="anon_chat",
status="started",
architecture_mode=architecture_mode.value,
orchestrator_used=False,
worker_count=0,
retry_count=0,
latency_ms=0.0,
token_total=0,
request_id=request_id,
turn_id=turn_id,
)
try:
async with shielded_async_session() as session:
@ -400,7 +423,10 @@ async def stream_anonymous_chat(
}
langgraph_config = {
"configurable": {"thread_id": anon_thread_id},
"configurable": {
"thread_id": anon_thread_id,
"architecture_mode": architecture_mode.value,
},
"recursion_limit": 40,
}
@ -468,6 +494,19 @@ async def stream_anonymous_chat(
"total_tokens": accumulator.grand_total,
},
)
log_architecture_telemetry(
phase="turn_end",
source="anon_chat",
status="completed",
architecture_mode=architecture_mode.value,
orchestrator_used=False,
worker_count=0,
retry_count=0,
latency_ms=(time.perf_counter() - started_at) * 1000.0,
token_total=accumulator.grand_total,
request_id=request_id,
turn_id=turn_id,
)
yield streaming_service.format_finish_step()
yield streaming_service.format_finish()
@ -475,6 +514,20 @@ async def stream_anonymous_chat(
except Exception as e:
logger.exception("Anonymous chat stream error")
log_architecture_telemetry(
phase="turn_end",
source="anon_chat",
status="error",
architecture_mode=architecture_mode.value,
orchestrator_used=False,
worker_count=0,
retry_count=0,
latency_ms=(time.perf_counter() - started_at) * 1000.0,
token_total=accumulator.grand_total,
request_id=request_id,
turn_id=turn_id,
extra={"error_type": type(e).__name__},
)
await TokenQuotaService.anon_release(session_key, ip_key, request_id)
yield streaming_service.format_error(f"Error during chat: {e!s}")
yield streaming_service.format_done()