feat: add performance logging middleware and enhance performance tracking across services

- Introduced RequestPerfMiddleware to log request performance metrics, including slow request thresholds.
- Updated various services and retrievers to utilize the new performance logging utility for better tracking of execution times.
- Enhanced existing methods with detailed performance logs for operations such as embedding, searching, and indexing.
- Removed deprecated logging setup in stream_new_chat and replaced it with the new performance logger.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-02-27 16:32:30 -08:00
parent 68bb196d45
commit 664c43ca13
11 changed files with 430 additions and 36 deletions

View file

@ -10,6 +10,7 @@ This module provides:
import asyncio
import json
import time
from datetime import datetime
from typing import Any
@ -19,6 +20,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.db import async_session_maker
from app.services.connector_service import ConnectorService
from app.utils.perf import get_perf_logger
# =============================================================================
# Connector Constants and Normalization
@ -412,6 +414,9 @@ async def search_knowledge_base_async(
Returns:
Formatted string with search results
"""
perf = get_perf_logger()
t0 = time.perf_counter()
all_documents: list[dict[str, Any]] = []
# Resolve date range (default last 2 years)
@ -423,6 +428,10 @@ async def search_knowledge_base_async(
)
connectors = _normalize_connectors(connectors_to_search, available_connectors)
perf.info(
"[kb_search] searching %d connectors: %s (space=%d, top_k=%d)",
len(connectors), connectors[:5], search_space_id, top_k,
)
connector_specs: dict[str, tuple[str, bool, bool, dict[str, Any]]] = {
"YOUTUBE_VIDEO": ("search_youtube", True, True, {}),
@ -492,20 +501,32 @@ async def search_knowledge_base_async(
try:
# Use isolated session per connector. Shared AsyncSession cannot safely
# run concurrent DB operations.
t_conn = time.perf_counter()
async with semaphore, async_session_maker() as isolated_session:
isolated_connector_service = ConnectorService(
isolated_session, search_space_id
)
connector_method = getattr(isolated_connector_service, method_name)
_, chunks = await connector_method(**kwargs)
perf.info(
"[kb_search] connector=%s results=%d in %.3fs",
connector, len(chunks), time.perf_counter() - t_conn,
)
return chunks
except Exception as e:
print(f"Error searching connector {connector}: {e}")
perf.warning(
"[kb_search] connector=%s FAILED in %.3fs: %s",
connector, time.perf_counter() - t_conn, e,
)
return []
t_gather = time.perf_counter()
connector_results = await asyncio.gather(
*[_search_one_connector(connector) for connector in connectors]
)
perf.info(
"[kb_search] all connectors gathered in %.3fs", time.perf_counter() - t_gather,
)
for chunks in connector_results:
all_documents.extend(chunks)
@ -552,7 +573,12 @@ async def search_knowledge_base_async(
deduplicated.append(doc)
output_budget = _compute_tool_output_budget(max_input_tokens)
return format_documents_for_context(deduplicated, max_chars=output_budget)
result = format_documents_for_context(deduplicated, max_chars=output_budget)
perf.info(
"[kb_search] TOTAL in %.3fs total_docs=%d deduped=%d output_chars=%d space=%d",
time.perf_counter() - t0, len(all_documents), len(deduplicated), len(result), search_space_id,
)
return result
def _build_connector_docstring(available_connectors: list[str] | None) -> str: