mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-23 19:05:16 +02:00
feat: add performance logging middleware and enhance performance tracking across services
- Introduced RequestPerfMiddleware to log request performance metrics, including slow request thresholds. - Updated various services and retrievers to utilize the new performance logging utility for better tracking of execution times. - Enhanced existing methods with detailed performance logs for operations such as embedding, searching, and indexing. - Removed deprecated logging setup in stream_new_chat and replaced it with the new performance logger.
This commit is contained in:
parent
68bb196d45
commit
664c43ca13
11 changed files with 430 additions and 36 deletions
|
|
@ -13,6 +13,7 @@ synchronous ChatLiteLLM-like interface and async methods.
|
|||
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForLLMRun
|
||||
|
|
@ -26,6 +27,8 @@ from litellm.exceptions import (
|
|||
ContextWindowExceededError,
|
||||
)
|
||||
|
||||
from app.utils.perf import get_perf_logger
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_CONTEXT_OVERFLOW_PATTERNS = re.compile(
|
||||
|
|
@ -410,6 +413,10 @@ class ChatLiteLLMRouter(BaseChatModel):
|
|||
if not self._router:
|
||||
raise ValueError("Router not initialized")
|
||||
|
||||
perf = get_perf_logger()
|
||||
t0 = time.perf_counter()
|
||||
msg_count = len(messages)
|
||||
|
||||
# Convert LangChain messages to OpenAI format
|
||||
formatted_messages = self._convert_messages(messages)
|
||||
|
||||
|
|
@ -428,12 +435,28 @@ class ChatLiteLLMRouter(BaseChatModel):
|
|||
**call_kwargs,
|
||||
)
|
||||
except ContextWindowExceededError as e:
|
||||
perf.warning(
|
||||
"[llm_router] _generate CONTEXT_OVERFLOW msgs=%d in %.3fs",
|
||||
msg_count, time.perf_counter() - t0,
|
||||
)
|
||||
raise ContextOverflowError(str(e)) from e
|
||||
except LiteLLMBadRequestError as e:
|
||||
if _is_context_overflow_error(e):
|
||||
perf.warning(
|
||||
"[llm_router] _generate CONTEXT_OVERFLOW msgs=%d in %.3fs",
|
||||
msg_count, time.perf_counter() - t0,
|
||||
)
|
||||
raise ContextOverflowError(str(e)) from e
|
||||
raise
|
||||
|
||||
elapsed = time.perf_counter() - t0
|
||||
perf.info(
|
||||
"[llm_router] _generate completed msgs=%d tools=%d in %.3fs",
|
||||
msg_count,
|
||||
len(self._bound_tools) if self._bound_tools else 0,
|
||||
elapsed,
|
||||
)
|
||||
|
||||
# Convert response to ChatResult with potential tool calls
|
||||
message = self._convert_response_to_message(response.choices[0].message)
|
||||
generation = ChatGeneration(message=message)
|
||||
|
|
@ -453,6 +476,10 @@ class ChatLiteLLMRouter(BaseChatModel):
|
|||
if not self._router:
|
||||
raise ValueError("Router not initialized")
|
||||
|
||||
perf = get_perf_logger()
|
||||
t0 = time.perf_counter()
|
||||
msg_count = len(messages)
|
||||
|
||||
# Convert LangChain messages to OpenAI format
|
||||
formatted_messages = self._convert_messages(messages)
|
||||
|
||||
|
|
@ -471,12 +498,28 @@ class ChatLiteLLMRouter(BaseChatModel):
|
|||
**call_kwargs,
|
||||
)
|
||||
except ContextWindowExceededError as e:
|
||||
perf.warning(
|
||||
"[llm_router] _agenerate CONTEXT_OVERFLOW msgs=%d in %.3fs",
|
||||
msg_count, time.perf_counter() - t0,
|
||||
)
|
||||
raise ContextOverflowError(str(e)) from e
|
||||
except LiteLLMBadRequestError as e:
|
||||
if _is_context_overflow_error(e):
|
||||
perf.warning(
|
||||
"[llm_router] _agenerate CONTEXT_OVERFLOW msgs=%d in %.3fs",
|
||||
msg_count, time.perf_counter() - t0,
|
||||
)
|
||||
raise ContextOverflowError(str(e)) from e
|
||||
raise
|
||||
|
||||
elapsed = time.perf_counter() - t0
|
||||
perf.info(
|
||||
"[llm_router] _agenerate completed msgs=%d tools=%d in %.3fs",
|
||||
msg_count,
|
||||
len(self._bound_tools) if self._bound_tools else 0,
|
||||
elapsed,
|
||||
)
|
||||
|
||||
# Convert response to ChatResult with potential tool calls
|
||||
message = self._convert_response_to_message(response.choices[0].message)
|
||||
generation = ChatGeneration(message=message)
|
||||
|
|
@ -541,6 +584,10 @@ class ChatLiteLLMRouter(BaseChatModel):
|
|||
if not self._router:
|
||||
raise ValueError("Router not initialized")
|
||||
|
||||
perf = get_perf_logger()
|
||||
t0 = time.perf_counter()
|
||||
msg_count = len(messages)
|
||||
|
||||
formatted_messages = self._convert_messages(messages)
|
||||
|
||||
# Add tools if bound
|
||||
|
|
@ -559,20 +606,48 @@ class ChatLiteLLMRouter(BaseChatModel):
|
|||
**call_kwargs,
|
||||
)
|
||||
except ContextWindowExceededError as e:
|
||||
perf.warning(
|
||||
"[llm_router] _astream CONTEXT_OVERFLOW msgs=%d in %.3fs",
|
||||
msg_count, time.perf_counter() - t0,
|
||||
)
|
||||
raise ContextOverflowError(str(e)) from e
|
||||
except LiteLLMBadRequestError as e:
|
||||
if _is_context_overflow_error(e):
|
||||
perf.warning(
|
||||
"[llm_router] _astream CONTEXT_OVERFLOW msgs=%d in %.3fs",
|
||||
msg_count, time.perf_counter() - t0,
|
||||
)
|
||||
raise ContextOverflowError(str(e)) from e
|
||||
raise
|
||||
|
||||
# Yield chunks asynchronously
|
||||
t_first_chunk = time.perf_counter()
|
||||
perf.info(
|
||||
"[llm_router] _astream connection established msgs=%d in %.3fs",
|
||||
msg_count, t_first_chunk - t0,
|
||||
)
|
||||
|
||||
chunk_count = 0
|
||||
first_chunk_logged = False
|
||||
async for chunk in response:
|
||||
if hasattr(chunk, "choices") and chunk.choices:
|
||||
delta = chunk.choices[0].delta
|
||||
chunk_msg = self._convert_delta_to_chunk(delta)
|
||||
if chunk_msg:
|
||||
chunk_count += 1
|
||||
if not first_chunk_logged:
|
||||
perf.info(
|
||||
"[llm_router] _astream first chunk in %.3fs (total %.3fs from start)",
|
||||
time.perf_counter() - t_first_chunk,
|
||||
time.perf_counter() - t0,
|
||||
)
|
||||
first_chunk_logged = True
|
||||
yield ChatGenerationChunk(message=chunk_msg)
|
||||
|
||||
perf.info(
|
||||
"[llm_router] _astream completed chunks=%d total=%.3fs",
|
||||
chunk_count, time.perf_counter() - t0,
|
||||
)
|
||||
|
||||
def _convert_messages(self, messages: list[BaseMessage]) -> list[dict]:
|
||||
"""Convert LangChain messages to OpenAI format."""
|
||||
from langchain_core.messages import (
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue