mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-24 21:38:09 +02:00
feat(error-handling): implement LLM error adaptation and classification for chat streaming
- Introduced LLMErrorCategory and adapt_llm_exception to normalize LLM exceptions. - Updated llm_retryable_message and llm_permanent_message to utilize the new adaptation logic. - Enhanced classify_stream_exception to classify provider errors and return user-friendly messages. - Added tests for error classification and adaptation to ensure robustness. - Updated frontend error handling to display appropriate messages based on new classifications.
This commit is contained in:
parent
203ef78346
commit
8e8cf96faa
9 changed files with 533 additions and 38 deletions
|
|
@ -14,6 +14,8 @@ from litellm.exceptions import (
|
||||||
)
|
)
|
||||||
from sqlalchemy.exc import IntegrityError as IntegrityError
|
from sqlalchemy.exc import IntegrityError as IntegrityError
|
||||||
|
|
||||||
|
from app.services.llm_error_adapter import LLMErrorCategory, adapt_llm_exception
|
||||||
|
|
||||||
# Tuples for use directly in except clauses.
|
# Tuples for use directly in except clauses.
|
||||||
RETRYABLE_LLM_ERRORS = (
|
RETRYABLE_LLM_ERRORS = (
|
||||||
RateLimitError,
|
RateLimitError,
|
||||||
|
|
@ -97,38 +99,20 @@ def safe_exception_message(exc: Exception) -> str:
|
||||||
|
|
||||||
def llm_retryable_message(exc: Exception) -> str:
|
def llm_retryable_message(exc: Exception) -> str:
|
||||||
try:
|
try:
|
||||||
if isinstance(exc, RateLimitError):
|
adapted = adapt_llm_exception(exc)
|
||||||
return PipelineMessages.RATE_LIMIT
|
if adapted.category is LLMErrorCategory.UNKNOWN:
|
||||||
if isinstance(exc, Timeout):
|
return safe_exception_message(exc)
|
||||||
return PipelineMessages.LLM_TIMEOUT
|
return adapted.user_message
|
||||||
if isinstance(exc, ServiceUnavailableError):
|
|
||||||
return PipelineMessages.LLM_UNAVAILABLE
|
|
||||||
if isinstance(exc, BadGatewayError):
|
|
||||||
return PipelineMessages.LLM_BAD_GATEWAY
|
|
||||||
if isinstance(exc, InternalServerError):
|
|
||||||
return PipelineMessages.LLM_SERVER_ERROR
|
|
||||||
if isinstance(exc, APIConnectionError):
|
|
||||||
return PipelineMessages.LLM_CONNECTION
|
|
||||||
return safe_exception_message(exc)
|
|
||||||
except Exception:
|
except Exception:
|
||||||
return "Something went wrong when calling the LLM."
|
return "Something went wrong when calling the LLM."
|
||||||
|
|
||||||
|
|
||||||
def llm_permanent_message(exc: Exception) -> str:
|
def llm_permanent_message(exc: Exception) -> str:
|
||||||
try:
|
try:
|
||||||
if isinstance(exc, AuthenticationError):
|
adapted = adapt_llm_exception(exc)
|
||||||
return PipelineMessages.LLM_AUTH
|
if adapted.category is LLMErrorCategory.UNKNOWN:
|
||||||
if isinstance(exc, PermissionDeniedError):
|
return safe_exception_message(exc)
|
||||||
return PipelineMessages.LLM_PERMISSION
|
return adapted.user_message
|
||||||
if isinstance(exc, NotFoundError):
|
|
||||||
return PipelineMessages.LLM_NOT_FOUND
|
|
||||||
if isinstance(exc, BadRequestError):
|
|
||||||
return PipelineMessages.LLM_BAD_REQUEST
|
|
||||||
if isinstance(exc, UnprocessableEntityError):
|
|
||||||
return PipelineMessages.LLM_UNPROCESSABLE
|
|
||||||
if isinstance(exc, APIResponseValidationError):
|
|
||||||
return PipelineMessages.LLM_RESPONSE
|
|
||||||
return safe_exception_message(exc)
|
|
||||||
except Exception:
|
except Exception:
|
||||||
return "Something went wrong when calling the LLM."
|
return "Something went wrong when calling the LLM."
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ from app.etl_pipeline.file_classifier import (
|
||||||
PLAINTEXT_EXTENSIONS,
|
PLAINTEXT_EXTENSIONS,
|
||||||
)
|
)
|
||||||
from app.rate_limiter import limiter
|
from app.rate_limiter import limiter
|
||||||
|
from app.tasks.chat.streaming.errors.classifier import classify_stream_exception
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -474,7 +475,15 @@ async def stream_anonymous_chat(
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("Anonymous chat stream error")
|
logger.exception("Anonymous chat stream error")
|
||||||
await TokenQuotaService.anon_release(session_key, ip_key, request_id)
|
await TokenQuotaService.anon_release(session_key, ip_key, request_id)
|
||||||
yield streaming_service.format_error(f"Error during chat: {e!s}")
|
_, error_code, _, _, user_message, extra = classify_stream_exception(
|
||||||
|
e,
|
||||||
|
flow_label="chat",
|
||||||
|
)
|
||||||
|
yield streaming_service.format_error(
|
||||||
|
user_message,
|
||||||
|
error_code=error_code,
|
||||||
|
extra=extra,
|
||||||
|
)
|
||||||
yield streaming_service.format_done()
|
yield streaming_service.format_done()
|
||||||
finally:
|
finally:
|
||||||
await TokenQuotaService.anon_release_stream_slot(client_ip)
|
await TokenQuotaService.anon_release_stream_slot(client_ip)
|
||||||
|
|
|
||||||
251
surfsense_backend/app/services/llm_error_adapter.py
Normal file
251
surfsense_backend/app/services/llm_error_adapter.py
Normal file
|
|
@ -0,0 +1,251 @@
|
||||||
|
"""Normalize provider/LLM exceptions into low-cardinality product categories."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from enum import StrEnum
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
class LLMErrorCategory(StrEnum):
|
||||||
|
RATE_LIMITED = "rate_limited"
|
||||||
|
TIMEOUT = "timeout"
|
||||||
|
PROVIDER_UNAVAILABLE = "provider_unavailable"
|
||||||
|
BAD_GATEWAY = "bad_gateway"
|
||||||
|
CONNECTION_FAILED = "connection_failed"
|
||||||
|
AUTH_FAILED = "auth_failed"
|
||||||
|
PERMISSION_DENIED = "permission_denied"
|
||||||
|
MODEL_NOT_FOUND = "model_not_found"
|
||||||
|
BAD_REQUEST = "bad_request"
|
||||||
|
CONTEXT_LIMIT = "context_limit"
|
||||||
|
RESPONSE_INVALID = "response_invalid"
|
||||||
|
SERVER_ERROR = "server_error"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class LLMErrorAdaptation:
|
||||||
|
category: LLMErrorCategory
|
||||||
|
retryable: bool
|
||||||
|
user_message: str
|
||||||
|
provider_status_code: int | None = None
|
||||||
|
provider_error_type: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
_CATEGORY_MESSAGES: dict[LLMErrorCategory, str] = {
|
||||||
|
LLMErrorCategory.RATE_LIMITED: "LLM rate limit exceeded. Will retry on next sync.",
|
||||||
|
LLMErrorCategory.TIMEOUT: "LLM request timed out. Will retry on next sync.",
|
||||||
|
LLMErrorCategory.PROVIDER_UNAVAILABLE: "LLM service temporarily unavailable. Will retry on next sync.",
|
||||||
|
LLMErrorCategory.BAD_GATEWAY: "LLM gateway error. Will retry on next sync.",
|
||||||
|
LLMErrorCategory.CONNECTION_FAILED: "Could not reach the LLM service. Check network connectivity.",
|
||||||
|
LLMErrorCategory.AUTH_FAILED: "LLM authentication failed. Check your API key.",
|
||||||
|
LLMErrorCategory.PERMISSION_DENIED: "LLM request denied. Check your account permissions.",
|
||||||
|
LLMErrorCategory.MODEL_NOT_FOUND: "Model not found. Check your model configuration.",
|
||||||
|
LLMErrorCategory.BAD_REQUEST: "LLM rejected the request. Document content may be invalid.",
|
||||||
|
LLMErrorCategory.CONTEXT_LIMIT: "Document exceeds the LLM context window even after optimization.",
|
||||||
|
LLMErrorCategory.RESPONSE_INVALID: "LLM returned an invalid response.",
|
||||||
|
LLMErrorCategory.SERVER_ERROR: "LLM internal server error. Will retry on next sync.",
|
||||||
|
LLMErrorCategory.UNKNOWN: "Something went wrong when calling the LLM.",
|
||||||
|
}
|
||||||
|
|
||||||
|
_RETRYABLE_CATEGORIES = {
|
||||||
|
LLMErrorCategory.RATE_LIMITED,
|
||||||
|
LLMErrorCategory.TIMEOUT,
|
||||||
|
LLMErrorCategory.PROVIDER_UNAVAILABLE,
|
||||||
|
LLMErrorCategory.BAD_GATEWAY,
|
||||||
|
LLMErrorCategory.CONNECTION_FAILED,
|
||||||
|
LLMErrorCategory.SERVER_ERROR,
|
||||||
|
}
|
||||||
|
|
||||||
|
_CLASS_NAME_MAP: tuple[tuple[LLMErrorCategory, tuple[str, ...]], ...] = (
|
||||||
|
(
|
||||||
|
LLMErrorCategory.RATE_LIMITED,
|
||||||
|
("RateLimitError", "TooManyRequests", "TooManyRequestsError"),
|
||||||
|
),
|
||||||
|
(LLMErrorCategory.TIMEOUT, ("Timeout", "APITimeoutError", "TimeoutException")),
|
||||||
|
(
|
||||||
|
LLMErrorCategory.PROVIDER_UNAVAILABLE,
|
||||||
|
("ServiceUnavailableError", "ServiceUnavailable"),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
LLMErrorCategory.BAD_GATEWAY,
|
||||||
|
("BadGatewayError", "GatewayTimeoutError"),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
LLMErrorCategory.CONNECTION_FAILED,
|
||||||
|
("APIConnectionError", "ConnectError", "ConnectTimeout", "ReadTimeout"),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
LLMErrorCategory.AUTH_FAILED,
|
||||||
|
("AuthenticationError", "InvalidApiKey", "InvalidAPIKey", "InvalidApiKeyError"),
|
||||||
|
),
|
||||||
|
(LLMErrorCategory.PERMISSION_DENIED, ("PermissionDeniedError", "ForbiddenError")),
|
||||||
|
(LLMErrorCategory.MODEL_NOT_FOUND, ("NotFoundError", "ModelNotFoundError")),
|
||||||
|
(
|
||||||
|
LLMErrorCategory.CONTEXT_LIMIT,
|
||||||
|
("ContextWindowExceeded", "ContextOverflow", "ContextLimit"),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
LLMErrorCategory.RESPONSE_INVALID,
|
||||||
|
("APIResponseValidationError", "ResponseValidationError"),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
LLMErrorCategory.BAD_REQUEST,
|
||||||
|
("BadRequestError", "InvalidRequestError", "UnprocessableEntityError"),
|
||||||
|
),
|
||||||
|
(LLMErrorCategory.SERVER_ERROR, ("InternalServerError",)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_error_payload(message: str) -> dict[str, Any] | None:
|
||||||
|
candidates = [message]
|
||||||
|
first_brace_idx = message.find("{")
|
||||||
|
if first_brace_idx >= 0:
|
||||||
|
candidates.append(message[first_brace_idx:])
|
||||||
|
|
||||||
|
for candidate in candidates:
|
||||||
|
try:
|
||||||
|
parsed = json.loads(candidate)
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
return parsed
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _class_names(exc: BaseException) -> tuple[str, ...]:
|
||||||
|
return tuple(cls.__name__ for cls in type(exc).__mro__)
|
||||||
|
|
||||||
|
|
||||||
|
def _category_from_class_name(exc: BaseException) -> LLMErrorCategory | None:
|
||||||
|
names = _class_names(exc)
|
||||||
|
for category, hints in _CLASS_NAME_MAP:
|
||||||
|
if any(any(hint in name for hint in hints) for name in names):
|
||||||
|
return category
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_provider_status_code(parsed: dict[str, Any] | None) -> int | None:
|
||||||
|
if not isinstance(parsed, dict):
|
||||||
|
return None
|
||||||
|
candidates: list[Any] = [parsed.get("code"), parsed.get("status")]
|
||||||
|
nested = parsed.get("error")
|
||||||
|
if isinstance(nested, dict):
|
||||||
|
candidates.extend([nested.get("code"), nested.get("status")])
|
||||||
|
for value in candidates:
|
||||||
|
try:
|
||||||
|
if value is None:
|
||||||
|
continue
|
||||||
|
return int(value)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_provider_error_type(parsed: dict[str, Any] | None) -> str | None:
|
||||||
|
if not isinstance(parsed, dict):
|
||||||
|
return None
|
||||||
|
candidates: list[Any] = [parsed.get("type")]
|
||||||
|
nested = parsed.get("error")
|
||||||
|
if isinstance(nested, dict):
|
||||||
|
candidates.append(nested.get("type"))
|
||||||
|
for value in candidates:
|
||||||
|
if isinstance(value, str) and value:
|
||||||
|
return value
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _category_from_provider_payload(
|
||||||
|
status_code: int | None,
|
||||||
|
provider_error_type: str | None,
|
||||||
|
) -> LLMErrorCategory | None:
|
||||||
|
if status_code == 429:
|
||||||
|
return LLMErrorCategory.RATE_LIMITED
|
||||||
|
if status_code == 401:
|
||||||
|
return LLMErrorCategory.AUTH_FAILED
|
||||||
|
if status_code == 403:
|
||||||
|
return LLMErrorCategory.PERMISSION_DENIED
|
||||||
|
if status_code == 404:
|
||||||
|
return LLMErrorCategory.MODEL_NOT_FOUND
|
||||||
|
if status_code in (400, 422):
|
||||||
|
return LLMErrorCategory.BAD_REQUEST
|
||||||
|
if status_code in (502, 504):
|
||||||
|
return LLMErrorCategory.BAD_GATEWAY
|
||||||
|
if status_code == 503:
|
||||||
|
return LLMErrorCategory.PROVIDER_UNAVAILABLE
|
||||||
|
if status_code is not None and status_code >= 500:
|
||||||
|
return LLMErrorCategory.SERVER_ERROR
|
||||||
|
|
||||||
|
normalized_type = (provider_error_type or "").lower()
|
||||||
|
if normalized_type == "rate_limit_error":
|
||||||
|
return LLMErrorCategory.RATE_LIMITED
|
||||||
|
if normalized_type in {"authentication_error", "invalid_api_key", "invalid_api_key_error"}:
|
||||||
|
return LLMErrorCategory.AUTH_FAILED
|
||||||
|
if normalized_type in {"permission_denied", "forbidden"}:
|
||||||
|
return LLMErrorCategory.PERMISSION_DENIED
|
||||||
|
if normalized_type in {"not_found_error", "model_not_found"}:
|
||||||
|
return LLMErrorCategory.MODEL_NOT_FOUND
|
||||||
|
if normalized_type in {"context_length_exceeded", "context_window_exceeded"}:
|
||||||
|
return LLMErrorCategory.CONTEXT_LIMIT
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _category_from_message(raw: str) -> LLMErrorCategory | None:
|
||||||
|
lowered = raw.lower()
|
||||||
|
if any(hint in lowered for hint in ("rate limit", "rate-limited", "temporarily rate-limited")):
|
||||||
|
return LLMErrorCategory.RATE_LIMITED
|
||||||
|
if any(
|
||||||
|
hint in lowered
|
||||||
|
for hint in (
|
||||||
|
"invalid api key",
|
||||||
|
"invalid_api_key",
|
||||||
|
"authentication",
|
||||||
|
"unauthorized",
|
||||||
|
"user not found",
|
||||||
|
"api key is expired",
|
||||||
|
"expired api key",
|
||||||
|
)
|
||||||
|
):
|
||||||
|
return LLMErrorCategory.AUTH_FAILED
|
||||||
|
if "forbidden" in lowered or "permission denied" in lowered:
|
||||||
|
return LLMErrorCategory.PERMISSION_DENIED
|
||||||
|
if "model not found" in lowered:
|
||||||
|
return LLMErrorCategory.MODEL_NOT_FOUND
|
||||||
|
if any(
|
||||||
|
hint in lowered
|
||||||
|
for hint in (
|
||||||
|
"context length",
|
||||||
|
"context window",
|
||||||
|
"maximum context",
|
||||||
|
"too many tokens",
|
||||||
|
)
|
||||||
|
):
|
||||||
|
return LLMErrorCategory.CONTEXT_LIMIT
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def adapt_llm_exception(exc: BaseException) -> LLMErrorAdaptation:
|
||||||
|
raw = str(exc)
|
||||||
|
parsed = _parse_error_payload(raw)
|
||||||
|
status_code = _extract_provider_status_code(parsed)
|
||||||
|
provider_error_type = _extract_provider_error_type(parsed)
|
||||||
|
|
||||||
|
category = (
|
||||||
|
_category_from_provider_payload(status_code, provider_error_type)
|
||||||
|
or _category_from_message(raw)
|
||||||
|
or _category_from_class_name(exc)
|
||||||
|
or LLMErrorCategory.UNKNOWN
|
||||||
|
)
|
||||||
|
return LLMErrorAdaptation(
|
||||||
|
category=category,
|
||||||
|
retryable=category in _RETRYABLE_CATEGORIES,
|
||||||
|
user_message=_CATEGORY_MESSAGES[category],
|
||||||
|
provider_status_code=status_code,
|
||||||
|
provider_error_type=provider_error_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def llm_error_message(exc: BaseException) -> str:
|
||||||
|
return adapt_llm_exception(exc).user_message
|
||||||
|
|
||||||
|
|
@ -12,6 +12,7 @@ from app.agents.chat.multi_agent_chat.main_agent.middleware.busy_mutex import (
|
||||||
is_cancel_requested,
|
is_cancel_requested,
|
||||||
)
|
)
|
||||||
from app.agents.chat.runtime.errors import BusyError
|
from app.agents.chat.runtime.errors import BusyError
|
||||||
|
from app.services.llm_error_adapter import LLMErrorCategory, adapt_llm_exception
|
||||||
|
|
||||||
TURN_CANCELLING_INITIAL_DELAY_MS = 200
|
TURN_CANCELLING_INITIAL_DELAY_MS = 200
|
||||||
TURN_CANCELLING_BACKOFF_FACTOR = 2
|
TURN_CANCELLING_BACKOFF_FACTOR = 2
|
||||||
|
|
@ -102,6 +103,9 @@ def _extract_provider_error_code(parsed: dict[str, Any] | None) -> int | None:
|
||||||
|
|
||||||
def is_provider_rate_limited(exc: BaseException) -> bool:
|
def is_provider_rate_limited(exc: BaseException) -> bool:
|
||||||
"""Return True if the exception looks like an upstream HTTP 429 / rate limit."""
|
"""Return True if the exception looks like an upstream HTTP 429 / rate limit."""
|
||||||
|
if adapt_llm_exception(exc).category is LLMErrorCategory.RATE_LIMITED:
|
||||||
|
return True
|
||||||
|
|
||||||
raw = str(exc)
|
raw = str(exc)
|
||||||
lowered = raw.lower()
|
lowered = raw.lower()
|
||||||
if "ratelimit" in type(exc).__name__.lower():
|
if "ratelimit" in type(exc).__name__.lower():
|
||||||
|
|
@ -131,6 +135,84 @@ def is_provider_rate_limited(exc: BaseException) -> bool:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _provider_error_extra(adapted: Any) -> dict[str, Any] | None:
|
||||||
|
extra: dict[str, Any] = {"provider_error_category": adapted.category.value}
|
||||||
|
if adapted.provider_status_code is not None:
|
||||||
|
extra["provider_status_code"] = adapted.provider_status_code
|
||||||
|
if adapted.provider_error_type:
|
||||||
|
extra["provider_error_type"] = adapted.provider_error_type
|
||||||
|
return extra
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_provider_exception(
|
||||||
|
exc: Exception,
|
||||||
|
) -> tuple[
|
||||||
|
str, str, Literal["info", "warn", "error"], bool, str, dict[str, Any] | None
|
||||||
|
] | None:
|
||||||
|
adapted = adapt_llm_exception(exc)
|
||||||
|
|
||||||
|
if adapted.category is LLMErrorCategory.RATE_LIMITED:
|
||||||
|
return (
|
||||||
|
"rate_limited",
|
||||||
|
"RATE_LIMITED",
|
||||||
|
"warn",
|
||||||
|
True,
|
||||||
|
"This model is temporarily rate-limited. Please try again in a few seconds or switch models.",
|
||||||
|
_provider_error_extra(adapted),
|
||||||
|
)
|
||||||
|
|
||||||
|
if adapted.category in {
|
||||||
|
LLMErrorCategory.AUTH_FAILED,
|
||||||
|
LLMErrorCategory.PERMISSION_DENIED,
|
||||||
|
}:
|
||||||
|
return (
|
||||||
|
"model_auth_failed",
|
||||||
|
"MODEL_AUTH_FAILED",
|
||||||
|
"warn",
|
||||||
|
True,
|
||||||
|
"This model's API key is invalid or expired. Switch models, or update the API key.",
|
||||||
|
_provider_error_extra(adapted),
|
||||||
|
)
|
||||||
|
|
||||||
|
if adapted.category is LLMErrorCategory.MODEL_NOT_FOUND:
|
||||||
|
return (
|
||||||
|
"model_not_found",
|
||||||
|
"MODEL_NOT_FOUND",
|
||||||
|
"warn",
|
||||||
|
True,
|
||||||
|
"The selected model is unavailable or no longer exists. Switch to another model and try again.",
|
||||||
|
_provider_error_extra(adapted),
|
||||||
|
)
|
||||||
|
|
||||||
|
if adapted.category is LLMErrorCategory.CONTEXT_LIMIT:
|
||||||
|
return (
|
||||||
|
"model_context_limit",
|
||||||
|
"MODEL_CONTEXT_LIMIT",
|
||||||
|
"warn",
|
||||||
|
True,
|
||||||
|
"This request is too large for the selected model. Try a model with a larger context window or reduce the input.",
|
||||||
|
_provider_error_extra(adapted),
|
||||||
|
)
|
||||||
|
|
||||||
|
if adapted.category in {
|
||||||
|
LLMErrorCategory.TIMEOUT,
|
||||||
|
LLMErrorCategory.PROVIDER_UNAVAILABLE,
|
||||||
|
LLMErrorCategory.BAD_GATEWAY,
|
||||||
|
LLMErrorCategory.CONNECTION_FAILED,
|
||||||
|
LLMErrorCategory.SERVER_ERROR,
|
||||||
|
}:
|
||||||
|
return (
|
||||||
|
"model_provider_unavailable",
|
||||||
|
"MODEL_PROVIDER_UNAVAILABLE",
|
||||||
|
"warn",
|
||||||
|
True,
|
||||||
|
"The selected model provider is temporarily unavailable. Please try again or switch models.",
|
||||||
|
_provider_error_extra(adapted),
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def classify_stream_exception(
|
def classify_stream_exception(
|
||||||
exc: Exception,
|
exc: Exception,
|
||||||
*,
|
*,
|
||||||
|
|
@ -167,15 +249,9 @@ def classify_stream_exception(
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
|
||||||
if is_provider_rate_limited(exc):
|
provider_classification = _classify_provider_exception(exc)
|
||||||
return (
|
if provider_classification is not None:
|
||||||
"rate_limited",
|
return provider_classification
|
||||||
"RATE_LIMITED",
|
|
||||||
"warn",
|
|
||||||
True,
|
|
||||||
"This model is temporarily rate-limited. Please try again in a few seconds or switch models.",
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
"server_error",
|
"server_error",
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,80 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from app.services.llm_error_adapter import LLMErrorCategory, adapt_llm_exception
|
||||||
|
from app.tasks.chat.streaming.errors.classifier import classify_stream_exception
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.unit
|
||||||
|
|
||||||
|
|
||||||
|
def _exception_named(name: str, message: str) -> Exception:
|
||||||
|
return type(name, (Exception,), {})(message)
|
||||||
|
|
||||||
|
|
||||||
|
def test_adapter_classifies_authentication_error_by_class_name() -> None:
|
||||||
|
exc = _exception_named("AuthenticationError", "provider rejected credentials")
|
||||||
|
|
||||||
|
adapted = adapt_llm_exception(exc)
|
||||||
|
|
||||||
|
assert adapted.category is LLMErrorCategory.AUTH_FAILED
|
||||||
|
assert adapted.retryable is False
|
||||||
|
assert adapted.user_message == "LLM authentication failed. Check your API key."
|
||||||
|
|
||||||
|
|
||||||
|
def test_adapter_classifies_embedded_provider_401_payload() -> None:
|
||||||
|
exc = RuntimeError(
|
||||||
|
'litellm.AuthenticationError: OpenrouterException - {"error":{"message":"User not found.","code":401}}'
|
||||||
|
)
|
||||||
|
|
||||||
|
adapted = adapt_llm_exception(exc)
|
||||||
|
|
||||||
|
assert adapted.category is LLMErrorCategory.AUTH_FAILED
|
||||||
|
assert adapted.provider_status_code == 401
|
||||||
|
|
||||||
|
|
||||||
|
def test_adapter_preserves_rate_limit_classification() -> None:
|
||||||
|
exc = RuntimeError('{"error":{"message":"Slow down","code":429}}')
|
||||||
|
|
||||||
|
adapted = adapt_llm_exception(exc)
|
||||||
|
|
||||||
|
assert adapted.category is LLMErrorCategory.RATE_LIMITED
|
||||||
|
assert adapted.retryable is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_stream_classifier_maps_model_auth_to_stable_code() -> None:
|
||||||
|
exc = RuntimeError(
|
||||||
|
'litellm.AuthenticationError: OpenrouterException - {"error":{"message":"User not found.","code":401}}'
|
||||||
|
)
|
||||||
|
|
||||||
|
kind, code, severity, expected, message, extra = classify_stream_exception(
|
||||||
|
exc,
|
||||||
|
flow_label="chat",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert kind == "model_auth_failed"
|
||||||
|
assert code == "MODEL_AUTH_FAILED"
|
||||||
|
assert severity == "warn"
|
||||||
|
assert expected is True
|
||||||
|
assert "API key" in message
|
||||||
|
assert extra == {
|
||||||
|
"provider_error_category": "auth_failed",
|
||||||
|
"provider_status_code": 401,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_stream_classifier_keeps_unknown_errors_generic() -> None:
|
||||||
|
exc = RuntimeError("database exploded")
|
||||||
|
|
||||||
|
kind, code, severity, expected, message, extra = classify_stream_exception(
|
||||||
|
exc,
|
||||||
|
flow_label="chat",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert kind == "server_error"
|
||||||
|
assert code == "SERVER_ERROR"
|
||||||
|
assert severity == "error"
|
||||||
|
assert expected is False
|
||||||
|
assert message == "Error during chat: database exploded"
|
||||||
|
assert extra is None
|
||||||
|
|
||||||
|
|
@ -613,6 +613,18 @@ export default function NewChatPage() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (normalized.channel === "inline") {
|
||||||
|
if (normalized.assistantMessage) {
|
||||||
|
await persistAssistantErrorMessage({
|
||||||
|
threadId,
|
||||||
|
assistantMsgId,
|
||||||
|
text: normalized.assistantMessage,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
toast.error(normalized.userMessage);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
toast.error(normalized.userMessage);
|
toast.error(normalized.userMessage);
|
||||||
},
|
},
|
||||||
[currentUser?.id, persistAssistantErrorMessage, searchSpaceId, setPremiumAlertForThread]
|
[currentUser?.id, persistAssistantErrorMessage, searchSpaceId, setPremiumAlertForThread]
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,21 @@ function normalizeFreeChatErrorMessage(error: unknown): string {
|
||||||
if (code === "THREAD_BUSY") {
|
if (code === "THREAD_BUSY") {
|
||||||
return "A previous response is still stopping. Please try again in a moment.";
|
return "A previous response is still stopping. Please try again in a moment.";
|
||||||
}
|
}
|
||||||
|
if (code === "MODEL_AUTH_FAILED") {
|
||||||
|
return "This model’s API key is invalid or expired. Switch models, or update the API key.";
|
||||||
|
}
|
||||||
|
if (code === "MODEL_NOT_FOUND") {
|
||||||
|
return "This model is unavailable or no longer exists. Please switch models.";
|
||||||
|
}
|
||||||
|
if (code === "MODEL_CONTEXT_LIMIT") {
|
||||||
|
return "This request is too large for the selected model. Reduce the input or switch models.";
|
||||||
|
}
|
||||||
|
if (code === "MODEL_PROVIDER_UNAVAILABLE") {
|
||||||
|
return "The selected model provider is temporarily unavailable. Please try again or switch models.";
|
||||||
|
}
|
||||||
|
if (code === "RATE_LIMITED") {
|
||||||
|
return "This model is temporarily rate-limited. Please try again in a few seconds or switch models.";
|
||||||
|
}
|
||||||
return error.message || "An unexpected error occurred";
|
return error.message || "An unexpected error occurred";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -154,7 +169,7 @@ export function FreeChatPage() {
|
||||||
assistantMsgId: string,
|
assistantMsgId: string,
|
||||||
signal: AbortSignal,
|
signal: AbortSignal,
|
||||||
turnstileToken: string | null
|
turnstileToken: string | null
|
||||||
): Promise<"captcha" | void> => {
|
): Promise<"captcha" | undefined> => {
|
||||||
const reqBody: Record<string, unknown> = {
|
const reqBody: Record<string, unknown> = {
|
||||||
model_slug: modelSlug,
|
model_slug: modelSlug,
|
||||||
messages: messageHistory,
|
messages: messageHistory,
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,10 @@ export type ChatErrorKind =
|
||||||
| "thread_busy"
|
| "thread_busy"
|
||||||
| "send_failed_pre_accept"
|
| "send_failed_pre_accept"
|
||||||
| "auth_expired"
|
| "auth_expired"
|
||||||
|
| "model_auth_failed"
|
||||||
|
| "model_not_found"
|
||||||
|
| "model_context_limit"
|
||||||
|
| "model_provider_unavailable"
|
||||||
| "rate_limited"
|
| "rate_limited"
|
||||||
| "network_offline"
|
| "network_offline"
|
||||||
| "stream_interrupted"
|
| "stream_interrupted"
|
||||||
|
|
@ -14,7 +18,7 @@ export type ChatErrorKind =
|
||||||
| "server_error"
|
| "server_error"
|
||||||
| "unknown";
|
| "unknown";
|
||||||
|
|
||||||
export type ChatErrorChannel = "pinned_inline" | "toast" | "silent";
|
export type ChatErrorChannel = "pinned_inline" | "inline" | "toast" | "silent";
|
||||||
export type ChatTelemetryEvent = "chat_blocked" | "chat_error";
|
export type ChatTelemetryEvent = "chat_blocked" | "chat_error";
|
||||||
export type ChatErrorSeverity = "info" | "warn" | "error";
|
export type ChatErrorSeverity = "info" | "warn" | "error";
|
||||||
|
|
||||||
|
|
@ -206,6 +210,66 @@ export function classifyChatError(input: RawChatErrorInput): NormalizedChatError
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (errorCode === "MODEL_AUTH_FAILED") {
|
||||||
|
return {
|
||||||
|
kind: "model_auth_failed",
|
||||||
|
channel: "toast",
|
||||||
|
severity: "warn",
|
||||||
|
telemetryEvent: "chat_blocked",
|
||||||
|
isExpected: true,
|
||||||
|
userMessage:
|
||||||
|
"This model’s API key is invalid or expired. Switch models, or update the API key.",
|
||||||
|
rawMessage,
|
||||||
|
errorCode: errorCode ?? "MODEL_AUTH_FAILED",
|
||||||
|
details: { flow: input.flow, providerErrorType },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errorCode === "MODEL_NOT_FOUND") {
|
||||||
|
return {
|
||||||
|
kind: "model_not_found",
|
||||||
|
channel: "toast",
|
||||||
|
severity: "warn",
|
||||||
|
telemetryEvent: "chat_blocked",
|
||||||
|
isExpected: true,
|
||||||
|
userMessage:
|
||||||
|
"This model is unavailable or no longer exists. Switch to another model and try again.",
|
||||||
|
rawMessage,
|
||||||
|
errorCode: errorCode ?? "MODEL_NOT_FOUND",
|
||||||
|
details: { flow: input.flow, providerErrorType },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errorCode === "MODEL_CONTEXT_LIMIT") {
|
||||||
|
return {
|
||||||
|
kind: "model_context_limit",
|
||||||
|
channel: "toast",
|
||||||
|
severity: "warn",
|
||||||
|
telemetryEvent: "chat_blocked",
|
||||||
|
isExpected: true,
|
||||||
|
userMessage:
|
||||||
|
"This request is too large for the selected model. Reduce the input or switch models.",
|
||||||
|
rawMessage,
|
||||||
|
errorCode: errorCode ?? "MODEL_CONTEXT_LIMIT",
|
||||||
|
details: { flow: input.flow, providerErrorType },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errorCode === "MODEL_PROVIDER_UNAVAILABLE") {
|
||||||
|
return {
|
||||||
|
kind: "model_provider_unavailable",
|
||||||
|
channel: "toast",
|
||||||
|
severity: "warn",
|
||||||
|
telemetryEvent: "chat_blocked",
|
||||||
|
isExpected: true,
|
||||||
|
userMessage:
|
||||||
|
"The selected model provider is temporarily unavailable. Please try again or switch models.",
|
||||||
|
rawMessage,
|
||||||
|
errorCode: errorCode ?? "MODEL_PROVIDER_UNAVAILABLE",
|
||||||
|
details: { flow: input.flow, providerErrorType },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
if (errorCode === "RATE_LIMITED" || providerTypeNormalized === "rate_limit_error") {
|
if (errorCode === "RATE_LIMITED" || providerTypeNormalized === "rate_limit_error") {
|
||||||
return {
|
return {
|
||||||
kind: "rate_limited",
|
kind: "rate_limited",
|
||||||
|
|
|
||||||
|
|
@ -91,6 +91,10 @@ export function tagPreAcceptSendFailure(error: unknown): unknown {
|
||||||
"TURN_CANCELLING",
|
"TURN_CANCELLING",
|
||||||
"AUTH_EXPIRED",
|
"AUTH_EXPIRED",
|
||||||
"UNAUTHORIZED",
|
"UNAUTHORIZED",
|
||||||
|
"MODEL_AUTH_FAILED",
|
||||||
|
"MODEL_NOT_FOUND",
|
||||||
|
"MODEL_CONTEXT_LIMIT",
|
||||||
|
"MODEL_PROVIDER_UNAVAILABLE",
|
||||||
"RATE_LIMITED",
|
"RATE_LIMITED",
|
||||||
"NETWORK_ERROR",
|
"NETWORK_ERROR",
|
||||||
"STREAM_PARSE_ERROR",
|
"STREAM_PARSE_ERROR",
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue