From 8e8cf96faa629e7f86ef60ed2f12ed61012b2a4f Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 12 Jun 2026 05:03:14 +0530 Subject: [PATCH] feat(error-handling): implement LLM error adaptation and classification for chat streaming - Introduced LLMErrorCategory and adapt_llm_exception to normalize LLM exceptions. - Updated llm_retryable_message and llm_permanent_message to utilize the new adaptation logic. - Enhanced classify_stream_exception to classify provider errors and return user-friendly messages. - Added tests for error classification and adaptation to ensure robustness. - Updated frontend error handling to display appropriate messages based on new classifications. --- .../app/indexing_pipeline/exceptions.py | 36 +-- .../app/routes/anonymous_chat_routes.py | 11 +- .../app/services/llm_error_adapter.py | 251 ++++++++++++++++++ .../tasks/chat/streaming/errors/classifier.py | 94 ++++++- .../chat/streaming/test_error_classifier.py | 80 ++++++ .../new-chat/[[...chat_id]]/page.tsx | 12 + .../components/free-chat/free-chat-page.tsx | 17 +- .../lib/chat/chat-error-classifier.ts | 66 ++++- surfsense_web/lib/chat/chat-request-errors.ts | 4 + 9 files changed, 533 insertions(+), 38 deletions(-) create mode 100644 surfsense_backend/app/services/llm_error_adapter.py create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_error_classifier.py diff --git a/surfsense_backend/app/indexing_pipeline/exceptions.py b/surfsense_backend/app/indexing_pipeline/exceptions.py index 666fa4b9f..bf9d9e9fa 100644 --- a/surfsense_backend/app/indexing_pipeline/exceptions.py +++ b/surfsense_backend/app/indexing_pipeline/exceptions.py @@ -14,6 +14,8 @@ from litellm.exceptions import ( ) from sqlalchemy.exc import IntegrityError as IntegrityError +from app.services.llm_error_adapter import LLMErrorCategory, adapt_llm_exception + # Tuples for use directly in except clauses. RETRYABLE_LLM_ERRORS = ( RateLimitError, @@ -97,38 +99,20 @@ def safe_exception_message(exc: Exception) -> str: def llm_retryable_message(exc: Exception) -> str: try: - if isinstance(exc, RateLimitError): - return PipelineMessages.RATE_LIMIT - if isinstance(exc, Timeout): - return PipelineMessages.LLM_TIMEOUT - if isinstance(exc, ServiceUnavailableError): - return PipelineMessages.LLM_UNAVAILABLE - if isinstance(exc, BadGatewayError): - return PipelineMessages.LLM_BAD_GATEWAY - if isinstance(exc, InternalServerError): - return PipelineMessages.LLM_SERVER_ERROR - if isinstance(exc, APIConnectionError): - return PipelineMessages.LLM_CONNECTION - return safe_exception_message(exc) + adapted = adapt_llm_exception(exc) + if adapted.category is LLMErrorCategory.UNKNOWN: + return safe_exception_message(exc) + return adapted.user_message except Exception: return "Something went wrong when calling the LLM." def llm_permanent_message(exc: Exception) -> str: try: - if isinstance(exc, AuthenticationError): - return PipelineMessages.LLM_AUTH - if isinstance(exc, PermissionDeniedError): - return PipelineMessages.LLM_PERMISSION - if isinstance(exc, NotFoundError): - return PipelineMessages.LLM_NOT_FOUND - if isinstance(exc, BadRequestError): - return PipelineMessages.LLM_BAD_REQUEST - if isinstance(exc, UnprocessableEntityError): - return PipelineMessages.LLM_UNPROCESSABLE - if isinstance(exc, APIResponseValidationError): - return PipelineMessages.LLM_RESPONSE - return safe_exception_message(exc) + adapted = adapt_llm_exception(exc) + if adapted.category is LLMErrorCategory.UNKNOWN: + return safe_exception_message(exc) + return adapted.user_message except Exception: return "Something went wrong when calling the LLM." diff --git a/surfsense_backend/app/routes/anonymous_chat_routes.py b/surfsense_backend/app/routes/anonymous_chat_routes.py index 84420e738..aa0e70464 100644 --- a/surfsense_backend/app/routes/anonymous_chat_routes.py +++ b/surfsense_backend/app/routes/anonymous_chat_routes.py @@ -18,6 +18,7 @@ from app.etl_pipeline.file_classifier import ( PLAINTEXT_EXTENSIONS, ) from app.rate_limiter import limiter +from app.tasks.chat.streaming.errors.classifier import classify_stream_exception logger = logging.getLogger(__name__) @@ -474,7 +475,15 @@ async def stream_anonymous_chat( except Exception as e: logger.exception("Anonymous chat stream error") await TokenQuotaService.anon_release(session_key, ip_key, request_id) - yield streaming_service.format_error(f"Error during chat: {e!s}") + _, error_code, _, _, user_message, extra = classify_stream_exception( + e, + flow_label="chat", + ) + yield streaming_service.format_error( + user_message, + error_code=error_code, + extra=extra, + ) yield streaming_service.format_done() finally: await TokenQuotaService.anon_release_stream_slot(client_ip) diff --git a/surfsense_backend/app/services/llm_error_adapter.py b/surfsense_backend/app/services/llm_error_adapter.py new file mode 100644 index 000000000..b0de15fb0 --- /dev/null +++ b/surfsense_backend/app/services/llm_error_adapter.py @@ -0,0 +1,251 @@ +"""Normalize provider/LLM exceptions into low-cardinality product categories.""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from enum import StrEnum +from typing import Any + + +class LLMErrorCategory(StrEnum): + RATE_LIMITED = "rate_limited" + TIMEOUT = "timeout" + PROVIDER_UNAVAILABLE = "provider_unavailable" + BAD_GATEWAY = "bad_gateway" + CONNECTION_FAILED = "connection_failed" + AUTH_FAILED = "auth_failed" + PERMISSION_DENIED = "permission_denied" + MODEL_NOT_FOUND = "model_not_found" + BAD_REQUEST = "bad_request" + CONTEXT_LIMIT = "context_limit" + RESPONSE_INVALID = "response_invalid" + SERVER_ERROR = "server_error" + UNKNOWN = "unknown" + + +@dataclass(frozen=True) +class LLMErrorAdaptation: + category: LLMErrorCategory + retryable: bool + user_message: str + provider_status_code: int | None = None + provider_error_type: str | None = None + + +_CATEGORY_MESSAGES: dict[LLMErrorCategory, str] = { + LLMErrorCategory.RATE_LIMITED: "LLM rate limit exceeded. Will retry on next sync.", + LLMErrorCategory.TIMEOUT: "LLM request timed out. Will retry on next sync.", + LLMErrorCategory.PROVIDER_UNAVAILABLE: "LLM service temporarily unavailable. Will retry on next sync.", + LLMErrorCategory.BAD_GATEWAY: "LLM gateway error. Will retry on next sync.", + LLMErrorCategory.CONNECTION_FAILED: "Could not reach the LLM service. Check network connectivity.", + LLMErrorCategory.AUTH_FAILED: "LLM authentication failed. Check your API key.", + LLMErrorCategory.PERMISSION_DENIED: "LLM request denied. Check your account permissions.", + LLMErrorCategory.MODEL_NOT_FOUND: "Model not found. Check your model configuration.", + LLMErrorCategory.BAD_REQUEST: "LLM rejected the request. Document content may be invalid.", + LLMErrorCategory.CONTEXT_LIMIT: "Document exceeds the LLM context window even after optimization.", + LLMErrorCategory.RESPONSE_INVALID: "LLM returned an invalid response.", + LLMErrorCategory.SERVER_ERROR: "LLM internal server error. Will retry on next sync.", + LLMErrorCategory.UNKNOWN: "Something went wrong when calling the LLM.", +} + +_RETRYABLE_CATEGORIES = { + LLMErrorCategory.RATE_LIMITED, + LLMErrorCategory.TIMEOUT, + LLMErrorCategory.PROVIDER_UNAVAILABLE, + LLMErrorCategory.BAD_GATEWAY, + LLMErrorCategory.CONNECTION_FAILED, + LLMErrorCategory.SERVER_ERROR, +} + +_CLASS_NAME_MAP: tuple[tuple[LLMErrorCategory, tuple[str, ...]], ...] = ( + ( + LLMErrorCategory.RATE_LIMITED, + ("RateLimitError", "TooManyRequests", "TooManyRequestsError"), + ), + (LLMErrorCategory.TIMEOUT, ("Timeout", "APITimeoutError", "TimeoutException")), + ( + LLMErrorCategory.PROVIDER_UNAVAILABLE, + ("ServiceUnavailableError", "ServiceUnavailable"), + ), + ( + LLMErrorCategory.BAD_GATEWAY, + ("BadGatewayError", "GatewayTimeoutError"), + ), + ( + LLMErrorCategory.CONNECTION_FAILED, + ("APIConnectionError", "ConnectError", "ConnectTimeout", "ReadTimeout"), + ), + ( + LLMErrorCategory.AUTH_FAILED, + ("AuthenticationError", "InvalidApiKey", "InvalidAPIKey", "InvalidApiKeyError"), + ), + (LLMErrorCategory.PERMISSION_DENIED, ("PermissionDeniedError", "ForbiddenError")), + (LLMErrorCategory.MODEL_NOT_FOUND, ("NotFoundError", "ModelNotFoundError")), + ( + LLMErrorCategory.CONTEXT_LIMIT, + ("ContextWindowExceeded", "ContextOverflow", "ContextLimit"), + ), + ( + LLMErrorCategory.RESPONSE_INVALID, + ("APIResponseValidationError", "ResponseValidationError"), + ), + ( + LLMErrorCategory.BAD_REQUEST, + ("BadRequestError", "InvalidRequestError", "UnprocessableEntityError"), + ), + (LLMErrorCategory.SERVER_ERROR, ("InternalServerError",)), +) + + +def _parse_error_payload(message: str) -> dict[str, Any] | None: + candidates = [message] + first_brace_idx = message.find("{") + if first_brace_idx >= 0: + candidates.append(message[first_brace_idx:]) + + for candidate in candidates: + try: + parsed = json.loads(candidate) + if isinstance(parsed, dict): + return parsed + except Exception: + continue + return None + + +def _class_names(exc: BaseException) -> tuple[str, ...]: + return tuple(cls.__name__ for cls in type(exc).__mro__) + + +def _category_from_class_name(exc: BaseException) -> LLMErrorCategory | None: + names = _class_names(exc) + for category, hints in _CLASS_NAME_MAP: + if any(any(hint in name for hint in hints) for name in names): + return category + return None + + +def _extract_provider_status_code(parsed: dict[str, Any] | None) -> int | None: + if not isinstance(parsed, dict): + return None + candidates: list[Any] = [parsed.get("code"), parsed.get("status")] + nested = parsed.get("error") + if isinstance(nested, dict): + candidates.extend([nested.get("code"), nested.get("status")]) + for value in candidates: + try: + if value is None: + continue + return int(value) + except Exception: + continue + return None + + +def _extract_provider_error_type(parsed: dict[str, Any] | None) -> str | None: + if not isinstance(parsed, dict): + return None + candidates: list[Any] = [parsed.get("type")] + nested = parsed.get("error") + if isinstance(nested, dict): + candidates.append(nested.get("type")) + for value in candidates: + if isinstance(value, str) and value: + return value + return None + + +def _category_from_provider_payload( + status_code: int | None, + provider_error_type: str | None, +) -> LLMErrorCategory | None: + if status_code == 429: + return LLMErrorCategory.RATE_LIMITED + if status_code == 401: + return LLMErrorCategory.AUTH_FAILED + if status_code == 403: + return LLMErrorCategory.PERMISSION_DENIED + if status_code == 404: + return LLMErrorCategory.MODEL_NOT_FOUND + if status_code in (400, 422): + return LLMErrorCategory.BAD_REQUEST + if status_code in (502, 504): + return LLMErrorCategory.BAD_GATEWAY + if status_code == 503: + return LLMErrorCategory.PROVIDER_UNAVAILABLE + if status_code is not None and status_code >= 500: + return LLMErrorCategory.SERVER_ERROR + + normalized_type = (provider_error_type or "").lower() + if normalized_type == "rate_limit_error": + return LLMErrorCategory.RATE_LIMITED + if normalized_type in {"authentication_error", "invalid_api_key", "invalid_api_key_error"}: + return LLMErrorCategory.AUTH_FAILED + if normalized_type in {"permission_denied", "forbidden"}: + return LLMErrorCategory.PERMISSION_DENIED + if normalized_type in {"not_found_error", "model_not_found"}: + return LLMErrorCategory.MODEL_NOT_FOUND + if normalized_type in {"context_length_exceeded", "context_window_exceeded"}: + return LLMErrorCategory.CONTEXT_LIMIT + return None + + +def _category_from_message(raw: str) -> LLMErrorCategory | None: + lowered = raw.lower() + if any(hint in lowered for hint in ("rate limit", "rate-limited", "temporarily rate-limited")): + return LLMErrorCategory.RATE_LIMITED + if any( + hint in lowered + for hint in ( + "invalid api key", + "invalid_api_key", + "authentication", + "unauthorized", + "user not found", + "api key is expired", + "expired api key", + ) + ): + return LLMErrorCategory.AUTH_FAILED + if "forbidden" in lowered or "permission denied" in lowered: + return LLMErrorCategory.PERMISSION_DENIED + if "model not found" in lowered: + return LLMErrorCategory.MODEL_NOT_FOUND + if any( + hint in lowered + for hint in ( + "context length", + "context window", + "maximum context", + "too many tokens", + ) + ): + return LLMErrorCategory.CONTEXT_LIMIT + return None + + +def adapt_llm_exception(exc: BaseException) -> LLMErrorAdaptation: + raw = str(exc) + parsed = _parse_error_payload(raw) + status_code = _extract_provider_status_code(parsed) + provider_error_type = _extract_provider_error_type(parsed) + + category = ( + _category_from_provider_payload(status_code, provider_error_type) + or _category_from_message(raw) + or _category_from_class_name(exc) + or LLMErrorCategory.UNKNOWN + ) + return LLMErrorAdaptation( + category=category, + retryable=category in _RETRYABLE_CATEGORIES, + user_message=_CATEGORY_MESSAGES[category], + provider_status_code=status_code, + provider_error_type=provider_error_type, + ) + + +def llm_error_message(exc: BaseException) -> str: + return adapt_llm_exception(exc).user_message + diff --git a/surfsense_backend/app/tasks/chat/streaming/errors/classifier.py b/surfsense_backend/app/tasks/chat/streaming/errors/classifier.py index 6b37df343..269143af2 100644 --- a/surfsense_backend/app/tasks/chat/streaming/errors/classifier.py +++ b/surfsense_backend/app/tasks/chat/streaming/errors/classifier.py @@ -12,6 +12,7 @@ from app.agents.chat.multi_agent_chat.main_agent.middleware.busy_mutex import ( is_cancel_requested, ) from app.agents.chat.runtime.errors import BusyError +from app.services.llm_error_adapter import LLMErrorCategory, adapt_llm_exception TURN_CANCELLING_INITIAL_DELAY_MS = 200 TURN_CANCELLING_BACKOFF_FACTOR = 2 @@ -102,6 +103,9 @@ def _extract_provider_error_code(parsed: dict[str, Any] | None) -> int | None: def is_provider_rate_limited(exc: BaseException) -> bool: """Return True if the exception looks like an upstream HTTP 429 / rate limit.""" + if adapt_llm_exception(exc).category is LLMErrorCategory.RATE_LIMITED: + return True + raw = str(exc) lowered = raw.lower() if "ratelimit" in type(exc).__name__.lower(): @@ -131,6 +135,84 @@ def is_provider_rate_limited(exc: BaseException) -> bool: ) +def _provider_error_extra(adapted: Any) -> dict[str, Any] | None: + extra: dict[str, Any] = {"provider_error_category": adapted.category.value} + if adapted.provider_status_code is not None: + extra["provider_status_code"] = adapted.provider_status_code + if adapted.provider_error_type: + extra["provider_error_type"] = adapted.provider_error_type + return extra + + +def _classify_provider_exception( + exc: Exception, +) -> tuple[ + str, str, Literal["info", "warn", "error"], bool, str, dict[str, Any] | None +] | None: + adapted = adapt_llm_exception(exc) + + if adapted.category is LLMErrorCategory.RATE_LIMITED: + return ( + "rate_limited", + "RATE_LIMITED", + "warn", + True, + "This model is temporarily rate-limited. Please try again in a few seconds or switch models.", + _provider_error_extra(adapted), + ) + + if adapted.category in { + LLMErrorCategory.AUTH_FAILED, + LLMErrorCategory.PERMISSION_DENIED, + }: + return ( + "model_auth_failed", + "MODEL_AUTH_FAILED", + "warn", + True, + "This model's API key is invalid or expired. Switch models, or update the API key.", + _provider_error_extra(adapted), + ) + + if adapted.category is LLMErrorCategory.MODEL_NOT_FOUND: + return ( + "model_not_found", + "MODEL_NOT_FOUND", + "warn", + True, + "The selected model is unavailable or no longer exists. Switch to another model and try again.", + _provider_error_extra(adapted), + ) + + if adapted.category is LLMErrorCategory.CONTEXT_LIMIT: + return ( + "model_context_limit", + "MODEL_CONTEXT_LIMIT", + "warn", + True, + "This request is too large for the selected model. Try a model with a larger context window or reduce the input.", + _provider_error_extra(adapted), + ) + + if adapted.category in { + LLMErrorCategory.TIMEOUT, + LLMErrorCategory.PROVIDER_UNAVAILABLE, + LLMErrorCategory.BAD_GATEWAY, + LLMErrorCategory.CONNECTION_FAILED, + LLMErrorCategory.SERVER_ERROR, + }: + return ( + "model_provider_unavailable", + "MODEL_PROVIDER_UNAVAILABLE", + "warn", + True, + "The selected model provider is temporarily unavailable. Please try again or switch models.", + _provider_error_extra(adapted), + ) + + return None + + def classify_stream_exception( exc: Exception, *, @@ -167,15 +249,9 @@ def classify_stream_exception( None, ) - if is_provider_rate_limited(exc): - return ( - "rate_limited", - "RATE_LIMITED", - "warn", - True, - "This model is temporarily rate-limited. Please try again in a few seconds or switch models.", - None, - ) + provider_classification = _classify_provider_exception(exc) + if provider_classification is not None: + return provider_classification return ( "server_error", diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_error_classifier.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_error_classifier.py new file mode 100644 index 000000000..48b07596c --- /dev/null +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_error_classifier.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +import pytest + +from app.services.llm_error_adapter import LLMErrorCategory, adapt_llm_exception +from app.tasks.chat.streaming.errors.classifier import classify_stream_exception + +pytestmark = pytest.mark.unit + + +def _exception_named(name: str, message: str) -> Exception: + return type(name, (Exception,), {})(message) + + +def test_adapter_classifies_authentication_error_by_class_name() -> None: + exc = _exception_named("AuthenticationError", "provider rejected credentials") + + adapted = adapt_llm_exception(exc) + + assert adapted.category is LLMErrorCategory.AUTH_FAILED + assert adapted.retryable is False + assert adapted.user_message == "LLM authentication failed. Check your API key." + + +def test_adapter_classifies_embedded_provider_401_payload() -> None: + exc = RuntimeError( + 'litellm.AuthenticationError: OpenrouterException - {"error":{"message":"User not found.","code":401}}' + ) + + adapted = adapt_llm_exception(exc) + + assert adapted.category is LLMErrorCategory.AUTH_FAILED + assert adapted.provider_status_code == 401 + + +def test_adapter_preserves_rate_limit_classification() -> None: + exc = RuntimeError('{"error":{"message":"Slow down","code":429}}') + + adapted = adapt_llm_exception(exc) + + assert adapted.category is LLMErrorCategory.RATE_LIMITED + assert adapted.retryable is True + + +def test_stream_classifier_maps_model_auth_to_stable_code() -> None: + exc = RuntimeError( + 'litellm.AuthenticationError: OpenrouterException - {"error":{"message":"User not found.","code":401}}' + ) + + kind, code, severity, expected, message, extra = classify_stream_exception( + exc, + flow_label="chat", + ) + + assert kind == "model_auth_failed" + assert code == "MODEL_AUTH_FAILED" + assert severity == "warn" + assert expected is True + assert "API key" in message + assert extra == { + "provider_error_category": "auth_failed", + "provider_status_code": 401, + } + + +def test_stream_classifier_keeps_unknown_errors_generic() -> None: + exc = RuntimeError("database exploded") + + kind, code, severity, expected, message, extra = classify_stream_exception( + exc, + flow_label="chat", + ) + + assert kind == "server_error" + assert code == "SERVER_ERROR" + assert severity == "error" + assert expected is False + assert message == "Error during chat: database exploded" + assert extra is None + diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index f048376cc..0c4fa63ec 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -613,6 +613,18 @@ export default function NewChatPage() { return; } + if (normalized.channel === "inline") { + if (normalized.assistantMessage) { + await persistAssistantErrorMessage({ + threadId, + assistantMsgId, + text: normalized.assistantMessage, + }); + } + toast.error(normalized.userMessage); + return; + } + toast.error(normalized.userMessage); }, [currentUser?.id, persistAssistantErrorMessage, searchSpaceId, setPremiumAlertForThread] diff --git a/surfsense_web/components/free-chat/free-chat-page.tsx b/surfsense_web/components/free-chat/free-chat-page.tsx index b28b1e0a1..8d5215fca 100644 --- a/surfsense_web/components/free-chat/free-chat-page.tsx +++ b/surfsense_web/components/free-chat/free-chat-page.tsx @@ -63,6 +63,21 @@ function normalizeFreeChatErrorMessage(error: unknown): string { if (code === "THREAD_BUSY") { return "A previous response is still stopping. Please try again in a moment."; } + if (code === "MODEL_AUTH_FAILED") { + return "This model’s API key is invalid or expired. Switch models, or update the API key."; + } + if (code === "MODEL_NOT_FOUND") { + return "This model is unavailable or no longer exists. Please switch models."; + } + if (code === "MODEL_CONTEXT_LIMIT") { + return "This request is too large for the selected model. Reduce the input or switch models."; + } + if (code === "MODEL_PROVIDER_UNAVAILABLE") { + return "The selected model provider is temporarily unavailable. Please try again or switch models."; + } + if (code === "RATE_LIMITED") { + return "This model is temporarily rate-limited. Please try again in a few seconds or switch models."; + } return error.message || "An unexpected error occurred"; } @@ -154,7 +169,7 @@ export function FreeChatPage() { assistantMsgId: string, signal: AbortSignal, turnstileToken: string | null - ): Promise<"captcha" | void> => { + ): Promise<"captcha" | undefined> => { const reqBody: Record = { model_slug: modelSlug, messages: messageHistory, diff --git a/surfsense_web/lib/chat/chat-error-classifier.ts b/surfsense_web/lib/chat/chat-error-classifier.ts index 1c67d59a1..92924f0f7 100644 --- a/surfsense_web/lib/chat/chat-error-classifier.ts +++ b/surfsense_web/lib/chat/chat-error-classifier.ts @@ -5,6 +5,10 @@ export type ChatErrorKind = | "thread_busy" | "send_failed_pre_accept" | "auth_expired" + | "model_auth_failed" + | "model_not_found" + | "model_context_limit" + | "model_provider_unavailable" | "rate_limited" | "network_offline" | "stream_interrupted" @@ -14,7 +18,7 @@ export type ChatErrorKind = | "server_error" | "unknown"; -export type ChatErrorChannel = "pinned_inline" | "toast" | "silent"; +export type ChatErrorChannel = "pinned_inline" | "inline" | "toast" | "silent"; export type ChatTelemetryEvent = "chat_blocked" | "chat_error"; export type ChatErrorSeverity = "info" | "warn" | "error"; @@ -206,6 +210,66 @@ export function classifyChatError(input: RawChatErrorInput): NormalizedChatError }; } + if (errorCode === "MODEL_AUTH_FAILED") { + return { + kind: "model_auth_failed", + channel: "toast", + severity: "warn", + telemetryEvent: "chat_blocked", + isExpected: true, + userMessage: + "This model’s API key is invalid or expired. Switch models, or update the API key.", + rawMessage, + errorCode: errorCode ?? "MODEL_AUTH_FAILED", + details: { flow: input.flow, providerErrorType }, + }; + } + + if (errorCode === "MODEL_NOT_FOUND") { + return { + kind: "model_not_found", + channel: "toast", + severity: "warn", + telemetryEvent: "chat_blocked", + isExpected: true, + userMessage: + "This model is unavailable or no longer exists. Switch to another model and try again.", + rawMessage, + errorCode: errorCode ?? "MODEL_NOT_FOUND", + details: { flow: input.flow, providerErrorType }, + }; + } + + if (errorCode === "MODEL_CONTEXT_LIMIT") { + return { + kind: "model_context_limit", + channel: "toast", + severity: "warn", + telemetryEvent: "chat_blocked", + isExpected: true, + userMessage: + "This request is too large for the selected model. Reduce the input or switch models.", + rawMessage, + errorCode: errorCode ?? "MODEL_CONTEXT_LIMIT", + details: { flow: input.flow, providerErrorType }, + }; + } + + if (errorCode === "MODEL_PROVIDER_UNAVAILABLE") { + return { + kind: "model_provider_unavailable", + channel: "toast", + severity: "warn", + telemetryEvent: "chat_blocked", + isExpected: true, + userMessage: + "The selected model provider is temporarily unavailable. Please try again or switch models.", + rawMessage, + errorCode: errorCode ?? "MODEL_PROVIDER_UNAVAILABLE", + details: { flow: input.flow, providerErrorType }, + }; + } + if (errorCode === "RATE_LIMITED" || providerTypeNormalized === "rate_limit_error") { return { kind: "rate_limited", diff --git a/surfsense_web/lib/chat/chat-request-errors.ts b/surfsense_web/lib/chat/chat-request-errors.ts index e0dfb3cc4..c86c72d66 100644 --- a/surfsense_web/lib/chat/chat-request-errors.ts +++ b/surfsense_web/lib/chat/chat-request-errors.ts @@ -91,6 +91,10 @@ export function tagPreAcceptSendFailure(error: unknown): unknown { "TURN_CANCELLING", "AUTH_EXPIRED", "UNAUTHORIZED", + "MODEL_AUTH_FAILED", + "MODEL_NOT_FOUND", + "MODEL_CONTEXT_LIMIT", + "MODEL_PROVIDER_UNAVAILABLE", "RATE_LIMITED", "NETWORK_ERROR", "STREAM_PARSE_ERROR",