From 8e8cf96faa629e7f86ef60ed2f12ed61012b2a4f Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Fri, 12 Jun 2026 05:03:14 +0530
Subject: [PATCH] feat(error-handling): implement LLM error adaptation and
 classification for chat streaming

- Introduced LLMErrorCategory and adapt_llm_exception to normalize LLM exceptions.
- Updated llm_retryable_message and llm_permanent_message to utilize the new adaptation logic.
- Enhanced classify_stream_exception to classify provider errors and return user-friendly messages.
- Added tests for error classification and adaptation to ensure robustness.
- Updated frontend error handling to display appropriate messages based on new classifications.
---
 .../app/indexing_pipeline/exceptions.py       |  36 +--
 .../app/routes/anonymous_chat_routes.py       |  11 +-
 .../app/services/llm_error_adapter.py         | 251 ++++++++++++++++++
 .../tasks/chat/streaming/errors/classifier.py |  94 ++++++-
 .../chat/streaming/test_error_classifier.py   |  80 ++++++
 .../new-chat/[[...chat_id]]/page.tsx          |  12 +
 .../components/free-chat/free-chat-page.tsx   |  17 +-
 .../lib/chat/chat-error-classifier.ts         |  66 ++++-
 surfsense_web/lib/chat/chat-request-errors.ts |   4 +
 9 files changed, 533 insertions(+), 38 deletions(-)
 create mode 100644 surfsense_backend/app/services/llm_error_adapter.py
 create mode 100644 surfsense_backend/tests/unit/tasks/chat/streaming/test_error_classifier.py

diff --git a/surfsense_backend/app/indexing_pipeline/exceptions.py b/surfsense_backend/app/indexing_pipeline/exceptions.py
index 666fa4b9f..bf9d9e9fa 100644
--- a/surfsense_backend/app/indexing_pipeline/exceptions.py
+++ b/surfsense_backend/app/indexing_pipeline/exceptions.py
@@ -14,6 +14,8 @@ from litellm.exceptions import (
 )
 from sqlalchemy.exc import IntegrityError as IntegrityError
 
+from app.services.llm_error_adapter import LLMErrorCategory, adapt_llm_exception
+
 # Tuples for use directly in except clauses.
 RETRYABLE_LLM_ERRORS = (
     RateLimitError,
@@ -97,38 +99,20 @@ def safe_exception_message(exc: Exception) -> str:
 
 def llm_retryable_message(exc: Exception) -> str:
     try:
-        if isinstance(exc, RateLimitError):
-            return PipelineMessages.RATE_LIMIT
-        if isinstance(exc, Timeout):
-            return PipelineMessages.LLM_TIMEOUT
-        if isinstance(exc, ServiceUnavailableError):
-            return PipelineMessages.LLM_UNAVAILABLE
-        if isinstance(exc, BadGatewayError):
-            return PipelineMessages.LLM_BAD_GATEWAY
-        if isinstance(exc, InternalServerError):
-            return PipelineMessages.LLM_SERVER_ERROR
-        if isinstance(exc, APIConnectionError):
-            return PipelineMessages.LLM_CONNECTION
-        return safe_exception_message(exc)
+        adapted = adapt_llm_exception(exc)
+        if adapted.category is LLMErrorCategory.UNKNOWN:
+            return safe_exception_message(exc)
+        return adapted.user_message
     except Exception:
         return "Something went wrong when calling the LLM."
 
 
 def llm_permanent_message(exc: Exception) -> str:
     try:
-        if isinstance(exc, AuthenticationError):
-            return PipelineMessages.LLM_AUTH
-        if isinstance(exc, PermissionDeniedError):
-            return PipelineMessages.LLM_PERMISSION
-        if isinstance(exc, NotFoundError):
-            return PipelineMessages.LLM_NOT_FOUND
-        if isinstance(exc, BadRequestError):
-            return PipelineMessages.LLM_BAD_REQUEST
-        if isinstance(exc, UnprocessableEntityError):
-            return PipelineMessages.LLM_UNPROCESSABLE
-        if isinstance(exc, APIResponseValidationError):
-            return PipelineMessages.LLM_RESPONSE
-        return safe_exception_message(exc)
+        adapted = adapt_llm_exception(exc)
+        if adapted.category is LLMErrorCategory.UNKNOWN:
+            return safe_exception_message(exc)
+        return adapted.user_message
     except Exception:
         return "Something went wrong when calling the LLM."
 
diff --git a/surfsense_backend/app/routes/anonymous_chat_routes.py b/surfsense_backend/app/routes/anonymous_chat_routes.py
index 84420e738..aa0e70464 100644
--- a/surfsense_backend/app/routes/anonymous_chat_routes.py
+++ b/surfsense_backend/app/routes/anonymous_chat_routes.py
@@ -18,6 +18,7 @@ from app.etl_pipeline.file_classifier import (
     PLAINTEXT_EXTENSIONS,
 )
 from app.rate_limiter import limiter
+from app.tasks.chat.streaming.errors.classifier import classify_stream_exception
 
 logger = logging.getLogger(__name__)
 
@@ -474,7 +475,15 @@ async def stream_anonymous_chat(
         except Exception as e:
             logger.exception("Anonymous chat stream error")
             await TokenQuotaService.anon_release(session_key, ip_key, request_id)
-            yield streaming_service.format_error(f"Error during chat: {e!s}")
+            _, error_code, _, _, user_message, extra = classify_stream_exception(
+                e,
+                flow_label="chat",
+            )
+            yield streaming_service.format_error(
+                user_message,
+                error_code=error_code,
+                extra=extra,
+            )
             yield streaming_service.format_done()
         finally:
             await TokenQuotaService.anon_release_stream_slot(client_ip)
diff --git a/surfsense_backend/app/services/llm_error_adapter.py b/surfsense_backend/app/services/llm_error_adapter.py
new file mode 100644
index 000000000..b0de15fb0
--- /dev/null
+++ b/surfsense_backend/app/services/llm_error_adapter.py
@@ -0,0 +1,251 @@
+"""Normalize provider/LLM exceptions into low-cardinality product categories."""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from enum import StrEnum
+from typing import Any
+
+
+class LLMErrorCategory(StrEnum):
+    RATE_LIMITED = "rate_limited"
+    TIMEOUT = "timeout"
+    PROVIDER_UNAVAILABLE = "provider_unavailable"
+    BAD_GATEWAY = "bad_gateway"
+    CONNECTION_FAILED = "connection_failed"
+    AUTH_FAILED = "auth_failed"
+    PERMISSION_DENIED = "permission_denied"
+    MODEL_NOT_FOUND = "model_not_found"
+    BAD_REQUEST = "bad_request"
+    CONTEXT_LIMIT = "context_limit"
+    RESPONSE_INVALID = "response_invalid"
+    SERVER_ERROR = "server_error"
+    UNKNOWN = "unknown"
+
+
+@dataclass(frozen=True)
+class LLMErrorAdaptation:
+    category: LLMErrorCategory
+    retryable: bool
+    user_message: str
+    provider_status_code: int | None = None
+    provider_error_type: str | None = None
+
+
+_CATEGORY_MESSAGES: dict[LLMErrorCategory, str] = {
+    LLMErrorCategory.RATE_LIMITED: "LLM rate limit exceeded. Will retry on next sync.",
+    LLMErrorCategory.TIMEOUT: "LLM request timed out. Will retry on next sync.",
+    LLMErrorCategory.PROVIDER_UNAVAILABLE: "LLM service temporarily unavailable. Will retry on next sync.",
+    LLMErrorCategory.BAD_GATEWAY: "LLM gateway error. Will retry on next sync.",
+    LLMErrorCategory.CONNECTION_FAILED: "Could not reach the LLM service. Check network connectivity.",
+    LLMErrorCategory.AUTH_FAILED: "LLM authentication failed. Check your API key.",
+    LLMErrorCategory.PERMISSION_DENIED: "LLM request denied. Check your account permissions.",
+    LLMErrorCategory.MODEL_NOT_FOUND: "Model not found. Check your model configuration.",
+    LLMErrorCategory.BAD_REQUEST: "LLM rejected the request. Document content may be invalid.",
+    LLMErrorCategory.CONTEXT_LIMIT: "Document exceeds the LLM context window even after optimization.",
+    LLMErrorCategory.RESPONSE_INVALID: "LLM returned an invalid response.",
+    LLMErrorCategory.SERVER_ERROR: "LLM internal server error. Will retry on next sync.",
+    LLMErrorCategory.UNKNOWN: "Something went wrong when calling the LLM.",
+}
+
+_RETRYABLE_CATEGORIES = {
+    LLMErrorCategory.RATE_LIMITED,
+    LLMErrorCategory.TIMEOUT,
+    LLMErrorCategory.PROVIDER_UNAVAILABLE,
+    LLMErrorCategory.BAD_GATEWAY,
+    LLMErrorCategory.CONNECTION_FAILED,
+    LLMErrorCategory.SERVER_ERROR,
+}
+
+_CLASS_NAME_MAP: tuple[tuple[LLMErrorCategory, tuple[str, ...]], ...] = (
+    (
+        LLMErrorCategory.RATE_LIMITED,
+        ("RateLimitError", "TooManyRequests", "TooManyRequestsError"),
+    ),
+    (LLMErrorCategory.TIMEOUT, ("Timeout", "APITimeoutError", "TimeoutException")),
+    (
+        LLMErrorCategory.PROVIDER_UNAVAILABLE,
+        ("ServiceUnavailableError", "ServiceUnavailable"),
+    ),
+    (
+        LLMErrorCategory.BAD_GATEWAY,
+        ("BadGatewayError", "GatewayTimeoutError"),
+    ),
+    (
+        LLMErrorCategory.CONNECTION_FAILED,
+        ("APIConnectionError", "ConnectError", "ConnectTimeout", "ReadTimeout"),
+    ),
+    (
+        LLMErrorCategory.AUTH_FAILED,
+        ("AuthenticationError", "InvalidApiKey", "InvalidAPIKey", "InvalidApiKeyError"),
+    ),
+    (LLMErrorCategory.PERMISSION_DENIED, ("PermissionDeniedError", "ForbiddenError")),
+    (LLMErrorCategory.MODEL_NOT_FOUND, ("NotFoundError", "ModelNotFoundError")),
+    (
+        LLMErrorCategory.CONTEXT_LIMIT,
+        ("ContextWindowExceeded", "ContextOverflow", "ContextLimit"),
+    ),
+    (
+        LLMErrorCategory.RESPONSE_INVALID,
+        ("APIResponseValidationError", "ResponseValidationError"),
+    ),
+    (
+        LLMErrorCategory.BAD_REQUEST,
+        ("BadRequestError", "InvalidRequestError", "UnprocessableEntityError"),
+    ),
+    (LLMErrorCategory.SERVER_ERROR, ("InternalServerError",)),
+)
+
+
+def _parse_error_payload(message: str) -> dict[str, Any] | None:
+    candidates = [message]
+    first_brace_idx = message.find("{")
+    if first_brace_idx >= 0:
+        candidates.append(message[first_brace_idx:])
+
+    for candidate in candidates:
+        try:
+            parsed = json.loads(candidate)
+            if isinstance(parsed, dict):
+                return parsed
+        except Exception:
+            continue
+    return None
+
+
+def _class_names(exc: BaseException) -> tuple[str, ...]:
+    return tuple(cls.__name__ for cls in type(exc).__mro__)
+
+
+def _category_from_class_name(exc: BaseException) -> LLMErrorCategory | None:
+    names = _class_names(exc)
+    for category, hints in _CLASS_NAME_MAP:
+        if any(any(hint in name for hint in hints) for name in names):
+            return category
+    return None
+
+
+def _extract_provider_status_code(parsed: dict[str, Any] | None) -> int | None:
+    if not isinstance(parsed, dict):
+        return None
+    candidates: list[Any] = [parsed.get("code"), parsed.get("status")]
+    nested = parsed.get("error")
+    if isinstance(nested, dict):
+        candidates.extend([nested.get("code"), nested.get("status")])
+    for value in candidates:
+        try:
+            if value is None:
+                continue
+            return int(value)
+        except Exception:
+            continue
+    return None
+
+
+def _extract_provider_error_type(parsed: dict[str, Any] | None) -> str | None:
+    if not isinstance(parsed, dict):
+        return None
+    candidates: list[Any] = [parsed.get("type")]
+    nested = parsed.get("error")
+    if isinstance(nested, dict):
+        candidates.append(nested.get("type"))
+    for value in candidates:
+        if isinstance(value, str) and value:
+            return value
+    return None
+
+
+def _category_from_provider_payload(
+    status_code: int | None,
+    provider_error_type: str | None,
+) -> LLMErrorCategory | None:
+    if status_code == 429:
+        return LLMErrorCategory.RATE_LIMITED
+    if status_code == 401:
+        return LLMErrorCategory.AUTH_FAILED
+    if status_code == 403:
+        return LLMErrorCategory.PERMISSION_DENIED
+    if status_code == 404:
+        return LLMErrorCategory.MODEL_NOT_FOUND
+    if status_code in (400, 422):
+        return LLMErrorCategory.BAD_REQUEST
+    if status_code in (502, 504):
+        return LLMErrorCategory.BAD_GATEWAY
+    if status_code == 503:
+        return LLMErrorCategory.PROVIDER_UNAVAILABLE
+    if status_code is not None and status_code >= 500:
+        return LLMErrorCategory.SERVER_ERROR
+
+    normalized_type = (provider_error_type or "").lower()
+    if normalized_type == "rate_limit_error":
+        return LLMErrorCategory.RATE_LIMITED
+    if normalized_type in {"authentication_error", "invalid_api_key", "invalid_api_key_error"}:
+        return LLMErrorCategory.AUTH_FAILED
+    if normalized_type in {"permission_denied", "forbidden"}:
+        return LLMErrorCategory.PERMISSION_DENIED
+    if normalized_type in {"not_found_error", "model_not_found"}:
+        return LLMErrorCategory.MODEL_NOT_FOUND
+    if normalized_type in {"context_length_exceeded", "context_window_exceeded"}:
+        return LLMErrorCategory.CONTEXT_LIMIT
+    return None
+
+
+def _category_from_message(raw: str) -> LLMErrorCategory | None:
+    lowered = raw.lower()
+    if any(hint in lowered for hint in ("rate limit", "rate-limited", "temporarily rate-limited")):
+        return LLMErrorCategory.RATE_LIMITED
+    if any(
+        hint in lowered
+        for hint in (
+            "invalid api key",
+            "invalid_api_key",
+            "authentication",
+            "unauthorized",
+            "user not found",
+            "api key is expired",
+            "expired api key",
+        )
+    ):
+        return LLMErrorCategory.AUTH_FAILED
+    if "forbidden" in lowered or "permission denied" in lowered:
+        return LLMErrorCategory.PERMISSION_DENIED
+    if "model not found" in lowered:
+        return LLMErrorCategory.MODEL_NOT_FOUND
+    if any(
+        hint in lowered
+        for hint in (
+            "context length",
+            "context window",
+            "maximum context",
+            "too many tokens",
+        )
+    ):
+        return LLMErrorCategory.CONTEXT_LIMIT
+    return None
+
+
+def adapt_llm_exception(exc: BaseException) -> LLMErrorAdaptation:
+    raw = str(exc)
+    parsed = _parse_error_payload(raw)
+    status_code = _extract_provider_status_code(parsed)
+    provider_error_type = _extract_provider_error_type(parsed)
+
+    category = (
+        _category_from_provider_payload(status_code, provider_error_type)
+        or _category_from_message(raw)
+        or _category_from_class_name(exc)
+        or LLMErrorCategory.UNKNOWN
+    )
+    return LLMErrorAdaptation(
+        category=category,
+        retryable=category in _RETRYABLE_CATEGORIES,
+        user_message=_CATEGORY_MESSAGES[category],
+        provider_status_code=status_code,
+        provider_error_type=provider_error_type,
+    )
+
+
+def llm_error_message(exc: BaseException) -> str:
+    return adapt_llm_exception(exc).user_message
+
diff --git a/surfsense_backend/app/tasks/chat/streaming/errors/classifier.py b/surfsense_backend/app/tasks/chat/streaming/errors/classifier.py
index 6b37df343..269143af2 100644
--- a/surfsense_backend/app/tasks/chat/streaming/errors/classifier.py
+++ b/surfsense_backend/app/tasks/chat/streaming/errors/classifier.py
@@ -12,6 +12,7 @@ from app.agents.chat.multi_agent_chat.main_agent.middleware.busy_mutex import (
     is_cancel_requested,
 )
 from app.agents.chat.runtime.errors import BusyError
+from app.services.llm_error_adapter import LLMErrorCategory, adapt_llm_exception
 
 TURN_CANCELLING_INITIAL_DELAY_MS = 200
 TURN_CANCELLING_BACKOFF_FACTOR = 2
@@ -102,6 +103,9 @@ def _extract_provider_error_code(parsed: dict[str, Any] | None) -> int | None:
 
 def is_provider_rate_limited(exc: BaseException) -> bool:
     """Return True if the exception looks like an upstream HTTP 429 / rate limit."""
+    if adapt_llm_exception(exc).category is LLMErrorCategory.RATE_LIMITED:
+        return True
+
     raw = str(exc)
     lowered = raw.lower()
     if "ratelimit" in type(exc).__name__.lower():
@@ -131,6 +135,84 @@ def is_provider_rate_limited(exc: BaseException) -> bool:
     )
 
 
+def _provider_error_extra(adapted: Any) -> dict[str, Any] | None:
+    extra: dict[str, Any] = {"provider_error_category": adapted.category.value}
+    if adapted.provider_status_code is not None:
+        extra["provider_status_code"] = adapted.provider_status_code
+    if adapted.provider_error_type:
+        extra["provider_error_type"] = adapted.provider_error_type
+    return extra
+
+
+def _classify_provider_exception(
+    exc: Exception,
+) -> tuple[
+    str, str, Literal["info", "warn", "error"], bool, str, dict[str, Any] | None
+] | None:
+    adapted = adapt_llm_exception(exc)
+
+    if adapted.category is LLMErrorCategory.RATE_LIMITED:
+        return (
+            "rate_limited",
+            "RATE_LIMITED",
+            "warn",
+            True,
+            "This model is temporarily rate-limited. Please try again in a few seconds or switch models.",
+            _provider_error_extra(adapted),
+        )
+
+    if adapted.category in {
+        LLMErrorCategory.AUTH_FAILED,
+        LLMErrorCategory.PERMISSION_DENIED,
+    }:
+        return (
+            "model_auth_failed",
+            "MODEL_AUTH_FAILED",
+            "warn",
+            True,
+            "This model's API key is invalid or expired. Switch models, or update the API key.",
+            _provider_error_extra(adapted),
+        )
+
+    if adapted.category is LLMErrorCategory.MODEL_NOT_FOUND:
+        return (
+            "model_not_found",
+            "MODEL_NOT_FOUND",
+            "warn",
+            True,
+            "The selected model is unavailable or no longer exists. Switch to another model and try again.",
+            _provider_error_extra(adapted),
+        )
+
+    if adapted.category is LLMErrorCategory.CONTEXT_LIMIT:
+        return (
+            "model_context_limit",
+            "MODEL_CONTEXT_LIMIT",
+            "warn",
+            True,
+            "This request is too large for the selected model. Try a model with a larger context window or reduce the input.",
+            _provider_error_extra(adapted),
+        )
+
+    if adapted.category in {
+        LLMErrorCategory.TIMEOUT,
+        LLMErrorCategory.PROVIDER_UNAVAILABLE,
+        LLMErrorCategory.BAD_GATEWAY,
+        LLMErrorCategory.CONNECTION_FAILED,
+        LLMErrorCategory.SERVER_ERROR,
+    }:
+        return (
+            "model_provider_unavailable",
+            "MODEL_PROVIDER_UNAVAILABLE",
+            "warn",
+            True,
+            "The selected model provider is temporarily unavailable. Please try again or switch models.",
+            _provider_error_extra(adapted),
+        )
+
+    return None
+
+
 def classify_stream_exception(
     exc: Exception,
     *,
@@ -167,15 +249,9 @@ def classify_stream_exception(
             None,
         )
 
-    if is_provider_rate_limited(exc):
-        return (
-            "rate_limited",
-            "RATE_LIMITED",
-            "warn",
-            True,
-            "This model is temporarily rate-limited. Please try again in a few seconds or switch models.",
-            None,
-        )
+    provider_classification = _classify_provider_exception(exc)
+    if provider_classification is not None:
+        return provider_classification
 
     return (
         "server_error",
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_error_classifier.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_error_classifier.py
new file mode 100644
index 000000000..48b07596c
--- /dev/null
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_error_classifier.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+import pytest
+
+from app.services.llm_error_adapter import LLMErrorCategory, adapt_llm_exception
+from app.tasks.chat.streaming.errors.classifier import classify_stream_exception
+
+pytestmark = pytest.mark.unit
+
+
+def _exception_named(name: str, message: str) -> Exception:
+    return type(name, (Exception,), {})(message)
+
+
+def test_adapter_classifies_authentication_error_by_class_name() -> None:
+    exc = _exception_named("AuthenticationError", "provider rejected credentials")
+
+    adapted = adapt_llm_exception(exc)
+
+    assert adapted.category is LLMErrorCategory.AUTH_FAILED
+    assert adapted.retryable is False
+    assert adapted.user_message == "LLM authentication failed. Check your API key."
+
+
+def test_adapter_classifies_embedded_provider_401_payload() -> None:
+    exc = RuntimeError(
+        'litellm.AuthenticationError: OpenrouterException - {"error":{"message":"User not found.","code":401}}'
+    )
+
+    adapted = adapt_llm_exception(exc)
+
+    assert adapted.category is LLMErrorCategory.AUTH_FAILED
+    assert adapted.provider_status_code == 401
+
+
+def test_adapter_preserves_rate_limit_classification() -> None:
+    exc = RuntimeError('{"error":{"message":"Slow down","code":429}}')
+
+    adapted = adapt_llm_exception(exc)
+
+    assert adapted.category is LLMErrorCategory.RATE_LIMITED
+    assert adapted.retryable is True
+
+
+def test_stream_classifier_maps_model_auth_to_stable_code() -> None:
+    exc = RuntimeError(
+        'litellm.AuthenticationError: OpenrouterException - {"error":{"message":"User not found.","code":401}}'
+    )
+
+    kind, code, severity, expected, message, extra = classify_stream_exception(
+        exc,
+        flow_label="chat",
+    )
+
+    assert kind == "model_auth_failed"
+    assert code == "MODEL_AUTH_FAILED"
+    assert severity == "warn"
+    assert expected is True
+    assert "API key" in message
+    assert extra == {
+        "provider_error_category": "auth_failed",
+        "provider_status_code": 401,
+    }
+
+
+def test_stream_classifier_keeps_unknown_errors_generic() -> None:
+    exc = RuntimeError("database exploded")
+
+    kind, code, severity, expected, message, extra = classify_stream_exception(
+        exc,
+        flow_label="chat",
+    )
+
+    assert kind == "server_error"
+    assert code == "SERVER_ERROR"
+    assert severity == "error"
+    assert expected is False
+    assert message == "Error during chat: database exploded"
+    assert extra is None
+
diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
index f048376cc..0c4fa63ec 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@@ -613,6 +613,18 @@ export default function NewChatPage() {
 				return;
 			}
 
+			if (normalized.channel === "inline") {
+				if (normalized.assistantMessage) {
+					await persistAssistantErrorMessage({
+						threadId,
+						assistantMsgId,
+						text: normalized.assistantMessage,
+					});
+				}
+				toast.error(normalized.userMessage);
+				return;
+			}
+
 			toast.error(normalized.userMessage);
 		},
 		[currentUser?.id, persistAssistantErrorMessage, searchSpaceId, setPremiumAlertForThread]
diff --git a/surfsense_web/components/free-chat/free-chat-page.tsx b/surfsense_web/components/free-chat/free-chat-page.tsx
index b28b1e0a1..8d5215fca 100644
--- a/surfsense_web/components/free-chat/free-chat-page.tsx
+++ b/surfsense_web/components/free-chat/free-chat-page.tsx
@@ -63,6 +63,21 @@ function normalizeFreeChatErrorMessage(error: unknown): string {
 	if (code === "THREAD_BUSY") {
 		return "A previous response is still stopping. Please try again in a moment.";
 	}
+	if (code === "MODEL_AUTH_FAILED") {
+		return "This model’s API key is invalid or expired. Switch models, or update the API key.";
+	}
+	if (code === "MODEL_NOT_FOUND") {
+		return "This model is unavailable or no longer exists. Please switch models.";
+	}
+	if (code === "MODEL_CONTEXT_LIMIT") {
+		return "This request is too large for the selected model. Reduce the input or switch models.";
+	}
+	if (code === "MODEL_PROVIDER_UNAVAILABLE") {
+		return "The selected model provider is temporarily unavailable. Please try again or switch models.";
+	}
+	if (code === "RATE_LIMITED") {
+		return "This model is temporarily rate-limited. Please try again in a few seconds or switch models.";
+	}
 	return error.message || "An unexpected error occurred";
 }
 
@@ -154,7 +169,7 @@ export function FreeChatPage() {
 			assistantMsgId: string,
 			signal: AbortSignal,
 			turnstileToken: string | null
-		): Promise<"captcha" | void> => {
+		): Promise<"captcha" | undefined> => {
 			const reqBody: Record<string, unknown> = {
 				model_slug: modelSlug,
 				messages: messageHistory,
diff --git a/surfsense_web/lib/chat/chat-error-classifier.ts b/surfsense_web/lib/chat/chat-error-classifier.ts
index 1c67d59a1..92924f0f7 100644
--- a/surfsense_web/lib/chat/chat-error-classifier.ts
+++ b/surfsense_web/lib/chat/chat-error-classifier.ts
@@ -5,6 +5,10 @@ export type ChatErrorKind =
 	| "thread_busy"
 	| "send_failed_pre_accept"
 	| "auth_expired"
+	| "model_auth_failed"
+	| "model_not_found"
+	| "model_context_limit"
+	| "model_provider_unavailable"
 	| "rate_limited"
 	| "network_offline"
 	| "stream_interrupted"
@@ -14,7 +18,7 @@ export type ChatErrorKind =
 	| "server_error"
 	| "unknown";
 
-export type ChatErrorChannel = "pinned_inline" | "toast" | "silent";
+export type ChatErrorChannel = "pinned_inline" | "inline" | "toast" | "silent";
 export type ChatTelemetryEvent = "chat_blocked" | "chat_error";
 export type ChatErrorSeverity = "info" | "warn" | "error";
 
@@ -206,6 +210,66 @@ export function classifyChatError(input: RawChatErrorInput): NormalizedChatError
 		};
 	}
 
+	if (errorCode === "MODEL_AUTH_FAILED") {
+		return {
+			kind: "model_auth_failed",
+			channel: "toast",
+			severity: "warn",
+			telemetryEvent: "chat_blocked",
+			isExpected: true,
+			userMessage:
+				"This model’s API key is invalid or expired. Switch models, or update the API key.",
+			rawMessage,
+			errorCode: errorCode ?? "MODEL_AUTH_FAILED",
+			details: { flow: input.flow, providerErrorType },
+		};
+	}
+
+	if (errorCode === "MODEL_NOT_FOUND") {
+		return {
+			kind: "model_not_found",
+			channel: "toast",
+			severity: "warn",
+			telemetryEvent: "chat_blocked",
+			isExpected: true,
+			userMessage:
+				"This model is unavailable or no longer exists. Switch to another model and try again.",
+			rawMessage,
+			errorCode: errorCode ?? "MODEL_NOT_FOUND",
+			details: { flow: input.flow, providerErrorType },
+		};
+	}
+
+	if (errorCode === "MODEL_CONTEXT_LIMIT") {
+		return {
+			kind: "model_context_limit",
+			channel: "toast",
+			severity: "warn",
+			telemetryEvent: "chat_blocked",
+			isExpected: true,
+			userMessage:
+				"This request is too large for the selected model. Reduce the input or switch models.",
+			rawMessage,
+			errorCode: errorCode ?? "MODEL_CONTEXT_LIMIT",
+			details: { flow: input.flow, providerErrorType },
+		};
+	}
+
+	if (errorCode === "MODEL_PROVIDER_UNAVAILABLE") {
+		return {
+			kind: "model_provider_unavailable",
+			channel: "toast",
+			severity: "warn",
+			telemetryEvent: "chat_blocked",
+			isExpected: true,
+			userMessage:
+				"The selected model provider is temporarily unavailable. Please try again or switch models.",
+			rawMessage,
+			errorCode: errorCode ?? "MODEL_PROVIDER_UNAVAILABLE",
+			details: { flow: input.flow, providerErrorType },
+		};
+	}
+
 	if (errorCode === "RATE_LIMITED" || providerTypeNormalized === "rate_limit_error") {
 		return {
 			kind: "rate_limited",
diff --git a/surfsense_web/lib/chat/chat-request-errors.ts b/surfsense_web/lib/chat/chat-request-errors.ts
index e0dfb3cc4..c86c72d66 100644
--- a/surfsense_web/lib/chat/chat-request-errors.ts
+++ b/surfsense_web/lib/chat/chat-request-errors.ts
@@ -91,6 +91,10 @@ export function tagPreAcceptSendFailure(error: unknown): unknown {
 			"TURN_CANCELLING",
 			"AUTH_EXPIRED",
 			"UNAUTHORIZED",
+			"MODEL_AUTH_FAILED",
+			"MODEL_NOT_FOUND",
+			"MODEL_CONTEXT_LIMIT",
+			"MODEL_PROVIDER_UNAVAILABLE",
 			"RATE_LIMITED",
 			"NETWORK_ERROR",
 			"STREAM_PARSE_ERROR",