diff --git a/surfsense_backend/app/services/llm_router_service.py b/surfsense_backend/app/services/llm_router_service.py
index 35dfdd44e..c9eeff01b 100644
--- a/surfsense_backend/app/services/llm_router_service.py
+++ b/surfsense_backend/app/services/llm_router_service.py
@@ -133,6 +133,44 @@ PROVIDER_MAP = {
 }
 
 
+# Default ``api_base`` per LiteLLM provider prefix.  Used as a safety net when
+# a global LLM config does *not* specify ``api_base``: without this, LiteLLM
+# happily picks up provider-agnostic env vars (e.g. ``AZURE_API_BASE``,
+# ``OPENAI_API_BASE``) and routes, say, an ``openrouter/anthropic/claude-3-haiku``
+# request to an Azure endpoint, which then 404s with ``Resource not found``.
+# Only providers with a well-known, stable public base URL are listed here —
+# self-hosted / BYO-endpoint providers (ollama, custom, bedrock, vertex_ai,
+# huggingface, databricks, cloudflare, replicate) are intentionally omitted
+# so their existing config-driven behaviour is preserved.
+PROVIDER_DEFAULT_API_BASE = {
+    "openrouter": "https://openrouter.ai/api/v1",
+    "groq": "https://api.groq.com/openai/v1",
+    "mistral": "https://api.mistral.ai/v1",
+    "perplexity": "https://api.perplexity.ai",
+    "xai": "https://api.x.ai/v1",
+    "cerebras": "https://api.cerebras.ai/v1",
+    "deepinfra": "https://api.deepinfra.com/v1/openai",
+    "fireworks_ai": "https://api.fireworks.ai/inference/v1",
+    "together_ai": "https://api.together.xyz/v1",
+    "anyscale": "https://api.endpoints.anyscale.com/v1",
+    "cometapi": "https://api.cometapi.com/v1",
+    "sambanova": "https://api.sambanova.ai/v1",
+}
+
+
+# Canonical provider → base URL when a config uses a generic ``openai``-style
+# prefix but the ``provider`` field tells us which API it really is
+# (e.g. DeepSeek/Alibaba/Moonshot/Zhipu/MiniMax all use ``openai`` compat but
+# each has its own base URL).
+PROVIDER_KEY_DEFAULT_API_BASE = {
+    "DEEPSEEK": "https://api.deepseek.com/v1",
+    "ALIBABA_QWEN": "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
+    "MOONSHOT": "https://api.moonshot.ai/v1",
+    "ZHIPU": "https://open.bigmodel.cn/api/paas/v4",
+    "MINIMAX": "https://api.minimax.io/v1",
+}
+
+
 class LLMRouterService:
     """
     Singleton service for managing LiteLLM Router.
@@ -224,6 +262,16 @@ class LLMRouterService:
         # hits ContextWindowExceededError.
         full_model_list, ctx_fallbacks = cls._build_context_fallback_groups(model_list)
 
+        # Build a general-purpose fallback list so NotFound/timeout/rate-limit
+        # style failures on one deployment don't bubble up as hard errors —
+        # the router retries with a sibling deployment in ``auto-large``.
+        # ``auto-large`` is the large-context subset of ``auto``; if it is
+        # empty we fall back to ``auto`` itself so the router at least picks a
+        # different deployment in the same group.
+        fallbacks: list[dict[str, list[str]]] | None = None
+        if ctx_fallbacks:
+            fallbacks = [{"auto": ["auto-large"]}]
+
         try:
             router_kwargs: dict[str, Any] = {
                 "model_list": full_model_list,
@@ -237,15 +285,18 @@ class LLMRouterService:
             }
             if ctx_fallbacks:
                 router_kwargs["context_window_fallbacks"] = ctx_fallbacks
+            if fallbacks:
+                router_kwargs["fallbacks"] = fallbacks
 
             instance._router = Router(**router_kwargs)
             instance._initialized = True
             logger.info(
                 "LLM Router initialized with %d deployments, "
-                "strategy: %s, context_window_fallbacks: %s",
+                "strategy: %s, context_window_fallbacks: %s, fallbacks: %s",
                 len(model_list),
                 final_settings.get("routing_strategy"),
                 ctx_fallbacks or "none",
+                fallbacks or "none",
             )
         except Exception as e:
             logger.error(f"Failed to initialize LLM Router: {e}")
@@ -348,10 +399,11 @@ class LLMRouterService:
                 return None
 
             # Build model string
+            provider = config.get("provider", "").upper()
             if config.get("custom_provider"):
-                model_string = f"{config['custom_provider']}/{config['model_name']}"
+                provider_prefix = config["custom_provider"]
+                model_string = f"{provider_prefix}/{config['model_name']}"
             else:
-                provider = config.get("provider", "").upper()
                 provider_prefix = PROVIDER_MAP.get(provider, provider.lower())
                 model_string = f"{provider_prefix}/{config['model_name']}"
 
@@ -361,9 +413,19 @@ class LLMRouterService:
                 "api_key": config.get("api_key"),
             }
 
-            # Add optional api_base
-            if config.get("api_base"):
-                litellm_params["api_base"] = config["api_base"]
+            # Resolve ``api_base``. Config value wins; otherwise apply a
+            # provider-aware default so the deployment does not silently
+            # inherit unrelated env vars (e.g. ``AZURE_API_BASE``) and route
+            # requests to the wrong endpoint.  See ``PROVIDER_DEFAULT_API_BASE``
+            # docstring for the motivating bug (OpenRouter models 404-ing
+            # against an Azure endpoint).
+            api_base = config.get("api_base")
+            if not api_base:
+                api_base = PROVIDER_KEY_DEFAULT_API_BASE.get(provider)
+            if not api_base:
+                api_base = PROVIDER_DEFAULT_API_BASE.get(provider_prefix)
+            if api_base:
+                litellm_params["api_base"] = api_base
 
             # Add any additional litellm parameters
             if config.get("litellm_params"):