diff --git a/router.py b/router.py
index 990974f..fe1fd36 100644
--- a/router.py
+++ b/router.py
@@ -30,16 +30,18 @@ from PIL import Image
 # Successful results are cached for 300s
 _models_cache: dict[str, tuple[Set[str], float]] = {}
 _loaded_models_cache: dict[str, tuple[Set[str], float]] = {}
-# Transient errors are cached for 1s – the key stays until the
-# timeout expires, after which the endpoint will be queried again.
-_error_cache: dict[str, float] = {}
+# Transient errors are cached separately per concern so that a failure
+# in one path does not poison the other.
+_available_error_cache: dict[str, float] = {}
+_loaded_error_cache: dict[str, float] = {}
 
 # ------------------------------------------------------------------
 # Cache locks
 # ------------------------------------------------------------------
 _models_cache_lock = asyncio.Lock()
 _loaded_models_cache_lock = asyncio.Lock()
-_error_cache_lock = asyncio.Lock()
+_available_error_cache_lock = asyncio.Lock()
+_loaded_error_cache_lock = asyncio.Lock()
 
 # ------------------------------------------------------------------
 # In-flight request tracking (prevents cache stampede)
@@ -535,10 +537,21 @@ class fetch:
             message = _format_connection_issue(endpoint_url, e)
             print(f"[fetch.available_models] {message}")
             # Update error cache with lock protection
-            async with _error_cache_lock:
-                _error_cache[endpoint] = time.time()
+            async with _available_error_cache_lock:
+                _available_error_cache[endpoint] = time.time()
             return set()
 
+    async def _refresh_available_models(endpoint: str, api_key: Optional[str] = None) -> None:
+        """
+        Background task to refresh available models cache without blocking the caller.
+        Used for stale-while-revalidate pattern.
+        """
+        try:
+            await fetch._fetch_available_models_internal(endpoint, api_key)
+        except Exception as e:
+            # Silently fail - cache will remain stale but functional
+            print(f"[fetch._refresh_available_models] Background refresh failed for {endpoint}: {e}")
+
     async def available_models(endpoint: str, api_key: Optional[str] = None) -> Set[str]:
         """
         Query <endpoint>/api/tags and return a set of all model names that the
@@ -549,6 +562,10 @@ class fetch:
         Uses request coalescing to prevent cache stampede: if multiple requests
         arrive when cache is expired, only one actual HTTP request is made.
 
+        Uses stale-while-revalidate: when the cache is between 300-600s old,
+        the stale data is returned immediately while a background refresh runs.
+        This prevents model blackouts caused by transient timeouts.
+
         If the request fails (e.g. timeout, 5xx, or malformed response), an empty
         set is returned.
         """
@@ -556,19 +573,27 @@ class fetch:
         async with _models_cache_lock:
             if endpoint in _models_cache:
                 models, cached_at = _models_cache[endpoint]
+
+                # FRESH: < 300s old - return immediately
                 if _is_fresh(cached_at, 300):
                     return models
-                # Stale entry - remove it
+
+                # STALE: 300-600s old - return stale data and refresh in background
+                if _is_fresh(cached_at, 600):
+                    asyncio.create_task(fetch._refresh_available_models(endpoint, api_key))
+                    return models  # Return stale data immediately
+
+                # EXPIRED: > 600s old - too stale, must refresh synchronously
                 del _models_cache[endpoint]
 
         # Check error cache with lock protection
-        async with _error_cache_lock:
-            if endpoint in _error_cache:
-                if _is_fresh(_error_cache[endpoint], 10):
+        async with _available_error_cache_lock:
+            if endpoint in _available_error_cache:
+                if _is_fresh(_available_error_cache[endpoint], 10):
                     # Still within the short error TTL – pretend nothing is available
                     return set()
                 # Error expired – remove it
-                del _error_cache[endpoint]
+                del _available_error_cache[endpoint]
 
         # Request coalescing: check if another request is already fetching this endpoint
         async with _inflight_lock:
@@ -657,12 +682,12 @@ class fetch:
                 del _loaded_models_cache[endpoint]
 
         # Check error cache with lock protection
-        async with _error_cache_lock:
-            if endpoint in _error_cache:
-                if _is_fresh(_error_cache[endpoint], 10):
+        async with _loaded_error_cache_lock:
+            if endpoint in _loaded_error_cache:
+                if _is_fresh(_loaded_error_cache[endpoint], 10):
                     return set()
                 # Error expired - remove it
-                del _error_cache[endpoint]
+                del _loaded_error_cache[endpoint]
 
         # Request coalescing: check if another request is already fetching this endpoint
         async with _inflight_lock: