diff --git a/.forgejo/workflows/docker-publish-semantic.yml b/.forgejo/workflows/docker-publish-semantic.yml
index 163f1a1..ebbfd36 100644
--- a/.forgejo/workflows/docker-publish-semantic.yml
+++ b/.forgejo/workflows/docker-publish-semantic.yml
@@ -86,7 +86,7 @@ jobs:
           provenance: false
           build-args: |
             SEMANTIC_CACHE=true
-          tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-${{ matrix.arch }}-${{ github.run_id }}
+          tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-${{ matrix.arch }}
 
   merge:
     runs-on: docker-amd64
@@ -142,6 +142,6 @@ jobs:
         run: |
           docker buildx imagetools create \
             $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-amd64-${{ github.run_id }} \
-            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-arm64-${{ github.run_id }}
+            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-amd64 \
+            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:semantic-platform-arm64
 
diff --git a/.forgejo/workflows/docker-publish.yml b/.forgejo/workflows/docker-publish.yml
index 09e145c..27cd879 100644
--- a/.forgejo/workflows/docker-publish.yml
+++ b/.forgejo/workflows/docker-publish.yml
@@ -77,7 +77,7 @@ jobs:
           platforms: ${{ matrix.platform }}
           push: true
           provenance: false
-          tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:platform-${{ matrix.arch }}-${{ github.run_id }}
+          tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:platform-${{ matrix.arch }}
 
   merge:
     runs-on: docker-amd64
@@ -133,6 +133,6 @@ jobs:
         run: |
           docker buildx imagetools create \
             $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:platform-amd64-${{ github.run_id }} \
-            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:platform-arm64-${{ github.run_id }}
+            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:platform-amd64 \
+            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:platform-arm64
 
diff --git a/.forgejo/workflows/opencode.yml b/.forgejo/workflows/opencode.yml
index 4fef9e1..6d126d3 100644
--- a/.forgejo/workflows/opencode.yml
+++ b/.forgejo/workflows/opencode.yml
@@ -11,7 +11,7 @@ jobs:
   opencode:
     if: |
       contains(github.event.comment.body, '/oc') ||
-      contains(github.event.review.body, '/oc')
+      contains(github.event.comment.body, '/opencode')
     runs-on: docker-amd64
     container:
       image: node:lts-bookworm
@@ -54,7 +54,9 @@ jobs:
         uses: ./.opencode-action
         with:
           nomyo_api_key: ${{ secrets.NOMYO_API_KEY }}
-          model: nomyo/unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_XL
+          model: nomyo/unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_M
           forgejo_api_url: https://bitfreedom.net/code/
           forgejo_token: ${{ secrets.FORGEJO_TOKEN }}
           forgejo_push_token: ${{ secrets.FORGEJO_PUSH_TOKEN }}
+
+
diff --git a/.forgejo/workflows/test-action.yml b/.forgejo/workflows/test-action.yml
new file mode 100644
index 0000000..e91b832
--- /dev/null
+++ b/.forgejo/workflows/test-action.yml
@@ -0,0 +1,20 @@
+name: artifact-probe
+on: workflow_dispatch
+jobs:
+  up:
+    runs-on: docker-amd64
+    container:
+      image: node:lts-bookworm
+    steps:
+      - run: echo "hello" > /tmp/probe.txt
+      - uses: https://github.com/actions/upload-artifact@v4
+        with: { name: probe, path: /tmp/probe.txt }
+  down:
+    needs: up
+    runs-on: docker-amd64
+    container:
+      image: node:lts-bookworm
+    steps:
+      - uses: https://github.com/actions/download-artifact@v4
+        with: { name: probe, path: /tmp/got }
+      - run: cat /tmp/got/probe.txt
\ No newline at end of file
diff --git a/doc/architecture.md b/doc/architecture.md
index c2408d8..f725573 100644
--- a/doc/architecture.md
+++ b/doc/architecture.md
@@ -206,8 +206,6 @@ The `/health` endpoint provides comprehensive health status:
 }
 ```
 
-For Ollama endpoints the probe is a parallel check of `/api/version` (liveness) and `/api/ps` (the route used by `choose_endpoint` when selecting a backend for a request). Reporting `ok` only when both succeed prevents the router from advertising an endpoint as healthy while completion calls dead-end on `/api/ps`. The same dual probe backs `/api/config`, which the dashboard uses to render endpoint health.
-
 ## Database Schema
 
 The router uses SQLite for persistent storage:
diff --git a/doc/monitoring.md b/doc/monitoring.md
index 9ce25ec..ab75d25 100644
--- a/doc/monitoring.md
+++ b/doc/monitoring.md
@@ -29,10 +29,6 @@ Response:
 - `200`: All endpoints healthy
 - `503`: One or more endpoints unhealthy
 
-**Probe scope per endpoint**:
-- **Ollama endpoints** are probed at both `/api/version` (liveness) and `/api/ps` (model-introspection used by the router). If either fails the endpoint is reported as `error`; the response still includes `version` when the daemon is reachable so operators can tell a partial failure from a full outage. The `detail` field names the failing probe, e.g. `"/api/ps: 502 …"`.
-- **OpenAI-compatible / llama-server endpoints** are probed at `/models`.
-
 ### Current Usage
 
 ```bash
@@ -137,8 +133,6 @@ Response:
 }
 ```
 
-Uses the same dual-probe logic as `/health` (Ollama: `/api/version` + `/api/ps`; OpenAI-compatible: `/models`). An endpoint will report `error` whenever either probe fails. The dashboard renders the `detail` field as a tooltip on the status cell.
-
 ### Cache Statistics
 
 ```bash
diff --git a/requirements.txt b/requirements.txt
index b03afff..5920529 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,18 +5,18 @@ annotated-types==0.7.0
 anyio==4.13.0
 async-timeout==5.0.1
 attrs==26.1.0
-certifi==2026.5.20
+certifi==2026.4.22
 click==8.4.0
 distro==1.9.0
 exceptiongroup==1.3.1
-fastapi==0.136.3
+fastapi==0.136.1
 fastapi-sse==1.1.1
 frozenlist==1.8.0
 h11==0.16.0
 httpcore==1.0.9
 httpx==0.28.1
 idna==3.15
-jiter==0.15.0
+jiter==0.14.0
 multidict==6.7.1
 ollama==0.6.2
 openai==2.37.0
@@ -30,10 +30,10 @@ pydantic_core==2.46.4
 python-dotenv==1.2.2
 PyYAML==6.0.3
 sniffio==1.3.1
-starlette>=1.0.1
+starlette==0.52.1
 truststore==0.10.4
 tiktoken==0.13.0
-tqdm==4.68.1
+tqdm==4.67.3
 typing-inspection==0.4.2
 typing_extensions==4.15.0
 uvicorn==0.47.0
diff --git a/router.py b/router.py
index c465ebc..08225fb 100644
--- a/router.py
+++ b/router.py
@@ -1000,7 +1000,7 @@ class fetch:
                 async with client.get(f"{endpoint}/models") as resp:
                     await _ensure_success(resp)
                     data = await resp.json()
-
+                
                 # Filter for loaded models only
                 items = data.get("data", [])
                 models = {
@@ -1012,19 +1012,11 @@ class fetch:
                 # Update cache with lock protection
                 async with _loaded_models_cache_lock:
                     _loaded_models_cache[endpoint] = (models, time.time())
-                # Probe succeeded — clear any stale error so the endpoint
-                # becomes routable again.
-                async with _loaded_error_cache_lock:
-                    _loaded_error_cache.pop(endpoint, None)
                 return models
             except Exception as e:
                 # If anything goes wrong we simply assume the endpoint has no models
                 message = _format_connection_issue(f"{endpoint}/models", e)
                 print(f"[fetch.loaded_models] {message}")
-                # Record the failure so `choose_endpoint` can avoid routing
-                # to an unhealthy backend and repeated probes short-circuit.
-                async with _loaded_error_cache_lock:
-                    _loaded_error_cache[endpoint] = time.time()
                 return set()
         else:
             # Original Ollama /api/ps logic
@@ -1039,15 +1031,11 @@ class fetch:
                 # Update cache with lock protection
                 async with _loaded_models_cache_lock:
                     _loaded_models_cache[endpoint] = (models, time.time())
-                async with _loaded_error_cache_lock:
-                    _loaded_error_cache.pop(endpoint, None)
                 return models
             except Exception as e:
                 # If anything goes wrong we simply assume the endpoint has no models
                 message = _format_connection_issue(f"{endpoint}/api/ps", e)
                 print(f"[fetch.loaded_models] {message}")
-                async with _loaded_error_cache_lock:
-                    _loaded_error_cache[endpoint] = time.time()
                 return set()
 
     async def _refresh_loaded_models(endpoint: str) -> None:
@@ -1865,28 +1853,6 @@ async def choose_endpoint(model: str, reserve: bool = True,
     load_tasks = [fetch.loaded_models(ep) for ep in candidate_endpoints]
     loaded_sets = await asyncio.gather(*load_tasks)
 
-    # 3️⃣.5  Exclude endpoints whose loaded-model probe has been failing
-    # recently. Without this filter, an endpoint where `/api/ps` returns 5xx
-    # would appear with an empty loaded set but pass through to the
-    # free-slot fallback (step 4) — sending completion calls to an
-    # unhealthy backend. See issue #83.
-    async with _loaded_error_cache_lock:
-        unhealthy = {
-            ep for ep, ts in _loaded_error_cache.items()
-            if _is_fresh(ts, 300)
-        }
-    if unhealthy:
-        filtered = [
-            (ep, models) for ep, models in zip(candidate_endpoints, loaded_sets)
-            if ep not in unhealthy
-        ]
-        if filtered:
-            candidate_endpoints = [ep for ep, _ in filtered]
-            loaded_sets = [models for _, models in filtered]
-        # If *every* candidate is unhealthy we still fall through with the
-        # original list — refusing to route is worse than retrying a
-        # possibly-recovered backend.
-
     # Look up a possible affinity hint *before* taking usage_lock. The two
     # locks are never held together to avoid lock-ordering issues.
     affine_ep: Optional[str] = None
@@ -3188,103 +3154,44 @@ async def usage_proxy(request: Request):
             "token_usage_counts": token_usage_counts}
 
 # -------------------------------------------------------------
-# 20. Endpoint health probes (shared by /api/config and /health)
-# -------------------------------------------------------------
-async def _raw_probe(
-    ep: str,
-    route: str,
-    api_key: Optional[str] = None,
-    timeout: Optional[float] = None,
-) -> tuple[bool, object]:
-    """Direct HTTP probe that distinguishes success from failure
-    (unlike `fetch.endpoint_details`, which returns [] on either).
-    Returns `(ok, payload_or_error_message)`.
-    """
-    headers = {"Referer": default_headers.get("HTTP-Referer", "https://nomyo.ai")}
-    if api_key is not None:
-        headers["Authorization"] = "Bearer " + api_key
-    url = f"{ep.rstrip('/')}/{route.lstrip('/')}"
-    req_kwargs = {}
-    if timeout is not None:
-        req_kwargs["timeout"] = aiohttp.ClientTimeout(total=timeout)
-    try:
-        client: aiohttp.ClientSession = get_session(ep)
-        async with client.get(url, headers=headers, **req_kwargs) as resp:
-            await _ensure_success(resp)
-            data = await resp.json()
-        return True, data
-    except Exception as exc:
-        return False, _format_connection_issue(url, exc)
-
-
-async def _endpoint_health(ep: str, *, timeout: Optional[float] = None) -> dict:
-    """Probe an endpoint and return `{status, version?, detail?}`.
-
-    Ollama endpoints get a dual probe of `/api/version` and `/api/ps` so
-    that a daemon which is reachable but has a broken model-introspection
-    path (issue #83) is reported as `error` rather than `ok`.
-    OpenAI-compatible endpoints use a single `/models` probe.
-    """
-    if is_openai_compatible(ep):
-        ok, payload = await _raw_probe(
-            ep, "/models", config.api_keys.get(ep), timeout=timeout,
-        )
-        if ok:
-            return {"status": "ok", "version": "latest"}
-        return {"status": "error", "detail": str(payload)}
-
-    (version_ok, version_payload), (ps_ok, ps_payload) = await asyncio.gather(
-        _raw_probe(ep, "/api/version", timeout=timeout),
-        _raw_probe(ep, "/api/ps", timeout=timeout),
-    )
-
-    version_value = (
-        version_payload.get("version")
-        if version_ok and isinstance(version_payload, dict)
-        else None
-    )
-
-    if version_ok and ps_ok:
-        return {"status": "ok", "version": version_value}
-    if not version_ok and not ps_ok:
-        return {"status": "error", "detail": str(version_payload)}
-    # Partial failure — daemon reachable but one probe failed. Report
-    # as "error" so callers can surface the issue; include `version` so
-    # the operator knows the daemon itself is alive.
-    if not ps_ok:
-        return {
-            "status": "error",
-            "version": version_value,
-            "detail": f"/api/ps: {ps_payload}",
-        }
-    return {
-        "status": "error",
-        "detail": f"/api/version: {version_payload}",
-    }
-
-
-# -------------------------------------------------------------
-# 20b. Proxy config route – for monitoring and frontend usage
+# 20. Proxy config route – for monitoring and frontent usage
 # -------------------------------------------------------------
 @app.get("/api/config")
 async def config_proxy(request: Request):
     """
     Return a simple JSON object that contains the configured
-    Ollama endpoints and llama_server_endpoints. The front‑end uses this
-    to display which endpoints are being proxied and their health.
-    Status is "error" when either liveness (/api/version) or routing
-    health (/api/ps) fails — see issue #83.
+    Ollama endpoints and llama_server_endpoints. The front‑end uses this to display
+    which endpoints are being proxied.
     """
-    async def check(url: str) -> dict:
-        return {"url": url, **(await _endpoint_health(url, timeout=5))}
+    async def check_endpoint(url: str):
+        client: aiohttp.ClientSession = get_session(url)
+        headers = None
+        if "/v1" in url:
+            headers = {"Authorization": "Bearer " + config.api_keys.get(url, "no-key")}
+            target_url = f"{url}/models"
+        else:
+            target_url = f"{url}/api/version"
 
-    ollama_results = await asyncio.gather(*[check(ep) for ep in config.endpoints])
+        try:
+            async with client.get(target_url, headers=headers, timeout=aiohttp.ClientTimeout(total=5)) as resp:
+                await _ensure_success(resp)
+                data = await resp.json()
+            if "/v1" in url:
+                return {"url": url, "status": "ok", "version": "latest"}
+            else:
+                return {"url": url, "status": "ok", "version": data.get("version")}
+        except Exception as e:
+            detail = _format_connection_issue(target_url, e)
+            return {"url": url, "status": "error", "detail": detail}
+
+    # Check Ollama endpoints
+    ollama_results = await asyncio.gather(*[check_endpoint(ep) for ep in config.endpoints])
+    
+    # Check llama-server endpoints
     llama_results = []
     if config.llama_server_endpoints:
-        llama_results = await asyncio.gather(
-            *[check(ep) for ep in config.llama_server_endpoints]
-        )
-
+        llama_results = await asyncio.gather(*[check_endpoint(ep) for ep in config.llama_server_endpoints])
+    
     return {
         "endpoints": ollama_results,
         "llama_server_endpoints": llama_results,
@@ -4096,30 +4003,44 @@ async def health_proxy(request: Request):
     """
     Health‑check endpoint for monitoring the proxy.
 
-    * Queries each configured endpoint for both liveness and routing health:
-      Ollama endpoints are probed at `/api/version` AND `/api/ps`,
-      OpenAI-compatible endpoints at `/models`.
+    * Queries each configured endpoint for its `/api/version` response.
     * Returns a JSON object containing:
-        - `status`: "ok" if every endpoint replied to every probe, otherwise "error".
+        - `status`: "ok" if every endpoint replied, otherwise "error".
         - `endpoints`: a mapping of endpoint URL → `{status, version|detail}`.
     * The HTTP status code is 200 when everything is healthy, 503 otherwise.
     """
     # Run all health checks in parallel.
-    # Ollama endpoints expose /api/version (liveness) and /api/ps (routing
-    # health — required by `choose_endpoint`). OpenAI-compatible endpoints
-    # (vLLM, llama-server, external) expose /models, which serves both
-    # purposes. Probing /api/version alone would miss the case where the
-    # Ollama process is up but /api/ps is failing — see issue #83.
+    # Ollama endpoints expose /api/version; OpenAI-compatible endpoints (vLLM,
+    # llama-server, external) expose /models.  Using /api/version against an
+    # OpenAI-compatible endpoint yields a 404 and noisy log output.
     all_endpoints = list(config.endpoints)
     llama_eps_extra = [ep for ep in config.llama_server_endpoints if ep not in config.endpoints]
     all_endpoints += llama_eps_extra
 
-    probe_results = await asyncio.gather(
-        *(_endpoint_health(ep) for ep in all_endpoints),
-    )
+    tasks = []
+    for ep in all_endpoints:
+        if is_openai_compatible(ep):
+            tasks.append(fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep), skip_error_cache=True))
+        else:
+            tasks.append(fetch.endpoint_details(ep, "/api/version", "version", skip_error_cache=True))
 
-    health_summary = dict(zip(all_endpoints, probe_results))
-    overall_ok = all(entry.get("status") == "ok" for entry in probe_results)
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+
+    health_summary = {}
+    overall_ok = True
+
+    for ep, result in zip(all_endpoints, results):
+        if isinstance(result, Exception):
+            # Endpoint did not respond / returned an error
+            health_summary[ep] = {"status": "error", "detail": str(result)}
+            overall_ok = False
+        else:
+            # Successful response – report the reported version (Ollama) or
+            # indicate the endpoint is reachable (OpenAI-compatible).
+            if is_openai_compatible(ep):
+                health_summary[ep] = {"status": "ok"}
+            else:
+                health_summary[ep] = {"status": "ok", "version": result}
 
     response_payload = {
         "status": "ok" if overall_ok else "error",
diff --git a/static/index.html b/static/index.html
index 8c0b16c..b29f22b 100644
--- a/static/index.html
+++ b/static/index.html
@@ -192,10 +192,6 @@
                 color: #8b0000;
                 font-weight: bold;
             }
-            .status-error[title] {
-                cursor: help;
-                text-decoration: underline dotted;
-            }
             .copy-link,
             .delete-link,
             .show-link,
@@ -740,16 +736,6 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
                 return await resp.json();
             }
 
-            function escapeHtml(value) {
-                if (value === null || value === undefined) return "";
-                return String(value)
-                    .replace(/&/g, "&amp;")
-                    .replace(/</g, "&lt;")
-                    .replace(/>/g, "&gt;")
-                    .replace(/"/g, "&quot;")
-                    .replace(/'/g, "&#39;");
-            }
-
             function toggleDarkMode() {
                 document.documentElement.classList.toggle("dark-mode");
             }
@@ -766,24 +752,40 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
                     // Build HTML for both endpoints and llama_server_endpoints
                     let html = "";
                     
-                    const renderRow = (e) => {
-                        const statusClass =
-                            e.status === "ok" ? "status-ok" : "status-error";
-                        const version = e.version || "N/A";
-                        const titleAttr = e.detail
-                            ? ` title="${escapeHtml(e.detail)}"`
-                            : "";
-                        return `
+                    // Add Ollama endpoints
+                    html += data.endpoints
+                        .map((e) => {
+                            const statusClass =
+                                e.status === "ok"
+                                    ? "status-ok"
+                                    : "status-error";
+                            const version = e.version || "N/A";
+                            return `
         <tr>
-          <td class="endpoint">${escapeHtml(e.url)}</td>
-          <td class="status ${statusClass}"${titleAttr}>${escapeHtml(e.status)}</td>
-          <td class="version">${escapeHtml(version)}</td>
+          <td class="endpoint">${e.url}</td>
+          <td class="status ${statusClass}">${e.status}</td>
+          <td class="version">${version}</td>
         </tr>`;
-                    };
-
-                    html += data.endpoints.map(renderRow).join("");
+                        })
+                        .join("");
+                    
+                    // Add llama-server endpoints
                     if (data.llama_server_endpoints && data.llama_server_endpoints.length > 0) {
-                        html += data.llama_server_endpoints.map(renderRow).join("");
+                        html += data.llama_server_endpoints
+                            .map((e) => {
+                                const statusClass =
+                                    e.status === "ok"
+                                        ? "status-ok"
+                                        : "status-error";
+                                const version = e.version || "N/A";
+                                return `
+        <tr>
+          <td class="endpoint">${e.url}</td>
+          <td class="status ${statusClass}">${e.status}</td>
+          <td class="version">${version}</td>
+        </tr>`;
+                            })
+                            .join("");
                     }
                     
                     body.innerHTML = html;
diff --git a/test/test_choose_endpoint.py b/test/test_choose_endpoint.py
index ece609a..b94fdf1 100644
--- a/test/test_choose_endpoint.py
+++ b/test/test_choose_endpoint.py
@@ -1,5 +1,4 @@
 """Tests for choose_endpoint routing logic with mocked fetch calls."""
-import time
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
@@ -26,12 +25,10 @@ def _make_cfg(endpoints, llama_eps=None, max_conn=2, endpoint_config=None, prior
 
 @pytest.fixture(autouse=True)
 def reset_usage():
-    """Clear usage_counts and error caches between tests to prevent bleed."""
+    """Clear usage_counts between tests to prevent bleed."""
     router.usage_counts.clear()
-    router._loaded_error_cache.clear()
     yield
     router.usage_counts.clear()
-    router._loaded_error_cache.clear()
 
 
 class TestChooseEndpointBasic:
@@ -105,57 +102,6 @@ class TestChooseEndpointBasic:
         # Least-busy is EP2
         assert ep == EP2
 
-    async def test_excludes_endpoint_with_recent_loaded_error(self):
-        # Regression: issue #83 — when /api/ps fails for EP1 but EP1
-        # still advertises the model via /api/tags, routing must not
-        # fall back to EP1 just because it has a free slot.
-        cfg = _make_cfg([EP1, EP2])
-
-        async def available(ep, *_):
-            return {"llama3.2:latest"}
-
-        # EP1's /api/ps probe failed recently; EP2 is fine but the model
-        # is not loaded there. Without the health filter, EP1 would be
-        # picked by the free-slot fallback (step 4 in choose_endpoint).
-        router._loaded_error_cache[EP1] = time.time()
-
-        with (
-            patch.object(router, "config", cfg),
-            patch.object(router.fetch, "available_models", side_effect=available),
-            patch.object(router.fetch, "loaded_models", AsyncMock(return_value=set())),
-        ):
-            ep, _ = await router.choose_endpoint("llama3.2:latest")
-        assert ep == EP2
-
-    async def test_stale_loaded_error_does_not_exclude(self):
-        # Errors older than the 300s window must not keep an endpoint
-        # excluded forever.
-        cfg = _make_cfg([EP1])
-        router._loaded_error_cache[EP1] = time.time() - 301
-
-        with (
-            patch.object(router, "config", cfg),
-            patch.object(router.fetch, "available_models", AsyncMock(return_value={"m:latest"})),
-            patch.object(router.fetch, "loaded_models", AsyncMock(return_value={"m:latest"})),
-        ):
-            ep, _ = await router.choose_endpoint("m:latest")
-        assert ep == EP1
-
-    async def test_all_unhealthy_still_routes(self):
-        # If every candidate has a fresh loaded-error we still try one
-        # (it may have recovered between the cache write and now) rather
-        # than refusing to route.
-        cfg = _make_cfg([EP1])
-        router._loaded_error_cache[EP1] = time.time()
-
-        with (
-            patch.object(router, "config", cfg),
-            patch.object(router.fetch, "available_models", AsyncMock(return_value={"m:latest"})),
-            patch.object(router.fetch, "loaded_models", AsyncMock(return_value=set())),
-        ):
-            ep, _ = await router.choose_endpoint("m:latest")
-        assert ep == EP1
-
     async def test_reserve_increments_usage(self):
         cfg = _make_cfg([EP1])
         with (
diff --git a/test/test_fetch.py b/test/test_fetch.py
index 6f2ed50..9b542a1 100644
--- a/test/test_fetch.py
+++ b/test/test_fetch.py
@@ -178,33 +178,3 @@ class TestFetchLoadedModels:
             first  = await router.fetch.loaded_models(MOCK_OLLAMA_EP)
             second = await router.fetch.loaded_models(MOCK_OLLAMA_EP)
         assert first == second
-
-    async def test_records_error_in_loaded_error_cache_on_failure(self):
-        # Regression: issue #83 — /api/ps failures must be recorded so
-        # `choose_endpoint` can exclude unhealthy backends from routing.
-        cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(f"{MOCK_OLLAMA_EP}/api/ps", status=502, payload={})
-            await router.fetch.loaded_models(MOCK_OLLAMA_EP)
-        assert MOCK_OLLAMA_EP in router._loaded_error_cache
-
-    async def test_records_error_for_llama_server_on_failure(self):
-        cfg = _make_cfg(ollama_eps=[], llama_eps=[MOCK_LLAMA_EP])
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(f"{MOCK_LLAMA_EP}/models", status=502, payload={})
-            await router.fetch.loaded_models(MOCK_LLAMA_EP)
-        assert MOCK_LLAMA_EP in router._loaded_error_cache
-
-    async def test_clears_error_cache_on_subsequent_success(self):
-        cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
-        # Pre-seed an old error so loaded_models() falls through to the
-        # network probe instead of short-circuiting on the error cache.
-        async with router._loaded_error_cache_lock:
-            router._loaded_error_cache[MOCK_OLLAMA_EP] = time.time() - 301
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(
-                f"{MOCK_OLLAMA_EP}/api/ps",
-                payload={"models": [{"name": "qwen:7b"}]},
-            )
-            await router.fetch.loaded_models(MOCK_OLLAMA_EP)
-        assert MOCK_OLLAMA_EP not in router._loaded_error_cache