fix: health check all endpoints with right per enpoint path

issue: resolving #24
2026-04-16 12:18:38 +02:00 · 2026-04-16 12:18:38 +02:00 · 1a2781ac23
commit 1a2781ac23
parent a3e7e8a007
1 changed files with 21 additions and 5 deletions
--- a/router.py
+++ b/router.py
@ -3754,21 +3754,37 @@ async def health_proxy(request: Request):
        - `endpoints`: a mapping of endpoint URL → `{status, version|detail}`.
    * The HTTP status code is 200 when everything is healthy, 503 otherwise.
    """
-    # Run all health checks in parallel
+    # Run all health checks in parallel.
-    tasks = [fetch.endpoint_details(ep, "/api/version", "version", skip_error_cache=True) for ep in config.endpoints] # if not is_ext_openai_endpoint(ep)]
+    # Ollama endpoints expose /api/version; OpenAI-compatible endpoints (vLLM,
    # llama-server, external) expose /models.  Using /api/version against an
    # OpenAI-compatible endpoint yields a 404 and noisy log output.
    all_endpoints = list(config.endpoints)
    llama_eps_extra = [ep for ep in config.llama_server_endpoints if ep not in config.endpoints]
    all_endpoints += llama_eps_extra
    tasks = []
    for ep in all_endpoints:
        if is_openai_compatible(ep):
            tasks.append(fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep), skip_error_cache=True))
        else:
            tasks.append(fetch.endpoint_details(ep, "/api/version", "version", skip_error_cache=True))
    results = await asyncio.gather(*tasks, return_exceptions=True)
    health_summary = {}
    overall_ok = True
-    for ep, result in zip(config.endpoints, results):
+    for ep, result in zip(all_endpoints, results):
        if isinstance(result, Exception):
            # Endpoint did not respond / returned an error
            health_summary[ep] = {"status": "error", "detail": str(result)}
            overall_ok = False
        else:
-            # Successful response – report the reported version
+            # Successful response – report the reported version (Ollama) or
            # indicate the endpoint is reachable (OpenAI-compatible).
            if is_openai_compatible(ep):
                health_summary[ep] = {"status": "ok"}
            else:
                health_summary[ep] = {"status": "ok", "version": result}
    response_payload = {