diff --git a/router.py b/router.py index 395394e..9c02077 100644 --- a/router.py +++ b/router.py @@ -3754,22 +3754,38 @@ async def health_proxy(request: Request): - `endpoints`: a mapping of endpoint URL → `{status, version|detail}`. * The HTTP status code is 200 when everything is healthy, 503 otherwise. """ - # Run all health checks in parallel - tasks = [fetch.endpoint_details(ep, "/api/version", "version", skip_error_cache=True) for ep in config.endpoints] # if not is_ext_openai_endpoint(ep)] + # Run all health checks in parallel. + # Ollama endpoints expose /api/version; OpenAI-compatible endpoints (vLLM, + # llama-server, external) expose /models. Using /api/version against an + # OpenAI-compatible endpoint yields a 404 and noisy log output. + all_endpoints = list(config.endpoints) + llama_eps_extra = [ep for ep in config.llama_server_endpoints if ep not in config.endpoints] + all_endpoints += llama_eps_extra + + tasks = [] + for ep in all_endpoints: + if is_openai_compatible(ep): + tasks.append(fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep), skip_error_cache=True)) + else: + tasks.append(fetch.endpoint_details(ep, "/api/version", "version", skip_error_cache=True)) results = await asyncio.gather(*tasks, return_exceptions=True) health_summary = {} overall_ok = True - for ep, result in zip(config.endpoints, results): + for ep, result in zip(all_endpoints, results): if isinstance(result, Exception): # Endpoint did not respond / returned an error health_summary[ep] = {"status": "error", "detail": str(result)} overall_ok = False else: - # Successful response – report the reported version - health_summary[ep] = {"status": "ok", "version": result} + # Successful response – report the reported version (Ollama) or + # indicate the endpoint is reachable (OpenAI-compatible). + if is_openai_compatible(ep): + health_summary[ep] = {"status": "ok"} + else: + health_summary[ep] = {"status": "ok", "version": result} response_payload = { "status": "ok" if overall_ok else "error",