From 182ddae5391f0894da3dde1684864e5fb5a49d8c Mon Sep 17 00:00:00 2001 From: alpha nerd Date: Fri, 1 May 2026 13:49:34 +0200 Subject: [PATCH] fix: prevent dashboard and route hangs when endpoints are down by calling skip_error_cache also with reduced timeout --- router.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/router.py b/router.py index 54edbf3..326ec33 100644 --- a/router.py +++ b/router.py @@ -1046,7 +1046,7 @@ class fetch: if _inflight_loaded_models.get(endpoint) == task: _inflight_loaded_models.pop(endpoint, None) - async def endpoint_details(endpoint: str, route: str, detail: str, api_key: Optional[str] = None, skip_error_cache: bool = False) -> List[dict]: + async def endpoint_details(endpoint: str, route: str, detail: str, api_key: Optional[str] = None, skip_error_cache: bool = False, timeout: float = None) -> List[dict]: """ Query / to fetch and return a List of dicts with details for the corresponding Ollama endpoint. If the request fails we respond with "N/A" for detail. @@ -1054,6 +1054,8 @@ class fetch: When ``skip_error_cache`` is False (the default), the call is short-circuited if the endpoint recently failed (recorded in ``_available_error_cache``). Pass ``skip_error_cache=True`` from health-check routes that must always probe. + + ``timeout`` overrides the session default for this single request (seconds, total). """ # Fast-fail if the endpoint is known to be down (unless caller opts out) if not skip_error_cache: @@ -1068,8 +1070,11 @@ class fetch: headers = {"Authorization": "Bearer " + api_key} request_url = f"{endpoint}{route}" + req_kwargs = {} + if timeout is not None: + req_kwargs["timeout"] = aiohttp.ClientTimeout(total=timeout) try: - async with client.get(request_url, headers=headers) as resp: + async with client.get(request_url, headers=headers, **req_kwargs) as resp: await _ensure_success(resp) data = await resp.json() detail = data.get(detail, []) @@ -2780,11 +2785,11 @@ async def tags_proxy(request: Request): """ # 1. Query all endpoints for models - tasks = [fetch.endpoint_details(ep, "/api/tags", "models") for ep in config.endpoints if "/v1" not in ep] - tasks += [fetch.endpoint_details(ep, "/models", "data", config.api_keys[ep]) for ep in config.endpoints if "/v1" in ep] + tasks = [fetch.endpoint_details(ep, "/api/tags", "models", skip_error_cache=True, timeout=8) for ep in config.endpoints if "/v1" not in ep] + tasks += [fetch.endpoint_details(ep, "/models", "data", config.api_keys[ep], skip_error_cache=True, timeout=8) for ep in config.endpoints if "/v1" in ep] # Also query llama-server endpoints not already covered by config.endpoints llama_eps_for_tags = [ep for ep in config.llama_server_endpoints if ep not in config.endpoints] - tasks += [fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep)) for ep in llama_eps_for_tags] + tasks += [fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep), skip_error_cache=True, timeout=8) for ep in llama_eps_for_tags] all_models = await asyncio.gather(*tasks) models = {'models': []} @@ -2818,12 +2823,12 @@ async def ps_proxy(request: Request): For llama-server endpoints: queries /v1/models with status.value == "loaded" """ # 1. Query Ollama endpoints for running models via /api/ps - ollama_tasks = [fetch.endpoint_details(ep, "/api/ps", "models") for ep in config.endpoints if "/v1" not in ep] + ollama_tasks = [fetch.endpoint_details(ep, "/api/ps", "models", skip_error_cache=True, timeout=8) for ep in config.endpoints if "/v1" not in ep] # 2. Query llama-server endpoints for loaded models via /v1/models # Also query endpoints from llama_server_endpoints that may not be in config.endpoints all_llama_endpoints = set(config.llama_server_endpoints) | set(ep for ep in config.endpoints if ep in config.llama_server_endpoints) llama_tasks = [ - fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep)) + fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep), skip_error_cache=True, timeout=8) for ep in all_llama_endpoints ] @@ -2873,12 +2878,12 @@ async def ps_details_proxy(request: Request): For llama-server endpoints: queries /v1/models with status info """ # 1. Query Ollama endpoints via /api/ps - ollama_tasks = [(ep, fetch.endpoint_details(ep, "/api/ps", "models")) for ep in config.endpoints if "/v1" not in ep] + ollama_tasks = [(ep, fetch.endpoint_details(ep, "/api/ps", "models", skip_error_cache=True, timeout=8)) for ep in config.endpoints if "/v1" not in ep] # 2. Query llama-server endpoints via /v1/models # Also query endpoints from llama_server_endpoints that may not be in config.endpoints all_llama_endpoints = set(config.llama_server_endpoints) | set(ep for ep in config.endpoints if ep in config.llama_server_endpoints) llama_tasks = [ - (ep, fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep))) + (ep, fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep), skip_error_cache=True, timeout=8)) for ep in all_llama_endpoints ] @@ -2940,7 +2945,7 @@ async def ps_details_proxy(request: Request): if api_key: headers = {"Authorization": f"Bearer {api_key}"} try: - async with client.get(props_url, headers=headers) as resp: + async with client.get(props_url, headers=headers, timeout=aiohttp.ClientTimeout(total=5)) as resp: if resp.status == 200: data = await resp.json() dgs = data.get("default_generation_settings", {}) @@ -3014,7 +3019,7 @@ async def config_proxy(request: Request): target_url = f"{url}/api/version" try: - async with client.get(target_url, headers=headers) as resp: + async with client.get(target_url, headers=headers, timeout=aiohttp.ClientTimeout(total=5)) as resp: await _ensure_success(resp) data = await resp.json() if "/v1" in url: @@ -3619,14 +3624,14 @@ async def openai_models_proxy(request: Request): For llama-server endpoints: queries /v1/models and filters for status.value == "loaded" """ # 1. Query Ollama endpoints for all models via /api/tags - ollama_tasks = [fetch.endpoint_details(ep, "/api/tags", "models") for ep in config.endpoints if "/v1" not in ep] + ollama_tasks = [fetch.endpoint_details(ep, "/api/tags", "models", skip_error_cache=True, timeout=8) for ep in config.endpoints if "/v1" not in ep] # 2. Query external OpenAI endpoints (Groq, OpenAI, etc.) via /models - ext_openai_tasks = [fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep)) for ep in config.endpoints if is_ext_openai_endpoint(ep)] + ext_openai_tasks = [fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep), skip_error_cache=True, timeout=8) for ep in config.endpoints if is_ext_openai_endpoint(ep)] # 3. Query llama-server endpoints for loaded models via /v1/models # Also query endpoints from llama_server_endpoints that may not be in config.endpoints all_llama_endpoints = set(config.llama_server_endpoints) | set(ep for ep in config.endpoints if ep in config.llama_server_endpoints) llama_tasks = [ - fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep)) + fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep), skip_error_cache=True, timeout=8) for ep in all_llama_endpoints ]