fix: prevent dashboard and route hangs when endpoints are down by calling skip_error_cache also with reduced timeout
This commit is contained in:
parent
bbe7bd48c5
commit
182ddae539
1 changed files with 19 additions and 14 deletions
33
router.py
33
router.py
|
|
@ -1046,7 +1046,7 @@ class fetch:
|
|||
if _inflight_loaded_models.get(endpoint) == task:
|
||||
_inflight_loaded_models.pop(endpoint, None)
|
||||
|
||||
async def endpoint_details(endpoint: str, route: str, detail: str, api_key: Optional[str] = None, skip_error_cache: bool = False) -> List[dict]:
|
||||
async def endpoint_details(endpoint: str, route: str, detail: str, api_key: Optional[str] = None, skip_error_cache: bool = False, timeout: float = None) -> List[dict]:
|
||||
"""
|
||||
Query <endpoint>/<route> to fetch <detail> and return a List of dicts with details
|
||||
for the corresponding Ollama endpoint. If the request fails we respond with "N/A" for detail.
|
||||
|
|
@ -1054,6 +1054,8 @@ class fetch:
|
|||
When ``skip_error_cache`` is False (the default), the call is short-circuited
|
||||
if the endpoint recently failed (recorded in ``_available_error_cache``).
|
||||
Pass ``skip_error_cache=True`` from health-check routes that must always probe.
|
||||
|
||||
``timeout`` overrides the session default for this single request (seconds, total).
|
||||
"""
|
||||
# Fast-fail if the endpoint is known to be down (unless caller opts out)
|
||||
if not skip_error_cache:
|
||||
|
|
@ -1068,8 +1070,11 @@ class fetch:
|
|||
headers = {"Authorization": "Bearer " + api_key}
|
||||
|
||||
request_url = f"{endpoint}{route}"
|
||||
req_kwargs = {}
|
||||
if timeout is not None:
|
||||
req_kwargs["timeout"] = aiohttp.ClientTimeout(total=timeout)
|
||||
try:
|
||||
async with client.get(request_url, headers=headers) as resp:
|
||||
async with client.get(request_url, headers=headers, **req_kwargs) as resp:
|
||||
await _ensure_success(resp)
|
||||
data = await resp.json()
|
||||
detail = data.get(detail, [])
|
||||
|
|
@ -2780,11 +2785,11 @@ async def tags_proxy(request: Request):
|
|||
"""
|
||||
|
||||
# 1. Query all endpoints for models
|
||||
tasks = [fetch.endpoint_details(ep, "/api/tags", "models") for ep in config.endpoints if "/v1" not in ep]
|
||||
tasks += [fetch.endpoint_details(ep, "/models", "data", config.api_keys[ep]) for ep in config.endpoints if "/v1" in ep]
|
||||
tasks = [fetch.endpoint_details(ep, "/api/tags", "models", skip_error_cache=True, timeout=8) for ep in config.endpoints if "/v1" not in ep]
|
||||
tasks += [fetch.endpoint_details(ep, "/models", "data", config.api_keys[ep], skip_error_cache=True, timeout=8) for ep in config.endpoints if "/v1" in ep]
|
||||
# Also query llama-server endpoints not already covered by config.endpoints
|
||||
llama_eps_for_tags = [ep for ep in config.llama_server_endpoints if ep not in config.endpoints]
|
||||
tasks += [fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep)) for ep in llama_eps_for_tags]
|
||||
tasks += [fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep), skip_error_cache=True, timeout=8) for ep in llama_eps_for_tags]
|
||||
all_models = await asyncio.gather(*tasks)
|
||||
|
||||
models = {'models': []}
|
||||
|
|
@ -2818,12 +2823,12 @@ async def ps_proxy(request: Request):
|
|||
For llama-server endpoints: queries /v1/models with status.value == "loaded"
|
||||
"""
|
||||
# 1. Query Ollama endpoints for running models via /api/ps
|
||||
ollama_tasks = [fetch.endpoint_details(ep, "/api/ps", "models") for ep in config.endpoints if "/v1" not in ep]
|
||||
ollama_tasks = [fetch.endpoint_details(ep, "/api/ps", "models", skip_error_cache=True, timeout=8) for ep in config.endpoints if "/v1" not in ep]
|
||||
# 2. Query llama-server endpoints for loaded models via /v1/models
|
||||
# Also query endpoints from llama_server_endpoints that may not be in config.endpoints
|
||||
all_llama_endpoints = set(config.llama_server_endpoints) | set(ep for ep in config.endpoints if ep in config.llama_server_endpoints)
|
||||
llama_tasks = [
|
||||
fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep))
|
||||
fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep), skip_error_cache=True, timeout=8)
|
||||
for ep in all_llama_endpoints
|
||||
]
|
||||
|
||||
|
|
@ -2873,12 +2878,12 @@ async def ps_details_proxy(request: Request):
|
|||
For llama-server endpoints: queries /v1/models with status info
|
||||
"""
|
||||
# 1. Query Ollama endpoints via /api/ps
|
||||
ollama_tasks = [(ep, fetch.endpoint_details(ep, "/api/ps", "models")) for ep in config.endpoints if "/v1" not in ep]
|
||||
ollama_tasks = [(ep, fetch.endpoint_details(ep, "/api/ps", "models", skip_error_cache=True, timeout=8)) for ep in config.endpoints if "/v1" not in ep]
|
||||
# 2. Query llama-server endpoints via /v1/models
|
||||
# Also query endpoints from llama_server_endpoints that may not be in config.endpoints
|
||||
all_llama_endpoints = set(config.llama_server_endpoints) | set(ep for ep in config.endpoints if ep in config.llama_server_endpoints)
|
||||
llama_tasks = [
|
||||
(ep, fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep)))
|
||||
(ep, fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep), skip_error_cache=True, timeout=8))
|
||||
for ep in all_llama_endpoints
|
||||
]
|
||||
|
||||
|
|
@ -2940,7 +2945,7 @@ async def ps_details_proxy(request: Request):
|
|||
if api_key:
|
||||
headers = {"Authorization": f"Bearer {api_key}"}
|
||||
try:
|
||||
async with client.get(props_url, headers=headers) as resp:
|
||||
async with client.get(props_url, headers=headers, timeout=aiohttp.ClientTimeout(total=5)) as resp:
|
||||
if resp.status == 200:
|
||||
data = await resp.json()
|
||||
dgs = data.get("default_generation_settings", {})
|
||||
|
|
@ -3014,7 +3019,7 @@ async def config_proxy(request: Request):
|
|||
target_url = f"{url}/api/version"
|
||||
|
||||
try:
|
||||
async with client.get(target_url, headers=headers) as resp:
|
||||
async with client.get(target_url, headers=headers, timeout=aiohttp.ClientTimeout(total=5)) as resp:
|
||||
await _ensure_success(resp)
|
||||
data = await resp.json()
|
||||
if "/v1" in url:
|
||||
|
|
@ -3619,14 +3624,14 @@ async def openai_models_proxy(request: Request):
|
|||
For llama-server endpoints: queries /v1/models and filters for status.value == "loaded"
|
||||
"""
|
||||
# 1. Query Ollama endpoints for all models via /api/tags
|
||||
ollama_tasks = [fetch.endpoint_details(ep, "/api/tags", "models") for ep in config.endpoints if "/v1" not in ep]
|
||||
ollama_tasks = [fetch.endpoint_details(ep, "/api/tags", "models", skip_error_cache=True, timeout=8) for ep in config.endpoints if "/v1" not in ep]
|
||||
# 2. Query external OpenAI endpoints (Groq, OpenAI, etc.) via /models
|
||||
ext_openai_tasks = [fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep)) for ep in config.endpoints if is_ext_openai_endpoint(ep)]
|
||||
ext_openai_tasks = [fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep), skip_error_cache=True, timeout=8) for ep in config.endpoints if is_ext_openai_endpoint(ep)]
|
||||
# 3. Query llama-server endpoints for loaded models via /v1/models
|
||||
# Also query endpoints from llama_server_endpoints that may not be in config.endpoints
|
||||
all_llama_endpoints = set(config.llama_server_endpoints) | set(ep for ep in config.endpoints if ep in config.llama_server_endpoints)
|
||||
llama_tasks = [
|
||||
fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep))
|
||||
fetch.endpoint_details(ep, "/models", "data", config.api_keys.get(ep), skip_error_cache=True, timeout=8)
|
||||
for ep in all_llama_endpoints
|
||||
]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue