diff --git a/requirements.txt b/requirements.txt
index e39b50c..d58da4c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,17 +1,27 @@
+aiocache==0.12.3
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
annotated-types==0.7.0
anyio==4.10.0
+async-timeout==5.0.1
+attrs==25.3.0
certifi==2025.8.3
click==8.2.1
distro==1.9.0
exceptiongroup==1.3.0
fastapi==0.116.1
+frozenlist==1.7.0
h11==0.16.0
httpcore==1.0.9
httpx==0.28.1
+httpx-aiohttp==0.1.8
idna==3.10
jiter==0.10.0
+multidict==6.6.4
ollama==0.5.3
openai==1.102.0
+propcache==0.3.2
pydantic==2.11.7
pydantic-settings==2.10.1
pydantic_core==2.33.2
@@ -23,3 +33,4 @@ tqdm==4.67.1
typing-inspection==0.4.1
typing_extensions==4.14.1
uvicorn==0.35.0
+yarl==1.20.1
diff --git a/router.py b/router.py
index 595f411..b73e4e6 100644
--- a/router.py
+++ b/router.py
@@ -7,6 +7,7 @@ license: AGPL
"""
# -------------------------------------------------------------
import json, time, asyncio, yaml, httpx, ollama, openai, os, re
+from httpx_aiohttp import AiohttpTransport
from pathlib import Path
from typing import Dict, Set, List, Optional
from fastapi import FastAPI, Request, HTTPException
@@ -96,11 +97,12 @@ def get_httpx_client(endpoint: str) -> httpx.AsyncClient:
"""
return httpx.AsyncClient(
base_url=endpoint,
- timeout=httpx.Timeout(5.0, read=5.0, write=5.0, connect=5.0),
- limits=httpx.Limits(
- max_keepalive_connections=64,
- max_connections=64
- )
+ timeout=httpx.Timeout(5.0, read=5.0, write=None, connect=5.0),
+ #limits=httpx.Limits(
+ # max_keepalive_connections=64,
+ # max_connections=64
+ #),
+ transport=AiohttpTransport()
)
async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) -> Set[str]:
@@ -133,8 +135,8 @@ async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) -
# Error expired – remove it
del _error_cache[endpoint]
- client = get_httpx_client(endpoint)
try:
+ client = get_httpx_client(endpoint)
if "/v1" in endpoint:
resp = await client.get(f"/models", headers=headers)
else:
@@ -147,7 +149,7 @@ async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) -
models = {m.get("id") for m in data.get("data", []) if m.get("id")}
else:
models = {m.get("name") for m in data.get("models", []) if m.get("name")}
-
+
if models:
_models_cache[endpoint] = (models, time.time())
return models
@@ -160,6 +162,8 @@ async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) -
print(f"[fetch_available_models] {endpoint} error: {e}")
_error_cache[endpoint] = time.time()
return set()
+ finally:
+ await client.aclose()
async def fetch_loaded_models(endpoint: str) -> Set[str]:
@@ -168,8 +172,8 @@ async def fetch_loaded_models(endpoint: str) -> Set[str]:
loaded on that endpoint. If the request fails (e.g. timeout, 5xx), an empty
set is returned.
"""
- client = get_httpx_client(endpoint)
try:
+ client = get_httpx_client(endpoint)
resp = await client.get(f"/api/ps")
resp.raise_for_status()
data = resp.json()
@@ -180,6 +184,8 @@ async def fetch_loaded_models(endpoint: str) -> Set[str]:
except Exception:
# If anything goes wrong we simply assume the endpoint has no models
return set()
+ finally:
+ await client.aclose()
async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key: Optional[str] = None) -> List[dict]:
"""
@@ -189,8 +195,9 @@ async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key
headers = None
if api_key is not None:
headers = {"Authorization": "Bearer " + api_key}
- client = get_httpx_client(endpoint)
+
try:
+ client = get_httpx_client(endpoint)
resp = await client.get(f"{route}", headers=headers)
resp.raise_for_status()
data = resp.json()
@@ -200,6 +207,8 @@ async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key
# If anything goes wrong we cannot reply details
print(e)
return []
+ finally:
+ await client.aclose()
def ep2base(ep):
if "/v1" in ep:
@@ -235,8 +244,8 @@ async def decrement_usage(endpoint: str, model: str) -> None:
# Optionally, clean up zero entries
if usage_counts[endpoint].get(model, 0) == 0:
usage_counts[endpoint].pop(model, None)
- if not usage_counts[endpoint]:
- usage_counts.pop(endpoint, None)
+ #if not usage_counts[endpoint]:
+ # usage_counts.pop(endpoint, None)
# -------------------------------------------------------------
# 5. Endpoint selection logic (respecting the configurable limit)
@@ -640,7 +649,7 @@ async def show_proxy(request: Request, model: Optional[str] = None):
# 2. Endpoint logic
endpoint = await choose_endpoint(model)
- await increment_usage(endpoint, model)
+ #await increment_usage(endpoint, model)
client = ollama.AsyncClient(host=endpoint)
# 3. Proxy a simple show request
@@ -907,7 +916,7 @@ async def config_proxy(request: Request):
"""
async def check_endpoint(url: str):
try:
- async with httpx.AsyncClient(timeout=1) as client:
+ async with httpx.AsyncClient(timeout=1, transport=AiohttpTransport()) as client:
if "/v1" in url:
headers = {"Authorization": "Bearer " + config.api_keys[url]}
r = await client.get(f"{url}/models", headers=headers)
@@ -921,6 +930,8 @@ async def config_proxy(request: Request):
return {"url": url, "status": "ok", "version": data.get("version")}
except Exception as exc:
return {"url": url, "status": "error", "detail": str(exc)}
+ finally:
+ await client.aclose()
results = await asyncio.gather(*[check_endpoint(ep) for ep in config.endpoints])
return {"endpoints": results}
diff --git a/static/index.html b/static/index.html
index 4fd9234..a2945b4 100644
--- a/static/index.html
+++ b/static/index.html
@@ -5,108 +5,75 @@
NOMYO Router Dashboard
-
Router Dashboard
+
+Router Dashboard
+
+
Available Models (Tags)
+
+
+
+
+
+
+
+
+
Running Models (PS)
@@ -119,10 +86,11 @@
| Digest |
-
- | Loading… |
-
+ | Loading… |
+
+
+
@@ -135,23 +103,22 @@
Version |
-
- | Loading… |
-
+ | Loading… |
+