From b3b67fdbf282b5df8302dfcaa5d1bfaf8c09be02 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Thu, 4 Sep 2025 19:07:28 +0200 Subject: [PATCH] Add files via upload BREAKING CHANGE: - new config.yaml config block - new dependency: httpx-aiohttp for faster endpoint queries in bigger installations - new dynamic dashboard --- requirements.txt | 11 ++ router.py | 37 +++-- static/index.html | 346 +++++++++++++++++++++------------------------- 3 files changed, 191 insertions(+), 203 deletions(-) diff --git a/requirements.txt b/requirements.txt index e39b50c..d58da4c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,17 +1,27 @@ +aiocache==0.12.3 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.15 +aiosignal==1.4.0 annotated-types==0.7.0 anyio==4.10.0 +async-timeout==5.0.1 +attrs==25.3.0 certifi==2025.8.3 click==8.2.1 distro==1.9.0 exceptiongroup==1.3.0 fastapi==0.116.1 +frozenlist==1.7.0 h11==0.16.0 httpcore==1.0.9 httpx==0.28.1 +httpx-aiohttp==0.1.8 idna==3.10 jiter==0.10.0 +multidict==6.6.4 ollama==0.5.3 openai==1.102.0 +propcache==0.3.2 pydantic==2.11.7 pydantic-settings==2.10.1 pydantic_core==2.33.2 @@ -23,3 +33,4 @@ tqdm==4.67.1 typing-inspection==0.4.1 typing_extensions==4.14.1 uvicorn==0.35.0 +yarl==1.20.1 diff --git a/router.py b/router.py index 595f411..b73e4e6 100644 --- a/router.py +++ b/router.py @@ -7,6 +7,7 @@ license: AGPL """ # ------------------------------------------------------------- import json, time, asyncio, yaml, httpx, ollama, openai, os, re +from httpx_aiohttp import AiohttpTransport from pathlib import Path from typing import Dict, Set, List, Optional from fastapi import FastAPI, Request, HTTPException @@ -96,11 +97,12 @@ def get_httpx_client(endpoint: str) -> httpx.AsyncClient: """ return httpx.AsyncClient( base_url=endpoint, - timeout=httpx.Timeout(5.0, read=5.0, write=5.0, connect=5.0), - limits=httpx.Limits( - max_keepalive_connections=64, - max_connections=64 - ) + timeout=httpx.Timeout(5.0, read=5.0, write=None, connect=5.0), + #limits=httpx.Limits( + # max_keepalive_connections=64, + # max_connections=64 + #), + transport=AiohttpTransport() ) async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) -> Set[str]: @@ -133,8 +135,8 @@ async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) - # Error expired – remove it del _error_cache[endpoint] - client = get_httpx_client(endpoint) try: + client = get_httpx_client(endpoint) if "/v1" in endpoint: resp = await client.get(f"/models", headers=headers) else: @@ -147,7 +149,7 @@ async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) - models = {m.get("id") for m in data.get("data", []) if m.get("id")} else: models = {m.get("name") for m in data.get("models", []) if m.get("name")} - + if models: _models_cache[endpoint] = (models, time.time()) return models @@ -160,6 +162,8 @@ async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) - print(f"[fetch_available_models] {endpoint} error: {e}") _error_cache[endpoint] = time.time() return set() + finally: + await client.aclose() async def fetch_loaded_models(endpoint: str) -> Set[str]: @@ -168,8 +172,8 @@ async def fetch_loaded_models(endpoint: str) -> Set[str]: loaded on that endpoint. If the request fails (e.g. timeout, 5xx), an empty set is returned. """ - client = get_httpx_client(endpoint) try: + client = get_httpx_client(endpoint) resp = await client.get(f"/api/ps") resp.raise_for_status() data = resp.json() @@ -180,6 +184,8 @@ async def fetch_loaded_models(endpoint: str) -> Set[str]: except Exception: # If anything goes wrong we simply assume the endpoint has no models return set() + finally: + await client.aclose() async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key: Optional[str] = None) -> List[dict]: """ @@ -189,8 +195,9 @@ async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key headers = None if api_key is not None: headers = {"Authorization": "Bearer " + api_key} - client = get_httpx_client(endpoint) + try: + client = get_httpx_client(endpoint) resp = await client.get(f"{route}", headers=headers) resp.raise_for_status() data = resp.json() @@ -200,6 +207,8 @@ async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key # If anything goes wrong we cannot reply details print(e) return [] + finally: + await client.aclose() def ep2base(ep): if "/v1" in ep: @@ -235,8 +244,8 @@ async def decrement_usage(endpoint: str, model: str) -> None: # Optionally, clean up zero entries if usage_counts[endpoint].get(model, 0) == 0: usage_counts[endpoint].pop(model, None) - if not usage_counts[endpoint]: - usage_counts.pop(endpoint, None) + #if not usage_counts[endpoint]: + # usage_counts.pop(endpoint, None) # ------------------------------------------------------------- # 5. Endpoint selection logic (respecting the configurable limit) @@ -640,7 +649,7 @@ async def show_proxy(request: Request, model: Optional[str] = None): # 2. Endpoint logic endpoint = await choose_endpoint(model) - await increment_usage(endpoint, model) + #await increment_usage(endpoint, model) client = ollama.AsyncClient(host=endpoint) # 3. Proxy a simple show request @@ -907,7 +916,7 @@ async def config_proxy(request: Request): """ async def check_endpoint(url: str): try: - async with httpx.AsyncClient(timeout=1) as client: + async with httpx.AsyncClient(timeout=1, transport=AiohttpTransport()) as client: if "/v1" in url: headers = {"Authorization": "Bearer " + config.api_keys[url]} r = await client.get(f"{url}/models", headers=headers) @@ -921,6 +930,8 @@ async def config_proxy(request: Request): return {"url": url, "status": "ok", "version": data.get("version")} except Exception as exc: return {"url": url, "status": "error", "detail": str(exc)} + finally: + await client.aclose() results = await asyncio.gather(*[check_endpoint(ep) for ep in config.endpoints]) return {"endpoints": results} diff --git a/static/index.html b/static/index.html index 4fd9234..a2945b4 100644 --- a/static/index.html +++ b/static/index.html @@ -5,108 +5,75 @@ NOMYO Router Dashboard -

Router Dashboard

+ +

Router Dashboard

+
-

Available Models (Tags)

-
- - - - -
+

Available Models (Tags)

+
+ + + + +
+
+ - - - +
ModelDigest
Loading…
Loading…
+

Running Models (PS)

@@ -119,10 +86,11 @@ - - - +
Digest
Loading…
Loading…
+ + +
@@ -135,23 +103,22 @@ Version - - Loading… - + Loading… +