From 2c82e5964f83e38af3e00375d8dc9ae7808a82f5 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Mon, 1 Sep 2025 16:35:22 +0200 Subject: [PATCH 01/20] Add files via upload minor updates --- config.yaml | 6 ++++-- router.py | 9 ++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/config.yaml b/config.yaml index a0d14ed..692c00e 100644 --- a/config.yaml +++ b/config.yaml @@ -3,6 +3,8 @@ endpoints: - http://192.168.0.50:11434 - http://192.168.0.51:11434 - http://192.168.0.52:11434 + - https://openrouter.ai/api/v1 + - https://api.inceptionlabs.ai/v1 -# Maximum concurrent connections *per endpoint‑model pair* -max_concurrent_connections: 2 \ No newline at end of file +# Maximum concurrent connections *per endpoint‑model pair* (equals to OLLAMA_NUM_PARALLEL) +max_concurrent_connections: 2 diff --git a/router.py b/router.py index 0a3c3d6..f58157a 100644 --- a/router.py +++ b/router.py @@ -2,7 +2,7 @@ title: NOMYO Router - an Ollama Proxy with Endpoint:Model aware routing author: alpha-nerd-nomyo author_url: https://github.com/nomyo-ai -version: 0.1 +version: 0.2.1 license: AGPL """ # ------------------------------------------------------------- @@ -19,9 +19,9 @@ from collections import defaultdict # ------------------------------------------------------------------ # In‑memory caches # ------------------------------------------------------------------ -# Successful results are cached for 300 s +# Successful results are cached for 300s _models_cache: dict[str, tuple[Set[str], float]] = {} -# Transient errors are cached for 30 s – the key stays until the +# Transient errors are cached for 1s – the key stays until the # timeout expires, after which the endpoint will be queried again. _error_cache: dict[str, float] = {} @@ -86,7 +86,6 @@ def get_httpx_client(endpoint: str) -> httpx.AsyncClient: ) ) -#@cached(cache=Cache.MEMORY, ttl=300) async def fetch_available_models(endpoint: str) -> Set[str]: """ Query /api/tags and return a set of all model names that the @@ -132,7 +131,7 @@ async def fetch_available_models(endpoint: str) -> Set[str]: _models_cache[endpoint] = (models, time.time()) return models else: - # Empty list – treat as “no models”, but still cache for 300 s + # Empty list – treat as “no models”, but still cache for 300s _models_cache[endpoint] = (models, time.time()) return models except Exception as e: From b27b3608cee450a06ecec4dbdf6055d378b6e169 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Mon, 1 Sep 2025 16:36:27 +0200 Subject: [PATCH 02/20] Update config.yaml --- config.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/config.yaml b/config.yaml index 692c00e..f36a1fb 100644 --- a/config.yaml +++ b/config.yaml @@ -3,8 +3,6 @@ endpoints: - http://192.168.0.50:11434 - http://192.168.0.51:11434 - http://192.168.0.52:11434 - - https://openrouter.ai/api/v1 - - https://api.inceptionlabs.ai/v1 # Maximum concurrent connections *per endpoint‑model pair* (equals to OLLAMA_NUM_PARALLEL) max_concurrent_connections: 2 From f01843d12bef7fac9741b06ede99ef3be7840148 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Tue, 2 Sep 2025 12:48:19 +0200 Subject: [PATCH 03/20] Add files via upload refined available models view for copy and paste with open ai compatible endpoints --- static/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/static/index.html b/static/index.html index 86746bd..dd9dfec 100644 --- a/static/index.html +++ b/static/index.html @@ -130,7 +130,7 @@ async function loadTags(){ try{ const data = await fetchJSON('/api/tags'); const body = document.getElementById('tags-body'); - body.innerHTML = data.models.map(m=>`${m.name}${m.digest}`).join(''); + body.innerHTML = data.models.map(m=>`${m.id || m.name}${m.digest}`).join(''); }catch(e){ console.error(e); } } From 7ed872379b8821d9610d6564211dc9d630aa78e1 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Tue, 2 Sep 2025 13:30:04 +0200 Subject: [PATCH 04/20] Add files via upload adding missing authorization headers for open ai endpoints --- router.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/router.py b/router.py index f58157a..1761caf 100644 --- a/router.py +++ b/router.py @@ -917,6 +917,10 @@ async def openai_embedding_proxy(request: Request): model = payload.get("model") input = payload.get("input") + headers = request.headers + api_key = headers.get("Authorization") + api_key = api_key.split()[1] + if not model: raise HTTPException( status_code=400, detail="Missing required field 'model'" @@ -931,7 +935,7 @@ async def openai_embedding_proxy(request: Request): # 2. Endpoint logic endpoint = await choose_endpoint(model) await increment_usage(endpoint, model) - oclient = openai.AsyncOpenAI(base_url=endpoint+"/v1", api_key="ollama") + oclient = openai.AsyncOpenAI(base_url=endpoint+"/v1", api_key=api_key) # 3. Async generator that streams embedding data and decrements the counter async_gen = await oclient.embeddings.create(input = [input], model=model) From 9f32fcf75dd80ffb07db056fb84ef0b63f600ab4 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Tue, 2 Sep 2025 14:44:21 +0200 Subject: [PATCH 05/20] Add files via upload fixing /v1/models: - relabel model.id with model.name for OpenAI compliance --- router.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/router.py b/router.py index 1761caf..5796972 100644 --- a/router.py +++ b/router.py @@ -1161,6 +1161,9 @@ async def openai_models_proxy(request: Request): models = {'data': []} for modellist in all_models: + for model in modellist: + if not id in model.keys(): # Relable Ollama models with OpenAI Model.id from Model.name + model['id'] = model['name'] models['data'] += modellist # 2. Return a JSONResponse with a deduplicated list of unique models for inference From 0a456e6e21601e6b2e206b0e8be603996d892709 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Tue, 2 Sep 2025 16:24:00 +0200 Subject: [PATCH 06/20] Add files via upload fixing v1/models --- router.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/router.py b/router.py index 5796972..99ea34b 100644 --- a/router.py +++ b/router.py @@ -160,14 +160,18 @@ async def fetch_loaded_models(endpoint: str) -> Set[str]: # If anything goes wrong we simply assume the endpoint has no models return set() -async def fetch_endpoint_details(endpoint: str, route: str, detail: str) -> List[dict]: +async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key: str = None) -> List[dict]: """ Query / to fetch and return a List of dicts with details for the corresponding Ollama endpoint. If the request fails we respond with "N/A" for detail. """ + if api_key is not None: + headers = {"Authorization": "Bearer " + api_key} + else: + headers = None client = get_httpx_client(endpoint) try: - resp = await client.get(f"{route}") + resp = await client.get(f"{route}", headers=headers) resp.raise_for_status() data = resp.json() detail = data.get(detail, []) @@ -175,7 +179,7 @@ async def fetch_endpoint_details(endpoint: str, route: str, detail: str) -> List except Exception as e: # If anything goes wrong we cannot reply details print(e) - return {detail: []} + return "N/A" def ep2base(ep): if "/v1" in ep: @@ -803,7 +807,8 @@ async def version_proxy(request: Request): # 1. Query all endpoints for version tasks = [fetch_endpoint_details(ep, "/api/version", "version") for ep in config.endpoints] all_versions = await asyncio.gather(*tasks) - + all_versions = [v for v in all_versions if v != "N/A"] + def version_key(v): return tuple(map(int, v.split('.'))) @@ -824,7 +829,7 @@ async def tags_proxy(request: Request): """ # 1. Query all endpoints for models tasks = [fetch_endpoint_details(ep, "/api/tags", "models") for ep in config.endpoints if "/v1" not in ep] - tasks += [fetch_endpoint_details(ep, "/models", "data") for ep in config.endpoints if "/v1" in ep] + tasks += [fetch_endpoint_details(ep, "/models", "data") for ep in config.endpoints if "/v1" in ep] #needs api_key TODO:add central mgmt all_models = await asyncio.gather(*tasks) models = {'models': []} @@ -1154,9 +1159,13 @@ async def openai_models_proxy(request: Request): Proxy a models request to Ollama endpoints and reply with a unique list of all models. """ + headers = request.headers + api_key = headers.get("Authorization") + api_key = api_key.split()[1] + # 1. Query all endpoints for models tasks = [fetch_endpoint_details(ep, "/api/tags", "models") for ep in config.endpoints if "/v1" not in ep] - tasks += [fetch_endpoint_details(ep, "/models", "data") for ep in config.endpoints if "/v1" in ep] + tasks += [fetch_endpoint_details(ep, "/models", "data", api_key) for ep in config.endpoints if "/v1" in ep] all_models = await asyncio.gather(*tasks) models = {'data': []} From d257073cb1670bbcc150d28320fcc9e485aab6fe Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Wed, 3 Sep 2025 16:34:41 +0200 Subject: [PATCH 07/20] Add files via upload preparations for /v1 endpoints with auth --- router.py | 51 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/router.py b/router.py index 99ea34b..e835deb 100644 --- a/router.py +++ b/router.py @@ -8,7 +8,7 @@ license: AGPL # ------------------------------------------------------------- import json, time, asyncio, yaml, httpx, ollama, openai from pathlib import Path -from typing import Dict, Set, List +from typing import Dict, Set, List, Optional from fastapi import FastAPI, Request, HTTPException from fastapi.staticfiles import StaticFiles from starlette.responses import StreamingResponse, JSONResponse, Response, HTMLResponse, RedirectResponse @@ -86,7 +86,7 @@ def get_httpx_client(endpoint: str) -> httpx.AsyncClient: ) ) -async def fetch_available_models(endpoint: str) -> Set[str]: +async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) -> Set[str]: """ Query /api/tags and return a set of all model names that the endpoint *advertises* (i.e. is capable of serving). This endpoint lists @@ -96,6 +96,10 @@ async def fetch_available_models(endpoint: str) -> Set[str]: If the request fails (e.g. timeout, 5xx, or malformed response), an empty set is returned. """ + headers = None + if api_key is not None: + headers = {"Authorization": "Bearer " + api_key} + if endpoint in _models_cache: models, cached_at = _models_cache[endpoint] if _is_fresh(cached_at, 300): @@ -115,7 +119,7 @@ async def fetch_available_models(endpoint: str) -> Set[str]: client = get_httpx_client(endpoint) try: if "/v1" in endpoint: - resp = await client.get(f"/models") + resp = await client.get(f"/models", headers=headers) else: resp = await client.get(f"/api/tags") resp.raise_for_status() @@ -123,7 +127,7 @@ async def fetch_available_models(endpoint: str) -> Set[str]: # Expected format: # {"models": [{"name": "model1"}, {"name": "model2"}]} if "/v1" in endpoint: - models = {m.get("id") for m in data.get("data", []) if m.get("name")} + models = {m.get("id") for m in data.get("data", []) if m.get("id")} else: models = {m.get("name") for m in data.get("models", []) if m.get("name")} @@ -160,15 +164,14 @@ async def fetch_loaded_models(endpoint: str) -> Set[str]: # If anything goes wrong we simply assume the endpoint has no models return set() -async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key: str = None) -> List[dict]: +async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key: Optional[str] = None) -> List[dict]: """ Query / to fetch and return a List of dicts with details for the corresponding Ollama endpoint. If the request fails we respond with "N/A" for detail. """ + headers = None if api_key is not None: headers = {"Authorization": "Bearer " + api_key} - else: - headers = None client = get_httpx_client(endpoint) try: resp = await client.get(f"{route}", headers=headers) @@ -179,7 +182,7 @@ async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key except Exception as e: # If anything goes wrong we cannot reply details print(e) - return "N/A" + return [] def ep2base(ep): if "/v1" in ep: @@ -221,7 +224,7 @@ async def decrement_usage(endpoint: str, model: str) -> None: # ------------------------------------------------------------- # 5. Endpoint selection logic (respecting the configurable limit) # ------------------------------------------------------------- -async def choose_endpoint(model: str) -> str: +async def choose_endpoint(model: str, api_key: Optional[str] = None) -> str: """ Determine which endpoint to use for the given model while respecting the `max_concurrent_connections` per endpoint‑model pair **and** @@ -240,7 +243,7 @@ async def choose_endpoint(model: str) -> str: 6️⃣ If no endpoint advertises the model at all, raise an error. """ # 1️⃣ Gather advertised‑model sets for all endpoints concurrently - tag_tasks = [fetch_available_models(ep) for ep in config.endpoints] + tag_tasks = [fetch_available_models(ep, api_key) for ep in config.endpoints] advertised_sets = await asyncio.gather(*tag_tasks) # 2️⃣ Filter endpoints that advertise the requested model @@ -938,7 +941,7 @@ async def openai_embedding_proxy(request: Request): raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e # 2. Endpoint logic - endpoint = await choose_endpoint(model) + endpoint = await choose_endpoint(model, api_key) await increment_usage(endpoint, model) oclient = openai.AsyncOpenAI(base_url=endpoint+"/v1", api_key=api_key) @@ -976,6 +979,7 @@ async def openai_chat_completions_proxy(request: Request): temperature = payload.get("temperature") top_p = payload.get("top_p") max_tokens = payload.get("max_tokens") + max_completion_tokens = payload.get("max_completion_tokens") tools = payload.get("tools") headers = request.headers @@ -985,14 +989,9 @@ async def openai_chat_completions_proxy(request: Request): params = { "messages": messages, "model": model, - "frequency_penalty": frequency_penalty, - "presence_penalty": presence_penalty, "seed": seed, "stop": stop, "stream": stream, - "temperature": temperature, - "top_p": top_p, - "max_tokens": max_tokens } if tools is not None: @@ -1001,6 +1000,18 @@ async def openai_chat_completions_proxy(request: Request): params["response_format"] = response_format if stream_options is not None: params["stream_options"] = stream_options + if max_completion_tokens is not None: + params["max_completion_tokens"] = max_completion_tokens + if max_tokens is not None: + params["max_tokens"] = max_tokens + if temperature is not None: + params["temperature"] = temperature + if top_p is not None: + params["top_p"] = top_p + if presence_penalty is not None: + params["presence_penalty"] = presence_penalty + if frequency_penalty is not None: + params["frequency_penalty"] = frequency_penalty if not model: raise HTTPException( @@ -1014,7 +1025,7 @@ async def openai_chat_completions_proxy(request: Request): raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e # 2. Endpoint logic - endpoint = await choose_endpoint(model) + endpoint = await choose_endpoint(model, api_key) await increment_usage(endpoint, model) base_url = ep2base(endpoint) oclient = openai.AsyncOpenAI(base_url=base_url, api_key=api_key) @@ -1112,7 +1123,7 @@ async def openai_completions_proxy(request: Request): raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e # 2. Endpoint logic - endpoint = await choose_endpoint(model) + endpoint = await choose_endpoint(model, api_key) await increment_usage(endpoint, model) base_url = ep2base(endpoint) oclient = openai.AsyncOpenAI(base_url=base_url, api_key=api_key) @@ -1171,8 +1182,10 @@ async def openai_models_proxy(request: Request): models = {'data': []} for modellist in all_models: for model in modellist: - if not id in model.keys(): # Relable Ollama models with OpenAI Model.id from Model.name + if not "id" in model.keys(): # Relable Ollama models with OpenAI Model.id from Model.name model['id'] = model['name'] + else: + model['name'] = model['id'] models['data'] += modellist # 2. Return a JSONResponse with a deduplicated list of unique models for inference From e7fd79c461de3c20020ce56d00590e6367602c10 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Wed, 3 Sep 2025 18:00:20 +0200 Subject: [PATCH 08/20] Update config.yaml centralizing remote endpoint secrets --- config.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/config.yaml b/config.yaml index f36a1fb..94162b1 100644 --- a/config.yaml +++ b/config.yaml @@ -3,6 +3,15 @@ endpoints: - http://192.168.0.50:11434 - http://192.168.0.51:11434 - http://192.168.0.52:11434 + - https://openrouter.ai/api/v1 + - https://api.openai.com/v1 # Maximum concurrent connections *per endpoint‑model pair* (equals to OLLAMA_NUM_PARALLEL) max_concurrent_connections: 2 + +# API keys for remote endpoints +# Set an environment variable like OPENAI_KEY +# Confirm endpoints are exactly as in endpoints block +api_keys: + "https://openrouter.ai/api/v1": "${OPENROUTER_KEY}" + "https://api.openai.com/v1": "${OPENAI_KEY}" From 2ead1112e74b1da425f976535eb03854bb2bcf84 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Wed, 3 Sep 2025 18:01:39 +0200 Subject: [PATCH 09/20] Add files via upload centralizing remote endpoint secrets management for unified endpoints --- router.py | 75 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/router.py b/router.py index e835deb..594496e 100644 --- a/router.py +++ b/router.py @@ -2,11 +2,11 @@ title: NOMYO Router - an Ollama Proxy with Endpoint:Model aware routing author: alpha-nerd-nomyo author_url: https://github.com/nomyo-ai -version: 0.2.1 +version: 0.2.2 license: AGPL """ # ------------------------------------------------------------- -import json, time, asyncio, yaml, httpx, ollama, openai +import json, time, asyncio, yaml, httpx, ollama, openai, os, re from pathlib import Path from typing import Dict, Set, List, Optional from fastapi import FastAPI, Request, HTTPException @@ -38,18 +38,35 @@ class Config(BaseSettings): # Max concurrent connections per endpoint‑model pair, see OLLAMA_NUM_PARALLEL max_concurrent_connections: int = 1 + api_keys: Dict[str, str] = Field(default_factory=dict) + class Config: # Load from `config.yaml` first, then from env variables - env_prefix = "OLLAMA_PROXY_" + env_prefix = "NOMYO_ROUTER_" yaml_file = Path("config.yaml") # relative to cwd + @classmethod + def _expand_env_refs(cls, obj): + """Recursively replace `${VAR}` with os.getenv('VAR').""" + if isinstance(obj, dict): + return {k: cls._expand_env_refs(v) for k, v in obj.items()} + if isinstance(obj, list): + return [cls._expand_env_refs(v) for v in obj] + if isinstance(obj, str): + # Only expand if it is exactly ${VAR} + m = re.fullmatch(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}", obj) + if m: + return os.getenv(m.group(1), "") + return obj + @classmethod def from_yaml(cls, path: Path) -> "Config": """Load the YAML file and create the Config instance.""" if path.exists(): with path.open("r", encoding="utf-8") as fp: data = yaml.safe_load(fp) or {} - return cls(**data) + cleaned = cls._expand_env_refs(data) + return cls(**cleaned) return cls() # Create the global config object – it will be overwritten on startup @@ -224,7 +241,7 @@ async def decrement_usage(endpoint: str, model: str) -> None: # ------------------------------------------------------------- # 5. Endpoint selection logic (respecting the configurable limit) # ------------------------------------------------------------- -async def choose_endpoint(model: str, api_key: Optional[str] = None) -> str: +async def choose_endpoint(model: str) -> str: """ Determine which endpoint to use for the given model while respecting the `max_concurrent_connections` per endpoint‑model pair **and** @@ -243,7 +260,8 @@ async def choose_endpoint(model: str, api_key: Optional[str] = None) -> str: 6️⃣ If no endpoint advertises the model at all, raise an error. """ # 1️⃣ Gather advertised‑model sets for all endpoints concurrently - tag_tasks = [fetch_available_models(ep, api_key) for ep in config.endpoints] + tag_tasks = [fetch_available_models(ep) for ep in config.endpoints if "/v1" not in ep] + tag_tasks += [fetch_available_models(ep, config.api_keys[ep]) for ep in config.endpoints if "/v1" in ep] advertised_sets = await asyncio.gather(*tag_tasks) # 2️⃣ Filter endpoints that advertise the requested model @@ -808,9 +826,8 @@ async def version_proxy(request: Request): """ # 1. Query all endpoints for version - tasks = [fetch_endpoint_details(ep, "/api/version", "version") for ep in config.endpoints] + tasks = [fetch_endpoint_details(ep, "/api/version", "version") for ep in config.endpoints if "/v1" not in ep] all_versions = await asyncio.gather(*tasks) - all_versions = [v for v in all_versions if v != "N/A"] def version_key(v): return tuple(map(int, v.split('.'))) @@ -830,9 +847,10 @@ async def tags_proxy(request: Request): Proxy a tags request to Ollama endpoints and reply with a unique list of all models. """ + # 1. Query all endpoints for models tasks = [fetch_endpoint_details(ep, "/api/tags", "models") for ep in config.endpoints if "/v1" not in ep] - tasks += [fetch_endpoint_details(ep, "/models", "data") for ep in config.endpoints if "/v1" in ep] #needs api_key TODO:add central mgmt + tasks += [fetch_endpoint_details(ep, "/models", "data", config.api_keys[ep]) for ep in config.endpoints if "/v1" in ep] all_models = await asyncio.gather(*tasks) models = {'models': []} @@ -841,7 +859,7 @@ async def tags_proxy(request: Request): # 2. Return a JSONResponse with a deduplicated list of unique models for inference return JSONResponse( - content={"models": dedupe_on_keys(models['models'], ['digest','name'])}, + content={"models": dedupe_on_keys(models['models'], ['digest','name','id'])}, status_code=200, ) @@ -893,7 +911,8 @@ async def config_proxy(request: Request): try: async with httpx.AsyncClient(timeout=1) as client: if "/v1" in url: - r = await client.get(f"{url}/models") + headers = {"Authorization": "Bearer " + config.api_keys[url]} + r = await client.get(f"{url}/models", headers=headers) else: r = await client.get(f"{url}/api/version") r.raise_for_status() @@ -925,9 +944,6 @@ async def openai_embedding_proxy(request: Request): model = payload.get("model") input = payload.get("input") - headers = request.headers - api_key = headers.get("Authorization") - api_key = api_key.split()[1] if not model: raise HTTPException( @@ -941,12 +957,16 @@ async def openai_embedding_proxy(request: Request): raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e # 2. Endpoint logic - endpoint = await choose_endpoint(model, api_key) + endpoint = await choose_endpoint(model) await increment_usage(endpoint, model) + if "/v1" in endpoint: + api_key = config.api_keys[endpoint] + else: + api_key = "ollama" oclient = openai.AsyncOpenAI(base_url=endpoint+"/v1", api_key=api_key) # 3. Async generator that streams embedding data and decrements the counter - async_gen = await oclient.embeddings.create(input = [input], model=model) + async_gen = await oclient.embeddings.create(input=[input], model=model) await decrement_usage(endpoint, model) @@ -981,10 +1001,6 @@ async def openai_chat_completions_proxy(request: Request): max_tokens = payload.get("max_tokens") max_completion_tokens = payload.get("max_completion_tokens") tools = payload.get("tools") - - headers = request.headers - api_key = headers.get("Authorization") - api_key = api_key.split()[1] params = { "messages": messages, @@ -1025,10 +1041,10 @@ async def openai_chat_completions_proxy(request: Request): raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e # 2. Endpoint logic - endpoint = await choose_endpoint(model, api_key) + endpoint = await choose_endpoint(model) await increment_usage(endpoint, model) base_url = ep2base(endpoint) - oclient = openai.AsyncOpenAI(base_url=base_url, api_key=api_key) + oclient = openai.AsyncOpenAI(base_url=base_url, api_key=config.api_keys[endpoint]) # 3. Async generator that streams completions data and decrements the counter async def stream_ochat_response(): @@ -1088,11 +1104,8 @@ async def openai_completions_proxy(request: Request): temperature = payload.get("temperature") top_p = payload.get("top_p") max_tokens = payload.get("max_tokens") + max_completion_tokens = payload.get("max_completion_tokens") suffix = payload.get("suffix") - - headers = request.headers - api_key = headers.get("Authorization") - api_key = api_key.split()[1] params = { "prompt": prompt, @@ -1123,10 +1136,10 @@ async def openai_completions_proxy(request: Request): raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e # 2. Endpoint logic - endpoint = await choose_endpoint(model, api_key) + endpoint = await choose_endpoint(model) await increment_usage(endpoint, model) base_url = ep2base(endpoint) - oclient = openai.AsyncOpenAI(base_url=base_url, api_key=api_key) + oclient = openai.AsyncOpenAI(base_url=base_url, api_key=config.api_keys[endpoint]) # 3. Async generator that streams completions data and decrements the counter async def stream_ocompletions_response(): @@ -1170,13 +1183,9 @@ async def openai_models_proxy(request: Request): Proxy a models request to Ollama endpoints and reply with a unique list of all models. """ - headers = request.headers - api_key = headers.get("Authorization") - api_key = api_key.split()[1] - # 1. Query all endpoints for models tasks = [fetch_endpoint_details(ep, "/api/tags", "models") for ep in config.endpoints if "/v1" not in ep] - tasks += [fetch_endpoint_details(ep, "/models", "data", api_key) for ep in config.endpoints if "/v1" in ep] + tasks += [fetch_endpoint_details(ep, "/models", "data", config.api_keys[ep]) for ep in config.endpoints if "/v1" in ep] all_models = await asyncio.gather(*tasks) models = {'data': []} From 190fa874c77b46358677a0654f6dd2a43adef80d Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Wed, 3 Sep 2025 19:20:01 +0200 Subject: [PATCH 10/20] Add files via upload cosmetics --- static/index.html | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/static/index.html b/static/index.html index dd9dfec..df27872 100644 --- a/static/index.html +++ b/static/index.html @@ -55,7 +55,7 @@
-

Available Models (Tags)

+

Available Models (Tags)

@@ -131,6 +131,8 @@ async function loadTags(){ const data = await fetchJSON('/api/tags'); const body = document.getElementById('tags-body'); body.innerHTML = data.models.map(m=>``).join(''); + const countSpan = document.getElementById('tags-count'); + countSpan.textContent = `${data.models.length}`; }catch(e){ console.error(e); } } From 2f09dbe22c1b845008fcb6219663ddd4c7eb33be Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Thu, 4 Sep 2025 10:39:10 +0200 Subject: [PATCH 11/20] Add files via upload adding dashboard copy link adding copy get route for dashboard --- router.py | 54 ++++++++++++++++++++++++++++++++--------------- static/index.html | 49 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 82 insertions(+), 21 deletions(-) diff --git a/router.py b/router.py index 594496e..6206cbc 100644 --- a/router.py +++ b/router.py @@ -679,20 +679,38 @@ async def copy_proxy(request: Request): # 3. Iterate over all endpoints to copy the model on each endpoint status_list = [] for endpoint in config.endpoints: - client = ollama.AsyncClient(host=endpoint) - # 4. Proxy a simple copy request - copy = await client.copy(source=src, destination=dst) - status_list.append(copy.status) + if "/v1" not in endpoint: + client = ollama.AsyncClient(host=endpoint) + # 4. Proxy a simple copy request + copy = await client.copy(source=src, destination=dst) + status_list.append(copy.status) # 4. Return with 200 OK if all went well, 404 if a single endpoint failed - if 404 in status_list: - return Response( - status_code=404 - ) - else: - return Response( - status_code=200 - ) + return Response(status_code=404 if 404 in status_list else 200) + +@app.get("/api/copy") +async def copy_proxy_from_dashboard(source: str, destination: str): + """ + Proxy a model copy request to each Ollama endpoint and reply with a status code. + Accepts `source` and `destination` exclusively as query‑string parameters. + """ + # 1. Validate that both values are non‑empty strings (FastAPI already guarantees presence) + if not source: + raise HTTPException(status_code=400, detail="Missing required query parameter 'source'") + if not destination: + raise HTTPException(status_code=400, detail="Missing required query parameter 'destination'") + + # 2. Iterate over all endpoints to copy the model on each endpoint + status_list = [] + for endpoint in config.endpoints: + if "/v1" not in endpoint: + client = ollama.AsyncClient(host=endpoint) + # 3. Proxy a simple copy request + copy = await client.copy(source=source, destination=destination) + status_list.append(copy.status) + + # 4. Return with 200 OK if all went well, 404 if any endpoint failed + return Response(status_code=404 if 404 in status_list else 200) # ------------------------------------------------------------- # 13. API route – Delete @@ -720,10 +738,11 @@ async def delete_proxy(request: Request): # 2. Iterate over all endpoints to delete the model on each endpoint status_list = [] for endpoint in config.endpoints: - client = ollama.AsyncClient(host=endpoint) - # 3. Proxy a simple copy request - copy = await client.delete(model=model) - status_list.append(copy.status) + if "/v1" not in endpoint: + client = ollama.AsyncClient(host=endpoint) + # 3. Proxy a simple copy request + copy = await client.delete(model=model) + status_list.append(copy.status) # 4. Retrun 200 0K, if a single enpoint fails, respond with 404 if 404 in status_list: @@ -1005,7 +1024,6 @@ async def openai_chat_completions_proxy(request: Request): params = { "messages": messages, "model": model, - "seed": seed, "stop": stop, "stream": stream, } @@ -1024,6 +1042,8 @@ async def openai_chat_completions_proxy(request: Request): params["temperature"] = temperature if top_p is not None: params["top_p"] = top_p + if seed is not None: + params["seed"] = seed if presence_penalty is not None: params["presence_penalty"] = presence_penalty if frequency_penalty is not None: diff --git a/static/index.html b/static/index.html index df27872..975b966 100644 --- a/static/index.html +++ b/static/index.html @@ -46,8 +46,16 @@ } } /* Add a tiny status‑style section */ -.status-ok { color: #006400; font-weight: bold; } /* dark green */ -.status-error{ color: #8B0000; font-weight: bold; } /* dark red */ + .status-ok { color: #006400; font-weight: bold; } /* dark green */ + .status-error{ color: #8B0000; font-weight: bold; } /* dark red */ + .copy-link { + font-size:0.9em; + margin-left:0.5em; + color:#0066cc; + cursor:pointer; + text-decoration:underline; + } + .copy-link:hover { text-decoration:none; } @@ -130,9 +138,42 @@ async function loadTags(){ try{ const data = await fetchJSON('/api/tags'); const body = document.getElementById('tags-body'); - body.innerHTML = data.models.map(m=>``).join(''); - const countSpan = document.getElementById('tags-count'); + body.innerHTML = data.models.map(m => { + // Build the model cell + let modelCell = `${m.id || m.name}`; + + // Add the copy link *only if a digest exists* + if (m.digest) { + modelCell += ` + + copy + `; + } + + return ` + + + + `; + }).join(''); const countSpan = document.getElementById('tags-count'); countSpan.textContent = `${data.models.length}`; + // Attach copy‑link handlers + document.querySelectorAll('.copy-link').forEach(link => { + link.addEventListener('click', async (e) => { + e.preventDefault(); + const source = link.dataset.source; + const dest = prompt(`Enter destination for ${source}:`); + if (!dest) return; // cancel if empty + try{ + const resp = await fetch(`/api/copy?source=${encodeURIComponent(source)}&destination=${encodeURIComponent(dest)}`); + if (!resp.ok) throw new Error(`Copy failed: ${resp.status}`); + alert(`Copied ${source} to ${dest} successfully.`); + }catch(err){ + console.error(err); + alert(`Error copying ${source} to ${dest}: ${err}`); + } + }); + }); }catch(e){ console.error(e); } } From fbce181a818d0a1285600e9757e6a28da676704a Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Thu, 4 Sep 2025 15:00:50 +0200 Subject: [PATCH 12/20] Add files via upload herding ollamas - added management functions to dashboard and updated routes in backend --- router.py | 79 +++++++++-------------- static/index.html | 158 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 182 insertions(+), 55 deletions(-) diff --git a/router.py b/router.py index 6206cbc..595f411 100644 --- a/router.py +++ b/router.py @@ -619,16 +619,17 @@ async def create_proxy(request: Request): # 11. API route – Show # ------------------------------------------------------------- @app.post("/api/show") -async def show_proxy(request: Request): +async def show_proxy(request: Request, model: Optional[str] = None): """ Proxy a model show request to Ollama and reply with ShowResponse. """ try: body_bytes = await request.body() - payload = json.loads(body_bytes.decode("utf-8")) - model = payload.get("model") + if not model: + payload = json.loads(body_bytes.decode("utf-8")) + model = payload.get("model") if not model: raise HTTPException( @@ -652,7 +653,7 @@ async def show_proxy(request: Request): # 12. API route – Copy # ------------------------------------------------------------- @app.post("/api/copy") -async def copy_proxy(request: Request): +async def copy_proxy(request: Request, source: Optional[str] = None, destination: Optional[str] = None): """ Proxy a model copy request to each Ollama endpoint and reply with Status Code. @@ -660,10 +661,14 @@ async def copy_proxy(request: Request): # 1. Parse and validate request try: body_bytes = await request.body() - payload = json.loads(body_bytes.decode("utf-8")) - src = payload.get("source") - dst = payload.get("destination") + if not source and not destination: + payload = json.loads(body_bytes.decode("utf-8")) + src = payload.get("source") + dst = payload.get("destination") + else: + src = source + dst = destination if not src: raise HTTPException( @@ -688,35 +693,11 @@ async def copy_proxy(request: Request): # 4. Return with 200 OK if all went well, 404 if a single endpoint failed return Response(status_code=404 if 404 in status_list else 200) -@app.get("/api/copy") -async def copy_proxy_from_dashboard(source: str, destination: str): - """ - Proxy a model copy request to each Ollama endpoint and reply with a status code. - Accepts `source` and `destination` exclusively as query‑string parameters. - """ - # 1. Validate that both values are non‑empty strings (FastAPI already guarantees presence) - if not source: - raise HTTPException(status_code=400, detail="Missing required query parameter 'source'") - if not destination: - raise HTTPException(status_code=400, detail="Missing required query parameter 'destination'") - - # 2. Iterate over all endpoints to copy the model on each endpoint - status_list = [] - for endpoint in config.endpoints: - if "/v1" not in endpoint: - client = ollama.AsyncClient(host=endpoint) - # 3. Proxy a simple copy request - copy = await client.copy(source=source, destination=destination) - status_list.append(copy.status) - - # 4. Return with 200 OK if all went well, 404 if any endpoint failed - return Response(status_code=404 if 404 in status_list else 200) - # ------------------------------------------------------------- # 13. API route – Delete # ------------------------------------------------------------- @app.delete("/api/delete") -async def delete_proxy(request: Request): +async def delete_proxy(request: Request, model: Optional[str] = None): """ Proxy a model delete request to each Ollama endpoint and reply with Status Code. @@ -724,9 +705,10 @@ async def delete_proxy(request: Request): # 1. Parse and validate request try: body_bytes = await request.body() - payload = json.loads(body_bytes.decode("utf-8")) - model = payload.get("model") + if not model: + payload = json.loads(body_bytes.decode("utf-8")) + model = payload.get("model") if not model: raise HTTPException( @@ -745,30 +727,26 @@ async def delete_proxy(request: Request): status_list.append(copy.status) # 4. Retrun 200 0K, if a single enpoint fails, respond with 404 - if 404 in status_list: - return Response( - status_code=404 - ) - else: - return Response( - status_code=200 - ) + return Response(status_code=404 if 404 in status_list else 200) # ------------------------------------------------------------- # 14. API route – Pull # ------------------------------------------------------------- @app.post("/api/pull") -async def pull_proxy(request: Request): +async def pull_proxy(request: Request, model: Optional[str] = None): """ Proxy a pull request to all Ollama endpoint and report status back. """ # 1. Parse and validate request try: body_bytes = await request.body() - payload = json.loads(body_bytes.decode("utf-8")) - model = payload.get("model") - insecure = payload.get("insecure") + if not model: + payload = json.loads(body_bytes.decode("utf-8")) + model = payload.get("model") + insecure = payload.get("insecure") + else: + insecure = None if not model: raise HTTPException( @@ -780,10 +758,11 @@ async def pull_proxy(request: Request): # 2. Iterate over all endpoints to pull the model status_list = [] for endpoint in config.endpoints: - client = ollama.AsyncClient(host=endpoint) - # 3. Proxy a simple pull request - pull = await client.pull(model=model, insecure=insecure, stream=False) - status_list.append(pull) + if "/v1" not in endpoint: + client = ollama.AsyncClient(host=endpoint) + # 3. Proxy a simple pull request + pull = await client.pull(model=model, insecure=insecure, stream=False) + status_list.append(pull) combined_status = [] for status in status_list: diff --git a/static/index.html b/static/index.html index 975b966..4fd9234 100644 --- a/static/index.html +++ b/static/index.html @@ -14,12 +14,17 @@ .model{font-family:monospace;} .loading{color:#999;} - /* NEW STYLES */ .tables-wrapper{ display:flex; gap:1rem; margin-top:1rem; } + .header-pull-wrapper { + display: flex; /* horizontal layout */ + align-items: center; /* vertical centering */ + gap: 1rem; /* space between title & form */ + flex-wrap: wrap; /* optional – keeps it tidy on very narrow screens */ + } .table-container{ width:50%; } @@ -54,8 +59,31 @@ color:#0066cc; cursor:pointer; text-decoration:underline; + float: right; } + .delete-link{ + font-size:0.9em; + margin-left:0.5em; + color:#b22222; /* dark red */ + cursor:pointer; + text-decoration:underline; + float: right; + } + .show-link { + font-size:0.9em; + margin-left:0.5em; + color:#0066cc; + cursor:pointer; + text-decoration:underline; + float: right; + } + .delete-link:hover{ text-decoration:none; } .copy-link:hover { text-decoration:none; } + /* modal.css – very lightweight – feel free to replace with Bootstrap/Material UI */ + .modal { display:none; position:fixed; top:0; left:0; width:100%; height:100%; + background:rgba(0,0,0,.6); align-items:center; justify-content:center; } + .modal-content { background:#fff; padding:1rem; max-width:90%; max-height:90%; overflow:auto; } + .close-btn { float:right; cursor:pointer; font-size:1.5rem; } @@ -63,7 +91,14 @@
+

Available Models (Tags)

+
+ + + + +
ModelDigest
${m.id || m.name}${m.digest}
${m.id || m.name}${m.digest}
${modelCell}${m.digest || ''}
@@ -141,7 +176,13 @@ async function loadTags(){ body.innerHTML = data.models.map(m => { // Build the model cell let modelCell = `${m.id || m.name}`; - + // Add delete link only when a digest exists + if (m.digest) { + modelCell += ` + + delete + `; + } // Add the copy link *only if a digest exists* if (m.digest) { modelCell += ` @@ -149,7 +190,12 @@ async function loadTags(){ copy `; } - + if (m.digest) { + modelCell += ` + + show + `; + } return ` @@ -157,7 +203,6 @@ async function loadTags(){ `; }).join(''); const countSpan = document.getElementById('tags-count'); countSpan.textContent = `${data.models.length}`; - // Attach copy‑link handlers document.querySelectorAll('.copy-link').forEach(link => { link.addEventListener('click', async (e) => { e.preventDefault(); @@ -165,15 +210,105 @@ async function loadTags(){ const dest = prompt(`Enter destination for ${source}:`); if (!dest) return; // cancel if empty try{ - const resp = await fetch(`/api/copy?source=${encodeURIComponent(source)}&destination=${encodeURIComponent(dest)}`); + const resp = await fetch( + `/api/copy?source=${encodeURIComponent(source)}&destination=${encodeURIComponent(dest)}`, + {method: 'POST'} + ); if (!resp.ok) throw new Error(`Copy failed: ${resp.status}`); alert(`Copied ${source} to ${dest} successfully.`); + + loadTags(); }catch(err){ console.error(err); alert(`Error copying ${source} to ${dest}: ${err}`); } }); }); + document.querySelectorAll('.delete-link').forEach(link => { + link.addEventListener('click', async e => { + e.preventDefault(); + const model = link.dataset.model; + + const ok = confirm(`Delete the model “${model}”? This cannot be undone.`); + if (!ok) return; + + try { + const resp = await fetch( + `/api/delete?model=${encodeURIComponent(model)}`, + {method: 'DELETE'} + ); + if (!resp.ok) throw new Error(`Delete failed: ${resp.status}`); + alert(`Model “${model}” deleted successfully.`); + + loadTags(); + } catch (err) { + console.error(err); + alert(`Error deleting ${model}: ${err}`); + } + }); + }); + document.body.addEventListener('click', async e => { + if (!e.target.matches('.show-link')) return; + + e.preventDefault(); + const model = e.target.dataset.model; + + try { + const resp = await fetch( + `/api/show?model=${encodeURIComponent(model)}`, + {method: 'POST'} + ); + if (!resp.ok) throw new Error(`Status ${resp.status}`); + const data = await resp.json(); + + const jsonText = JSON.stringify(data, null, 2) + .replace(/\\n/g, '\n'); + + document.getElementById('json-output').textContent = jsonText; + document.getElementById('show-modal').style.display = 'flex'; + } catch (err) { + console.error(err); + alert(`Could not load model details: ${err.message}`); + } + }); + + document.getElementById('pull-btn').addEventListener('click', async () => { + const model = document.getElementById('pull-model-input').value.trim(); + const statusEl = document.getElementById('pull-status'); + + if (!model) { + alert('Please enter a model name.'); + return; + } + + try { + const resp = await fetch( + `/api/pull?model=${encodeURIComponent(model)}`, + {method: 'POST'} + ); + + if (!resp.ok) throw new Error(`Status ${resp.status}`); + const data = await resp.json(); + + statusEl.textContent = `✅ ${JSON.stringify(data, null, 2)}`; + statusEl.style.color = 'green'; + + // Optional: refresh the tags list so the new model appears + loadTags(); + } catch (err) { + console.error(err); + statusEl.textContent = `❌ ${err.message}`; + statusEl.style.color = 'red'; + } +}); + + + const modal = document.getElementById('show-modal'); + modal.addEventListener('click', e => { + if (e.target === modal || e.target.matches('.close-btn')) { + modal.style.display = 'none'; + } + }); }catch(e){ console.error(e); } } @@ -190,6 +325,19 @@ window.addEventListener('load', ()=>{ loadTags(); loadPS(); }); +setInterval(() => { + loadTags(); +}, 600_000); +setInterval(() => { + loadPS(); +}, 60_000); + \ No newline at end of file From 7a4e0bb08cfe35602ab54ff831efb631b8557a2d Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Thu, 4 Sep 2025 16:08:02 +0200 Subject: [PATCH 13/20] Update config.yaml fixing config for ollama /v1 compatible endpoint usage --- config.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/config.yaml b/config.yaml index 94162b1..01bf296 100644 --- a/config.yaml +++ b/config.yaml @@ -13,5 +13,8 @@ max_concurrent_connections: 2 # Set an environment variable like OPENAI_KEY # Confirm endpoints are exactly as in endpoints block api_keys: + "http://192.168.0.50:11434": "ollama" + "http://192.168.0.51:11434": "ollama" + "http://192.168.0.52:11434": "ollama" "https://openrouter.ai/api/v1": "${OPENROUTER_KEY}" "https://api.openai.com/v1": "${OPENAI_KEY}" From 75de2100399ca05a613f0a3d235ab17bed5fd336 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Thu, 4 Sep 2025 16:09:30 +0200 Subject: [PATCH 14/20] Update config.yaml --- config.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/config.yaml b/config.yaml index 01bf296..93ae117 100644 --- a/config.yaml +++ b/config.yaml @@ -3,8 +3,8 @@ endpoints: - http://192.168.0.50:11434 - http://192.168.0.51:11434 - http://192.168.0.52:11434 - - https://openrouter.ai/api/v1 - - https://api.openai.com/v1 + #- https://openrouter.ai/api/v1 + #- https://api.openai.com/v1 # Maximum concurrent connections *per endpoint‑model pair* (equals to OLLAMA_NUM_PARALLEL) max_concurrent_connections: 2 @@ -16,5 +16,5 @@ api_keys: "http://192.168.0.50:11434": "ollama" "http://192.168.0.51:11434": "ollama" "http://192.168.0.52:11434": "ollama" - "https://openrouter.ai/api/v1": "${OPENROUTER_KEY}" - "https://api.openai.com/v1": "${OPENAI_KEY}" + #"https://openrouter.ai/api/v1": "${OPENROUTER_KEY}" + #"https://api.openai.com/v1": "${OPENAI_KEY}" From 20790d95eda9cd96d0c5c5a47c374d7c95d65317 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Thu, 4 Sep 2025 16:12:05 +0200 Subject: [PATCH 15/20] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9d514eb..18b2290 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ is a transparent proxy for [Ollama](https://github.com/ollama/ollama) with model deployment aware routing. -Screenshot_NOMYO_Router_Dashboard
+Screenshot_NOMYO_Router_0-2-2_Dashboard
It runs between your frontend application and Ollama backend and is transparent for both, the front- and backend. From b3b67fdbf282b5df8302dfcaa5d1bfaf8c09be02 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Thu, 4 Sep 2025 19:07:28 +0200 Subject: [PATCH 16/20] Add files via upload BREAKING CHANGE: - new config.yaml config block - new dependency: httpx-aiohttp for faster endpoint queries in bigger installations - new dynamic dashboard --- requirements.txt | 11 ++ router.py | 37 +++-- static/index.html | 346 +++++++++++++++++++++------------------------- 3 files changed, 191 insertions(+), 203 deletions(-) diff --git a/requirements.txt b/requirements.txt index e39b50c..d58da4c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,17 +1,27 @@ +aiocache==0.12.3 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.15 +aiosignal==1.4.0 annotated-types==0.7.0 anyio==4.10.0 +async-timeout==5.0.1 +attrs==25.3.0 certifi==2025.8.3 click==8.2.1 distro==1.9.0 exceptiongroup==1.3.0 fastapi==0.116.1 +frozenlist==1.7.0 h11==0.16.0 httpcore==1.0.9 httpx==0.28.1 +httpx-aiohttp==0.1.8 idna==3.10 jiter==0.10.0 +multidict==6.6.4 ollama==0.5.3 openai==1.102.0 +propcache==0.3.2 pydantic==2.11.7 pydantic-settings==2.10.1 pydantic_core==2.33.2 @@ -23,3 +33,4 @@ tqdm==4.67.1 typing-inspection==0.4.1 typing_extensions==4.14.1 uvicorn==0.35.0 +yarl==1.20.1 diff --git a/router.py b/router.py index 595f411..b73e4e6 100644 --- a/router.py +++ b/router.py @@ -7,6 +7,7 @@ license: AGPL """ # ------------------------------------------------------------- import json, time, asyncio, yaml, httpx, ollama, openai, os, re +from httpx_aiohttp import AiohttpTransport from pathlib import Path from typing import Dict, Set, List, Optional from fastapi import FastAPI, Request, HTTPException @@ -96,11 +97,12 @@ def get_httpx_client(endpoint: str) -> httpx.AsyncClient: """ return httpx.AsyncClient( base_url=endpoint, - timeout=httpx.Timeout(5.0, read=5.0, write=5.0, connect=5.0), - limits=httpx.Limits( - max_keepalive_connections=64, - max_connections=64 - ) + timeout=httpx.Timeout(5.0, read=5.0, write=None, connect=5.0), + #limits=httpx.Limits( + # max_keepalive_connections=64, + # max_connections=64 + #), + transport=AiohttpTransport() ) async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) -> Set[str]: @@ -133,8 +135,8 @@ async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) - # Error expired – remove it del _error_cache[endpoint] - client = get_httpx_client(endpoint) try: + client = get_httpx_client(endpoint) if "/v1" in endpoint: resp = await client.get(f"/models", headers=headers) else: @@ -147,7 +149,7 @@ async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) - models = {m.get("id") for m in data.get("data", []) if m.get("id")} else: models = {m.get("name") for m in data.get("models", []) if m.get("name")} - + if models: _models_cache[endpoint] = (models, time.time()) return models @@ -160,6 +162,8 @@ async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) - print(f"[fetch_available_models] {endpoint} error: {e}") _error_cache[endpoint] = time.time() return set() + finally: + await client.aclose() async def fetch_loaded_models(endpoint: str) -> Set[str]: @@ -168,8 +172,8 @@ async def fetch_loaded_models(endpoint: str) -> Set[str]: loaded on that endpoint. If the request fails (e.g. timeout, 5xx), an empty set is returned. """ - client = get_httpx_client(endpoint) try: + client = get_httpx_client(endpoint) resp = await client.get(f"/api/ps") resp.raise_for_status() data = resp.json() @@ -180,6 +184,8 @@ async def fetch_loaded_models(endpoint: str) -> Set[str]: except Exception: # If anything goes wrong we simply assume the endpoint has no models return set() + finally: + await client.aclose() async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key: Optional[str] = None) -> List[dict]: """ @@ -189,8 +195,9 @@ async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key headers = None if api_key is not None: headers = {"Authorization": "Bearer " + api_key} - client = get_httpx_client(endpoint) + try: + client = get_httpx_client(endpoint) resp = await client.get(f"{route}", headers=headers) resp.raise_for_status() data = resp.json() @@ -200,6 +207,8 @@ async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key # If anything goes wrong we cannot reply details print(e) return [] + finally: + await client.aclose() def ep2base(ep): if "/v1" in ep: @@ -235,8 +244,8 @@ async def decrement_usage(endpoint: str, model: str) -> None: # Optionally, clean up zero entries if usage_counts[endpoint].get(model, 0) == 0: usage_counts[endpoint].pop(model, None) - if not usage_counts[endpoint]: - usage_counts.pop(endpoint, None) + #if not usage_counts[endpoint]: + # usage_counts.pop(endpoint, None) # ------------------------------------------------------------- # 5. Endpoint selection logic (respecting the configurable limit) @@ -640,7 +649,7 @@ async def show_proxy(request: Request, model: Optional[str] = None): # 2. Endpoint logic endpoint = await choose_endpoint(model) - await increment_usage(endpoint, model) + #await increment_usage(endpoint, model) client = ollama.AsyncClient(host=endpoint) # 3. Proxy a simple show request @@ -907,7 +916,7 @@ async def config_proxy(request: Request): """ async def check_endpoint(url: str): try: - async with httpx.AsyncClient(timeout=1) as client: + async with httpx.AsyncClient(timeout=1, transport=AiohttpTransport()) as client: if "/v1" in url: headers = {"Authorization": "Bearer " + config.api_keys[url]} r = await client.get(f"{url}/models", headers=headers) @@ -921,6 +930,8 @@ async def config_proxy(request: Request): return {"url": url, "status": "ok", "version": data.get("version")} except Exception as exc: return {"url": url, "status": "error", "detail": str(exc)} + finally: + await client.aclose() results = await asyncio.gather(*[check_endpoint(ep) for ep in config.endpoints]) return {"endpoints": results} diff --git a/static/index.html b/static/index.html index 4fd9234..a2945b4 100644 --- a/static/index.html +++ b/static/index.html @@ -5,108 +5,75 @@ NOMYO Router Dashboard -

Router Dashboard

+ +

Router Dashboard

+
-

Available Models (Tags)

-
- - - - -
+

Available Models (Tags)

+
+ + + + +
+
+
ModelDigest
${modelCell}
- - - +
ModelDigest
Loading…
Loading…
+

Running Models (PS)

@@ -119,10 +86,11 @@ - - - +
Digest
Loading…
Loading…
+ + +
@@ -135,23 +103,22 @@ Version - - Loading… - + Loading… +