From 2c82e5964f83e38af3e00375d8dc9ae7808a82f5 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Mon, 1 Sep 2025 16:35:22 +0200
Subject: [PATCH 01/20] Add files via upload

minor updates
---
 config.yaml | 6 ++++--
 router.py   | 9 ++++-----
 2 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/config.yaml b/config.yaml
index a0d14ed..692c00e 100644
--- a/config.yaml
+++ b/config.yaml
@@ -3,6 +3,8 @@ endpoints:
   - http://192.168.0.50:11434
   - http://192.168.0.51:11434
   - http://192.168.0.52:11434
+  - https://openrouter.ai/api/v1
+  - https://api.inceptionlabs.ai/v1
 
-# Maximum concurrent connections *per endpoint‑model pair*
-max_concurrent_connections: 2
\ No newline at end of file
+# Maximum concurrent connections *per endpoint‑model pair* (equals to OLLAMA_NUM_PARALLEL)
+max_concurrent_connections: 2
diff --git a/router.py b/router.py
index 0a3c3d6..f58157a 100644
--- a/router.py
+++ b/router.py
@@ -2,7 +2,7 @@
 title: NOMYO Router - an Ollama Proxy with Endpoint:Model aware routing
 author: alpha-nerd-nomyo
 author_url: https://github.com/nomyo-ai
-version: 0.1
+version: 0.2.1
 license: AGPL
 """
 # -------------------------------------------------------------
@@ -19,9 +19,9 @@ from collections import defaultdict
 # ------------------------------------------------------------------
 # In‑memory caches
 # ------------------------------------------------------------------
-# Successful results are cached for 300 s
+# Successful results are cached for 300s
 _models_cache: dict[str, tuple[Set[str], float]] = {}
-# Transient errors are cached for 30 s – the key stays until the
+# Transient errors are cached for 1s – the key stays until the
 # timeout expires, after which the endpoint will be queried again.
 _error_cache: dict[str, float] = {}
 
@@ -86,7 +86,6 @@ def get_httpx_client(endpoint: str) -> httpx.AsyncClient:
         )
     )
 
-#@cached(cache=Cache.MEMORY, ttl=300)
 async def fetch_available_models(endpoint: str) -> Set[str]:
     """
     Query <endpoint>/api/tags and return a set of all model names that the
@@ -132,7 +131,7 @@ async def fetch_available_models(endpoint: str) -> Set[str]:
             _models_cache[endpoint] = (models, time.time())
             return models
         else:
-            # Empty list – treat as “no models”, but still cache for 300 s
+            # Empty list – treat as “no models”, but still cache for 300s
             _models_cache[endpoint] = (models, time.time())
             return models
     except Exception as e:

From b27b3608cee450a06ecec4dbdf6055d378b6e169 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Mon, 1 Sep 2025 16:36:27 +0200
Subject: [PATCH 02/20] Update config.yaml

---
 config.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/config.yaml b/config.yaml
index 692c00e..f36a1fb 100644
--- a/config.yaml
+++ b/config.yaml
@@ -3,8 +3,6 @@ endpoints:
   - http://192.168.0.50:11434
   - http://192.168.0.51:11434
   - http://192.168.0.52:11434
-  - https://openrouter.ai/api/v1
-  - https://api.inceptionlabs.ai/v1
 
 # Maximum concurrent connections *per endpoint‑model pair* (equals to OLLAMA_NUM_PARALLEL)
 max_concurrent_connections: 2

From f01843d12bef7fac9741b06ede99ef3be7840148 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Tue, 2 Sep 2025 12:48:19 +0200
Subject: [PATCH 03/20] Add files via upload

refined available models view for copy and paste with open ai compatible endpoints
---
 static/index.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/static/index.html b/static/index.html
index 86746bd..dd9dfec 100644
--- a/static/index.html
+++ b/static/index.html
@@ -130,7 +130,7 @@ async function loadTags(){
   try{
     const data = await fetchJSON('/api/tags');
     const body = document.getElementById('tags-body');
-    body.innerHTML = data.models.map(m=>`<tr><td class="model">${m.name}</td><td>${m.digest}</td></tr>`).join('');
+    body.innerHTML = data.models.map(m=>`<tr><td class="model">${m.id || m.name}</td><td>${m.digest}</td></tr>`).join('');
   }catch(e){ console.error(e); }
 }
 

From 7ed872379b8821d9610d6564211dc9d630aa78e1 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Tue, 2 Sep 2025 13:30:04 +0200
Subject: [PATCH 04/20] Add files via upload

adding missing authorization headers for open ai endpoints
---
 router.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/router.py b/router.py
index f58157a..1761caf 100644
--- a/router.py
+++ b/router.py
@@ -917,6 +917,10 @@ async def openai_embedding_proxy(request: Request):
         model = payload.get("model")
         input = payload.get("input")
         
+        headers = request.headers
+        api_key = headers.get("Authorization")
+        api_key = api_key.split()[1]
+
         if not model:
             raise HTTPException(
                 status_code=400, detail="Missing required field 'model'"
@@ -931,7 +935,7 @@ async def openai_embedding_proxy(request: Request):
     # 2. Endpoint logic
     endpoint = await choose_endpoint(model)
     await increment_usage(endpoint, model)
-    oclient = openai.AsyncOpenAI(base_url=endpoint+"/v1", api_key="ollama")
+    oclient = openai.AsyncOpenAI(base_url=endpoint+"/v1", api_key=api_key)
 
     # 3. Async generator that streams embedding data and decrements the counter
     async_gen = await oclient.embeddings.create(input = [input], model=model)

From 9f32fcf75dd80ffb07db056fb84ef0b63f600ab4 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Tue, 2 Sep 2025 14:44:21 +0200
Subject: [PATCH 05/20] Add files via upload

fixing /v1/models:
- relabel model.id with model.name for OpenAI compliance
---
 router.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/router.py b/router.py
index 1761caf..5796972 100644
--- a/router.py
+++ b/router.py
@@ -1161,6 +1161,9 @@ async def openai_models_proxy(request: Request):
     
     models = {'data': []}
     for modellist in all_models:
+        for model in modellist:
+            if not id in model.keys():  # Relable Ollama models with OpenAI Model.id from Model.name
+                model['id'] = model['name']
         models['data'] += modellist
     
     # 2. Return a JSONResponse with a deduplicated list of unique models for inference

From 0a456e6e21601e6b2e206b0e8be603996d892709 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Tue, 2 Sep 2025 16:24:00 +0200
Subject: [PATCH 06/20] Add files via upload

fixing v1/models
---
 router.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/router.py b/router.py
index 5796972..99ea34b 100644
--- a/router.py
+++ b/router.py
@@ -160,14 +160,18 @@ async def fetch_loaded_models(endpoint: str) -> Set[str]:
         # If anything goes wrong we simply assume the endpoint has no models
         return set()
 
-async def fetch_endpoint_details(endpoint: str, route: str, detail: str) -> List[dict]:
+async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key: str = None) -> List[dict]:
     """
     Query <endpoint>/<route> to fetch <detail> and return a List of dicts with details
     for the corresponding Ollama endpoint. If the request fails we respond with "N/A" for detail.
     """
+    if api_key is not None:
+        headers = {"Authorization": "Bearer " + api_key}
+    else:
+        headers = None
     client = get_httpx_client(endpoint)
     try:
-        resp = await client.get(f"{route}")
+        resp = await client.get(f"{route}", headers=headers)
         resp.raise_for_status()
         data = resp.json()
         detail = data.get(detail, [])
@@ -175,7 +179,7 @@ async def fetch_endpoint_details(endpoint: str, route: str, detail: str) -> List
     except Exception as e:
         # If anything goes wrong we cannot reply details
         print(e)
-        return {detail: []}
+        return "N/A"
 
 def ep2base(ep):
     if "/v1" in ep:
@@ -803,7 +807,8 @@ async def version_proxy(request: Request):
     # 1. Query all endpoints for version
     tasks = [fetch_endpoint_details(ep, "/api/version", "version") for ep in config.endpoints]
     all_versions = await asyncio.gather(*tasks)
-    
+    all_versions = [v for v in all_versions if v != "N/A"]
+
     def version_key(v):
         return tuple(map(int, v.split('.')))
     
@@ -824,7 +829,7 @@ async def tags_proxy(request: Request):
     """
     # 1. Query all endpoints for models
     tasks = [fetch_endpoint_details(ep, "/api/tags", "models") for ep in config.endpoints if "/v1" not in ep]
-    tasks += [fetch_endpoint_details(ep, "/models", "data") for ep in config.endpoints if "/v1" in ep]
+    tasks += [fetch_endpoint_details(ep, "/models", "data") for ep in config.endpoints if "/v1" in ep] #needs api_key TODO:add central mgmt
     all_models = await asyncio.gather(*tasks)
     
     models = {'models': []}
@@ -1154,9 +1159,13 @@ async def openai_models_proxy(request: Request):
     Proxy a models request to Ollama endpoints and reply with a unique list of all models.
 
     """
+    headers = request.headers
+    api_key = headers.get("Authorization")
+    api_key = api_key.split()[1]
+
     # 1. Query all endpoints for models
     tasks = [fetch_endpoint_details(ep, "/api/tags", "models") for ep in config.endpoints if "/v1" not in ep]
-    tasks += [fetch_endpoint_details(ep, "/models", "data") for ep in config.endpoints if "/v1" in ep]
+    tasks += [fetch_endpoint_details(ep, "/models", "data", api_key) for ep in config.endpoints if "/v1" in ep]
     all_models = await asyncio.gather(*tasks)
     
     models = {'data': []}

From d257073cb1670bbcc150d28320fcc9e485aab6fe Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Wed, 3 Sep 2025 16:34:41 +0200
Subject: [PATCH 07/20] Add files via upload

preparations for /v1 endpoints with auth
---
 router.py | 51 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/router.py b/router.py
index 99ea34b..e835deb 100644
--- a/router.py
+++ b/router.py
@@ -8,7 +8,7 @@ license: AGPL
 # -------------------------------------------------------------
 import json, time, asyncio, yaml, httpx, ollama, openai
 from pathlib import Path
-from typing import Dict, Set, List
+from typing import Dict, Set, List, Optional
 from fastapi import FastAPI, Request, HTTPException
 from fastapi.staticfiles import StaticFiles
 from starlette.responses import StreamingResponse, JSONResponse, Response, HTMLResponse, RedirectResponse
@@ -86,7 +86,7 @@ def get_httpx_client(endpoint: str) -> httpx.AsyncClient:
         )
     )
 
-async def fetch_available_models(endpoint: str) -> Set[str]:
+async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) -> Set[str]:
     """
     Query <endpoint>/api/tags and return a set of all model names that the
     endpoint *advertises* (i.e. is capable of serving).  This endpoint lists
@@ -96,6 +96,10 @@ async def fetch_available_models(endpoint: str) -> Set[str]:
     If the request fails (e.g. timeout, 5xx, or malformed response), an empty
     set is returned.
     """
+    headers = None
+    if api_key is not None:
+        headers = {"Authorization": "Bearer " + api_key}
+
     if endpoint in _models_cache:
         models, cached_at = _models_cache[endpoint]
         if _is_fresh(cached_at, 300):
@@ -115,7 +119,7 @@ async def fetch_available_models(endpoint: str) -> Set[str]:
     client = get_httpx_client(endpoint)
     try:
         if "/v1" in endpoint:
-            resp = await client.get(f"/models")
+            resp = await client.get(f"/models", headers=headers)
         else:
             resp = await client.get(f"/api/tags")
         resp.raise_for_status()
@@ -123,7 +127,7 @@ async def fetch_available_models(endpoint: str) -> Set[str]:
         # Expected format:
         #   {"models": [{"name": "model1"}, {"name": "model2"}]}
         if "/v1" in endpoint:
-            models = {m.get("id") for m in data.get("data", []) if m.get("name")}
+            models = {m.get("id") for m in data.get("data", []) if m.get("id")}
         else:
             models = {m.get("name") for m in data.get("models", []) if m.get("name")}
 
@@ -160,15 +164,14 @@ async def fetch_loaded_models(endpoint: str) -> Set[str]:
         # If anything goes wrong we simply assume the endpoint has no models
         return set()
 
-async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key: str = None) -> List[dict]:
+async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key: Optional[str] = None) -> List[dict]:
     """
     Query <endpoint>/<route> to fetch <detail> and return a List of dicts with details
     for the corresponding Ollama endpoint. If the request fails we respond with "N/A" for detail.
     """
+    headers = None
     if api_key is not None:
         headers = {"Authorization": "Bearer " + api_key}
-    else:
-        headers = None
     client = get_httpx_client(endpoint)
     try:
         resp = await client.get(f"{route}", headers=headers)
@@ -179,7 +182,7 @@ async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key
     except Exception as e:
         # If anything goes wrong we cannot reply details
         print(e)
-        return "N/A"
+        return []
 
 def ep2base(ep):
     if "/v1" in ep:
@@ -221,7 +224,7 @@ async def decrement_usage(endpoint: str, model: str) -> None:
 # -------------------------------------------------------------
 # 5. Endpoint selection logic (respecting the configurable limit)
 # -------------------------------------------------------------
-async def choose_endpoint(model: str) -> str:
+async def choose_endpoint(model: str, api_key: Optional[str] = None) -> str:
     """
     Determine which endpoint to use for the given model while respecting
     the `max_concurrent_connections` per endpoint‑model pair **and**
@@ -240,7 +243,7 @@ async def choose_endpoint(model: str) -> str:
     6️⃣  If no endpoint advertises the model at all, raise an error.
     """
     # 1️⃣  Gather advertised‑model sets for all endpoints concurrently
-    tag_tasks = [fetch_available_models(ep) for ep in config.endpoints]
+    tag_tasks = [fetch_available_models(ep, api_key) for ep in config.endpoints]
     advertised_sets = await asyncio.gather(*tag_tasks)
 
     # 2️⃣  Filter endpoints that advertise the requested model
@@ -938,7 +941,7 @@ async def openai_embedding_proxy(request: Request):
         raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e
 
     # 2. Endpoint logic
-    endpoint = await choose_endpoint(model)
+    endpoint = await choose_endpoint(model, api_key)
     await increment_usage(endpoint, model)
     oclient = openai.AsyncOpenAI(base_url=endpoint+"/v1", api_key=api_key)
 
@@ -976,6 +979,7 @@ async def openai_chat_completions_proxy(request: Request):
         temperature = payload.get("temperature")
         top_p = payload.get("top_p")
         max_tokens = payload.get("max_tokens")
+        max_completion_tokens = payload.get("max_completion_tokens")
         tools = payload.get("tools")
         
         headers = request.headers
@@ -985,14 +989,9 @@ async def openai_chat_completions_proxy(request: Request):
         params = {
             "messages": messages, 
             "model": model,
-            "frequency_penalty": frequency_penalty, 
-            "presence_penalty": presence_penalty, 
             "seed": seed, 
             "stop": stop,
             "stream": stream,
-            "temperature": temperature,
-            "top_p": top_p, 
-            "max_tokens": max_tokens
         }
 
         if tools is not None:
@@ -1001,6 +1000,18 @@ async def openai_chat_completions_proxy(request: Request):
             params["response_format"] = response_format
         if stream_options is not None:
             params["stream_options"] = stream_options
+        if max_completion_tokens is not None:
+            params["max_completion_tokens"] = max_completion_tokens
+        if max_tokens is not None:
+            params["max_tokens"] = max_tokens
+        if temperature is not None:
+            params["temperature"] = temperature
+        if top_p is not None:
+            params["top_p"] = top_p
+        if presence_penalty is not None:
+            params["presence_penalty"] = presence_penalty
+        if frequency_penalty is not None:
+            params["frequency_penalty"] = frequency_penalty
         
         if not model:
             raise HTTPException(
@@ -1014,7 +1025,7 @@ async def openai_chat_completions_proxy(request: Request):
         raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e
 
     # 2. Endpoint logic
-    endpoint = await choose_endpoint(model)
+    endpoint = await choose_endpoint(model, api_key)
     await increment_usage(endpoint, model)
     base_url = ep2base(endpoint)
     oclient = openai.AsyncOpenAI(base_url=base_url, api_key=api_key)
@@ -1112,7 +1123,7 @@ async def openai_completions_proxy(request: Request):
         raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e
 
     # 2. Endpoint logic
-    endpoint = await choose_endpoint(model)
+    endpoint = await choose_endpoint(model, api_key)
     await increment_usage(endpoint, model)
     base_url = ep2base(endpoint)
     oclient = openai.AsyncOpenAI(base_url=base_url, api_key=api_key)
@@ -1171,8 +1182,10 @@ async def openai_models_proxy(request: Request):
     models = {'data': []}
     for modellist in all_models:
         for model in modellist:
-            if not id in model.keys():  # Relable Ollama models with OpenAI Model.id from Model.name
+            if not "id" in model.keys():  # Relable Ollama models with OpenAI Model.id from Model.name
                 model['id'] = model['name']
+            else:
+                model['name'] = model['id']
         models['data'] += modellist
     
     # 2. Return a JSONResponse with a deduplicated list of unique models for inference

From e7fd79c461de3c20020ce56d00590e6367602c10 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Wed, 3 Sep 2025 18:00:20 +0200
Subject: [PATCH 08/20] Update config.yaml

centralizing remote endpoint secrets
---
 config.yaml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/config.yaml b/config.yaml
index f36a1fb..94162b1 100644
--- a/config.yaml
+++ b/config.yaml
@@ -3,6 +3,15 @@ endpoints:
   - http://192.168.0.50:11434
   - http://192.168.0.51:11434
   - http://192.168.0.52:11434
+  - https://openrouter.ai/api/v1
+  - https://api.openai.com/v1
 
 # Maximum concurrent connections *per endpoint‑model pair* (equals to OLLAMA_NUM_PARALLEL)
 max_concurrent_connections: 2
+
+# API keys for remote endpoints
+# Set an environment variable like OPENAI_KEY
+# Confirm endpoints are exactly as in endpoints block
+api_keys:
+  "https://openrouter.ai/api/v1": "${OPENROUTER_KEY}"
+  "https://api.openai.com/v1": "${OPENAI_KEY}"

From 2ead1112e74b1da425f976535eb03854bb2bcf84 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Wed, 3 Sep 2025 18:01:39 +0200
Subject: [PATCH 09/20] Add files via upload

centralizing remote endpoint secrets management for unified endpoints
---
 router.py | 75 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 33 deletions(-)

diff --git a/router.py b/router.py
index e835deb..594496e 100644
--- a/router.py
+++ b/router.py
@@ -2,11 +2,11 @@
 title: NOMYO Router - an Ollama Proxy with Endpoint:Model aware routing
 author: alpha-nerd-nomyo
 author_url: https://github.com/nomyo-ai
-version: 0.2.1
+version: 0.2.2
 license: AGPL
 """
 # -------------------------------------------------------------
-import json, time, asyncio, yaml, httpx, ollama, openai
+import json, time, asyncio, yaml, httpx, ollama, openai, os, re
 from pathlib import Path
 from typing import Dict, Set, List, Optional
 from fastapi import FastAPI, Request, HTTPException
@@ -38,18 +38,35 @@ class Config(BaseSettings):
     # Max concurrent connections per endpoint‑model pair, see OLLAMA_NUM_PARALLEL
     max_concurrent_connections: int = 1
 
+    api_keys: Dict[str, str] = Field(default_factory=dict)
+
     class Config:
         # Load from `config.yaml` first, then from env variables
-        env_prefix = "OLLAMA_PROXY_"
+        env_prefix = "NOMYO_ROUTER_"
         yaml_file = Path("config.yaml")  # relative to cwd
 
+    @classmethod
+    def _expand_env_refs(cls, obj):
+        """Recursively replace `${VAR}` with os.getenv('VAR')."""
+        if isinstance(obj, dict):
+            return {k: cls._expand_env_refs(v) for k, v in obj.items()}
+        if isinstance(obj, list):
+            return [cls._expand_env_refs(v) for v in obj]
+        if isinstance(obj, str):
+            # Only expand if it is exactly ${VAR}
+            m = re.fullmatch(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}", obj)
+            if m:
+                return os.getenv(m.group(1), "")
+        return obj
+
     @classmethod
     def from_yaml(cls, path: Path) -> "Config":
         """Load the YAML file and create the Config instance."""
         if path.exists():
             with path.open("r", encoding="utf-8") as fp:
                 data = yaml.safe_load(fp) or {}
-            return cls(**data)
+                cleaned = cls._expand_env_refs(data)
+            return cls(**cleaned)
         return cls()
 
 # Create the global config object – it will be overwritten on startup
@@ -224,7 +241,7 @@ async def decrement_usage(endpoint: str, model: str) -> None:
 # -------------------------------------------------------------
 # 5. Endpoint selection logic (respecting the configurable limit)
 # -------------------------------------------------------------
-async def choose_endpoint(model: str, api_key: Optional[str] = None) -> str:
+async def choose_endpoint(model: str) -> str:
     """
     Determine which endpoint to use for the given model while respecting
     the `max_concurrent_connections` per endpoint‑model pair **and**
@@ -243,7 +260,8 @@ async def choose_endpoint(model: str, api_key: Optional[str] = None) -> str:
     6️⃣  If no endpoint advertises the model at all, raise an error.
     """
     # 1️⃣  Gather advertised‑model sets for all endpoints concurrently
-    tag_tasks = [fetch_available_models(ep, api_key) for ep in config.endpoints]
+    tag_tasks = [fetch_available_models(ep) for ep in config.endpoints if "/v1" not in ep]
+    tag_tasks += [fetch_available_models(ep, config.api_keys[ep]) for ep in config.endpoints if "/v1" in ep]
     advertised_sets = await asyncio.gather(*tag_tasks)
 
     # 2️⃣  Filter endpoints that advertise the requested model
@@ -808,9 +826,8 @@ async def version_proxy(request: Request):
 
     """
     # 1. Query all endpoints for version
-    tasks = [fetch_endpoint_details(ep, "/api/version", "version") for ep in config.endpoints]
+    tasks = [fetch_endpoint_details(ep, "/api/version", "version") for ep in config.endpoints if "/v1" not in ep]
     all_versions = await asyncio.gather(*tasks)
-    all_versions = [v for v in all_versions if v != "N/A"]
 
     def version_key(v):
         return tuple(map(int, v.split('.')))
@@ -830,9 +847,10 @@ async def tags_proxy(request: Request):
     Proxy a tags request to Ollama endpoints and reply with a unique list of all models.
 
     """
+    
     # 1. Query all endpoints for models
     tasks = [fetch_endpoint_details(ep, "/api/tags", "models") for ep in config.endpoints if "/v1" not in ep]
-    tasks += [fetch_endpoint_details(ep, "/models", "data") for ep in config.endpoints if "/v1" in ep] #needs api_key TODO:add central mgmt
+    tasks += [fetch_endpoint_details(ep, "/models", "data", config.api_keys[ep]) for ep in config.endpoints if "/v1" in ep]
     all_models = await asyncio.gather(*tasks)
     
     models = {'models': []}
@@ -841,7 +859,7 @@ async def tags_proxy(request: Request):
     
     # 2. Return a JSONResponse with a deduplicated list of unique models for inference
     return JSONResponse(
-        content={"models": dedupe_on_keys(models['models'], ['digest','name'])},
+        content={"models": dedupe_on_keys(models['models'], ['digest','name','id'])},
         status_code=200,
     )
 
@@ -893,7 +911,8 @@ async def config_proxy(request: Request):
         try:
             async with httpx.AsyncClient(timeout=1) as client:
                 if "/v1" in url:
-                    r = await client.get(f"{url}/models")
+                    headers = {"Authorization": "Bearer " + config.api_keys[url]}
+                    r = await client.get(f"{url}/models", headers=headers)
                 else:
                     r = await client.get(f"{url}/api/version")
                 r.raise_for_status()
@@ -925,9 +944,6 @@ async def openai_embedding_proxy(request: Request):
         model = payload.get("model")
         input = payload.get("input")
         
-        headers = request.headers
-        api_key = headers.get("Authorization")
-        api_key = api_key.split()[1]
 
         if not model:
             raise HTTPException(
@@ -941,12 +957,16 @@ async def openai_embedding_proxy(request: Request):
         raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e
 
     # 2. Endpoint logic
-    endpoint = await choose_endpoint(model, api_key)
+    endpoint = await choose_endpoint(model)
     await increment_usage(endpoint, model)
+    if "/v1" in endpoint:
+        api_key = config.api_keys[endpoint]
+    else:
+        api_key = "ollama"
     oclient = openai.AsyncOpenAI(base_url=endpoint+"/v1", api_key=api_key)
 
     # 3. Async generator that streams embedding data and decrements the counter
-    async_gen = await oclient.embeddings.create(input = [input], model=model)
+    async_gen = await oclient.embeddings.create(input=[input], model=model)
             
     await decrement_usage(endpoint, model)
 
@@ -981,10 +1001,6 @@ async def openai_chat_completions_proxy(request: Request):
         max_tokens = payload.get("max_tokens")
         max_completion_tokens = payload.get("max_completion_tokens")
         tools = payload.get("tools")
-        
-        headers = request.headers
-        api_key = headers.get("Authorization")
-        api_key = api_key.split()[1]
 
         params = {
             "messages": messages, 
@@ -1025,10 +1041,10 @@ async def openai_chat_completions_proxy(request: Request):
         raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e
 
     # 2. Endpoint logic
-    endpoint = await choose_endpoint(model, api_key)
+    endpoint = await choose_endpoint(model)
     await increment_usage(endpoint, model)
     base_url = ep2base(endpoint)
-    oclient = openai.AsyncOpenAI(base_url=base_url, api_key=api_key)
+    oclient = openai.AsyncOpenAI(base_url=base_url, api_key=config.api_keys[endpoint])
     
     # 3. Async generator that streams completions data and decrements the counter
     async def stream_ochat_response():
@@ -1088,11 +1104,8 @@ async def openai_completions_proxy(request: Request):
         temperature = payload.get("temperature")
         top_p = payload.get("top_p")
         max_tokens = payload.get("max_tokens")
+        max_completion_tokens = payload.get("max_completion_tokens")
         suffix = payload.get("suffix")
-        
-        headers = request.headers
-        api_key = headers.get("Authorization")
-        api_key = api_key.split()[1]
 
         params = {
             "prompt": prompt, 
@@ -1123,10 +1136,10 @@ async def openai_completions_proxy(request: Request):
         raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e
 
     # 2. Endpoint logic
-    endpoint = await choose_endpoint(model, api_key)
+    endpoint = await choose_endpoint(model)
     await increment_usage(endpoint, model)
     base_url = ep2base(endpoint)
-    oclient = openai.AsyncOpenAI(base_url=base_url, api_key=api_key)
+    oclient = openai.AsyncOpenAI(base_url=base_url, api_key=config.api_keys[endpoint])
 
     # 3. Async generator that streams completions data and decrements the counter
     async def stream_ocompletions_response():
@@ -1170,13 +1183,9 @@ async def openai_models_proxy(request: Request):
     Proxy a models request to Ollama endpoints and reply with a unique list of all models.
 
     """
-    headers = request.headers
-    api_key = headers.get("Authorization")
-    api_key = api_key.split()[1]
-
     # 1. Query all endpoints for models
     tasks = [fetch_endpoint_details(ep, "/api/tags", "models") for ep in config.endpoints if "/v1" not in ep]
-    tasks += [fetch_endpoint_details(ep, "/models", "data", api_key) for ep in config.endpoints if "/v1" in ep]
+    tasks += [fetch_endpoint_details(ep, "/models", "data", config.api_keys[ep]) for ep in config.endpoints if "/v1" in ep]
     all_models = await asyncio.gather(*tasks)
     
     models = {'data': []}

From 190fa874c77b46358677a0654f6dd2a43adef80d Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Wed, 3 Sep 2025 19:20:01 +0200
Subject: [PATCH 10/20] Add files via upload

cosmetics
---
 static/index.html | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/static/index.html b/static/index.html
index dd9dfec..df27872 100644
--- a/static/index.html
+++ b/static/index.html
@@ -55,7 +55,7 @@
 
 <div class="tables-wrapper">
   <div class="table-container">
-    <h2>Available Models (Tags)</h2>
+    <h2><span id="tags-count"></span> Available Models (Tags)</h2>
     <table id="tags-table">
       <thead><tr><th>Model</th><th>Digest</th></tr></thead>
       <tbody id="tags-body">
@@ -131,6 +131,8 @@ async function loadTags(){
     const data = await fetchJSON('/api/tags');
     const body = document.getElementById('tags-body');
     body.innerHTML = data.models.map(m=>`<tr><td class="model">${m.id || m.name}</td><td>${m.digest}</td></tr>`).join('');
+    const countSpan = document.getElementById('tags-count');
+    countSpan.textContent = `${data.models.length}`;
   }catch(e){ console.error(e); }
 }
 

From 2f09dbe22c1b845008fcb6219663ddd4c7eb33be Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Thu, 4 Sep 2025 10:39:10 +0200
Subject: [PATCH 11/20] Add files via upload

adding dashboard copy link
adding copy get route for dashboard
---
 router.py         | 54 ++++++++++++++++++++++++++++++++---------------
 static/index.html | 49 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 82 insertions(+), 21 deletions(-)

diff --git a/router.py b/router.py
index 594496e..6206cbc 100644
--- a/router.py
+++ b/router.py
@@ -679,20 +679,38 @@ async def copy_proxy(request: Request):
     # 3. Iterate over all endpoints to copy the model on each endpoint
     status_list = []
     for endpoint in config.endpoints:
-        client = ollama.AsyncClient(host=endpoint)
-        # 4. Proxy a simple copy request
-        copy = await client.copy(source=src, destination=dst)
-        status_list.append(copy.status)
+        if "/v1" not in endpoint:
+            client = ollama.AsyncClient(host=endpoint)
+            # 4. Proxy a simple copy request
+            copy = await client.copy(source=src, destination=dst)
+            status_list.append(copy.status)
 
     # 4. Return with 200 OK if all went well, 404 if a single endpoint failed
-    if 404 in status_list:
-        return Response(
-            status_code=404
-        )
-    else:
-        return Response(
-            status_code=200
-        )    
+    return Response(status_code=404 if 404 in status_list else 200)
+
+@app.get("/api/copy")
+async def copy_proxy_from_dashboard(source: str, destination: str):
+    """
+    Proxy a model copy request to each Ollama endpoint and reply with a status code.
+    Accepts `source` and `destination` exclusively as query‑string parameters.
+    """
+    # 1. Validate that both values are non‑empty strings (FastAPI already guarantees presence)
+    if not source:
+        raise HTTPException(status_code=400, detail="Missing required query parameter 'source'")
+    if not destination:
+        raise HTTPException(status_code=400, detail="Missing required query parameter 'destination'")
+
+    # 2. Iterate over all endpoints to copy the model on each endpoint
+    status_list = []
+    for endpoint in config.endpoints:
+        if "/v1" not in endpoint:
+            client = ollama.AsyncClient(host=endpoint)
+            # 3. Proxy a simple copy request
+            copy = await client.copy(source=source, destination=destination)
+            status_list.append(copy.status)
+
+    # 4. Return with 200 OK if all went well, 404 if any endpoint failed
+    return Response(status_code=404 if 404 in status_list else 200)
 
 # -------------------------------------------------------------
 # 13. API route – Delete
@@ -720,10 +738,11 @@ async def delete_proxy(request: Request):
     # 2. Iterate over all endpoints to delete the model on each endpoint
     status_list = []
     for endpoint in config.endpoints:
-        client = ollama.AsyncClient(host=endpoint)
-        # 3. Proxy a simple copy request
-        copy = await client.delete(model=model)
-        status_list.append(copy.status)
+        if "/v1" not in endpoint:
+            client = ollama.AsyncClient(host=endpoint)
+            # 3. Proxy a simple copy request
+            copy = await client.delete(model=model)
+            status_list.append(copy.status)
     
     # 4. Retrun 200 0K, if a single enpoint fails, respond with 404
     if 404 in status_list:
@@ -1005,7 +1024,6 @@ async def openai_chat_completions_proxy(request: Request):
         params = {
             "messages": messages, 
             "model": model,
-            "seed": seed, 
             "stop": stop,
             "stream": stream,
         }
@@ -1024,6 +1042,8 @@ async def openai_chat_completions_proxy(request: Request):
             params["temperature"] = temperature
         if top_p is not None:
             params["top_p"] = top_p
+        if seed is not None:
+            params["seed"] = seed
         if presence_penalty is not None:
             params["presence_penalty"] = presence_penalty
         if frequency_penalty is not None:
diff --git a/static/index.html b/static/index.html
index df27872..975b966 100644
--- a/static/index.html
+++ b/static/index.html
@@ -46,8 +46,16 @@
     }
   }
   /* Add a tiny status‑style section */
-.status-ok   { color: #006400; font-weight: bold; }   /* dark green */
-.status-error{ color: #8B0000; font-weight: bold; }   /* dark red */
+  .status-ok   { color: #006400; font-weight: bold; }   /* dark green */
+  .status-error{ color: #8B0000; font-weight: bold; }   /* dark red */
+  .copy-link {
+    font-size:0.9em;
+    margin-left:0.5em;
+    color:#0066cc;
+    cursor:pointer;
+    text-decoration:underline;
+  }
+  .copy-link:hover { text-decoration:none; }
 </style>
 </head>
 <body>
@@ -130,9 +138,42 @@ async function loadTags(){
   try{
     const data = await fetchJSON('/api/tags');
     const body = document.getElementById('tags-body');
-    body.innerHTML = data.models.map(m=>`<tr><td class="model">${m.id || m.name}</td><td>${m.digest}</td></tr>`).join('');
-    const countSpan = document.getElementById('tags-count');
+    body.innerHTML = data.models.map(m => {
+      // Build the model cell
+      let modelCell = `${m.id || m.name}`;
+
+      // Add the copy link *only if a digest exists*
+      if (m.digest) {
+        modelCell += `
+          <a href="#" class="copy-link" data-source="${m.name}">
+            copy
+          </a>`;
+      }
+
+      return `
+        <tr>
+          <td class="model">${modelCell}</td>
+          <td>${m.digest || ''}</td>
+        </tr>`;
+    }).join('');    const countSpan = document.getElementById('tags-count');
     countSpan.textContent = `${data.models.length}`;
+     // Attach copy‑link handlers
+    document.querySelectorAll('.copy-link').forEach(link => {
+      link.addEventListener('click', async (e) => {
+        e.preventDefault();
+        const source = link.dataset.source;
+        const dest = prompt(`Enter destination for ${source}:`);
+        if (!dest) return; // cancel if empty
+        try{
+          const resp = await fetch(`/api/copy?source=${encodeURIComponent(source)}&destination=${encodeURIComponent(dest)}`);
+          if (!resp.ok) throw new Error(`Copy failed: ${resp.status}`);
+          alert(`Copied ${source} to ${dest} successfully.`);
+        }catch(err){
+          console.error(err);
+          alert(`Error copying ${source} to ${dest}: ${err}`);
+        }
+      });
+    });
   }catch(e){ console.error(e); }
 }
 

From fbce181a818d0a1285600e9757e6a28da676704a Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Thu, 4 Sep 2025 15:00:50 +0200
Subject: [PATCH 12/20] Add files via upload

herding ollamas
- added management functions to dashboard and updated routes in backend
---
 router.py         |  79 +++++++++--------------
 static/index.html | 158 ++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 182 insertions(+), 55 deletions(-)

diff --git a/router.py b/router.py
index 6206cbc..595f411 100644
--- a/router.py
+++ b/router.py
@@ -619,16 +619,17 @@ async def create_proxy(request: Request):
 # 11. API route – Show
 # -------------------------------------------------------------
 @app.post("/api/show")
-async def show_proxy(request: Request):
+async def show_proxy(request: Request, model: Optional[str] = None):
     """
     Proxy a model show request to Ollama and reply with ShowResponse.
 
     """
     try:
         body_bytes = await request.body()
-        payload = json.loads(body_bytes.decode("utf-8"))
 
-        model = payload.get("model")
+        if not model:
+            payload = json.loads(body_bytes.decode("utf-8"))
+            model = payload.get("model")
         
         if not model:
             raise HTTPException(
@@ -652,7 +653,7 @@ async def show_proxy(request: Request):
 # 12. API route – Copy
 # -------------------------------------------------------------
 @app.post("/api/copy")
-async def copy_proxy(request: Request):
+async def copy_proxy(request: Request, source: Optional[str] = None, destination: Optional[str] = None):
     """
     Proxy a model copy request to each Ollama endpoint and reply with Status Code.
 
@@ -660,10 +661,14 @@ async def copy_proxy(request: Request):
     # 1. Parse and validate request
     try:
         body_bytes = await request.body()
-        payload = json.loads(body_bytes.decode("utf-8"))
 
-        src = payload.get("source")
-        dst = payload.get("destination")
+        if not source and not destination:
+            payload = json.loads(body_bytes.decode("utf-8"))
+            src = payload.get("source")
+            dst = payload.get("destination")
+        else:
+            src = source
+            dst = destination
         
         if not src:
             raise HTTPException(
@@ -688,35 +693,11 @@ async def copy_proxy(request: Request):
     # 4. Return with 200 OK if all went well, 404 if a single endpoint failed
     return Response(status_code=404 if 404 in status_list else 200)
 
-@app.get("/api/copy")
-async def copy_proxy_from_dashboard(source: str, destination: str):
-    """
-    Proxy a model copy request to each Ollama endpoint and reply with a status code.
-    Accepts `source` and `destination` exclusively as query‑string parameters.
-    """
-    # 1. Validate that both values are non‑empty strings (FastAPI already guarantees presence)
-    if not source:
-        raise HTTPException(status_code=400, detail="Missing required query parameter 'source'")
-    if not destination:
-        raise HTTPException(status_code=400, detail="Missing required query parameter 'destination'")
-
-    # 2. Iterate over all endpoints to copy the model on each endpoint
-    status_list = []
-    for endpoint in config.endpoints:
-        if "/v1" not in endpoint:
-            client = ollama.AsyncClient(host=endpoint)
-            # 3. Proxy a simple copy request
-            copy = await client.copy(source=source, destination=destination)
-            status_list.append(copy.status)
-
-    # 4. Return with 200 OK if all went well, 404 if any endpoint failed
-    return Response(status_code=404 if 404 in status_list else 200)
-
 # -------------------------------------------------------------
 # 13. API route – Delete
 # -------------------------------------------------------------
 @app.delete("/api/delete")
-async def delete_proxy(request: Request):
+async def delete_proxy(request: Request, model: Optional[str] = None):
     """
     Proxy a model delete request to each Ollama endpoint and reply with Status Code.
 
@@ -724,9 +705,10 @@ async def delete_proxy(request: Request):
     # 1. Parse and validate request
     try:
         body_bytes = await request.body()
-        payload = json.loads(body_bytes.decode("utf-8"))
 
-        model = payload.get("model")
+        if not model:
+            payload = json.loads(body_bytes.decode("utf-8"))
+            model = payload.get("model")
         
         if not model:
             raise HTTPException(
@@ -745,30 +727,26 @@ async def delete_proxy(request: Request):
             status_list.append(copy.status)
     
     # 4. Retrun 200 0K, if a single enpoint fails, respond with 404
-    if 404 in status_list:
-        return Response(
-            status_code=404
-        )
-    else:
-        return Response(
-            status_code=200
-        )    
+    return Response(status_code=404 if 404 in status_list else 200)   
 
 # -------------------------------------------------------------
 # 14. API route – Pull
 # -------------------------------------------------------------
 @app.post("/api/pull")
-async def pull_proxy(request: Request):
+async def pull_proxy(request: Request, model: Optional[str] = None):
     """
     Proxy a pull request to all Ollama endpoint and report status back.
     """
     # 1. Parse and validate request
     try:
         body_bytes = await request.body()
-        payload = json.loads(body_bytes.decode("utf-8"))
 
-        model = payload.get("model")
-        insecure = payload.get("insecure")
+        if not model:
+            payload = json.loads(body_bytes.decode("utf-8"))
+            model = payload.get("model")
+            insecure = payload.get("insecure")
+        else:
+            insecure = None
 
         if not model:
             raise HTTPException(
@@ -780,10 +758,11 @@ async def pull_proxy(request: Request):
     # 2. Iterate over all endpoints to pull the model
     status_list = []
     for endpoint in config.endpoints:
-        client = ollama.AsyncClient(host=endpoint)
-        # 3. Proxy a simple pull request
-        pull = await client.pull(model=model, insecure=insecure, stream=False)
-        status_list.append(pull)
+        if "/v1" not in endpoint:
+            client = ollama.AsyncClient(host=endpoint)
+            # 3. Proxy a simple pull request
+            pull = await client.pull(model=model, insecure=insecure, stream=False)
+            status_list.append(pull)
 
     combined_status = []
     for status in status_list:
diff --git a/static/index.html b/static/index.html
index 975b966..4fd9234 100644
--- a/static/index.html
+++ b/static/index.html
@@ -14,12 +14,17 @@
   .model{font-family:monospace;}
   .loading{color:#999;}
 
-  /* NEW STYLES */
   .tables-wrapper{
     display:flex;
     gap:1rem;
     margin-top:1rem;
   }
+  .header-pull-wrapper {
+    display: flex;            /* horizontal layout */
+    align-items: center;      /* vertical centering */
+    gap: 1rem;                /* space between title & form */
+    flex-wrap: wrap;          /* optional – keeps it tidy on very narrow screens */
+  }
   .table-container{
     width:50%;
   }
@@ -54,8 +59,31 @@
     color:#0066cc;
     cursor:pointer;
     text-decoration:underline;
+    float: right;
   }
+  .delete-link{
+  font-size:0.9em;
+  margin-left:0.5em;
+  color:#b22222;      /* dark red */
+  cursor:pointer;
+  text-decoration:underline;
+  float: right;
+  }
+  .show-link {
+  font-size:0.9em;
+  margin-left:0.5em;
+  color:#0066cc;
+  cursor:pointer;
+  text-decoration:underline;
+  float: right;
+  }
+  .delete-link:hover{ text-decoration:none; }
   .copy-link:hover { text-decoration:none; }
+  /* modal.css – very lightweight – feel free to replace with Bootstrap/Material UI */
+  .modal { display:none; position:fixed; top:0; left:0; width:100%; height:100%;
+          background:rgba(0,0,0,.6); align-items:center; justify-content:center; }
+  .modal-content { background:#fff; padding:1rem; max-width:90%; max-height:90%; overflow:auto; }
+  .close-btn { float:right; cursor:pointer; font-size:1.5rem; }
 </style>
 </head>
 <body>
@@ -63,7 +91,14 @@
 
 <div class="tables-wrapper">
   <div class="table-container">
+    <div class="header-pull-wrapper">
     <h2><span id="tags-count"></span> Available Models (Tags)</h2>
+    <div id="pull-section">
+      <label for="pull-model-input">Pull a model:</label>
+      <input type="text" id="pull-model-input" placeholder="llama3:latest" />
+      <button id="pull-btn">Pull</button>
+      <span id="pull-status" style="margin-left:0.5rem; color:green;"></span>
+    </div></div>
     <table id="tags-table">
       <thead><tr><th>Model</th><th>Digest</th></tr></thead>
       <tbody id="tags-body">
@@ -141,7 +176,13 @@ async function loadTags(){
     body.innerHTML = data.models.map(m => {
       // Build the model cell
       let modelCell = `${m.id || m.name}`;
-
+      // Add delete link only when a digest exists
+      if (m.digest) {
+        modelCell += `
+          <a href="#" class="delete-link" data-model="${m.name}">
+            delete
+          </a>`;
+      }
       // Add the copy link *only if a digest exists*
       if (m.digest) {
         modelCell += `
@@ -149,7 +190,12 @@ async function loadTags(){
             copy
           </a>`;
       }
-
+      if (m.digest) {
+        modelCell += `
+          <a href="#" class="show-link" data-model="${m.name}">
+            show
+          </a>`;
+      }
       return `
         <tr>
           <td class="model">${modelCell}</td>
@@ -157,7 +203,6 @@ async function loadTags(){
         </tr>`;
     }).join('');    const countSpan = document.getElementById('tags-count');
     countSpan.textContent = `${data.models.length}`;
-     // Attach copy‑link handlers
     document.querySelectorAll('.copy-link').forEach(link => {
       link.addEventListener('click', async (e) => {
         e.preventDefault();
@@ -165,15 +210,105 @@ async function loadTags(){
         const dest = prompt(`Enter destination for ${source}:`);
         if (!dest) return; // cancel if empty
         try{
-          const resp = await fetch(`/api/copy?source=${encodeURIComponent(source)}&destination=${encodeURIComponent(dest)}`);
+          const resp = await fetch(
+            `/api/copy?source=${encodeURIComponent(source)}&destination=${encodeURIComponent(dest)}`,
+            {method: 'POST'}
+          );
           if (!resp.ok) throw new Error(`Copy failed: ${resp.status}`);
           alert(`Copied ${source} to ${dest} successfully.`);
+
+          loadTags();
         }catch(err){
           console.error(err);
           alert(`Error copying ${source} to ${dest}: ${err}`);
         }
       });
     });
+    document.querySelectorAll('.delete-link').forEach(link => {
+      link.addEventListener('click', async e => {
+        e.preventDefault();
+        const model = link.dataset.model;
+
+        const ok = confirm(`Delete the model “${model}”? This cannot be undone.`);
+        if (!ok) return;
+
+        try {
+          const resp = await fetch(
+            `/api/delete?model=${encodeURIComponent(model)}`,
+            {method: 'DELETE'}
+          );
+          if (!resp.ok) throw new Error(`Delete failed: ${resp.status}`);
+          alert(`Model “${model}” deleted successfully.`);
+
+          loadTags();
+        } catch (err) {
+          console.error(err);
+          alert(`Error deleting ${model}: ${err}`);
+        }
+      });
+    });
+    document.body.addEventListener('click', async e => {
+    if (!e.target.matches('.show-link')) return;
+
+    e.preventDefault();
+    const model = e.target.dataset.model;
+
+    try {
+      const resp = await fetch(
+        `/api/show?model=${encodeURIComponent(model)}`,
+        {method: 'POST'}
+      );
+      if (!resp.ok) throw new Error(`Status ${resp.status}`);
+      const data = await resp.json();
+
+      const jsonText = JSON.stringify(data, null, 2)
+      .replace(/\\n/g, '\n');
+
+      document.getElementById('json-output').textContent = jsonText;
+      document.getElementById('show-modal').style.display = 'flex';
+    } catch (err) {
+      console.error(err);
+      alert(`Could not load model details: ${err.message}`);
+    }
+  });
+
+  document.getElementById('pull-btn').addEventListener('click', async () => {
+  const model = document.getElementById('pull-model-input').value.trim();
+  const statusEl = document.getElementById('pull-status');
+
+  if (!model) {
+    alert('Please enter a model name.');
+    return;
+  }
+
+  try {
+    const resp = await fetch(
+      `/api/pull?model=${encodeURIComponent(model)}`,
+      {method: 'POST'}
+    );
+
+    if (!resp.ok) throw new Error(`Status ${resp.status}`);
+    const data = await resp.json();
+
+    statusEl.textContent = `✅ ${JSON.stringify(data, null, 2)}`;
+    statusEl.style.color = 'green';
+
+    // Optional: refresh the tags list so the new model appears
+    loadTags();
+  } catch (err) {
+    console.error(err);
+    statusEl.textContent = `❌ ${err.message}`;
+    statusEl.style.color = 'red';
+  }
+});
+
+  
+  const modal = document.getElementById('show-modal');
+  modal.addEventListener('click', e => {
+    if (e.target === modal || e.target.matches('.close-btn')) {
+      modal.style.display = 'none';
+    }
+  });
   }catch(e){ console.error(e); }
 }
 
@@ -190,6 +325,19 @@ window.addEventListener('load', ()=>{
   loadTags();
   loadPS();
 });
+setInterval(() => {
+  loadTags();
+}, 600_000);
+setInterval(() => {
+  loadPS();
+}, 60_000);
 </script>
+<div id="show-modal" class="modal">
+  <div class="modal-content">
+    <span class="close-btn">&times;</span>
+    <h2>Model details</h2>
+    <pre id="json-output"></pre>
+  </div>
+</div>
 </body>
 </html>
\ No newline at end of file

From 7a4e0bb08cfe35602ab54ff831efb631b8557a2d Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Thu, 4 Sep 2025 16:08:02 +0200
Subject: [PATCH 13/20] Update config.yaml

fixing config for ollama /v1 compatible endpoint usage
---
 config.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/config.yaml b/config.yaml
index 94162b1..01bf296 100644
--- a/config.yaml
+++ b/config.yaml
@@ -13,5 +13,8 @@ max_concurrent_connections: 2
 # Set an environment variable like OPENAI_KEY
 # Confirm endpoints are exactly as in endpoints block
 api_keys:
+  "http://192.168.0.50:11434": "ollama"
+  "http://192.168.0.51:11434": "ollama"
+  "http://192.168.0.52:11434": "ollama"
   "https://openrouter.ai/api/v1": "${OPENROUTER_KEY}"
   "https://api.openai.com/v1": "${OPENAI_KEY}"

From 75de2100399ca05a613f0a3d235ab17bed5fd336 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Thu, 4 Sep 2025 16:09:30 +0200
Subject: [PATCH 14/20] Update config.yaml

---
 config.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/config.yaml b/config.yaml
index 01bf296..93ae117 100644
--- a/config.yaml
+++ b/config.yaml
@@ -3,8 +3,8 @@ endpoints:
   - http://192.168.0.50:11434
   - http://192.168.0.51:11434
   - http://192.168.0.52:11434
-  - https://openrouter.ai/api/v1
-  - https://api.openai.com/v1
+  #- https://openrouter.ai/api/v1
+  #- https://api.openai.com/v1
 
 # Maximum concurrent connections *per endpoint‑model pair* (equals to OLLAMA_NUM_PARALLEL)
 max_concurrent_connections: 2
@@ -16,5 +16,5 @@ api_keys:
   "http://192.168.0.50:11434": "ollama"
   "http://192.168.0.51:11434": "ollama"
   "http://192.168.0.52:11434": "ollama"
-  "https://openrouter.ai/api/v1": "${OPENROUTER_KEY}"
-  "https://api.openai.com/v1": "${OPENAI_KEY}"
+  #"https://openrouter.ai/api/v1": "${OPENROUTER_KEY}"
+  #"https://api.openai.com/v1": "${OPENAI_KEY}"

From 20790d95eda9cd96d0c5c5a47c374d7c95d65317 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Thu, 4 Sep 2025 16:12:05 +0200
Subject: [PATCH 15/20] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9d514eb..18b2290 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 is a transparent proxy for [Ollama](https://github.com/ollama/ollama) with model deployment aware routing.
 
-<img width="2490" height="1298" alt="Screenshot_NOMYO_Router_Dashboard" src="https://github.com/user-attachments/assets/ba0518d4-d4e3-4cb0-a7de-377df8e6370c" /><br>
+<img width="2490" height="1298" alt="Screenshot_NOMYO_Router_0-2-2_Dashboard" src="https://github.com/user-attachments/assets/ddacdf88-e3f3-41dd-8be6-f165b22d9879" /><br>
 
 It runs between your frontend application and Ollama backend and is transparent for both, the front- and backend.
 

From b3b67fdbf282b5df8302dfcaa5d1bfaf8c09be02 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Thu, 4 Sep 2025 19:07:28 +0200
Subject: [PATCH 16/20] Add files via upload

BREAKING CHANGE:
- new config.yaml config block
- new dependency: httpx-aiohttp for faster endpoint queries in bigger installations
- new dynamic dashboard
---
 requirements.txt  |  11 ++
 router.py         |  37 +++--
 static/index.html | 346 +++++++++++++++++++++-------------------------
 3 files changed, 191 insertions(+), 203 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index e39b50c..d58da4c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,17 +1,27 @@
+aiocache==0.12.3
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
 annotated-types==0.7.0
 anyio==4.10.0
+async-timeout==5.0.1
+attrs==25.3.0
 certifi==2025.8.3
 click==8.2.1
 distro==1.9.0
 exceptiongroup==1.3.0
 fastapi==0.116.1
+frozenlist==1.7.0
 h11==0.16.0
 httpcore==1.0.9
 httpx==0.28.1
+httpx-aiohttp==0.1.8
 idna==3.10
 jiter==0.10.0
+multidict==6.6.4
 ollama==0.5.3
 openai==1.102.0
+propcache==0.3.2
 pydantic==2.11.7
 pydantic-settings==2.10.1
 pydantic_core==2.33.2
@@ -23,3 +33,4 @@ tqdm==4.67.1
 typing-inspection==0.4.1
 typing_extensions==4.14.1
 uvicorn==0.35.0
+yarl==1.20.1
diff --git a/router.py b/router.py
index 595f411..b73e4e6 100644
--- a/router.py
+++ b/router.py
@@ -7,6 +7,7 @@ license: AGPL
 """
 # -------------------------------------------------------------
 import json, time, asyncio, yaml, httpx, ollama, openai, os, re
+from httpx_aiohttp import AiohttpTransport
 from pathlib import Path
 from typing import Dict, Set, List, Optional
 from fastapi import FastAPI, Request, HTTPException
@@ -96,11 +97,12 @@ def get_httpx_client(endpoint: str) -> httpx.AsyncClient:
     """
     return httpx.AsyncClient(
         base_url=endpoint,
-        timeout=httpx.Timeout(5.0, read=5.0, write=5.0, connect=5.0),
-        limits=httpx.Limits(
-            max_keepalive_connections=64,
-            max_connections=64
-        )
+        timeout=httpx.Timeout(5.0, read=5.0, write=None, connect=5.0),
+        #limits=httpx.Limits(
+        #    max_keepalive_connections=64,
+        #    max_connections=64
+        #),
+        transport=AiohttpTransport()
     )
 
 async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) -> Set[str]:
@@ -133,8 +135,8 @@ async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) -
             # Error expired – remove it
             del _error_cache[endpoint]
 
-    client = get_httpx_client(endpoint)
     try:
+        client = get_httpx_client(endpoint)
         if "/v1" in endpoint:
             resp = await client.get(f"/models", headers=headers)
         else:
@@ -147,7 +149,7 @@ async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) -
             models = {m.get("id") for m in data.get("data", []) if m.get("id")}
         else:
             models = {m.get("name") for m in data.get("models", []) if m.get("name")}
-
+        
         if models:
             _models_cache[endpoint] = (models, time.time())
             return models
@@ -160,6 +162,8 @@ async def fetch_available_models(endpoint: str, api_key: Optional[str] = None) -
         print(f"[fetch_available_models] {endpoint} error: {e}")
         _error_cache[endpoint] = time.time()
         return set()
+    finally:
+        await client.aclose()
 
 
 async def fetch_loaded_models(endpoint: str) -> Set[str]:
@@ -168,8 +172,8 @@ async def fetch_loaded_models(endpoint: str) -> Set[str]:
     loaded on that endpoint. If the request fails (e.g. timeout, 5xx), an empty
     set is returned.
     """
-    client = get_httpx_client(endpoint)
     try:
+        client = get_httpx_client(endpoint)
         resp = await client.get(f"/api/ps")
         resp.raise_for_status()
         data = resp.json()
@@ -180,6 +184,8 @@ async def fetch_loaded_models(endpoint: str) -> Set[str]:
     except Exception:
         # If anything goes wrong we simply assume the endpoint has no models
         return set()
+    finally:
+        await client.aclose()
 
 async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key: Optional[str] = None) -> List[dict]:
     """
@@ -189,8 +195,9 @@ async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key
     headers = None
     if api_key is not None:
         headers = {"Authorization": "Bearer " + api_key}
-    client = get_httpx_client(endpoint)
+    
     try:
+        client = get_httpx_client(endpoint)
         resp = await client.get(f"{route}", headers=headers)
         resp.raise_for_status()
         data = resp.json()
@@ -200,6 +207,8 @@ async def fetch_endpoint_details(endpoint: str, route: str, detail: str, api_key
         # If anything goes wrong we cannot reply details
         print(e)
         return []
+    finally:
+        await client.aclose()
 
 def ep2base(ep):
     if "/v1" in ep:
@@ -235,8 +244,8 @@ async def decrement_usage(endpoint: str, model: str) -> None:
         # Optionally, clean up zero entries
         if usage_counts[endpoint].get(model, 0) == 0:
             usage_counts[endpoint].pop(model, None)
-        if not usage_counts[endpoint]:
-            usage_counts.pop(endpoint, None)
+        #if not usage_counts[endpoint]:
+        #    usage_counts.pop(endpoint, None)
 
 # -------------------------------------------------------------
 # 5. Endpoint selection logic (respecting the configurable limit)
@@ -640,7 +649,7 @@ async def show_proxy(request: Request, model: Optional[str] = None):
 
     # 2. Endpoint logic
     endpoint = await choose_endpoint(model)
-    await increment_usage(endpoint, model)
+    #await increment_usage(endpoint, model)
     client = ollama.AsyncClient(host=endpoint)
 
     # 3. Proxy a simple show request
@@ -907,7 +916,7 @@ async def config_proxy(request: Request):
     """
     async def check_endpoint(url: str):
         try:
-            async with httpx.AsyncClient(timeout=1) as client:
+            async with httpx.AsyncClient(timeout=1, transport=AiohttpTransport()) as client:
                 if "/v1" in url:
                     headers = {"Authorization": "Bearer " + config.api_keys[url]}
                     r = await client.get(f"{url}/models", headers=headers)
@@ -921,6 +930,8 @@ async def config_proxy(request: Request):
                     return {"url": url, "status": "ok", "version": data.get("version")}
         except Exception as exc:
             return {"url": url, "status": "error", "detail": str(exc)}
+        finally:
+            await client.aclose()
 
     results = await asyncio.gather(*[check_endpoint(ep) for ep in config.endpoints])
     return {"endpoints": results}
diff --git a/static/index.html b/static/index.html
index 4fd9234..a2945b4 100644
--- a/static/index.html
+++ b/static/index.html
@@ -5,108 +5,75 @@
 <title>NOMYO Router Dashboard</title>
 <style>
   body{font-family:Arial,Helvetica,sans-serif;background:#f7f7f7;color:#333;padding:20px;}
-  h1{margin-top:0;}
-  table{border-collapse:collapse;width:100%;margin-bottom:20px;}
-  th,td{border:1px solid #ddd;padding:8px;}
-  th{background:#333;color:#fff;}
-  tr:nth-child(even){background:#f2f2f2;}
-  .endpoint{font-weight:bold;}
-  .model{font-family:monospace;}
-  .loading{color:#999;}
-
-  .tables-wrapper{
-    display:flex;
-    gap:1rem;
-    margin-top:1rem;
-  }
-  .header-pull-wrapper {
-    display: flex;            /* horizontal layout */
-    align-items: center;      /* vertical centering */
-    gap: 1rem;                /* space between title & form */
-    flex-wrap: wrap;          /* optional – keeps it tidy on very narrow screens */
-  }
-  .table-container{
-    width:50%;
-  }
-  /* Ensure the heading aligns nicely inside each container */
-  .table-container h2{
-    margin:0 0 0.5rem 0;
-  }
-  /* ---- NEW STYLES FOR PORTRAIT (height > width) ---- */
-  @media (orientation: portrait) {
-    /* Stack the two tables vertically */
+  h2 {margin: 0;}
+  .tables-wrapper{display:flex;gap:2rem;}
+  .table-container{flex:1;min-width:350px;background:#fff;padding:1rem;border-radius:6px;}
+  /* ---------- Header + Pull form ---------- */
+  .header-pull-wrapper{display:flex;flex-direction:row;align-items:center;gap:1rem;margin-bottom:0;}
+  #pull-section{display:flex;align-items:center;}
+  #pull-section input{flex:1;padding:0;margin-right:0.5rem;}
+  #pull-section button{padding:0 0;}
+  #pull-status{margin-left:0.5rem; font-weight:bold;}
+  /* ---------- Tables ---------- */
+  table{width:100%;border-collapse:collapse;margin-top:1rem;}
+  th,td{border:1px solid #ddd;padding:0.5rem;text-align:left;}
+  th{background:#e0e0e0;}
+  .loading{color:#777;font-style:italic;}
+  .status-ok{color:#006400;font-weight:bold;}
+  .status-error{color:#8B0000;font-weight:bold;}
+  .copy-link,.delete-link,.show-link{font-size:0.9em;margin-left:0.5em;cursor:pointer;text-decoration:underline;float:right;}
+  .delete-link{color:#b22222;}
+  .copy-link,.show-link{color:#0066cc;}
+  .delete-link:hover,.copy-link:hover,.show-link:hover{text-decoration:none;}
+  /* ---------- Modal ---------- */
+  .modal{display:none;position:fixed;top:0;left:0;width:100%;height:100%;background:rgba(0,0,0,.6);align-items:center;justify-content:center;}
+  .modal-content{background:#fff;padding:1rem;max-width:90%;max-height:90%;overflow:auto;border-radius:6px;}
+  .close-btn{float:right;cursor:pointer;font-size:1.5rem;}
+  /* ---------- Usage Chart ---------- */
+  .usage-chart{margin-top:20px;}
+  .endpoint-bar{margin-bottom:12px;}
+  .endpoint-label{font-weight:bold;margin-bottom:4px;}
+  .bar{display:flex;height:16px;background:#e0e0e0;border-radius:4px;overflow:hidden;}
+  .segment{height:100%;color:white;font-size:12px;font-weight: bolder;display:flex;align-items:center;justify-content:center;white-space:nowrap;}
+  .table-container {padding-top:1rem;}
+    /* ---------- Responsive reorder ---------- */
+  @media (max-aspect-ratio: 1/1) {
     .tables-wrapper {
-      flex-direction: column;          /* instead of the default row */
+      flex-direction: column;
     }
-    .table-container {
-      width: 100%;                     /* full width when stacked */
+    .tables-wrapper > .table-container:nth-child(1) { /* Tags container */
+      order: 2;
     }
-    /* Put the “Running Models” table first */
-    .table-container:nth-child(2) {    /* the PS table is the 2nd child */
-      order: -1;
-    }
-    /* Keep the other table after it (default order 0) */
-    .table-container:nth-child(1) {
-      order: 0;
+    .tables-wrapper > .table-container:nth-child(2) { /* PS container */
+      order: 1;
     }
   }
-  /* Add a tiny status‑style section */
-  .status-ok   { color: #006400; font-weight: bold; }   /* dark green */
-  .status-error{ color: #8B0000; font-weight: bold; }   /* dark red */
-  .copy-link {
-    font-size:0.9em;
-    margin-left:0.5em;
-    color:#0066cc;
-    cursor:pointer;
-    text-decoration:underline;
-    float: right;
-  }
-  .delete-link{
-  font-size:0.9em;
-  margin-left:0.5em;
-  color:#b22222;      /* dark red */
-  cursor:pointer;
-  text-decoration:underline;
-  float: right;
-  }
-  .show-link {
-  font-size:0.9em;
-  margin-left:0.5em;
-  color:#0066cc;
-  cursor:pointer;
-  text-decoration:underline;
-  float: right;
-  }
-  .delete-link:hover{ text-decoration:none; }
-  .copy-link:hover { text-decoration:none; }
-  /* modal.css – very lightweight – feel free to replace with Bootstrap/Material UI */
-  .modal { display:none; position:fixed; top:0; left:0; width:100%; height:100%;
-          background:rgba(0,0,0,.6); align-items:center; justify-content:center; }
-  .modal-content { background:#fff; padding:1rem; max-width:90%; max-height:90%; overflow:auto; }
-  .close-btn { float:right; cursor:pointer; font-size:1.5rem; }
 </style>
 </head>
 <body>
-<a href="https://www.nomyo.ai" target="_blank"><img src="./static/228394408.png" width="100px" height="100px"></a><h1>Router Dashboard</h1>
+<a href="https://www.nomyo.ai" target="_blank"><img src="./static/228394408.png" width="100px" height="100px"></a>
+<h1>Router Dashboard</h1>
 
 <div class="tables-wrapper">
+  <!-- ---------- Tags ---------- -->
   <div class="table-container">
     <div class="header-pull-wrapper">
-    <h2><span id="tags-count"></span> Available Models (Tags)</h2>
-    <div id="pull-section">
-      <label for="pull-model-input">Pull a model:</label>
-      <input type="text" id="pull-model-input" placeholder="llama3:latest" />
-      <button id="pull-btn">Pull</button>
-      <span id="pull-status" style="margin-left:0.5rem; color:green;"></span>
-    </div></div>
+      <h2><span id="tags-count"></span> Available Models (Tags)</h2>
+      <div id="pull-section">
+        <label for="pull-model-input">Pull a model:</label>
+        <input type="text" id="pull-model-input" placeholder="llama3:latest" />
+        <button id="pull-btn">Pull</button>
+        <span id="pull-status"></span>
+      </div>
+    </div>
+
     <table id="tags-table">
       <thead><tr><th>Model</th><th>Digest</th></tr></thead>
-      <tbody id="tags-body">
-        <tr><td colspan="2" class="loading">Loading…</td></tr>
-      </tbody>
+      <tbody id="tags-body"><tr><td colspan="2" class="loading">Loading…</td></tr></tbody>
     </table>
   </div>
 
+  <!-- ---------- PS + Usage Chart ---------- -->
   <div class="table-container">
     <h2>Running Models (PS)</h2>
     <table id="ps-table">
@@ -119,10 +86,11 @@
           <th>Digest</th>
         </tr>
       </thead>
-      <tbody id="ps-body">
-        <tr><td colspan="2" class="loading">Loading…</td></tr>
-      </tbody>
+      <tbody id="ps-body"><tr><td colspan="5" class="loading">Loading…</td></tr></tbody>
     </table>
+
+    <!-- ------------- Usage Chart ------------- -->
+    <div id="usage-chart" class="usage-chart"></div>
   </div>
 </div>
 
@@ -135,23 +103,22 @@
       <th>Version</th>
     </tr>
   </thead>
-  <tbody id="endpoints-body">
-    <tr><td colspan="3" class="loading">Loading…</td></tr>
-  </tbody>
+  <tbody id="endpoints-body"><tr><td colspan="3" class="loading">Loading…</td></tr></tbody>
 </table>
 
 <script>
+/* ---------- Utility ---------- */
 async function fetchJSON(url){
   const resp = await fetch(url);
   if(!resp.ok){ throw new Error(`Failed ${url}: ${resp.status}`); }
   return await resp.json();
 }
 
+/* ---------- Endpoints ---------- */
 async function loadEndpoints(){
   try{
     const data = await fetchJSON('/api/config');
     const body = document.getElementById('endpoints-body');
-    // Map each endpoint object to a table row
     body.innerHTML = data.endpoints.map(e => {
       const statusClass = e.status === 'ok' ? 'status-ok' : 'status-error';
       const version = e.version || 'N/A';
@@ -169,54 +136,37 @@ async function loadEndpoints(){
   }
 }
 
+/* ---------- Tags ---------- */
 async function loadTags(){
   try{
     const data = await fetchJSON('/api/tags');
     const body = document.getElementById('tags-body');
     body.innerHTML = data.models.map(m => {
-      // Build the model cell
       let modelCell = `${m.id || m.name}`;
-      // Add delete link only when a digest exists
       if (m.digest) {
-        modelCell += `
-          <a href="#" class="delete-link" data-model="${m.name}">
-            delete
-          </a>`;
-      }
-      // Add the copy link *only if a digest exists*
-      if (m.digest) {
-        modelCell += `
-          <a href="#" class="copy-link" data-source="${m.name}">
-            copy
-          </a>`;
-      }
-      if (m.digest) {
-        modelCell += `
-          <a href="#" class="show-link" data-model="${m.name}">
-            show
-          </a>`;
+        modelCell += `<a href="#" class="delete-link" data-model="${m.name}">delete</a>`;
+        modelCell += `<a href="#" class="copy-link" data-source="${m.name}">copy</a>`;
+        modelCell += `<a href="#" class="show-link" data-model="${m.name}">show</a>`;
       }
       return `
         <tr>
           <td class="model">${modelCell}</td>
           <td>${m.digest || ''}</td>
         </tr>`;
-    }).join('');    const countSpan = document.getElementById('tags-count');
-    countSpan.textContent = `${data.models.length}`;
+    }).join('');
+    document.getElementById('tags-count').textContent = `${data.models.length}`;
+
+    /* copy logic */
     document.querySelectorAll('.copy-link').forEach(link => {
       link.addEventListener('click', async (e) => {
         e.preventDefault();
         const source = link.dataset.source;
         const dest = prompt(`Enter destination for ${source}:`);
-        if (!dest) return; // cancel if empty
+        if (!dest) return;
         try{
-          const resp = await fetch(
-            `/api/copy?source=${encodeURIComponent(source)}&destination=${encodeURIComponent(dest)}`,
-            {method: 'POST'}
-          );
+          const resp = await fetch(`/api/copy?source=${encodeURIComponent(source)}&destination=${encodeURIComponent(dest)}`,{method:'POST'});
           if (!resp.ok) throw new Error(`Copy failed: ${resp.status}`);
           alert(`Copied ${source} to ${dest} successfully.`);
-
           loadTags();
         }catch(err){
           console.error(err);
@@ -224,22 +174,18 @@ async function loadTags(){
         }
       });
     });
+
+    /* delete logic */
     document.querySelectorAll('.delete-link').forEach(link => {
       link.addEventListener('click', async e => {
         e.preventDefault();
         const model = link.dataset.model;
-
         const ok = confirm(`Delete the model “${model}”? This cannot be undone.`);
         if (!ok) return;
-
         try {
-          const resp = await fetch(
-            `/api/delete?model=${encodeURIComponent(model)}`,
-            {method: 'DELETE'}
-          );
+          const resp = await fetch(`/api/delete?model=${encodeURIComponent(model)}`,{method:'DELETE'});
           if (!resp.ok) throw new Error(`Delete failed: ${resp.status}`);
           alert(`Model “${model}” deleted successfully.`);
-
           loadTags();
         } catch (err) {
           console.error(err);
@@ -247,71 +193,55 @@ async function loadTags(){
         }
       });
     });
+
+    /* show logic */
     document.body.addEventListener('click', async e => {
-    if (!e.target.matches('.show-link')) return;
+      if (!e.target.matches('.show-link')) return;
+      e.preventDefault();
+      const model = e.target.dataset.model;
+      try {
+        const resp = await fetch(`/api/show?model=${encodeURIComponent(model)}`,{method:'POST'});
+        if (!resp.ok) throw new Error(`Status ${resp.status}`);
+        const data = await resp.json();
+        document.getElementById('json-output').textContent = JSON.stringify(data, null, 2);
+        document.getElementById('show-modal').style.display = 'flex';
+      } catch (err) {
+        console.error(err);
+        alert(`Could not load model details: ${err.message}`);
+      }
+    });
 
-    e.preventDefault();
-    const model = e.target.dataset.model;
+    /* pull logic */
+    document.getElementById('pull-btn').addEventListener('click', async () => {
+      const model = document.getElementById('pull-model-input').value.trim();
+      const statusEl = document.getElementById('pull-status');
+      if (!model) { alert('Please enter a model name.'); return; }
+      try {
+        const resp = await fetch(`/api/pull?model=${encodeURIComponent(model)}`,{method:'POST'});
+        if (!resp.ok) throw new Error(`Status ${resp.status}`);
+        const data = await resp.json();
+        statusEl.textContent = `✅ ${JSON.stringify(data, null, 2)}`;
+        statusEl.style.color = 'green';
+        loadTags();
+      } catch (err) {
+        console.error(err);
+        statusEl.textContent = `❌ ${err.message}`;
+        statusEl.style.color = 'red';
+      }
+    });
 
-    try {
-      const resp = await fetch(
-        `/api/show?model=${encodeURIComponent(model)}`,
-        {method: 'POST'}
-      );
-      if (!resp.ok) throw new Error(`Status ${resp.status}`);
-      const data = await resp.json();
+    /* modal close */
+    const modal = document.getElementById('show-modal');
+    modal.addEventListener('click', e => {
+      if (e.target === modal || e.target.matches('.close-btn')) {
+        modal.style.display = 'none';
+      }
+    });
 
-      const jsonText = JSON.stringify(data, null, 2)
-      .replace(/\\n/g, '\n');
-
-      document.getElementById('json-output').textContent = jsonText;
-      document.getElementById('show-modal').style.display = 'flex';
-    } catch (err) {
-      console.error(err);
-      alert(`Could not load model details: ${err.message}`);
-    }
-  });
-
-  document.getElementById('pull-btn').addEventListener('click', async () => {
-  const model = document.getElementById('pull-model-input').value.trim();
-  const statusEl = document.getElementById('pull-status');
-
-  if (!model) {
-    alert('Please enter a model name.');
-    return;
-  }
-
-  try {
-    const resp = await fetch(
-      `/api/pull?model=${encodeURIComponent(model)}`,
-      {method: 'POST'}
-    );
-
-    if (!resp.ok) throw new Error(`Status ${resp.status}`);
-    const data = await resp.json();
-
-    statusEl.textContent = `✅ ${JSON.stringify(data, null, 2)}`;
-    statusEl.style.color = 'green';
-
-    // Optional: refresh the tags list so the new model appears
-    loadTags();
-  } catch (err) {
-    console.error(err);
-    statusEl.textContent = `❌ ${err.message}`;
-    statusEl.style.color = 'red';
-  }
-});
-
-  
-  const modal = document.getElementById('show-modal');
-  modal.addEventListener('click', e => {
-    if (e.target === modal || e.target.matches('.close-btn')) {
-      modal.style.display = 'none';
-    }
-  });
   }catch(e){ console.error(e); }
 }
 
+/* ---------- PS ---------- */
 async function loadPS(){
   try{
     const data = await fetchJSON('/api/ps');
@@ -320,18 +250,54 @@ async function loadPS(){
   }catch(e){ console.error(e); }
 }
 
+/* ---------- Usage Chart (stacked‑percentage) ---------- */
+function getColor(seed){
+  const h = Math.abs(hashString(seed) % 360);
+  return `hsl(${h}, 70%, 50%)`;
+}
+function hashString(str){
+  let hash = 0;
+  for (let i=0;i<str.length;i++){
+    hash = ((hash<<5)-hash)+str.charCodeAt(i);
+    hash |= 0;
+  }
+  return Math.abs(hash);
+}
+async function loadUsage(){
+  try{
+    const data = await fetchJSON('/api/usage');
+    const chart = document.getElementById('usage-chart');
+    const usage = data.usage_counts || {};
+
+    let html = '';
+    for (const [endpoint, models] of Object.entries(usage)){
+      const total = Object.values(models).reduce((a,b)=>a+b,0);
+      html += `<div class="endpoint-bar"><div class="endpoint-label">${endpoint}</div><div class="bar">`;
+      for (const [model, count] of Object.entries(models)){
+        const pct = total ? (count/total)*100 : 0;
+        const width = pct.toFixed(2);
+        const color = getColor(model);
+        html += `<div class="segment" style="width:${width}%;background:${color};">${model} (${count})</div>`;
+      }
+      html += `</div></div>`;
+    }
+    chart.innerHTML = html;
+  }catch(e){
+    console.error('Failed to load usage counts', e);
+  }
+}
+
+/* ---------- Init ---------- */
 window.addEventListener('load', ()=>{
   loadEndpoints();
   loadTags();
   loadPS();
+  loadUsage();
+  setInterval(loadPS, 60_000);
+  setInterval(loadUsage, 1_000);
 });
-setInterval(() => {
-  loadTags();
-}, 600_000);
-setInterval(() => {
-  loadPS();
-}, 60_000);
 </script>
+
 <div id="show-modal" class="modal">
   <div class="modal-content">
     <span class="close-btn">&times;</span>

From ef936bb2a0b73b15ea2444364f99d12e4895807c Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Thu, 4 Sep 2025 19:16:34 +0200
Subject: [PATCH 17/20] Update requirements.txt

remove unnecessary dependecy
---
 requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index d58da4c..8ef45ff 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
-aiocache==0.12.3
 aiohappyeyeballs==2.6.1
 aiohttp==3.12.15
 aiosignal==1.4.0

From a23ccafc5a5996e58bcc867a7bf2f96cdcdc8afa Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Fri, 5 Sep 2025 09:44:35 +0200
Subject: [PATCH 18/20] Add files via upload

dashboard polling removed in favour for SSE pub:sub
---
 requirements.txt  |  1 +
 router.py         | 72 ++++++++++++++++++++++++++++++++++++++++++++++-
 static/index.html | 57 ++++++++++++++++++++++++++++---------
 3 files changed, 115 insertions(+), 15 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 8ef45ff..4ffd391 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,6 +10,7 @@ click==8.2.1
 distro==1.9.0
 exceptiongroup==1.3.0
 fastapi==0.116.1
+fastapi-sse==1.1.1
 frozenlist==1.7.0
 h11==0.16.0
 httpcore==1.0.9
diff --git a/router.py b/router.py
index b73e4e6..9b83fb4 100644
--- a/router.py
+++ b/router.py
@@ -11,6 +11,7 @@ from httpx_aiohttp import AiohttpTransport
 from pathlib import Path
 from typing import Dict, Set, List, Optional
 from fastapi import FastAPI, Request, HTTPException
+from fastapi_sse import sse_handler
 from fastapi.staticfiles import StaticFiles
 from starlette.responses import StreamingResponse, JSONResponse, Response, HTMLResponse, RedirectResponse
 from pydantic import Field
@@ -26,6 +27,12 @@ _models_cache: dict[str, tuple[Set[str], float]] = {}
 # timeout expires, after which the endpoint will be queried again.
 _error_cache: dict[str, float] = {}
 
+# ------------------------------------------------------------------
+# SSE Queues
+# ------------------------------------------------------------------
+_subscribers: Set[asyncio.Queue] = set()
+_subscribers_lock = asyncio.Lock()
+
 # -------------------------------------------------------------
 # 1. Configuration loader
 # -------------------------------------------------------------
@@ -77,6 +84,7 @@ config = Config()
 # 2. FastAPI application
 # -------------------------------------------------------------
 app = FastAPI()
+sse_handler.app = app
 
 # -------------------------------------------------------------
 # 3. Global state: per‑endpoint per‑model active connection counters
@@ -234,6 +242,7 @@ def dedupe_on_keys(dicts, key_fields):
 async def increment_usage(endpoint: str, model: str) -> None:
     async with usage_lock:
         usage_counts[endpoint][model] += 1
+    await publish_snapshot()
 
 async def decrement_usage(endpoint: str, model: str) -> None:
     async with usage_lock:
@@ -246,6 +255,41 @@ async def decrement_usage(endpoint: str, model: str) -> None:
             usage_counts[endpoint].pop(model, None)
         #if not usage_counts[endpoint]:
         #    usage_counts.pop(endpoint, None)
+    await publish_snapshot()
+
+# ------------------------------------------------------------------
+# SSE Helpser
+# ------------------------------------------------------------------
+async def publish_snapshot():
+    snapshot = json.dumps({"usage_counts": usage_counts})
+    async with _subscribers_lock:
+        for q in _subscribers:
+            # If the queue is full, drop the message to avoid back‑pressure.
+            if q.full():
+                continue
+            await q.put(snapshot)
+
+# ------------------------------------------------------------------
+# Subscriber helpers
+# ------------------------------------------------------------------
+async def subscribe() -> asyncio.Queue:
+    """
+    Returns a new Queue that will receive every snapshot.
+    """
+    q: asyncio.Queue = asyncio.Queue(maxsize=10)
+    async with _subscribers_lock:
+        _subscribers.add(q)
+    return q
+
+async def unsubscribe(q: asyncio.Queue):
+    async with _subscribers_lock:
+        _subscribers.discard(q)
+
+# ------------------------------------------------------------------
+# Convenience wrapper – returns the current snapshot (for the proxy)
+# ------------------------------------------------------------------
+async def get_usage_counts() -> Dict:
+    return dict(usage_counts)   # shallow copy
 
 # -------------------------------------------------------------
 # 5. Endpoint selection logic (respecting the configurable limit)
@@ -1272,7 +1316,33 @@ async def health_proxy(request: Request):
     return JSONResponse(content=response_payload, status_code=http_status)
 
 # -------------------------------------------------------------
-# 27. FastAPI startup event – load configuration
+# 27. SSE route for usage broadcasts
+# -------------------------------------------------------------
+@app.get("/api/usage-stream")
+async def usage_stream(request: Request):
+    """
+    Server‑Sent‑Events that emits a JSON payload every time the
+    global `usage_counts` dictionary changes.
+    """
+    async def event_generator():
+        # The queue that receives *every* new snapshot
+        queue = await subscribe()
+        try:
+            while True:
+                # If the client disconnects, cancel the loop
+                if await request.is_disconnected():
+                    break
+                data = await queue.get()
+                # Send the data as a single SSE message
+                yield f"data: {data}\n\n"
+        finally:
+            # Clean‑up: unsubscribe from the broadcast channel
+            await unsubscribe(queue)
+
+    return StreamingResponse(event_generator(), media_type="text/event-stream")
+
+# -------------------------------------------------------------
+# 28. FastAPI startup event – load configuration
 # -------------------------------------------------------------
 @app.on_event("startup")
 async def startup_event() -> None:
diff --git a/static/index.html b/static/index.html
index a2945b4..c4e522a 100644
--- a/static/index.html
+++ b/static/index.html
@@ -253,7 +253,7 @@ async function loadPS(){
 /* ---------- Usage Chart (stacked‑percentage) ---------- */
 function getColor(seed){
   const h = Math.abs(hashString(seed) % 360);
-  return `hsl(${h}, 70%, 50%)`;
+  return `hsl(${h}, 80%, 40%)`;
 }
 function hashString(str){
   let hash = 0;
@@ -263,28 +263,57 @@ function hashString(str){
   }
   return Math.abs(hash);
 }
-async function loadUsage(){
-  try{
-    const data = await fetchJSON('/api/usage');
+async function loadUsage() {
+  // Create the EventSource once and keep it around
+  const source = new EventSource('/api/usage-stream');
+
+  // -----------------------------------------------------------------
+  // Helper that receives the payload and renders the chart
+  // -----------------------------------------------------------------
+  const renderChart = (data) => {
     const chart = document.getElementById('usage-chart');
     const usage = data.usage_counts || {};
 
     let html = '';
-    for (const [endpoint, models] of Object.entries(usage)){
-      const total = Object.values(models).reduce((a,b)=>a+b,0);
-      html += `<div class="endpoint-bar"><div class="endpoint-label">${endpoint}</div><div class="bar">`;
-      for (const [model, count] of Object.entries(models)){
-        const pct = total ? (count/total)*100 : 0;
+    for (const [endpoint, models] of Object.entries(usage)) {
+      const total = Object.values(models).reduce((a, b) => a + b, 0);
+
+      html += `<div class="endpoint-bar">
+                 <div class="endpoint-label">${endpoint}</div>
+                 <div class="bar">`;
+
+      for (const [model, count] of Object.entries(models)) {
+        const pct = total ? (count / total) * 100 : 0;
         const width = pct.toFixed(2);
         const color = getColor(model);
-        html += `<div class="segment" style="width:${width}%;background:${color};">${model} (${count})</div>`;
+        html += `<div class="segment"
+                  style="width:${width}%;background:${color};">
+                  ${model} (${count})
+                </div>`;
       }
+
       html += `</div></div>`;
     }
     chart.innerHTML = html;
-  }catch(e){
-    console.error('Failed to load usage counts', e);
-  }
+  };
+
+  // -----------------------------------------------------------------
+  // Event handlers
+  // -----------------------------------------------------------------
+  source.onmessage = (e) => {
+    try {
+      const payload = JSON.parse(e.data); // SSE sends plain text
+      renderChart(payload);
+    } catch (err) {
+      console.error('Failed to parse SSE payload', err);
+    }
+  };
+
+  source.onerror = (err) => {
+    console.error('SSE connection error. Retrying...', err);
+    // EventSource will automatically try to reconnect.
+  };
+  window.addEventListener('beforeunload', () => source.close());
 }
 
 /* ---------- Init ---------- */
@@ -294,7 +323,7 @@ window.addEventListener('load', ()=>{
   loadPS();
   loadUsage();
   setInterval(loadPS, 60_000);
-  setInterval(loadUsage, 1_000);
+  setInterval(loadEndpoints, 300_000);
 });
 </script>
 

From ee8788e4824c62641171fed64521fa627b21f62c Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Fri, 5 Sep 2025 12:00:47 +0200
Subject: [PATCH 19/20] Add files via upload

textformatting improved
---
 static/index.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/static/index.html b/static/index.html
index c4e522a..8237c79 100644
--- a/static/index.html
+++ b/static/index.html
@@ -203,7 +203,7 @@ async function loadTags(){
         const resp = await fetch(`/api/show?model=${encodeURIComponent(model)}`,{method:'POST'});
         if (!resp.ok) throw new Error(`Status ${resp.status}`);
         const data = await resp.json();
-        document.getElementById('json-output').textContent = JSON.stringify(data, null, 2);
+        document.getElementById('json-output').textContent = JSON.stringify(data, null, 2).replace(/\\n/g, '\n');
         document.getElementById('show-modal').style.display = 'flex';
       } catch (err) {
         console.error(err);
@@ -253,7 +253,7 @@ async function loadPS(){
 /* ---------- Usage Chart (stacked‑percentage) ---------- */
 function getColor(seed){
   const h = Math.abs(hashString(seed) % 360);
-  return `hsl(${h}, 80%, 40%)`;
+  return `hsl(${h}, 80%, 30%)`;
 }
 function hashString(str){
   let hash = 0;

From ca3908412a0106727d31e9f74ab938f664c9f1e2 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Fri, 5 Sep 2025 12:15:57 +0200
Subject: [PATCH 20/20] Add files via upload

---
 router.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/router.py b/router.py
index 9b83fb4..51cf51d 100644
--- a/router.py
+++ b/router.py
@@ -1234,7 +1234,7 @@ async def openai_completions_proxy(request: Request):
 @app.get("/v1/models")
 async def openai_models_proxy(request: Request):
     """
-    Proxy a models request to Ollama endpoints and reply with a unique list of all models.
+    Proxy an OpenAI API models request to Ollama endpoints and reply with a unique list of all models.
 
     """
     # 1. Query all endpoints for models
@@ -1269,8 +1269,8 @@ async def redirect_favicon():
 @app.get("/", response_class=HTMLResponse)
 async def index(request: Request):
     """
-    Render the landing page that lists the configured endpoints
-    and the models available / running.
+    Render the dynamic NOMYO Router dashboard listing the configured endpoints
+    and the models details, availability & task status.
     """
     return HTMLResponse(content=open("static/index.html", "r").read(), status_code=200)
 

Model	Digest
${m.id \|\| m.name}	${m.digest}
${m.id \|\| m.name}	${m.digest}
${modelCell}	${m.digest \|\| ''}