From 353fadac48fd1d5437eda4a3c2b4f12e0a229e7e Mon Sep 17 00:00:00 2001
From: alpha nerd <alpha-nerd@nomyo.ai>
Date: Tue, 5 May 2026 11:04:18 +0200
Subject: [PATCH 01/14] doc: update

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1b952d9..fb60988 100644
--- a/README.md
+++ b/README.md
@@ -72,7 +72,7 @@ uvicorn router:app --host 127.0.0.1 --port 12434 --loop uvloop
 
 ## Docker Deployment
 
-### Pre-built image (GitHub Container Registry)
+### Pre-built image (OCI Registry)
 
 Pre-built multi-arch images (`linux/amd64`, `linux/arm64`) are published automatically on every release.
 

From e296ac19badeeb2258de42375d66325a2cdfa803 Mon Sep 17 00:00:00 2001
From: alpha nerd <alpha-nerd@nomyo.ai>
Date: Thu, 7 May 2026 11:34:09 +0200
Subject: [PATCH 02/14] feat: new helper to bridge change of behaviour in
 llama.cpp v1/models status  - now correctly reporting "sleeping" or "loaded"
 for auto-unload

---
 router.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/router.py b/router.py
index 326ec33..7cf3ada 100644
--- a/router.py
+++ b/router.py
@@ -572,6 +572,19 @@ def _is_llama_model_loaded(item: dict) -> bool:
         return status == "loaded"
     return False
 
+def _is_llama_model_loaded_or_sleeping(item: dict) -> bool:
+    """Return True if status is 'loaded' or 'sleeping'.
+    Newer llama-server versions report 'sleeping' in /v1/models when a model is idle;
+    ps_details needs to include these so _fetch_llama_props can detect and unload them."""
+    status = item.get("status")
+    if status is None:
+        return True
+    if isinstance(status, dict):
+        return status.get("value") in ("loaded", "sleeping")
+    if isinstance(status, str):
+        return status in ("loaded", "sleeping")
+    return False
+
 def is_ext_openai_endpoint(endpoint: str) -> bool:
     """
     Determine if an endpoint is an external OpenAI-compatible endpoint (not Ollama or llama-server).
@@ -2908,8 +2921,8 @@ async def ps_details_proxy(request: Request):
         llama_models_pending: list[dict] = []
 
         for (endpoint, modellist) in zip([ep for ep, _ in llama_tasks], llama_loaded):
-            # Filter for loaded models only
-            loaded_models = [item for item in modellist if _is_llama_model_loaded(item)]
+            # Include sleeping models too so _fetch_llama_props can unload them
+            loaded_models = [item for item in modellist if _is_llama_model_loaded_or_sleeping(item)]
             for item in loaded_models:
                 if isinstance(item, dict) and item.get("id"):
                     raw_id = item["id"]

From ecdd228a54322716a0d36ec7d3c43e204af91208 Mon Sep 17 00:00:00 2001
From: alpha nerd <alpha-nerd@nomyo.ai>
Date: Fri, 8 May 2026 12:15:51 +0200
Subject: [PATCH 03/14] feat: better default referer handling

---
 router.py | 43 +++++++++++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/router.py b/router.py
index 7cf3ada..ab54cc7 100644
--- a/router.py
+++ b/router.py
@@ -331,6 +331,7 @@ app.add_middleware(
 )
 default_headers={
     "HTTP-Referer": "https://nomyo.ai",
+    "Referer": "https://nomyo.ai",
     "X-Title": "NOMYO Router",
     }
         
@@ -797,18 +798,19 @@ class fetch:
         Internal function that performs the actual HTTP request to fetch available models.
         This is called by available_models() after checking caches and in-flight requests.
         """
-        headers = None
+        headers = {"Referer": default_headers.get("HTTP-Referer", "https://nomyo.ai")}
         if api_key is not None:
-            headers = {"Authorization": "Bearer " + api_key}
+            headers["Authorization"] = "Bearer " + api_key
 
+        ep_base = endpoint.rstrip("/")
         if endpoint in config.llama_server_endpoints and "/v1" not in endpoint:
-            endpoint_url = f"{endpoint}/v1/models"
+            endpoint_url = f"{ep_base}/v1/models"
             key = "data"
         elif "/v1" in endpoint or endpoint in config.llama_server_endpoints:
-            endpoint_url = f"{endpoint}/models"
+            endpoint_url = f"{ep_base}/models"
             key = "data"
         else:
-            endpoint_url = f"{endpoint}/api/tags"
+            endpoint_url = f"{ep_base}/api/tags"
             key = "models"
 
         client: aiohttp.ClientSession = get_session(endpoint)
@@ -817,13 +819,12 @@ class fetch:
                 await _ensure_success(resp)
                 data = await resp.json()
 
-                items = data.get(key, [])
-                models = {item.get("id") or item.get("name") for item in items if item.get("id") or item.get("name")}
+            items = data.get(key, [])
+            models = {item.get("id") or item.get("name") for item in items if item.get("id") or item.get("name")}
 
-                # Update cache with lock protection
-                async with _models_cache_lock:
-                    _models_cache[endpoint] = (models, time.time())
-                return models
+            async with _models_cache_lock:
+                _models_cache[endpoint] = (models, time.time())
+            return models
         except Exception as e:
             # Treat any error as if the endpoint offers no models
             message = _format_connection_issue(endpoint_url, e)
@@ -1077,12 +1078,12 @@ class fetch:
                     if _is_fresh(_available_error_cache[endpoint], 300):
                         return []
 
-        client: aiohttp.ClientSession = get_session(endpoint)
-        headers = None
+        headers = {"Referer": default_headers.get("HTTP-Referer", "https://nomyo.ai")}
         if api_key is not None:
-            headers = {"Authorization": "Bearer " + api_key}
+            headers["Authorization"] = "Bearer " + api_key
 
-        request_url = f"{endpoint}{route}"
+        request_url = f"{endpoint.rstrip('/')}/{route.lstrip('/')}"
+        client: aiohttp.ClientSession = get_session(endpoint)
         req_kwargs = {}
         if timeout is not None:
             req_kwargs["timeout"] = aiohttp.ClientTimeout(total=timeout)
@@ -3984,11 +3985,21 @@ async def startup_event() -> None:
     ssl_context = ssl.create_default_context()
     connector = aiohttp.TCPConnector(limit=0, limit_per_host=512, ssl=ssl_context)
     timeout = aiohttp.ClientTimeout(total=60, connect=15, sock_read=120, sock_connect=15)
-    session = aiohttp.ClientSession(connector=connector, timeout=timeout)
+    session = aiohttp.ClientSession(
+        connector=connector,
+        timeout=timeout,
+        headers={"Referer": default_headers.get("HTTP-Referer", "https://nomyo.ai")},
+    )
 
     app_state["connector"] = connector
     app_state["session"] = session
 
+    # Create httpx clients for external OpenAI endpoints (Google, etc.)
+    # aiohttp strips Referer headers for cross-origin requests, so we use httpx
+    for ep in config.endpoints:
+        if is_ext_openai_endpoint(ep):
+            app_state["httpx_clients"][ep] = httpx.AsyncClient(timeout=30.0)
+
     # Create per-endpoint Unix socket sessions for .sock endpoints
     for ep in config.llama_server_endpoints:
         if _is_unix_socket_endpoint(ep):

From 90a54abc9b403c136233788933f0c66ebbcf571d Mon Sep 17 00:00:00 2001
From: alpha nerd <alpha-nerd@nomyo.ai>
Date: Fri, 8 May 2026 12:19:03 +0200
Subject: [PATCH 04/14] feat: correct pass through of openai.APIStatusErrors

---
 router.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/router.py b/router.py
index ab54cc7..603387e 100644
--- a/router.py
+++ b/router.py
@@ -418,7 +418,16 @@ async def enforce_router_api_key(request: Request, call_next):
         response.headers["Access-Control-Allow-Headers"] = "Authorization, Content-Type"
         response.headers["Access-Control-Allow-Methods"] = "GET, POST, PUT, DELETE, OPTIONS"
     return response
-        
+
+
+@app.exception_handler(openai.APIStatusError)
+async def _openai_api_status_error_handler(request: Request, exc: openai.APIStatusError):
+    """Forward upstream OpenAI-SDK status errors with their original status code and body
+    instead of letting them bubble up as 500s."""
+    body = exc.body if exc.body is not None else {"error": {"message": str(exc), "code": exc.status_code}}
+    return JSONResponse(status_code=exc.status_code, content=body)
+
+
 # -------------------------------------------------------------
 # 3. Global state: per‑endpoint per‑model active connection counters
 # -------------------------------------------------------------

From abcb36bbd4c53f7261a1fe005c9137ba93f6bc43 Mon Sep 17 00:00:00 2001
From: Renovate Bot <renovate-bot@bitfreedom.net>
Date: Fri, 8 May 2026 12:52:36 +0000
Subject: [PATCH 05/14] chore(deps): update dependency attrs to v26

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 1b4031b..3723185 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ aiosignal==1.4.0
 annotated-types==0.7.0
 anyio==4.13.0
 async-timeout==5.0.1
-attrs==25.4.0
+attrs==26.1.0
 certifi==2025.11.12
 click==8.3.3
 distro==1.9.0

From a2dd6d10b34861c6721990ae7a3ff6c62bb6e5b4 Mon Sep 17 00:00:00 2001
From: Renovate Bot <renovate-bot@bitfreedom.net>
Date: Fri, 8 May 2026 12:54:41 +0000
Subject: [PATCH 06/14] chore(deps): update dependency exceptiongroup to v1.3.1

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index b1fa533..c44cd84 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,7 +8,7 @@ attrs==25.4.0
 certifi==2025.11.12
 click==8.3.3
 distro==1.9.0
-exceptiongroup==1.3.0
+exceptiongroup==1.3.1
 fastapi==0.136.1
 fastapi-sse==1.1.1
 frozenlist==1.8.0

From c4f761181704819d81eeac23209290deba25764d Mon Sep 17 00:00:00 2001
From: Renovate Bot <renovate-bot@bitfreedom.net>
Date: Fri, 8 May 2026 13:00:53 +0000
Subject: [PATCH 07/14] chore(deps): update dependency starlette to v1

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index b1fa533..110aa59 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -30,7 +30,7 @@ pydantic_core==2.46.4
 python-dotenv==1.2.2
 PyYAML==6.0.3
 sniffio==1.3.1
-starlette==0.52.1
+starlette==1.0.0
 truststore==0.10.4
 tiktoken==0.12.0
 tqdm==4.67.3

From aefeac1ff186dd93673d9a612a15dbe3fd597535 Mon Sep 17 00:00:00 2001
From: Renovate Bot <renovate-bot@bitfreedom.net>
Date: Fri, 8 May 2026 16:17:12 +0000
Subject: [PATCH 08/14] chore(deps): update dependency pydantic-settings to
 v2.14.1

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index c44cd84..f5f5ae0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,7 +25,7 @@ numpy>=1.26
 pillow==12.2.0
 propcache==0.4.1
 pydantic==2.13.4
-pydantic-settings==2.10.1
+pydantic-settings==2.14.1
 pydantic_core==2.46.4
 python-dotenv==1.2.2
 PyYAML==6.0.3

From fc3c2a161dffa2eba274315b02416956c1b11af5 Mon Sep 17 00:00:00 2001
From: Renovate Bot <renovate-bot@bitfreedom.net>
Date: Fri, 8 May 2026 21:13:33 +0000
Subject: [PATCH 09/14] chore(deps): update dependency propcache to v0.5.2

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index c44cd84..1bf5fb7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,7 +23,7 @@ openai==1.109.1
 orjson>=3.11.5
 numpy>=1.26
 pillow==12.2.0
-propcache==0.4.1
+propcache==0.5.2
 pydantic==2.13.4
 pydantic-settings==2.10.1
 pydantic_core==2.46.4

From cf340d35753ba8a57da420226d8632dffbae707c Mon Sep 17 00:00:00 2001
From: Renovate Bot <renovate-bot@bitfreedom.net>
Date: Sun, 10 May 2026 09:16:19 +0000
Subject: [PATCH 10/14] chore(deps): update dependency certifi to v2026

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 9814898..d87f1f8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,7 @@ annotated-types==0.7.0
 anyio==4.13.0
 async-timeout==5.0.1
 attrs==26.1.0
-certifi==2025.11.12
+certifi==2026.4.22
 click==8.3.3
 distro==1.9.0
 exceptiongroup==1.3.1

From 2a9f42c0e0f40ec6aa0bdf3d4c1ddc11f14c2f14 Mon Sep 17 00:00:00 2001
From: Renovate Bot <renovate-bot@bitfreedom.net>
Date: Sun, 10 May 2026 20:40:45 +0000
Subject: [PATCH 11/14] chore(deps): update dependency idna to v3.14

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index d87f1f8..159c062 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,7 +15,7 @@ frozenlist==1.8.0
 h11==0.16.0
 httpcore==1.0.9
 httpx==0.28.1
-idna==3.13
+idna==3.14
 jiter==0.14.0
 multidict==6.7.1
 ollama==0.6.2

From 84808c17bdb3b39231b84387b3e46e631616a7d5 Mon Sep 17 00:00:00 2001
From: alpha-nerd <alpha-nerd@noreply.localhost>
Date: Mon, 11 May 2026 10:40:18 +0200
Subject: [PATCH 12/14] .forgejo/workflows/docker-publish.yml aktualisiert

---
 .forgejo/workflows/docker-publish.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.forgejo/workflows/docker-publish.yml b/.forgejo/workflows/docker-publish.yml
index 3979f62..e3dad4b 100644
--- a/.forgejo/workflows/docker-publish.yml
+++ b/.forgejo/workflows/docker-publish.yml
@@ -79,7 +79,7 @@ jobs:
           provenance: false
           tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:platform-${{ matrix.arch }}
           cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }}
-          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }},mode=max
+          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }},mode=min
 
   merge:
     runs-on: docker-amd64

From 50f832668ead676d438b78bb8860e1bb55f6bd39 Mon Sep 17 00:00:00 2001
From: alpha-nerd <alpha-nerd@noreply.localhost>
Date: Mon, 11 May 2026 10:47:02 +0200
Subject: [PATCH 13/14] .forgejo/workflows/docker-publish.yml aktualisiert

---
 .forgejo/workflows/docker-publish.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.forgejo/workflows/docker-publish.yml b/.forgejo/workflows/docker-publish.yml
index e3dad4b..27cd879 100644
--- a/.forgejo/workflows/docker-publish.yml
+++ b/.forgejo/workflows/docker-publish.yml
@@ -78,8 +78,6 @@ jobs:
           push: true
           provenance: false
           tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:platform-${{ matrix.arch }}
-          cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }}
-          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-${{ matrix.arch }},mode=min
 
   merge:
     runs-on: docker-amd64

From 3669365c3cedabce5514e3bedb7ed94715217bc9 Mon Sep 17 00:00:00 2001
From: Renovate Bot <renovate-bot@bitfreedom.net>
Date: Thu, 14 May 2026 11:42:46 +0000
Subject: [PATCH 14/14] chore(deps): update dependency starlette to v1

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 159c062..e71ef8a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -30,7 +30,7 @@ pydantic_core==2.46.4
 python-dotenv==1.2.2
 PyYAML==6.0.3
 sniffio==1.3.1
-starlette==0.52.1
+starlette==1.0.0
 truststore==0.10.4
 tiktoken==0.12.0
 tqdm==4.67.3