From 16dba93c0dbd932e9f992471e95df543e98a7bb9 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Mon, 15 Sep 2025 17:48:17 +0200
Subject: [PATCH] compliance for ollama embeddings endpoints using openai
 models

---
 router.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/router.py b/router.py
index 5780045..10b3a5e 100644
--- a/router.py
+++ b/router.py
@@ -397,7 +397,6 @@ async def choose_endpoint(model: str) -> str:
         if ":latest" in model:  #ollama naming convention not applicable to openai
             model = model.split(":")
             model = model[0]
-            print(model)
             candidate_endpoints = [
                 ep for ep, models in zip(config.endpoints, advertised_sets)
                 if model in models
@@ -670,13 +669,15 @@ async def embedding_proxy(request: Request):
 
     # 2. Endpoint logic
     endpoint = await choose_endpoint(model)
-    await increment_usage(endpoint, model)
     is_openai_endpoint = "/v1" in endpoint
     if is_openai_endpoint:
+        if ":latest" in model:
+            model = model.split(":")
+            model = model[0]
         client = openai.AsyncOpenAI(base_url=endpoint, api_key=config.api_keys[endpoint])
     else:
         client = ollama.AsyncClient(host=endpoint)
-
+    await increment_usage(endpoint, model)
     # 3. Async generator that streams embedding data and decrements the counter
     async def stream_embedding_response():
         try:
@@ -734,13 +735,15 @@ async def embed_proxy(request: Request):
 
     # 2. Endpoint logic
     endpoint = await choose_endpoint(model)
-    await increment_usage(endpoint, model)
     is_openai_endpoint = "/v1" in endpoint
     if is_openai_endpoint:
+        if ":latest" in model:
+            model = model.split(":")
+            model = model[0]
         client = openai.AsyncOpenAI(base_url=endpoint, api_key=config.api_keys[endpoint])
     else:
         client = ollama.AsyncClient(host=endpoint)
-
+    await increment_usage(endpoint, model)
     # 3. Async generator that streams embed data and decrements the counter
     async def stream_embedding_response():
         try: