From deca8e37adc72e27a8993718b2f508cf124e1d7f Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Wed, 17 Sep 2025 11:40:48 +0200
Subject: [PATCH] fixing model re-naming in /v1 endpoints and thinking in
 rechunk

---
 .gitignore |  3 +++
 router.py  | 12 +++++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 5dac518..4bb65cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -63,3 +63,6 @@ cython_debug/
 # Logfile(s)
 *.log
 *.sqlite3
+
+# Config
+config.yaml
\ No newline at end of file
diff --git a/router.py b/router.py
index 1c00373..2aa0886 100644
--- a/router.py
+++ b/router.py
@@ -291,13 +291,15 @@ class rechunk:
         return rechunk
     
     def openai_completion2ollama(chunk: dict, stream: bool, start_ts: float):
+        with_thinking = chunk.choices[0] if chunk.choices[0] else None
+        thinking = getattr(with_thinking, "reasoning", None) if with_thinking else None
         rechunk = { "model": chunk.model,
                                     "created_at": iso8601_ns(),
                                     "load_duration": None,
                                     "done_reason": chunk.choices[0].finish_reason,
                                     "total_duration": None, 
                                     "eval_duration": (int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None),
-                                    "thinking": chunk.choices[0].reasoning or None,
+                                    "thinking": thinking,
                                     "context": None,
                                     "response": chunk.choices[0].text
             }
@@ -1213,6 +1215,10 @@ async def openai_chat_completions_proxy(request: Request):
         max_completion_tokens = payload.get("max_completion_tokens")
         tools = payload.get("tools")
 
+        if ":latest" in model:
+            model = model.split(":latest")
+            model = model[0]
+
         params = {
             "messages": messages, 
             "model": model,
@@ -1313,6 +1319,10 @@ async def openai_completions_proxy(request: Request):
         max_completion_tokens = payload.get("max_completion_tokens")
         suffix = payload.get("suffix")
 
+        if ":latest" in model:
+            model = model.split(":latest")
+            model = model[0]
+
         params = {
             "prompt": prompt, 
             "model": model,