From deca8e37adc72e27a8993718b2f508cf124e1d7f Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Wed, 17 Sep 2025 11:40:48 +0200 Subject: [PATCH] fixing model re-naming in /v1 endpoints and thinking in rechunk --- .gitignore | 3 +++ router.py | 12 +++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5dac518..4bb65cd 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,6 @@ cython_debug/ # Logfile(s) *.log *.sqlite3 + +# Config +config.yaml \ No newline at end of file diff --git a/router.py b/router.py index 1c00373..2aa0886 100644 --- a/router.py +++ b/router.py @@ -291,13 +291,15 @@ class rechunk: return rechunk def openai_completion2ollama(chunk: dict, stream: bool, start_ts: float): + with_thinking = chunk.choices[0] if chunk.choices[0] else None + thinking = getattr(with_thinking, "reasoning", None) if with_thinking else None rechunk = { "model": chunk.model, "created_at": iso8601_ns(), "load_duration": None, "done_reason": chunk.choices[0].finish_reason, "total_duration": None, "eval_duration": (int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None), - "thinking": chunk.choices[0].reasoning or None, + "thinking": thinking, "context": None, "response": chunk.choices[0].text } @@ -1213,6 +1215,10 @@ async def openai_chat_completions_proxy(request: Request): max_completion_tokens = payload.get("max_completion_tokens") tools = payload.get("tools") + if ":latest" in model: + model = model.split(":latest") + model = model[0] + params = { "messages": messages, "model": model, @@ -1313,6 +1319,10 @@ async def openai_completions_proxy(request: Request): max_completion_tokens = payload.get("max_completion_tokens") suffix = payload.get("suffix") + if ":latest" in model: + model = model.split(":latest") + model = model[0] + params = { "prompt": prompt, "model": model,