From 1c3f9a9dc4b945bfed818e6b179daf57938d490d Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Mon, 24 Nov 2025 09:33:54 +0100 Subject: [PATCH] fix model naming to allow correct decrement usage counter in /v1 endpoints --- router.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/router.py b/router.py index 46b6e7e..cc332f5 100644 --- a/router.py +++ b/router.py @@ -1647,8 +1647,8 @@ async def openai_chat_completions_proxy(request: Request): if prompt_tok != 0 or comp_tok != 0: if not is_ext_openai_endpoint(endpoint): if not ":" in model: - model = model+":latest" - await token_queue.put((endpoint, model, prompt_tok, comp_tok)) + local_model = model+":latest" + await token_queue.put((endpoint, local_model, prompt_tok, comp_tok)) yield b"data: [DONE]\n\n" else: prompt_tok = async_gen.usage.prompt_tokens or 0 @@ -1763,8 +1763,8 @@ async def openai_completions_proxy(request: Request): if prompt_tok != 0 or comp_tok != 0: if not is_ext_openai_endpoint(endpoint): if not ":" in model: - model = model+":latest" - await token_queue.put((endpoint, model, prompt_tok, comp_tok)) + local_model = model+":latest" + await token_queue.put((endpoint, local_model, prompt_tok, comp_tok)) # Final DONE event yield b"data: [DONE]\n\n" else: