From 47a39184ada4c080a9861982632c6a675e9c94df Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Thu, 6 Nov 2025 14:27:34 +0100
Subject: [PATCH] token usage counter for non-stream openai ollama endpoints
 added

---
 router.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/router.py b/router.py
index a0ed5d5..f0ea29b 100644
--- a/router.py
+++ b/router.py
@@ -1446,6 +1446,9 @@ async def openai_chat_completions_proxy(request: Request):
                         yield f"data: {data}\n\n".encode("utf-8")
                 yield b"data: [DONE]\n\n"
             else:
+                prompt_tok = async_gen.usage.prompt_tokens or 0
+                comp_tok   = async_gen.usage.completion_tokens or 0
+                record_token_usage(endpoint, payload.get("model"), prompt_tok, comp_tok)
                 json_line = (
                     async_gen.model_dump_json()
                     if hasattr(async_gen, "model_dump_json")
@@ -1549,6 +1552,9 @@ async def openai_completions_proxy(request: Request):
                 # Final DONE event
                 yield b"data: [DONE]\n\n"
             else:
+                prompt_tok = async_gen.usage.prompt_tokens or 0
+                comp_tok   = async_gen.usage.completion_tokens or 0
+                record_token_usage(endpoint, payload.get("model"), prompt_tok, comp_tok)
                 json_line = (
                     async_gen.model_dump_json()
                     if hasattr(async_gen, "model_dump_json")