From 47a39184ada4c080a9861982632c6a675e9c94df Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Thu, 6 Nov 2025 14:27:34 +0100 Subject: [PATCH] token usage counter for non-stream openai ollama endpoints added --- router.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/router.py b/router.py index a0ed5d5..f0ea29b 100644 --- a/router.py +++ b/router.py @@ -1446,6 +1446,9 @@ async def openai_chat_completions_proxy(request: Request): yield f"data: {data}\n\n".encode("utf-8") yield b"data: [DONE]\n\n" else: + prompt_tok = async_gen.usage.prompt_tokens or 0 + comp_tok = async_gen.usage.completion_tokens or 0 + record_token_usage(endpoint, payload.get("model"), prompt_tok, comp_tok) json_line = ( async_gen.model_dump_json() if hasattr(async_gen, "model_dump_json") @@ -1549,6 +1552,9 @@ async def openai_completions_proxy(request: Request): # Final DONE event yield b"data: [DONE]\n\n" else: + prompt_tok = async_gen.usage.prompt_tokens or 0 + comp_tok = async_gen.usage.completion_tokens or 0 + record_token_usage(endpoint, payload.get("model"), prompt_tok, comp_tok) json_line = ( async_gen.model_dump_json() if hasattr(async_gen, "model_dump_json")