From fcfabbe9262336e1a6e2eb72b7ea30119f932e15 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Tue, 23 Sep 2025 13:08:17 +0200 Subject: [PATCH] mitigating div by zero due to google genai sending completion_token=0 in first chunk --- router.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/router.py b/router.py index 94f84ca..2fa59ef 100644 --- a/router.py +++ b/router.py @@ -314,7 +314,7 @@ class rechunk: total_duration=int((time.perf_counter() - start_ts) * 1_000_000_000) if chunk.usage is not None else 0, load_duration=100000, prompt_eval_count=int(chunk.usage.prompt_tokens) if chunk.usage is not None else 0, - prompt_eval_duration=int((time.perf_counter() - start_ts) * 1_000_000_000 * (chunk.usage.prompt_tokens / chunk.usage.completion_tokens / 100)) if chunk.usage is not None else 0, + prompt_eval_duration=int((time.perf_counter() - start_ts) * 1_000_000_000 * (chunk.usage.prompt_tokens / chunk.usage.completion_tokens / 100)) if chunk.usage is not None and chunk.usage.completion_tokens != 0 else 0, eval_count=int(chunk.usage.completion_tokens) if chunk.usage is not None else 0, eval_duration=int((time.perf_counter() - start_ts) * 1_000_000_000) if chunk.usage is not None else 0, message=assistant_msg) @@ -331,7 +331,7 @@ class rechunk: total_duration=int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else 0, load_duration=10000, prompt_eval_count=int(chunk.usage.prompt_tokens) if chunk.usage is not None else 0, - prompt_eval_duration=int((time.perf_counter() - start_ts) * 1_000_000_000 * (chunk.usage.prompt_tokens / chunk.usage.completion_tokens / 100)) if chunk.usage is not None else 0, + prompt_eval_duration=int((time.perf_counter() - start_ts) * 1_000_000_000 * (chunk.usage.prompt_tokens / chunk.usage.completion_tokens / 100)) if chunk.usage is not None and chunk.usage.completion_tokens != 0 else 0, eval_count=int(chunk.usage.completion_tokens) if chunk.usage is not None else 0, eval_duration=int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else 0, response=chunk.choices[0].text or '', @@ -664,9 +664,11 @@ async def chat_proxy(request: Request): async_gen = await client.chat(model=model, messages=messages, tools=tools, stream=stream, think=think, format=_format, options=options, keep_alive=keep_alive) if stream == True: async for chunk in async_gen: + print(chunk) if is_openai_endpoint: chunk = rechunk.openai_chat_completion2ollama(chunk, stream, start_ts) # `chunk` can be a dict or a pydantic model – dump to JSON safely + print(chunk) if hasattr(chunk, "model_dump_json"): json_line = chunk.model_dump_json() else: