From a432a65396d17ab08fd91abeae1090dd5fe6d818 Mon Sep 17 00:00:00 2001 From: alpha nerd Date: Wed, 8 Apr 2026 13:01:56 +0200 Subject: [PATCH] fix: params is never defined in ollama native backend --- router.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/router.py b/router.py index c87c5ca..2f27d94 100644 --- a/router.py +++ b/router.py @@ -2110,7 +2110,11 @@ async def chat_proxy(request: Request): # Only cache when no max_tokens limit was set — otherwise # finish_reason=length might just mean max_tokens was hit, # not that the context window was exhausted. - _req_max_tok = params.get("max_tokens") or params.get("max_completion_tokens") or params.get("num_predict") + _req_max_tok = ( + params.get("max_tokens") or params.get("max_completion_tokens") or params.get("num_predict") + if use_openai else + (options.get("num_predict") if options else None) + ) if _dr == "length" and not _req_max_tok: _pt = getattr(chunk, "prompt_eval_count", 0) or 0 _ct = getattr(chunk, "eval_count", 0) or 0