feat: update reasoning handling

Updated reasoning content handling in router.py to check for both "reasoning_content" and "reasoning" attributes.
2026-02-08 11:29:47 +01:00 · 2026-02-08 11:29:47 +01:00 · 92cea1dead
commit 92cea1dead
parent bd0d210b2a
1 changed files with 3 additions and 3 deletions
--- a/router.py
+++ b/router.py
@ -997,11 +997,11 @@ class rechunk:
                )
        with_thinking = chunk.choices[0] if chunk.choices[0] else None
        if stream == True:
-            thinking = getattr(with_thinking.delta, "reasoning", None) if with_thinking else None
+            thinking = (getattr(with_thinking.delta, "reasoning_content", None) or getattr(with_thinking.delta, "reasoning", None)) if with_thinking else None
            role = chunk.choices[0].delta.role or "assistant"
            content = chunk.choices[0].delta.content or ''
        else:
-            thinking = getattr(with_thinking, "reasoning", None) if with_thinking else None
+            thinking = (getattr(with_thinking.message, "reasoning_content", None) or getattr(with_thinking.message, "reasoning", None)) if with_thinking else None
            role = chunk.choices[0].message.role or "assistant"
            content = chunk.choices[0].message.content or ''
        assistant_msg = ollama.Message(
@ -1211,7 +1211,7 @@ async def choose_endpoint(model: str) -> str:
            # Then by total endpoint usage (ascending) to balance idle endpoints
            endpoints_with_free_slot.sort(
                key=lambda ep: (
-                    -usage_counts.get(ep, {}).get(model, 0),  # Primary: per-model usage (descending - prefer endpoints with connections)
+                    #-usage_counts.get(ep, {}).get(model, 0),  # Primary: per-model usage (descending - prefer endpoints with connections)
                    sum(usage_counts.get(ep, {}).values())    # Secondary: total endpoint usage (ascending - prefer idle endpoints)
                )
            )