fixing model re-naming in /v1 endpoints and thinking in rechunk

This commit is contained in:
Alpha Nerd 2025-09-17 11:40:48 +02:00
parent d85d120cc8
commit deca8e37ad
2 changed files with 14 additions and 1 deletions

3
.gitignore vendored
View file

@ -63,3 +63,6 @@ cython_debug/
# Logfile(s)
*.log
*.sqlite3
# Config
config.yaml

View file

@ -291,13 +291,15 @@ class rechunk:
return rechunk
def openai_completion2ollama(chunk: dict, stream: bool, start_ts: float):
with_thinking = chunk.choices[0] if chunk.choices[0] else None
thinking = getattr(with_thinking, "reasoning", None) if with_thinking else None
rechunk = { "model": chunk.model,
"created_at": iso8601_ns(),
"load_duration": None,
"done_reason": chunk.choices[0].finish_reason,
"total_duration": None,
"eval_duration": (int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None),
"thinking": chunk.choices[0].reasoning or None,
"thinking": thinking,
"context": None,
"response": chunk.choices[0].text
}
@ -1213,6 +1215,10 @@ async def openai_chat_completions_proxy(request: Request):
max_completion_tokens = payload.get("max_completion_tokens")
tools = payload.get("tools")
if ":latest" in model:
model = model.split(":latest")
model = model[0]
params = {
"messages": messages,
"model": model,
@ -1313,6 +1319,10 @@ async def openai_completions_proxy(request: Request):
max_completion_tokens = payload.get("max_completion_tokens")
suffix = payload.get("suffix")
if ":latest" in model:
model = model.split(":latest")
model = model[0]
params = {
"prompt": prompt,
"model": model,