formatting, condensing rechunk

This commit is contained in:
Alpha Nerd 2025-09-21 16:33:43 +02:00
parent 43d95fbf38
commit aeca77c1a1

View file

@ -277,48 +277,44 @@ def iso8601_ns():
class rechunk: class rechunk:
def openai_chat_completion2ollama(chunk: dict, stream: bool, start_ts: float): def openai_chat_completion2ollama(chunk: dict, stream: bool, start_ts: float):
if stream == True: if stream == True:
assistant_msg = ollama.Message( role = chunk.choices[0].delta.role or "assistant"
role=chunk.choices[0].delta.role or "assistant", content = chunk.choices[0].delta.content
content=chunk.choices[0].delta.content,
thinking=None,
images=None,
tool_name=None,
tool_calls=None
)
else: else:
assistant_msg = ollama.Message( role = chunk.choices[0].message.role or "assistant"
role=chunk.choices[0].message.role or "assistant", content = chunk.choices[0].message.content
content=chunk.choices[0].message.content, assistant_msg = ollama.Message(
thinking=None, role=role,
images=None, content=content,
tool_name=None, thinking=None,
tool_calls=None images=None,
) tool_name=None,
rechunk = ollama.ChatResponse(model=chunk.model, tool_calls=None)
created_at=iso8601_ns(), rechunk = ollama.ChatResponse(
done_reason=chunk.choices[0].finish_reason, model=chunk.model,
load_duration=100000, created_at=iso8601_ns(),
prompt_eval_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000 * (chunk.usage.prompt_tokens / chunk.usage.completion_tokens / 100)) if chunk.usage is not None else None), done_reason=chunk.choices[0].finish_reason,
eval_count= (chunk.usage.completion_tokens if chunk.usage is not None else None), load_duration=100000,
prompt_eval_count=(chunk.usage.prompt_tokens if chunk.usage is not None else None), prompt_eval_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000 * (chunk.usage.prompt_tokens / chunk.usage.completion_tokens / 100)) if chunk.usage is not None else None),
eval_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000) if chunk.usage is not None else None), eval_count= (chunk.usage.completion_tokens if chunk.usage is not None else None),
total_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000) if chunk.usage is not None else None), prompt_eval_count=(chunk.usage.prompt_tokens if chunk.usage is not None else None),
message=assistant_msg) eval_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000) if chunk.usage is not None else None),
total_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000) if chunk.usage is not None else None),
message=assistant_msg)
return rechunk return rechunk
def openai_completion2ollama(chunk: dict, stream: bool, start_ts: float): def openai_completion2ollama(chunk: dict, stream: bool, start_ts: float):
with_thinking = chunk.choices[0] if chunk.choices[0] else None with_thinking = chunk.choices[0] if chunk.choices[0] else None
thinking = getattr(with_thinking, "reasoning", None) if with_thinking else None thinking = getattr(with_thinking, "reasoning", None) if with_thinking else None
rechunk = ollama.GenerateResponse(model=chunk.model, rechunk = ollama.GenerateResponse(
created_at=iso8601_ns(), model=chunk.model,
load_duration=10000, created_at=iso8601_ns(),
done_reason=chunk.choices[0].finish_reason, load_duration=10000,
done=None, #True if chunk.choices[0].finish_reason is not None else False, done_reason=chunk.choices[0].finish_reason,
total_duration=(int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None), done=None, #True if chunk.choices[0].finish_reason is not None else False,
eval_duration=(int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None), total_duration=(int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None),
thinking=thinking, eval_duration=(int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None),
response=chunk.choices[0].text thinking=thinking,
) response=chunk.choices[0].text)
return rechunk return rechunk
def openai_embeddings2ollama(chunk: dict): def openai_embeddings2ollama(chunk: dict):
@ -326,18 +322,18 @@ class rechunk:
return rechunk return rechunk
def openai_embed2ollama(chunk: dict, model: str): def openai_embed2ollama(chunk: dict, model: str):
rechunk = ollama.EmbedResponse(model=model, rechunk = ollama.EmbedResponse(
created_at=iso8601_ns(), model=model,
done=None, created_at=iso8601_ns(),
done_reason=None, done=None,
total_duration=None, done_reason=None,
load_duration=None, total_duration=None,
prompt_eval_count=None, load_duration=None,
prompt_eval_duration=None, prompt_eval_count=None,
eval_count=None, prompt_eval_duration=None,
eval_duration=None, eval_count=None,
embeddings=[chunk.data[0].embedding] eval_duration=None,
) embeddings=[chunk.data[0].embedding])
return rechunk return rechunk
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# SSE Helpser # SSE Helpser