formatting, condensing rechunk
This commit is contained in:
parent
43d95fbf38
commit
aeca77c1a1
1 changed files with 44 additions and 48 deletions
92
router.py
92
router.py
|
|
@ -277,48 +277,44 @@ def iso8601_ns():
|
||||||
class rechunk:
|
class rechunk:
|
||||||
def openai_chat_completion2ollama(chunk: dict, stream: bool, start_ts: float):
|
def openai_chat_completion2ollama(chunk: dict, stream: bool, start_ts: float):
|
||||||
if stream == True:
|
if stream == True:
|
||||||
assistant_msg = ollama.Message(
|
role = chunk.choices[0].delta.role or "assistant"
|
||||||
role=chunk.choices[0].delta.role or "assistant",
|
content = chunk.choices[0].delta.content
|
||||||
content=chunk.choices[0].delta.content,
|
|
||||||
thinking=None,
|
|
||||||
images=None,
|
|
||||||
tool_name=None,
|
|
||||||
tool_calls=None
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
assistant_msg = ollama.Message(
|
role = chunk.choices[0].message.role or "assistant"
|
||||||
role=chunk.choices[0].message.role or "assistant",
|
content = chunk.choices[0].message.content
|
||||||
content=chunk.choices[0].message.content,
|
assistant_msg = ollama.Message(
|
||||||
thinking=None,
|
role=role,
|
||||||
images=None,
|
content=content,
|
||||||
tool_name=None,
|
thinking=None,
|
||||||
tool_calls=None
|
images=None,
|
||||||
)
|
tool_name=None,
|
||||||
rechunk = ollama.ChatResponse(model=chunk.model,
|
tool_calls=None)
|
||||||
created_at=iso8601_ns(),
|
rechunk = ollama.ChatResponse(
|
||||||
done_reason=chunk.choices[0].finish_reason,
|
model=chunk.model,
|
||||||
load_duration=100000,
|
created_at=iso8601_ns(),
|
||||||
prompt_eval_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000 * (chunk.usage.prompt_tokens / chunk.usage.completion_tokens / 100)) if chunk.usage is not None else None),
|
done_reason=chunk.choices[0].finish_reason,
|
||||||
eval_count= (chunk.usage.completion_tokens if chunk.usage is not None else None),
|
load_duration=100000,
|
||||||
prompt_eval_count=(chunk.usage.prompt_tokens if chunk.usage is not None else None),
|
prompt_eval_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000 * (chunk.usage.prompt_tokens / chunk.usage.completion_tokens / 100)) if chunk.usage is not None else None),
|
||||||
eval_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000) if chunk.usage is not None else None),
|
eval_count= (chunk.usage.completion_tokens if chunk.usage is not None else None),
|
||||||
total_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000) if chunk.usage is not None else None),
|
prompt_eval_count=(chunk.usage.prompt_tokens if chunk.usage is not None else None),
|
||||||
message=assistant_msg)
|
eval_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000) if chunk.usage is not None else None),
|
||||||
|
total_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000) if chunk.usage is not None else None),
|
||||||
|
message=assistant_msg)
|
||||||
return rechunk
|
return rechunk
|
||||||
|
|
||||||
def openai_completion2ollama(chunk: dict, stream: bool, start_ts: float):
|
def openai_completion2ollama(chunk: dict, stream: bool, start_ts: float):
|
||||||
with_thinking = chunk.choices[0] if chunk.choices[0] else None
|
with_thinking = chunk.choices[0] if chunk.choices[0] else None
|
||||||
thinking = getattr(with_thinking, "reasoning", None) if with_thinking else None
|
thinking = getattr(with_thinking, "reasoning", None) if with_thinking else None
|
||||||
rechunk = ollama.GenerateResponse(model=chunk.model,
|
rechunk = ollama.GenerateResponse(
|
||||||
created_at=iso8601_ns(),
|
model=chunk.model,
|
||||||
load_duration=10000,
|
created_at=iso8601_ns(),
|
||||||
done_reason=chunk.choices[0].finish_reason,
|
load_duration=10000,
|
||||||
done=None, #True if chunk.choices[0].finish_reason is not None else False,
|
done_reason=chunk.choices[0].finish_reason,
|
||||||
total_duration=(int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None),
|
done=None, #True if chunk.choices[0].finish_reason is not None else False,
|
||||||
eval_duration=(int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None),
|
total_duration=(int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None),
|
||||||
thinking=thinking,
|
eval_duration=(int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None),
|
||||||
response=chunk.choices[0].text
|
thinking=thinking,
|
||||||
)
|
response=chunk.choices[0].text)
|
||||||
return rechunk
|
return rechunk
|
||||||
|
|
||||||
def openai_embeddings2ollama(chunk: dict):
|
def openai_embeddings2ollama(chunk: dict):
|
||||||
|
|
@ -326,18 +322,18 @@ class rechunk:
|
||||||
return rechunk
|
return rechunk
|
||||||
|
|
||||||
def openai_embed2ollama(chunk: dict, model: str):
|
def openai_embed2ollama(chunk: dict, model: str):
|
||||||
rechunk = ollama.EmbedResponse(model=model,
|
rechunk = ollama.EmbedResponse(
|
||||||
created_at=iso8601_ns(),
|
model=model,
|
||||||
done=None,
|
created_at=iso8601_ns(),
|
||||||
done_reason=None,
|
done=None,
|
||||||
total_duration=None,
|
done_reason=None,
|
||||||
load_duration=None,
|
total_duration=None,
|
||||||
prompt_eval_count=None,
|
load_duration=None,
|
||||||
prompt_eval_duration=None,
|
prompt_eval_count=None,
|
||||||
eval_count=None,
|
prompt_eval_duration=None,
|
||||||
eval_duration=None,
|
eval_count=None,
|
||||||
embeddings=[chunk.data[0].embedding]
|
eval_duration=None,
|
||||||
)
|
embeddings=[chunk.data[0].embedding])
|
||||||
return rechunk
|
return rechunk
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# SSE Helpser
|
# SSE Helpser
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue