173 lines
9.1 KiB
Python
173 lines
9.1 KiB
Python
"""Unit tests for router.rechunk — OpenAI ↔ Ollama chunk shape conversion."""
|
|
import time
|
|
from types import SimpleNamespace
|
|
|
|
import ollama
|
|
|
|
import router
|
|
|
|
|
|
def _ns(**kw):
|
|
return SimpleNamespace(**kw)
|
|
|
|
|
|
def _stream_chunk(content="hi", role="assistant", finish_reason=None,
|
|
usage=None, model="m"):
|
|
"""Build a SimpleNamespace mimicking a streaming OpenAI chunk."""
|
|
delta = _ns(content=content, role=role, reasoning=None, reasoning_content=None,
|
|
tool_calls=None)
|
|
choice = _ns(delta=delta, finish_reason=finish_reason, logprobs=None)
|
|
return _ns(model=model, choices=[choice], usage=usage)
|
|
|
|
|
|
def _nonstream_chunk(content="hi", role="assistant", finish_reason="stop",
|
|
usage=None, model="m", tool_calls=None):
|
|
"""Build a SimpleNamespace mimicking a non-streaming OpenAI ChatCompletion."""
|
|
message = _ns(content=content, role=role, reasoning=None, reasoning_content=None,
|
|
tool_calls=tool_calls)
|
|
choice = _ns(message=message, finish_reason=finish_reason, logprobs=None)
|
|
return _ns(model=model, choices=[choice], usage=usage)
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# openai_chat_completion2ollama
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
class TestChatCompletionToOllama:
|
|
def test_streaming_content_chunk(self):
|
|
chunk = _stream_chunk(content="hello", finish_reason=None, usage=None)
|
|
out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter())
|
|
assert isinstance(out, ollama.ChatResponse)
|
|
assert out.message.role == "assistant"
|
|
assert out.message.content == "hello"
|
|
assert out.done is False # usage is None → not done yet
|
|
assert out.model == "m"
|
|
|
|
def test_streaming_empty_content_defaults(self):
|
|
# Some chunks have content=None — should coerce to empty string
|
|
chunk = _stream_chunk(content=None, role=None)
|
|
out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter())
|
|
assert out.message.role == "assistant" # role defaulted
|
|
assert out.message.content == ""
|
|
|
|
def test_final_usage_only_chunk_marks_done(self):
|
|
usage = _ns(prompt_tokens=10, completion_tokens=5, total_tokens=15)
|
|
chunk = _ns(model="m", choices=[], usage=usage)
|
|
out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter())
|
|
assert out.done is True
|
|
assert out.done_reason == "stop"
|
|
assert out.prompt_eval_count == 10
|
|
assert out.eval_count == 5
|
|
assert out.message.content == ""
|
|
|
|
def test_nonstreaming_with_content(self):
|
|
usage = _ns(prompt_tokens=2, completion_tokens=3, total_tokens=5)
|
|
chunk = _nonstream_chunk(content="response text", finish_reason="stop", usage=usage)
|
|
out = router.rechunk.openai_chat_completion2ollama(chunk, False, time.perf_counter())
|
|
assert out.done is True
|
|
assert out.message.content == "response text"
|
|
assert out.prompt_eval_count == 2
|
|
assert out.eval_count == 3
|
|
|
|
def test_nonstreaming_tool_calls_converted(self):
|
|
"""Tool calls with JSON string arguments are parsed into dicts."""
|
|
tc = _ns(function=_ns(name="get_weather", arguments='{"city": "Paris"}'))
|
|
usage = _ns(prompt_tokens=1, completion_tokens=1, total_tokens=2)
|
|
chunk = _nonstream_chunk(
|
|
content="", finish_reason="tool_calls", usage=usage, tool_calls=[tc]
|
|
)
|
|
out = router.rechunk.openai_chat_completion2ollama(chunk, False, time.perf_counter())
|
|
assert out.message.tool_calls is not None
|
|
assert len(out.message.tool_calls) == 1
|
|
first = out.message.tool_calls[0]
|
|
assert first.function.name == "get_weather"
|
|
assert first.function.arguments == {"city": "Paris"}
|
|
|
|
def test_nonstreaming_tool_calls_with_invalid_json_fall_back_to_empty(self):
|
|
tc = _ns(function=_ns(name="f", arguments="not-json"))
|
|
usage = _ns(prompt_tokens=1, completion_tokens=1, total_tokens=2)
|
|
chunk = _nonstream_chunk(content="", usage=usage, tool_calls=[tc])
|
|
out = router.rechunk.openai_chat_completion2ollama(chunk, False, time.perf_counter())
|
|
assert out.message.tool_calls[0].function.arguments == {}
|
|
|
|
def test_streaming_tool_calls_in_delta_are_skipped(self):
|
|
"""Streaming mode must not assemble tool calls (caller handles it)."""
|
|
chunk = _stream_chunk(content="x", finish_reason=None)
|
|
# Even if a chunk somehow carried tool_calls in the delta, streaming
|
|
# mode should ignore them.
|
|
out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter())
|
|
assert out.message.tool_calls is None
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# openai_completion2ollama
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
class TestCompletionToOllama:
|
|
def test_streaming_text_chunk(self):
|
|
choice = _ns(text="word", finish_reason=None, reasoning=None)
|
|
chunk = _ns(model="m", choices=[choice], usage=None)
|
|
out = router.rechunk.openai_completion2ollama(chunk, True, time.perf_counter())
|
|
assert isinstance(out, ollama.GenerateResponse)
|
|
assert out.response == "word"
|
|
assert out.done is False
|
|
|
|
def test_final_chunk_with_usage(self):
|
|
usage = _ns(prompt_tokens=4, completion_tokens=6, total_tokens=10)
|
|
choice = _ns(text="end", finish_reason="stop", reasoning=None)
|
|
chunk = _ns(model="m", choices=[choice], usage=usage)
|
|
out = router.rechunk.openai_completion2ollama(chunk, True, time.perf_counter())
|
|
assert out.done is True
|
|
assert out.prompt_eval_count == 4
|
|
assert out.eval_count == 6
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# embeddings / embed
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
class TestEmbeddingConversions:
|
|
def test_openai_embeddings2ollama(self):
|
|
chunk = _ns(data=[_ns(embedding=[0.1, 0.2, 0.3])])
|
|
out = router.rechunk.openai_embeddings2ollama(chunk)
|
|
assert isinstance(out, ollama.EmbeddingsResponse)
|
|
assert list(out.embedding) == [0.1, 0.2, 0.3]
|
|
|
|
def test_openai_embed2ollama(self):
|
|
chunk = _ns(data=[_ns(embedding=[0.5, 0.6])])
|
|
out = router.rechunk.openai_embed2ollama(chunk, "my-embed-model")
|
|
assert isinstance(out, ollama.EmbedResponse)
|
|
assert out.model == "my-embed-model"
|
|
assert list(out.embeddings[0]) == [0.5, 0.6]
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# extract_usage_from_llama_timings
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
class TestExtractUsageFromLlamaTimings:
|
|
def test_none_when_no_timings_attr(self):
|
|
obj = _ns()
|
|
assert router.rechunk.extract_usage_from_llama_timings(obj) is None
|
|
|
|
def test_prompt_plus_cache_sums(self):
|
|
obj = _ns(timings={"prompt_n": 1, "cache_n": 236, "predicted_n": 35})
|
|
prompt, completion = router.rechunk.extract_usage_from_llama_timings(obj)
|
|
assert prompt == 237
|
|
assert completion == 35
|
|
|
|
def test_missing_keys_default_to_zero(self):
|
|
obj = _ns(timings={"predicted_n": 12})
|
|
prompt, completion = router.rechunk.extract_usage_from_llama_timings(obj)
|
|
assert prompt == 0
|
|
assert completion == 12
|
|
|
|
def test_null_values_treated_as_zero(self):
|
|
obj = _ns(timings={"prompt_n": None, "cache_n": None, "predicted_n": None})
|
|
prompt, completion = router.rechunk.extract_usage_from_llama_timings(obj)
|
|
assert prompt == 0
|
|
assert completion == 0
|
|
|
|
def test_non_dict_timings_returns_none(self):
|
|
obj = _ns(timings="not-a-dict")
|
|
assert router.rechunk.extract_usage_from_llama_timings(obj) is None
|