nomyo-router/test/test_unit_rechunk.py

173 lines
9.1 KiB
Python

"""Unit tests for router.rechunk — OpenAI ↔ Ollama chunk shape conversion."""
import time
from types import SimpleNamespace
import ollama
import router
def _ns(**kw):
return SimpleNamespace(**kw)
def _stream_chunk(content="hi", role="assistant", finish_reason=None,
usage=None, model="m"):
"""Build a SimpleNamespace mimicking a streaming OpenAI chunk."""
delta = _ns(content=content, role=role, reasoning=None, reasoning_content=None,
tool_calls=None)
choice = _ns(delta=delta, finish_reason=finish_reason, logprobs=None)
return _ns(model=model, choices=[choice], usage=usage)
def _nonstream_chunk(content="hi", role="assistant", finish_reason="stop",
usage=None, model="m", tool_calls=None):
"""Build a SimpleNamespace mimicking a non-streaming OpenAI ChatCompletion."""
message = _ns(content=content, role=role, reasoning=None, reasoning_content=None,
tool_calls=tool_calls)
choice = _ns(message=message, finish_reason=finish_reason, logprobs=None)
return _ns(model=model, choices=[choice], usage=usage)
# ──────────────────────────────────────────────────────────────────────────────
# openai_chat_completion2ollama
# ──────────────────────────────────────────────────────────────────────────────
class TestChatCompletionToOllama:
def test_streaming_content_chunk(self):
chunk = _stream_chunk(content="hello", finish_reason=None, usage=None)
out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter())
assert isinstance(out, ollama.ChatResponse)
assert out.message.role == "assistant"
assert out.message.content == "hello"
assert out.done is False # usage is None → not done yet
assert out.model == "m"
def test_streaming_empty_content_defaults(self):
# Some chunks have content=None — should coerce to empty string
chunk = _stream_chunk(content=None, role=None)
out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter())
assert out.message.role == "assistant" # role defaulted
assert out.message.content == ""
def test_final_usage_only_chunk_marks_done(self):
usage = _ns(prompt_tokens=10, completion_tokens=5, total_tokens=15)
chunk = _ns(model="m", choices=[], usage=usage)
out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter())
assert out.done is True
assert out.done_reason == "stop"
assert out.prompt_eval_count == 10
assert out.eval_count == 5
assert out.message.content == ""
def test_nonstreaming_with_content(self):
usage = _ns(prompt_tokens=2, completion_tokens=3, total_tokens=5)
chunk = _nonstream_chunk(content="response text", finish_reason="stop", usage=usage)
out = router.rechunk.openai_chat_completion2ollama(chunk, False, time.perf_counter())
assert out.done is True
assert out.message.content == "response text"
assert out.prompt_eval_count == 2
assert out.eval_count == 3
def test_nonstreaming_tool_calls_converted(self):
"""Tool calls with JSON string arguments are parsed into dicts."""
tc = _ns(function=_ns(name="get_weather", arguments='{"city": "Paris"}'))
usage = _ns(prompt_tokens=1, completion_tokens=1, total_tokens=2)
chunk = _nonstream_chunk(
content="", finish_reason="tool_calls", usage=usage, tool_calls=[tc]
)
out = router.rechunk.openai_chat_completion2ollama(chunk, False, time.perf_counter())
assert out.message.tool_calls is not None
assert len(out.message.tool_calls) == 1
first = out.message.tool_calls[0]
assert first.function.name == "get_weather"
assert first.function.arguments == {"city": "Paris"}
def test_nonstreaming_tool_calls_with_invalid_json_fall_back_to_empty(self):
tc = _ns(function=_ns(name="f", arguments="not-json"))
usage = _ns(prompt_tokens=1, completion_tokens=1, total_tokens=2)
chunk = _nonstream_chunk(content="", usage=usage, tool_calls=[tc])
out = router.rechunk.openai_chat_completion2ollama(chunk, False, time.perf_counter())
assert out.message.tool_calls[0].function.arguments == {}
def test_streaming_tool_calls_in_delta_are_skipped(self):
"""Streaming mode must not assemble tool calls (caller handles it)."""
chunk = _stream_chunk(content="x", finish_reason=None)
# Even if a chunk somehow carried tool_calls in the delta, streaming
# mode should ignore them.
out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter())
assert out.message.tool_calls is None
# ──────────────────────────────────────────────────────────────────────────────
# openai_completion2ollama
# ──────────────────────────────────────────────────────────────────────────────
class TestCompletionToOllama:
def test_streaming_text_chunk(self):
choice = _ns(text="word", finish_reason=None, reasoning=None)
chunk = _ns(model="m", choices=[choice], usage=None)
out = router.rechunk.openai_completion2ollama(chunk, True, time.perf_counter())
assert isinstance(out, ollama.GenerateResponse)
assert out.response == "word"
assert out.done is False
def test_final_chunk_with_usage(self):
usage = _ns(prompt_tokens=4, completion_tokens=6, total_tokens=10)
choice = _ns(text="end", finish_reason="stop", reasoning=None)
chunk = _ns(model="m", choices=[choice], usage=usage)
out = router.rechunk.openai_completion2ollama(chunk, True, time.perf_counter())
assert out.done is True
assert out.prompt_eval_count == 4
assert out.eval_count == 6
# ──────────────────────────────────────────────────────────────────────────────
# embeddings / embed
# ──────────────────────────────────────────────────────────────────────────────
class TestEmbeddingConversions:
def test_openai_embeddings2ollama(self):
chunk = _ns(data=[_ns(embedding=[0.1, 0.2, 0.3])])
out = router.rechunk.openai_embeddings2ollama(chunk)
assert isinstance(out, ollama.EmbeddingsResponse)
assert list(out.embedding) == [0.1, 0.2, 0.3]
def test_openai_embed2ollama(self):
chunk = _ns(data=[_ns(embedding=[0.5, 0.6])])
out = router.rechunk.openai_embed2ollama(chunk, "my-embed-model")
assert isinstance(out, ollama.EmbedResponse)
assert out.model == "my-embed-model"
assert list(out.embeddings[0]) == [0.5, 0.6]
# ──────────────────────────────────────────────────────────────────────────────
# extract_usage_from_llama_timings
# ──────────────────────────────────────────────────────────────────────────────
class TestExtractUsageFromLlamaTimings:
def test_none_when_no_timings_attr(self):
obj = _ns()
assert router.rechunk.extract_usage_from_llama_timings(obj) is None
def test_prompt_plus_cache_sums(self):
obj = _ns(timings={"prompt_n": 1, "cache_n": 236, "predicted_n": 35})
prompt, completion = router.rechunk.extract_usage_from_llama_timings(obj)
assert prompt == 237
assert completion == 35
def test_missing_keys_default_to_zero(self):
obj = _ns(timings={"predicted_n": 12})
prompt, completion = router.rechunk.extract_usage_from_llama_timings(obj)
assert prompt == 0
assert completion == 12
def test_null_values_treated_as_zero(self):
obj = _ns(timings={"prompt_n": None, "cache_n": None, "predicted_n": None})
prompt, completion = router.rechunk.extract_usage_from_llama_timings(obj)
assert prompt == 0
assert completion == 0
def test_non_dict_timings_returns_none(self):
obj = _ns(timings="not-a-dict")
assert router.rechunk.extract_usage_from_llama_timings(obj) is None