nomyo-router/test/test_unit_rechunk.py

"""Unit tests for router.rechunk — OpenAI ↔ Ollama chunk shape conversion."""
import time
from types import SimpleNamespace

import ollama

import router


def _ns(**kw):
    return SimpleNamespace(**kw)


def _stream_chunk(content="hi", role="assistant", finish_reason=None,
                  usage=None, model="m"):
    """Build a SimpleNamespace mimicking a streaming OpenAI chunk."""
    delta = _ns(content=content, role=role, reasoning=None, reasoning_content=None,
                tool_calls=None)
    choice = _ns(delta=delta, finish_reason=finish_reason, logprobs=None)
    return _ns(model=model, choices=[choice], usage=usage)


def _nonstream_chunk(content="hi", role="assistant", finish_reason="stop",
                     usage=None, model="m", tool_calls=None):
    """Build a SimpleNamespace mimicking a non-streaming OpenAI ChatCompletion."""
    message = _ns(content=content, role=role, reasoning=None, reasoning_content=None,
                  tool_calls=tool_calls)
    choice = _ns(message=message, finish_reason=finish_reason, logprobs=None)
    return _ns(model=model, choices=[choice], usage=usage)


# ──────────────────────────────────────────────────────────────────────────────
# openai_chat_completion2ollama
# ──────────────────────────────────────────────────────────────────────────────

class TestChatCompletionToOllama:
    def test_streaming_content_chunk(self):
        chunk = _stream_chunk(content="hello", finish_reason=None, usage=None)
        out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter())
        assert isinstance(out, ollama.ChatResponse)
        assert out.message.role == "assistant"
        assert out.message.content == "hello"
        assert out.done is False           # usage is None → not done yet
        assert out.model == "m"

    def test_streaming_empty_content_defaults(self):
        # Some chunks have content=None — should coerce to empty string
        chunk = _stream_chunk(content=None, role=None)
        out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter())
        assert out.message.role == "assistant"   # role defaulted
        assert out.message.content == ""

    def test_final_usage_only_chunk_marks_done(self):
        usage = _ns(prompt_tokens=10, completion_tokens=5, total_tokens=15)
        chunk = _ns(model="m", choices=[], usage=usage)
        out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter())
        assert out.done is True
        assert out.done_reason == "stop"
        assert out.prompt_eval_count == 10
        assert out.eval_count == 5
        assert out.message.content == ""

    def test_nonstreaming_with_content(self):
        usage = _ns(prompt_tokens=2, completion_tokens=3, total_tokens=5)
        chunk = _nonstream_chunk(content="response text", finish_reason="stop", usage=usage)
        out = router.rechunk.openai_chat_completion2ollama(chunk, False, time.perf_counter())
        assert out.done is True
        assert out.message.content == "response text"
        assert out.prompt_eval_count == 2
        assert out.eval_count == 3

    def test_nonstreaming_tool_calls_converted(self):
        """Tool calls with JSON string arguments are parsed into dicts."""
        tc = _ns(function=_ns(name="get_weather", arguments='{"city": "Paris"}'))
        usage = _ns(prompt_tokens=1, completion_tokens=1, total_tokens=2)
        chunk = _nonstream_chunk(
            content="", finish_reason="tool_calls", usage=usage, tool_calls=[tc]
        )
        out = router.rechunk.openai_chat_completion2ollama(chunk, False, time.perf_counter())
        assert out.message.tool_calls is not None
        assert len(out.message.tool_calls) == 1
        first = out.message.tool_calls[0]
        assert first.function.name == "get_weather"
        assert first.function.arguments == {"city": "Paris"}

    def test_nonstreaming_tool_calls_with_invalid_json_fall_back_to_empty(self):
        tc = _ns(function=_ns(name="f", arguments="not-json"))
        usage = _ns(prompt_tokens=1, completion_tokens=1, total_tokens=2)
        chunk = _nonstream_chunk(content="", usage=usage, tool_calls=[tc])
        out = router.rechunk.openai_chat_completion2ollama(chunk, False, time.perf_counter())
        assert out.message.tool_calls[0].function.arguments == {}

    def test_streaming_tool_calls_in_delta_are_skipped(self):
        """Streaming mode must not assemble tool calls (caller handles it)."""
        chunk = _stream_chunk(content="x", finish_reason=None)
        # Even if a chunk somehow carried tool_calls in the delta, streaming
        # mode should ignore them.
        out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter())
        assert out.message.tool_calls is None


# ──────────────────────────────────────────────────────────────────────────────
# openai_completion2ollama
# ──────────────────────────────────────────────────────────────────────────────

class TestCompletionToOllama:
    def test_streaming_text_chunk(self):
        choice = _ns(text="word", finish_reason=None, reasoning=None)
        chunk = _ns(model="m", choices=[choice], usage=None)
        out = router.rechunk.openai_completion2ollama(chunk, True, time.perf_counter())
        assert isinstance(out, ollama.GenerateResponse)
        assert out.response == "word"
        assert out.done is False

    def test_final_chunk_with_usage(self):
        usage = _ns(prompt_tokens=4, completion_tokens=6, total_tokens=10)
        choice = _ns(text="end", finish_reason="stop", reasoning=None)
        chunk = _ns(model="m", choices=[choice], usage=usage)
        out = router.rechunk.openai_completion2ollama(chunk, True, time.perf_counter())
        assert out.done is True
        assert out.prompt_eval_count == 4
        assert out.eval_count == 6


# ──────────────────────────────────────────────────────────────────────────────
# embeddings / embed
# ──────────────────────────────────────────────────────────────────────────────

class TestEmbeddingConversions:
    def test_openai_embeddings2ollama(self):
        chunk = _ns(data=[_ns(embedding=[0.1, 0.2, 0.3])])
        out = router.rechunk.openai_embeddings2ollama(chunk)
        assert isinstance(out, ollama.EmbeddingsResponse)
        assert list(out.embedding) == [0.1, 0.2, 0.3]

    def test_openai_embed2ollama(self):
        chunk = _ns(data=[_ns(embedding=[0.5, 0.6])])
        out = router.rechunk.openai_embed2ollama(chunk, "my-embed-model")
        assert isinstance(out, ollama.EmbedResponse)
        assert out.model == "my-embed-model"
        assert list(out.embeddings[0]) == [0.5, 0.6]


# ──────────────────────────────────────────────────────────────────────────────
# extract_usage_from_llama_timings
# ──────────────────────────────────────────────────────────────────────────────

class TestExtractUsageFromLlamaTimings:
    def test_none_when_no_timings_attr(self):
        obj = _ns()
        assert router.rechunk.extract_usage_from_llama_timings(obj) is None

    def test_prompt_plus_cache_sums(self):
        obj = _ns(timings={"prompt_n": 1, "cache_n": 236, "predicted_n": 35})
        prompt, completion = router.rechunk.extract_usage_from_llama_timings(obj)
        assert prompt == 237
        assert completion == 35

    def test_missing_keys_default_to_zero(self):
        obj = _ns(timings={"predicted_n": 12})
        prompt, completion = router.rechunk.extract_usage_from_llama_timings(obj)
        assert prompt == 0
        assert completion == 12

    def test_null_values_treated_as_zero(self):
        obj = _ns(timings={"prompt_n": None, "cache_n": None, "predicted_n": None})
        prompt, completion = router.rechunk.extract_usage_from_llama_timings(obj)
        assert prompt == 0
        assert completion == 0

    def test_non_dict_timings_returns_none(self):
        obj = _ns(timings="not-a-dict")
        assert router.rechunk.extract_usage_from_llama_timings(obj) is None