"""Unit tests for router.rechunk — OpenAI ↔ Ollama chunk shape conversion.""" import time from types import SimpleNamespace import ollama import router def _ns(**kw): return SimpleNamespace(**kw) def _stream_chunk(content="hi", role="assistant", finish_reason=None, usage=None, model="m"): """Build a SimpleNamespace mimicking a streaming OpenAI chunk.""" delta = _ns(content=content, role=role, reasoning=None, reasoning_content=None, tool_calls=None) choice = _ns(delta=delta, finish_reason=finish_reason, logprobs=None) return _ns(model=model, choices=[choice], usage=usage) def _nonstream_chunk(content="hi", role="assistant", finish_reason="stop", usage=None, model="m", tool_calls=None): """Build a SimpleNamespace mimicking a non-streaming OpenAI ChatCompletion.""" message = _ns(content=content, role=role, reasoning=None, reasoning_content=None, tool_calls=tool_calls) choice = _ns(message=message, finish_reason=finish_reason, logprobs=None) return _ns(model=model, choices=[choice], usage=usage) # ────────────────────────────────────────────────────────────────────────────── # openai_chat_completion2ollama # ────────────────────────────────────────────────────────────────────────────── class TestChatCompletionToOllama: def test_streaming_content_chunk(self): chunk = _stream_chunk(content="hello", finish_reason=None, usage=None) out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter()) assert isinstance(out, ollama.ChatResponse) assert out.message.role == "assistant" assert out.message.content == "hello" assert out.done is False # usage is None → not done yet assert out.model == "m" def test_streaming_empty_content_defaults(self): # Some chunks have content=None — should coerce to empty string chunk = _stream_chunk(content=None, role=None) out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter()) assert out.message.role == "assistant" # role defaulted assert out.message.content == "" def test_final_usage_only_chunk_marks_done(self): usage = _ns(prompt_tokens=10, completion_tokens=5, total_tokens=15) chunk = _ns(model="m", choices=[], usage=usage) out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter()) assert out.done is True assert out.done_reason == "stop" assert out.prompt_eval_count == 10 assert out.eval_count == 5 assert out.message.content == "" def test_nonstreaming_with_content(self): usage = _ns(prompt_tokens=2, completion_tokens=3, total_tokens=5) chunk = _nonstream_chunk(content="response text", finish_reason="stop", usage=usage) out = router.rechunk.openai_chat_completion2ollama(chunk, False, time.perf_counter()) assert out.done is True assert out.message.content == "response text" assert out.prompt_eval_count == 2 assert out.eval_count == 3 def test_nonstreaming_tool_calls_converted(self): """Tool calls with JSON string arguments are parsed into dicts.""" tc = _ns(function=_ns(name="get_weather", arguments='{"city": "Paris"}')) usage = _ns(prompt_tokens=1, completion_tokens=1, total_tokens=2) chunk = _nonstream_chunk( content="", finish_reason="tool_calls", usage=usage, tool_calls=[tc] ) out = router.rechunk.openai_chat_completion2ollama(chunk, False, time.perf_counter()) assert out.message.tool_calls is not None assert len(out.message.tool_calls) == 1 first = out.message.tool_calls[0] assert first.function.name == "get_weather" assert first.function.arguments == {"city": "Paris"} def test_nonstreaming_tool_calls_with_invalid_json_fall_back_to_empty(self): tc = _ns(function=_ns(name="f", arguments="not-json")) usage = _ns(prompt_tokens=1, completion_tokens=1, total_tokens=2) chunk = _nonstream_chunk(content="", usage=usage, tool_calls=[tc]) out = router.rechunk.openai_chat_completion2ollama(chunk, False, time.perf_counter()) assert out.message.tool_calls[0].function.arguments == {} def test_streaming_tool_calls_in_delta_are_skipped(self): """Streaming mode must not assemble tool calls (caller handles it).""" chunk = _stream_chunk(content="x", finish_reason=None) # Even if a chunk somehow carried tool_calls in the delta, streaming # mode should ignore them. out = router.rechunk.openai_chat_completion2ollama(chunk, True, time.perf_counter()) assert out.message.tool_calls is None # ────────────────────────────────────────────────────────────────────────────── # openai_completion2ollama # ────────────────────────────────────────────────────────────────────────────── class TestCompletionToOllama: def test_streaming_text_chunk(self): choice = _ns(text="word", finish_reason=None, reasoning=None) chunk = _ns(model="m", choices=[choice], usage=None) out = router.rechunk.openai_completion2ollama(chunk, True, time.perf_counter()) assert isinstance(out, ollama.GenerateResponse) assert out.response == "word" assert out.done is False def test_final_chunk_with_usage(self): usage = _ns(prompt_tokens=4, completion_tokens=6, total_tokens=10) choice = _ns(text="end", finish_reason="stop", reasoning=None) chunk = _ns(model="m", choices=[choice], usage=usage) out = router.rechunk.openai_completion2ollama(chunk, True, time.perf_counter()) assert out.done is True assert out.prompt_eval_count == 4 assert out.eval_count == 6 # ────────────────────────────────────────────────────────────────────────────── # embeddings / embed # ────────────────────────────────────────────────────────────────────────────── class TestEmbeddingConversions: def test_openai_embeddings2ollama(self): chunk = _ns(data=[_ns(embedding=[0.1, 0.2, 0.3])]) out = router.rechunk.openai_embeddings2ollama(chunk) assert isinstance(out, ollama.EmbeddingsResponse) assert list(out.embedding) == [0.1, 0.2, 0.3] def test_openai_embed2ollama(self): chunk = _ns(data=[_ns(embedding=[0.5, 0.6])]) out = router.rechunk.openai_embed2ollama(chunk, "my-embed-model") assert isinstance(out, ollama.EmbedResponse) assert out.model == "my-embed-model" assert list(out.embeddings[0]) == [0.5, 0.6] # ────────────────────────────────────────────────────────────────────────────── # extract_usage_from_llama_timings # ────────────────────────────────────────────────────────────────────────────── class TestExtractUsageFromLlamaTimings: def test_none_when_no_timings_attr(self): obj = _ns() assert router.rechunk.extract_usage_from_llama_timings(obj) is None def test_prompt_plus_cache_sums(self): obj = _ns(timings={"prompt_n": 1, "cache_n": 236, "predicted_n": 35}) prompt, completion = router.rechunk.extract_usage_from_llama_timings(obj) assert prompt == 237 assert completion == 35 def test_missing_keys_default_to_zero(self): obj = _ns(timings={"predicted_n": 12}) prompt, completion = router.rechunk.extract_usage_from_llama_timings(obj) assert prompt == 0 assert completion == 12 def test_null_values_treated_as_zero(self): obj = _ns(timings={"prompt_n": None, "cache_n": None, "predicted_n": None}) prompt, completion = router.rechunk.extract_usage_from_llama_timings(obj) assert prompt == 0 assert completion == 0 def test_non_dict_timings_returns_none(self): obj = _ns(timings="not-a-dict") assert router.rechunk.extract_usage_from_llama_timings(obj) is None