nomyo-router/test/test_responses.py

460 lines
24 KiB
Python

"""Tests for the OpenAI Responses API support (api/responses.py + requests/responses.py).
Covers the pure translation layer, the translated (Ollama-style) and native
(external-OpenAI) backend paths, conversation storage / chaining, background mode,
and the retrieve / delete / cancel routes.
"""
import asyncio
from contextlib import ExitStack, contextmanager
from types import SimpleNamespace as NS
from unittest.mock import AsyncMock, MagicMock, patch
import orjson
import pytest
import router
from api import responses as api_responses
from requests import responses as rt
# ──────────────────────────────────────────────────────────────────────────────
# Pure translation unit tests (no app / no I/O)
# ──────────────────────────────────────────────────────────────────────────────
class TestTranslationInputToMessages:
def test_string_input(self):
msgs = rt.responses_input_to_messages("hello")
assert msgs == [{"role": "user", "content": "hello"}]
def test_instructions_become_system(self):
msgs = rt.responses_input_to_messages("hi", instructions="be brief")
assert msgs[0] == {"role": "system", "content": "be brief"}
assert msgs[1] == {"role": "user", "content": "hi"}
def test_item_list_text_and_image(self):
items = [{
"type": "message", "role": "user",
"content": [
{"type": "input_text", "text": "describe"},
{"type": "input_image", "image_url": "http://x/y.png"},
],
}]
msgs = rt.responses_input_to_messages(items)
assert msgs[0]["role"] == "user"
assert msgs[0]["content"] == [
{"type": "text", "text": "describe"},
{"type": "image_url", "image_url": {"url": "http://x/y.png"}},
]
def test_single_text_part_collapses_to_string(self):
items = [{"type": "message", "role": "user",
"content": [{"type": "input_text", "text": "yo"}]}]
assert rt.responses_input_to_messages(items)[0]["content"] == "yo"
def test_function_call_roundtrip(self):
items = [
{"type": "function_call", "call_id": "c1", "name": "get", "arguments": "{\"x\":1}"},
{"type": "function_call_output", "call_id": "c1", "output": "42"},
]
msgs = rt.responses_input_to_messages(items)
assert msgs[0]["role"] == "assistant"
assert msgs[0]["tool_calls"][0]["id"] == "c1"
assert msgs[0]["tool_calls"][0]["function"]["name"] == "get"
assert msgs[1] == {"role": "tool", "tool_call_id": "c1", "content": "42"}
class TestTranslationResponseDirection:
def test_chat_message_to_output_items_text(self):
items = rt.chat_message_to_output_items({"role": "assistant", "content": "hi there"})
assert len(items) == 1
assert items[0]["type"] == "message"
assert items[0]["content"][0] == {"type": "output_text", "text": "hi there", "annotations": []}
def test_chat_message_to_output_items_tool_call(self):
items = rt.chat_message_to_output_items({
"role": "assistant", "content": None,
"tool_calls": [{"id": "c9", "function": {"name": "f", "arguments": "{}"}}],
})
assert items[0]["type"] == "function_call"
assert items[0]["call_id"] == "c9"
assert items[0]["name"] == "f"
def test_usage_mapping(self):
u = rt.usage_chat_to_responses({"prompt_tokens": 7, "completion_tokens": 3})
assert u == {"input_tokens": 7, "output_tokens": 3, "total_tokens": 10}
def test_build_response_object_output_text(self):
items = rt.chat_message_to_output_items({"role": "assistant", "content": "abc"})
obj = rt.build_response_object(response_id="resp_1", model="m", output_items=items)
assert obj["object"] == "response"
assert obj["output_text"] == "abc"
assert obj["status"] == "completed"
def test_tools_responses_to_chat(self):
tools = [{"type": "function", "name": "f", "description": "d", "parameters": {"type": "object"}}]
chat_tools = rt.tools_responses_to_chat(tools)
assert chat_tools == [{"type": "function",
"function": {"name": "f", "description": "d",
"parameters": {"type": "object"}}}]
def test_messages_to_responses_input(self):
instr, items = rt.messages_to_responses_input([
{"role": "system", "content": "sys"},
{"role": "user", "content": "hi"},
{"role": "assistant", "content": "yo"},
])
assert instr == "sys"
assert items[0] == {"role": "user", "content": [{"type": "input_text", "text": "hi"}]}
assert items[1] == {"role": "assistant", "content": [{"type": "output_text", "text": "yo"}]}
# ──────────────────────────────────────────────────────────────────────────────
# Fakes for backend generators
# ──────────────────────────────────────────────────────────────────────────────
def _fake_completion(content="hello world", usage=(3, 5)):
msg = MagicMock()
msg.model_dump.return_value = {"role": "assistant", "content": content}
usage_obj = MagicMock()
usage_obj.model_dump.return_value = {
"prompt_tokens": usage[0], "completion_tokens": usage[1], "total_tokens": sum(usage)}
return NS(choices=[NS(message=msg)], usage=usage_obj)
def _chunk(content=None, tool_calls=None):
return NS(choices=[NS(delta=NS(content=content, tool_calls=tool_calls),
finish_reason=None)], usage=None)
def _usage_chunk(p, c):
return NS(choices=[], usage=NS(prompt_tokens=p, completion_tokens=c))
def _text_chunks():
async def _gen():
yield _chunk(content="Hel")
yield _chunk(content="lo")
yield _usage_chunk(3, 5)
return _gen()
def _toolcall_chunks():
tc0 = NS(index=0, id="call_1", function=NS(name="lookup", arguments='{"q":'))
tc1 = NS(index=0, id=None, function=NS(name=None, arguments='"hi"}'))
async def _gen():
yield _chunk(tool_calls=[tc0])
yield _chunk(tool_calls=[tc1])
yield _usage_chunk(4, 2)
return _gen()
class _FakeEvent:
def __init__(self, data):
self._data = data
def model_dump(self):
return self._data
def _native_event_stream():
async def _gen():
yield _FakeEvent({"type": "response.created",
"response": {"id": "resp_openai", "status": "in_progress", "output": []}})
yield _FakeEvent({"type": "response.output_text.delta",
"item_id": "msg_1", "output_index": 0, "delta": "hi"})
yield _FakeEvent({"type": "response.completed", "response": {
"id": "resp_openai", "status": "completed",
"output": [{"type": "message", "role": "assistant",
"content": [{"type": "output_text", "text": "hi"}]}],
"usage": {"input_tokens": 2, "output_tokens": 1, "total_tokens": 3}}})
return _gen()
def _sse_events(text):
"""Split an SSE body into a list of (event_type, data_dict)."""
out = []
for frame in text.strip().split("\n\n"):
if not frame.strip():
continue
etype = data = None
for line in frame.splitlines():
if line.startswith("event: "):
etype = line[len("event: "):]
elif line.startswith("data: "):
data = orjson.loads(line[len("data: "):])
out.append((etype, data))
return out
@contextmanager
def _enter(*cms):
"""Enter a variable number of context managers (works with *unpacked tuples)."""
with ExitStack() as stack:
for cm in cms:
stack.enter_context(cm)
yield
def _patch_backend(native=False, endpoint="http://ollama:11434"):
"""Context managers patching endpoint selection + client construction."""
return (
patch.object(api_responses, "choose_endpoint",
AsyncMock(return_value=(endpoint, "test-model:latest"))),
patch.object(api_responses, "decrement_usage", AsyncMock()),
patch.object(api_responses, "is_ext_openai_endpoint", return_value=native),
patch.object(api_responses, "_make_openai_client", return_value=MagicMock()),
patch.object(api_responses, "get_llm_cache", return_value=None),
)
# ──────────────────────────────────────────────────────────────────────────────
# Translated path (Ollama-style backend)
# ──────────────────────────────────────────────────────────────────────────────
class TestTranslatedPath:
async def test_nonstream(self, client):
with _enter(*_patch_backend(native=False),
patch.object(api_responses, "create_chat_with_retries",
AsyncMock(return_value=_fake_completion("hello world")))):
resp = await client.post("/v1/responses",
json={"model": "test-model", "input": "hi", "store": False})
assert resp.status_code == 200
body = resp.json()
assert body["object"] == "response"
assert body["output_text"] == "hello world"
assert body["usage"] == {"input_tokens": 3, "output_tokens": 5, "total_tokens": 8}
assert body["id"].startswith("resp_")
async def test_stream_event_sequence(self, client):
with _enter(*_patch_backend(native=False),
patch.object(api_responses, "create_chat_with_retries",
AsyncMock(return_value=_text_chunks()))):
resp = await client.post("/v1/responses",
json={"model": "test-model", "input": "hi",
"stream": True, "store": False})
assert resp.status_code == 200
assert resp.headers["content-type"].startswith("text/event-stream")
events = _sse_events(resp.content.decode())
types = [e[0] for e in events]
assert types[0] == "response.created"
assert "response.output_text.delta" in types
assert types[-1] == "response.completed"
# concatenated deltas reconstruct the content
deltas = "".join(d["delta"] for t, d in events if t == "response.output_text.delta")
assert deltas == "Hello"
# completed event carries usage
completed = [d for t, d in events if t == "response.completed"][0]
assert completed["response"]["usage"]["input_tokens"] == 3
async def test_stream_tool_calls(self, client):
with _enter(*_patch_backend(native=False),
patch.object(api_responses, "create_chat_with_retries",
AsyncMock(return_value=_toolcall_chunks()))):
resp = await client.post("/v1/responses",
json={"model": "test-model", "input": "lookup hi",
"stream": True, "store": False})
events = _sse_events(resp.content.decode())
types = [e[0] for e in events]
assert "response.function_call_arguments.delta" in types
assert "response.function_call_arguments.done" in types
args = "".join(d["delta"] for t, d in events
if t == "response.function_call_arguments.delta")
assert args == '{"q":"hi"}'
completed = [d for t, d in events if t == "response.completed"][0]
fc = [i for i in completed["response"]["output"] if i["type"] == "function_call"][0]
assert fc["name"] == "lookup"
assert fc["arguments"] == '{"q":"hi"}'
# ──────────────────────────────────────────────────────────────────────────────
# Native path (external OpenAI backend)
# ──────────────────────────────────────────────────────────────────────────────
class TestNativePath:
async def test_nonstream_passthrough_rewrites_id(self, client):
oclient = MagicMock()
resp_obj = MagicMock()
resp_obj.model_dump.return_value = {
"id": "resp_openai", "status": "completed",
"output": [{"type": "message", "role": "assistant",
"content": [{"type": "output_text", "text": "native hi"}]}],
"usage": {"input_tokens": 2, "output_tokens": 3, "total_tokens": 5}}
oclient.responses.create = AsyncMock(return_value=resp_obj)
with (patch.object(api_responses, "choose_endpoint",
AsyncMock(return_value=("https://api.openai.com/v1", "gpt"))),
patch.object(api_responses, "decrement_usage", AsyncMock()),
patch.object(api_responses, "is_ext_openai_endpoint", return_value=True),
patch.object(api_responses, "_make_openai_client", return_value=oclient),
patch.object(api_responses, "get_llm_cache", return_value=None)):
resp = await client.post("/v1/responses",
json={"model": "gpt", "input": "hi", "store": False})
body = resp.json()
assert body["output_text"] == "native hi"
assert body["id"].startswith("resp_") and body["id"] != "resp_openai"
# native call must not delegate state upstream
assert oclient.responses.create.call_args.kwargs["store"] is False
async def test_stream_passthrough(self, client):
oclient = MagicMock()
oclient.responses.create = AsyncMock(return_value=_native_event_stream())
with (patch.object(api_responses, "choose_endpoint",
AsyncMock(return_value=("https://api.openai.com/v1", "gpt"))),
patch.object(api_responses, "decrement_usage", AsyncMock()),
patch.object(api_responses, "is_ext_openai_endpoint", return_value=True),
patch.object(api_responses, "_make_openai_client", return_value=oclient),
patch.object(api_responses, "get_llm_cache", return_value=None)):
resp = await client.post("/v1/responses",
json={"model": "gpt", "input": "hi",
"stream": True, "store": False})
events = _sse_events(resp.content.decode())
# the completed event's response id is rewritten to the router id
completed = [d for t, d in events if t == "response.completed"][0]
assert completed["response"]["id"].startswith("resp_")
assert completed["response"]["id"] != "resp_openai"
# ──────────────────────────────────────────────────────────────────────────────
# Storage + chaining + retrieve/delete
# ──────────────────────────────────────────────────────────────────────────────
class TestStorageAndChaining:
async def test_store_and_retrieve(self, client):
with _enter(*_patch_backend(native=False),
patch.object(api_responses, "create_chat_with_retries",
AsyncMock(return_value=_fake_completion("remembered")))):
created = await client.post("/v1/responses",
json={"model": "test-model", "input": "hi", "store": True})
rid = created.json()["id"]
got = await client.get(f"/v1/responses/{rid}")
assert got.status_code == 200
assert got.json()["output_text"] == "remembered"
async def test_previous_response_id_rehydrates_history(self, client):
# First turn
with _enter(*_patch_backend(native=False),
patch.object(api_responses, "create_chat_with_retries",
AsyncMock(return_value=_fake_completion("turn-one")))):
first = await client.post("/v1/responses",
json={"model": "test-model", "input": "first?", "store": True})
rid = first.json()["id"]
# Second turn references the first — capture the messages sent to the backend
capture = AsyncMock(return_value=_fake_completion("turn-two"))
with _enter(*_patch_backend(native=False),
patch.object(api_responses, "create_chat_with_retries", capture)):
await client.post("/v1/responses",
json={"model": "test-model", "input": "second?",
"previous_response_id": rid, "store": True})
sent_messages = capture.call_args.args[1]["messages"]
contents = [m.get("content") for m in sent_messages]
assert "first?" in contents # prior user turn replayed
assert "turn-one" in contents # prior assistant turn replayed
assert "second?" in contents # current turn appended
async def test_delete(self, client):
with _enter(*_patch_backend(native=False),
patch.object(api_responses, "create_chat_with_retries",
AsyncMock(return_value=_fake_completion("bye")))):
created = await client.post("/v1/responses",
json={"model": "test-model", "input": "hi", "store": True})
rid = created.json()["id"]
deleted = await client.delete(f"/v1/responses/{rid}")
assert deleted.status_code == 200
assert deleted.json()["deleted"] is True
assert (await client.get(f"/v1/responses/{rid}")).status_code == 404
async def test_retrieve_missing_404(self, client):
assert (await client.get("/v1/responses/resp_missing")).status_code == 404
# ──────────────────────────────────────────────────────────────────────────────
# Background mode
# ──────────────────────────────────────────────────────────────────────────────
class TestBackgroundMode:
async def test_background_requires_store(self, client):
resp = await client.post("/v1/responses",
json={"model": "test-model", "input": "hi",
"background": True, "store": False})
assert resp.status_code == 400
async def test_background_lifecycle(self, client):
with _enter(*_patch_backend(native=False),
patch.object(api_responses, "create_chat_with_retries",
AsyncMock(return_value=_fake_completion("bg-done")))):
created = await client.post("/v1/responses",
json={"model": "test-model", "input": "hi",
"background": True, "store": True})
assert created.status_code == 200
assert created.json()["status"] == "queued"
rid = created.json()["id"]
# poll until terminal
status = None
for _ in range(100):
await asyncio.sleep(0.01)
got = await client.get(f"/v1/responses/{rid}")
status = got.json()["status"]
if status in ("completed", "failed", "cancelled"):
break
assert status == "completed"
assert got.json()["output_text"] == "bg-done"
async def test_fail_orphaned_responses(self, client):
db = router.db
await db.store_response("resp_orphan", previous_response_id=None, model="m",
status="in_progress", created_at=0, input_messages=[])
n = await db.fail_orphaned_responses()
assert n >= 1
row = await db.get_response("resp_orphan")
assert row["status"] == "failed"
# ──────────────────────────────────────────────────────────────────────────────
# Cache parity
# ──────────────────────────────────────────────────────────────────────────────
class _FakeCache:
def __init__(self, response_bytes):
self._resp = response_bytes
self.calls = []
async def get_chat(self, route, model, messages):
self.calls.append((route, model, messages))
return self._resp
class TestCacheParity:
async def test_cache_hit_served_as_response(self, client):
cached = orjson.dumps(rt.build_response_object(
response_id="resp_cached", model="test-model",
output_items=rt.chat_message_to_output_items(
{"role": "assistant", "content": "from-cache"})))
fake = _FakeCache(cached)
with (patch.object(api_responses, "get_llm_cache", return_value=fake),
patch.object(api_responses, "choose_endpoint",
AsyncMock(side_effect=AssertionError("backend must not be reached")))):
resp = await client.post("/v1/responses",
json={"model": "test-model", "input": "ping",
"store": False, "nomyo": {"cache": True}})
assert resp.status_code == 200
assert resp.json()["output_text"] == "from-cache"
assert fake.calls and fake.calls[0][0] == "openai_responses"
async def test_cache_hit_served_as_sse(self, client):
cached = orjson.dumps(rt.build_response_object(
response_id="resp_cached", model="test-model",
output_items=rt.chat_message_to_output_items(
{"role": "assistant", "content": "from-cache"})))
fake = _FakeCache(cached)
with (patch.object(api_responses, "get_llm_cache", return_value=fake),
patch.object(api_responses, "choose_endpoint",
AsyncMock(side_effect=AssertionError("backend must not be reached")))):
resp = await client.post("/v1/responses",
json={"model": "test-model", "input": "ping",
"stream": True, "store": False,
"nomyo": {"cache": True}})
assert resp.headers["content-type"].startswith("text/event-stream")
events = _sse_events(resp.content.decode())
deltas = "".join(d["delta"] for t, d in events if t == "response.output_text.delta")
assert deltas == "from-cache"