feat: transparent openai responses api integration
This commit is contained in:
parent
e7407b86b3
commit
b28f175b61
7 changed files with 1674 additions and 86 deletions
460
test/test_responses.py
Normal file
460
test/test_responses.py
Normal file
|
|
@ -0,0 +1,460 @@
|
|||
"""Tests for the OpenAI Responses API support (api/responses.py + requests/responses.py).
|
||||
|
||||
Covers the pure translation layer, the translated (Ollama-style) and native
|
||||
(external-OpenAI) backend paths, conversation storage / chaining, background mode,
|
||||
and the retrieve / delete / cancel routes.
|
||||
"""
|
||||
import asyncio
|
||||
from contextlib import ExitStack, contextmanager
|
||||
from types import SimpleNamespace as NS
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import orjson
|
||||
import pytest
|
||||
|
||||
import router
|
||||
from api import responses as api_responses
|
||||
from requests import responses as rt
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
# Pure translation unit tests (no app / no I/O)
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestTranslationInputToMessages:
|
||||
def test_string_input(self):
|
||||
msgs = rt.responses_input_to_messages("hello")
|
||||
assert msgs == [{"role": "user", "content": "hello"}]
|
||||
|
||||
def test_instructions_become_system(self):
|
||||
msgs = rt.responses_input_to_messages("hi", instructions="be brief")
|
||||
assert msgs[0] == {"role": "system", "content": "be brief"}
|
||||
assert msgs[1] == {"role": "user", "content": "hi"}
|
||||
|
||||
def test_item_list_text_and_image(self):
|
||||
items = [{
|
||||
"type": "message", "role": "user",
|
||||
"content": [
|
||||
{"type": "input_text", "text": "describe"},
|
||||
{"type": "input_image", "image_url": "http://x/y.png"},
|
||||
],
|
||||
}]
|
||||
msgs = rt.responses_input_to_messages(items)
|
||||
assert msgs[0]["role"] == "user"
|
||||
assert msgs[0]["content"] == [
|
||||
{"type": "text", "text": "describe"},
|
||||
{"type": "image_url", "image_url": {"url": "http://x/y.png"}},
|
||||
]
|
||||
|
||||
def test_single_text_part_collapses_to_string(self):
|
||||
items = [{"type": "message", "role": "user",
|
||||
"content": [{"type": "input_text", "text": "yo"}]}]
|
||||
assert rt.responses_input_to_messages(items)[0]["content"] == "yo"
|
||||
|
||||
def test_function_call_roundtrip(self):
|
||||
items = [
|
||||
{"type": "function_call", "call_id": "c1", "name": "get", "arguments": "{\"x\":1}"},
|
||||
{"type": "function_call_output", "call_id": "c1", "output": "42"},
|
||||
]
|
||||
msgs = rt.responses_input_to_messages(items)
|
||||
assert msgs[0]["role"] == "assistant"
|
||||
assert msgs[0]["tool_calls"][0]["id"] == "c1"
|
||||
assert msgs[0]["tool_calls"][0]["function"]["name"] == "get"
|
||||
assert msgs[1] == {"role": "tool", "tool_call_id": "c1", "content": "42"}
|
||||
|
||||
|
||||
class TestTranslationResponseDirection:
|
||||
def test_chat_message_to_output_items_text(self):
|
||||
items = rt.chat_message_to_output_items({"role": "assistant", "content": "hi there"})
|
||||
assert len(items) == 1
|
||||
assert items[0]["type"] == "message"
|
||||
assert items[0]["content"][0] == {"type": "output_text", "text": "hi there", "annotations": []}
|
||||
|
||||
def test_chat_message_to_output_items_tool_call(self):
|
||||
items = rt.chat_message_to_output_items({
|
||||
"role": "assistant", "content": None,
|
||||
"tool_calls": [{"id": "c9", "function": {"name": "f", "arguments": "{}"}}],
|
||||
})
|
||||
assert items[0]["type"] == "function_call"
|
||||
assert items[0]["call_id"] == "c9"
|
||||
assert items[0]["name"] == "f"
|
||||
|
||||
def test_usage_mapping(self):
|
||||
u = rt.usage_chat_to_responses({"prompt_tokens": 7, "completion_tokens": 3})
|
||||
assert u == {"input_tokens": 7, "output_tokens": 3, "total_tokens": 10}
|
||||
|
||||
def test_build_response_object_output_text(self):
|
||||
items = rt.chat_message_to_output_items({"role": "assistant", "content": "abc"})
|
||||
obj = rt.build_response_object(response_id="resp_1", model="m", output_items=items)
|
||||
assert obj["object"] == "response"
|
||||
assert obj["output_text"] == "abc"
|
||||
assert obj["status"] == "completed"
|
||||
|
||||
def test_tools_responses_to_chat(self):
|
||||
tools = [{"type": "function", "name": "f", "description": "d", "parameters": {"type": "object"}}]
|
||||
chat_tools = rt.tools_responses_to_chat(tools)
|
||||
assert chat_tools == [{"type": "function",
|
||||
"function": {"name": "f", "description": "d",
|
||||
"parameters": {"type": "object"}}}]
|
||||
|
||||
def test_messages_to_responses_input(self):
|
||||
instr, items = rt.messages_to_responses_input([
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "hi"},
|
||||
{"role": "assistant", "content": "yo"},
|
||||
])
|
||||
assert instr == "sys"
|
||||
assert items[0] == {"role": "user", "content": [{"type": "input_text", "text": "hi"}]}
|
||||
assert items[1] == {"role": "assistant", "content": [{"type": "output_text", "text": "yo"}]}
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
# Fakes for backend generators
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _fake_completion(content="hello world", usage=(3, 5)):
|
||||
msg = MagicMock()
|
||||
msg.model_dump.return_value = {"role": "assistant", "content": content}
|
||||
usage_obj = MagicMock()
|
||||
usage_obj.model_dump.return_value = {
|
||||
"prompt_tokens": usage[0], "completion_tokens": usage[1], "total_tokens": sum(usage)}
|
||||
return NS(choices=[NS(message=msg)], usage=usage_obj)
|
||||
|
||||
|
||||
def _chunk(content=None, tool_calls=None):
|
||||
return NS(choices=[NS(delta=NS(content=content, tool_calls=tool_calls),
|
||||
finish_reason=None)], usage=None)
|
||||
|
||||
|
||||
def _usage_chunk(p, c):
|
||||
return NS(choices=[], usage=NS(prompt_tokens=p, completion_tokens=c))
|
||||
|
||||
|
||||
def _text_chunks():
|
||||
async def _gen():
|
||||
yield _chunk(content="Hel")
|
||||
yield _chunk(content="lo")
|
||||
yield _usage_chunk(3, 5)
|
||||
return _gen()
|
||||
|
||||
|
||||
def _toolcall_chunks():
|
||||
tc0 = NS(index=0, id="call_1", function=NS(name="lookup", arguments='{"q":'))
|
||||
tc1 = NS(index=0, id=None, function=NS(name=None, arguments='"hi"}'))
|
||||
|
||||
async def _gen():
|
||||
yield _chunk(tool_calls=[tc0])
|
||||
yield _chunk(tool_calls=[tc1])
|
||||
yield _usage_chunk(4, 2)
|
||||
return _gen()
|
||||
|
||||
|
||||
class _FakeEvent:
|
||||
def __init__(self, data):
|
||||
self._data = data
|
||||
|
||||
def model_dump(self):
|
||||
return self._data
|
||||
|
||||
|
||||
def _native_event_stream():
|
||||
async def _gen():
|
||||
yield _FakeEvent({"type": "response.created",
|
||||
"response": {"id": "resp_openai", "status": "in_progress", "output": []}})
|
||||
yield _FakeEvent({"type": "response.output_text.delta",
|
||||
"item_id": "msg_1", "output_index": 0, "delta": "hi"})
|
||||
yield _FakeEvent({"type": "response.completed", "response": {
|
||||
"id": "resp_openai", "status": "completed",
|
||||
"output": [{"type": "message", "role": "assistant",
|
||||
"content": [{"type": "output_text", "text": "hi"}]}],
|
||||
"usage": {"input_tokens": 2, "output_tokens": 1, "total_tokens": 3}}})
|
||||
return _gen()
|
||||
|
||||
|
||||
def _sse_events(text):
|
||||
"""Split an SSE body into a list of (event_type, data_dict)."""
|
||||
out = []
|
||||
for frame in text.strip().split("\n\n"):
|
||||
if not frame.strip():
|
||||
continue
|
||||
etype = data = None
|
||||
for line in frame.splitlines():
|
||||
if line.startswith("event: "):
|
||||
etype = line[len("event: "):]
|
||||
elif line.startswith("data: "):
|
||||
data = orjson.loads(line[len("data: "):])
|
||||
out.append((etype, data))
|
||||
return out
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _enter(*cms):
|
||||
"""Enter a variable number of context managers (works with *unpacked tuples)."""
|
||||
with ExitStack() as stack:
|
||||
for cm in cms:
|
||||
stack.enter_context(cm)
|
||||
yield
|
||||
|
||||
|
||||
def _patch_backend(native=False, endpoint="http://ollama:11434"):
|
||||
"""Context managers patching endpoint selection + client construction."""
|
||||
return (
|
||||
patch.object(api_responses, "choose_endpoint",
|
||||
AsyncMock(return_value=(endpoint, "test-model:latest"))),
|
||||
patch.object(api_responses, "decrement_usage", AsyncMock()),
|
||||
patch.object(api_responses, "is_ext_openai_endpoint", return_value=native),
|
||||
patch.object(api_responses, "_make_openai_client", return_value=MagicMock()),
|
||||
patch.object(api_responses, "get_llm_cache", return_value=None),
|
||||
)
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
# Translated path (Ollama-style backend)
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestTranslatedPath:
|
||||
async def test_nonstream(self, client):
|
||||
with _enter(*_patch_backend(native=False),
|
||||
patch.object(api_responses, "create_chat_with_retries",
|
||||
AsyncMock(return_value=_fake_completion("hello world")))):
|
||||
resp = await client.post("/v1/responses",
|
||||
json={"model": "test-model", "input": "hi", "store": False})
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["object"] == "response"
|
||||
assert body["output_text"] == "hello world"
|
||||
assert body["usage"] == {"input_tokens": 3, "output_tokens": 5, "total_tokens": 8}
|
||||
assert body["id"].startswith("resp_")
|
||||
|
||||
async def test_stream_event_sequence(self, client):
|
||||
with _enter(*_patch_backend(native=False),
|
||||
patch.object(api_responses, "create_chat_with_retries",
|
||||
AsyncMock(return_value=_text_chunks()))):
|
||||
resp = await client.post("/v1/responses",
|
||||
json={"model": "test-model", "input": "hi",
|
||||
"stream": True, "store": False})
|
||||
assert resp.status_code == 200
|
||||
assert resp.headers["content-type"].startswith("text/event-stream")
|
||||
events = _sse_events(resp.content.decode())
|
||||
types = [e[0] for e in events]
|
||||
assert types[0] == "response.created"
|
||||
assert "response.output_text.delta" in types
|
||||
assert types[-1] == "response.completed"
|
||||
# concatenated deltas reconstruct the content
|
||||
deltas = "".join(d["delta"] for t, d in events if t == "response.output_text.delta")
|
||||
assert deltas == "Hello"
|
||||
# completed event carries usage
|
||||
completed = [d for t, d in events if t == "response.completed"][0]
|
||||
assert completed["response"]["usage"]["input_tokens"] == 3
|
||||
|
||||
async def test_stream_tool_calls(self, client):
|
||||
with _enter(*_patch_backend(native=False),
|
||||
patch.object(api_responses, "create_chat_with_retries",
|
||||
AsyncMock(return_value=_toolcall_chunks()))):
|
||||
resp = await client.post("/v1/responses",
|
||||
json={"model": "test-model", "input": "lookup hi",
|
||||
"stream": True, "store": False})
|
||||
events = _sse_events(resp.content.decode())
|
||||
types = [e[0] for e in events]
|
||||
assert "response.function_call_arguments.delta" in types
|
||||
assert "response.function_call_arguments.done" in types
|
||||
args = "".join(d["delta"] for t, d in events
|
||||
if t == "response.function_call_arguments.delta")
|
||||
assert args == '{"q":"hi"}'
|
||||
completed = [d for t, d in events if t == "response.completed"][0]
|
||||
fc = [i for i in completed["response"]["output"] if i["type"] == "function_call"][0]
|
||||
assert fc["name"] == "lookup"
|
||||
assert fc["arguments"] == '{"q":"hi"}'
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
# Native path (external OpenAI backend)
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestNativePath:
|
||||
async def test_nonstream_passthrough_rewrites_id(self, client):
|
||||
oclient = MagicMock()
|
||||
resp_obj = MagicMock()
|
||||
resp_obj.model_dump.return_value = {
|
||||
"id": "resp_openai", "status": "completed",
|
||||
"output": [{"type": "message", "role": "assistant",
|
||||
"content": [{"type": "output_text", "text": "native hi"}]}],
|
||||
"usage": {"input_tokens": 2, "output_tokens": 3, "total_tokens": 5}}
|
||||
oclient.responses.create = AsyncMock(return_value=resp_obj)
|
||||
with (patch.object(api_responses, "choose_endpoint",
|
||||
AsyncMock(return_value=("https://api.openai.com/v1", "gpt"))),
|
||||
patch.object(api_responses, "decrement_usage", AsyncMock()),
|
||||
patch.object(api_responses, "is_ext_openai_endpoint", return_value=True),
|
||||
patch.object(api_responses, "_make_openai_client", return_value=oclient),
|
||||
patch.object(api_responses, "get_llm_cache", return_value=None)):
|
||||
resp = await client.post("/v1/responses",
|
||||
json={"model": "gpt", "input": "hi", "store": False})
|
||||
body = resp.json()
|
||||
assert body["output_text"] == "native hi"
|
||||
assert body["id"].startswith("resp_") and body["id"] != "resp_openai"
|
||||
# native call must not delegate state upstream
|
||||
assert oclient.responses.create.call_args.kwargs["store"] is False
|
||||
|
||||
async def test_stream_passthrough(self, client):
|
||||
oclient = MagicMock()
|
||||
oclient.responses.create = AsyncMock(return_value=_native_event_stream())
|
||||
with (patch.object(api_responses, "choose_endpoint",
|
||||
AsyncMock(return_value=("https://api.openai.com/v1", "gpt"))),
|
||||
patch.object(api_responses, "decrement_usage", AsyncMock()),
|
||||
patch.object(api_responses, "is_ext_openai_endpoint", return_value=True),
|
||||
patch.object(api_responses, "_make_openai_client", return_value=oclient),
|
||||
patch.object(api_responses, "get_llm_cache", return_value=None)):
|
||||
resp = await client.post("/v1/responses",
|
||||
json={"model": "gpt", "input": "hi",
|
||||
"stream": True, "store": False})
|
||||
events = _sse_events(resp.content.decode())
|
||||
# the completed event's response id is rewritten to the router id
|
||||
completed = [d for t, d in events if t == "response.completed"][0]
|
||||
assert completed["response"]["id"].startswith("resp_")
|
||||
assert completed["response"]["id"] != "resp_openai"
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
# Storage + chaining + retrieve/delete
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestStorageAndChaining:
|
||||
async def test_store_and_retrieve(self, client):
|
||||
with _enter(*_patch_backend(native=False),
|
||||
patch.object(api_responses, "create_chat_with_retries",
|
||||
AsyncMock(return_value=_fake_completion("remembered")))):
|
||||
created = await client.post("/v1/responses",
|
||||
json={"model": "test-model", "input": "hi", "store": True})
|
||||
rid = created.json()["id"]
|
||||
got = await client.get(f"/v1/responses/{rid}")
|
||||
assert got.status_code == 200
|
||||
assert got.json()["output_text"] == "remembered"
|
||||
|
||||
async def test_previous_response_id_rehydrates_history(self, client):
|
||||
# First turn
|
||||
with _enter(*_patch_backend(native=False),
|
||||
patch.object(api_responses, "create_chat_with_retries",
|
||||
AsyncMock(return_value=_fake_completion("turn-one")))):
|
||||
first = await client.post("/v1/responses",
|
||||
json={"model": "test-model", "input": "first?", "store": True})
|
||||
rid = first.json()["id"]
|
||||
|
||||
# Second turn references the first — capture the messages sent to the backend
|
||||
capture = AsyncMock(return_value=_fake_completion("turn-two"))
|
||||
with _enter(*_patch_backend(native=False),
|
||||
patch.object(api_responses, "create_chat_with_retries", capture)):
|
||||
await client.post("/v1/responses",
|
||||
json={"model": "test-model", "input": "second?",
|
||||
"previous_response_id": rid, "store": True})
|
||||
sent_messages = capture.call_args.args[1]["messages"]
|
||||
contents = [m.get("content") for m in sent_messages]
|
||||
assert "first?" in contents # prior user turn replayed
|
||||
assert "turn-one" in contents # prior assistant turn replayed
|
||||
assert "second?" in contents # current turn appended
|
||||
|
||||
async def test_delete(self, client):
|
||||
with _enter(*_patch_backend(native=False),
|
||||
patch.object(api_responses, "create_chat_with_retries",
|
||||
AsyncMock(return_value=_fake_completion("bye")))):
|
||||
created = await client.post("/v1/responses",
|
||||
json={"model": "test-model", "input": "hi", "store": True})
|
||||
rid = created.json()["id"]
|
||||
deleted = await client.delete(f"/v1/responses/{rid}")
|
||||
assert deleted.status_code == 200
|
||||
assert deleted.json()["deleted"] is True
|
||||
assert (await client.get(f"/v1/responses/{rid}")).status_code == 404
|
||||
|
||||
async def test_retrieve_missing_404(self, client):
|
||||
assert (await client.get("/v1/responses/resp_missing")).status_code == 404
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
# Background mode
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestBackgroundMode:
|
||||
async def test_background_requires_store(self, client):
|
||||
resp = await client.post("/v1/responses",
|
||||
json={"model": "test-model", "input": "hi",
|
||||
"background": True, "store": False})
|
||||
assert resp.status_code == 400
|
||||
|
||||
async def test_background_lifecycle(self, client):
|
||||
with _enter(*_patch_backend(native=False),
|
||||
patch.object(api_responses, "create_chat_with_retries",
|
||||
AsyncMock(return_value=_fake_completion("bg-done")))):
|
||||
created = await client.post("/v1/responses",
|
||||
json={"model": "test-model", "input": "hi",
|
||||
"background": True, "store": True})
|
||||
assert created.status_code == 200
|
||||
assert created.json()["status"] == "queued"
|
||||
rid = created.json()["id"]
|
||||
# poll until terminal
|
||||
status = None
|
||||
for _ in range(100):
|
||||
await asyncio.sleep(0.01)
|
||||
got = await client.get(f"/v1/responses/{rid}")
|
||||
status = got.json()["status"]
|
||||
if status in ("completed", "failed", "cancelled"):
|
||||
break
|
||||
assert status == "completed"
|
||||
assert got.json()["output_text"] == "bg-done"
|
||||
|
||||
async def test_fail_orphaned_responses(self, client):
|
||||
db = router.db
|
||||
await db.store_response("resp_orphan", previous_response_id=None, model="m",
|
||||
status="in_progress", created_at=0, input_messages=[])
|
||||
n = await db.fail_orphaned_responses()
|
||||
assert n >= 1
|
||||
row = await db.get_response("resp_orphan")
|
||||
assert row["status"] == "failed"
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
# Cache parity
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class _FakeCache:
|
||||
def __init__(self, response_bytes):
|
||||
self._resp = response_bytes
|
||||
self.calls = []
|
||||
|
||||
async def get_chat(self, route, model, messages):
|
||||
self.calls.append((route, model, messages))
|
||||
return self._resp
|
||||
|
||||
|
||||
class TestCacheParity:
|
||||
async def test_cache_hit_served_as_response(self, client):
|
||||
cached = orjson.dumps(rt.build_response_object(
|
||||
response_id="resp_cached", model="test-model",
|
||||
output_items=rt.chat_message_to_output_items(
|
||||
{"role": "assistant", "content": "from-cache"})))
|
||||
fake = _FakeCache(cached)
|
||||
with (patch.object(api_responses, "get_llm_cache", return_value=fake),
|
||||
patch.object(api_responses, "choose_endpoint",
|
||||
AsyncMock(side_effect=AssertionError("backend must not be reached")))):
|
||||
resp = await client.post("/v1/responses",
|
||||
json={"model": "test-model", "input": "ping",
|
||||
"store": False, "nomyo": {"cache": True}})
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["output_text"] == "from-cache"
|
||||
assert fake.calls and fake.calls[0][0] == "openai_responses"
|
||||
|
||||
async def test_cache_hit_served_as_sse(self, client):
|
||||
cached = orjson.dumps(rt.build_response_object(
|
||||
response_id="resp_cached", model="test-model",
|
||||
output_items=rt.chat_message_to_output_items(
|
||||
{"role": "assistant", "content": "from-cache"})))
|
||||
fake = _FakeCache(cached)
|
||||
with (patch.object(api_responses, "get_llm_cache", return_value=fake),
|
||||
patch.object(api_responses, "choose_endpoint",
|
||||
AsyncMock(side_effect=AssertionError("backend must not be reached")))):
|
||||
resp = await client.post("/v1/responses",
|
||||
json={"model": "test-model", "input": "ping",
|
||||
"stream": True, "store": False,
|
||||
"nomyo": {"cache": True}})
|
||||
assert resp.headers["content-type"].startswith("text/event-stream")
|
||||
events = _sse_events(resp.content.decode())
|
||||
deltas = "".join(d["delta"] for t, d in events if t == "response.output_text.delta")
|
||||
assert deltas == "from-cache"
|
||||
Loading…
Add table
Add a link
Reference in a new issue