492 lines
20 KiB
Python
492 lines
20 KiB
Python
"""Translation between the OpenAI **Responses API** and **Chat Completions**.
|
|
|
|
The router speaks Chat Completions to every backend (Ollama, llama-server,
|
|
external OpenAI). To expose ``/v1/responses`` transparently on top of that, this
|
|
module converts in both directions:
|
|
|
|
* request: Responses ``input`` / ``instructions`` / ``tools`` → chat ``messages`` / ``tools``
|
|
* response: chat ``choices[0].message`` → Responses ``output`` items
|
|
* stream: chat completion deltas → Responses typed SSE events
|
|
|
|
Pure functions / a stream-translator class — no I/O, mirroring the style of
|
|
``requests/messages.py``. The native passthrough path (external OpenAI) does not
|
|
use this module; it forwards the SDK's Responses objects directly.
|
|
"""
|
|
import secrets
|
|
import time
|
|
|
|
import orjson
|
|
|
|
from requests.messages import _accumulate_openai_tc_delta
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Request direction: Responses → Chat Completions
|
|
# ---------------------------------------------------------------------------
|
|
def _responses_content_to_chat(content):
|
|
"""Convert a Responses message ``content`` into Chat Completions content.
|
|
|
|
Collapses a single text part to a plain string (what most backends expect);
|
|
keeps a multimodal list otherwise.
|
|
"""
|
|
if content is None or isinstance(content, str):
|
|
return content
|
|
if not isinstance(content, list):
|
|
return str(content)
|
|
parts = []
|
|
for p in content:
|
|
if not isinstance(p, dict):
|
|
parts.append({"type": "text", "text": str(p)})
|
|
continue
|
|
ptype = p.get("type")
|
|
if ptype in ("input_text", "output_text", "text"):
|
|
parts.append({"type": "text", "text": p.get("text", "")})
|
|
elif ptype in ("input_image", "image_url"):
|
|
url = p.get("image_url")
|
|
if isinstance(url, dict):
|
|
url = url.get("url")
|
|
if url:
|
|
parts.append({"type": "image_url", "image_url": {"url": url}})
|
|
# input_file / refusal / reasoning parts have no chat equivalent → skip
|
|
if len(parts) == 1 and parts[0].get("type") == "text":
|
|
return parts[0]["text"]
|
|
return parts
|
|
|
|
|
|
def _input_item_to_message(item):
|
|
"""Convert a single Responses ``input`` item to a chat message (or None)."""
|
|
if isinstance(item, str):
|
|
return {"role": "user", "content": item}
|
|
if not isinstance(item, dict):
|
|
return None
|
|
|
|
itype = item.get("type")
|
|
|
|
if itype == "function_call":
|
|
return {
|
|
"role": "assistant",
|
|
"content": None,
|
|
"tool_calls": [{
|
|
"id": item.get("call_id") or item.get("id"),
|
|
"type": "function",
|
|
"function": {
|
|
"name": item.get("name"),
|
|
"arguments": item.get("arguments", ""),
|
|
},
|
|
}],
|
|
}
|
|
|
|
if itype == "function_call_output":
|
|
output = item.get("output", "")
|
|
if not isinstance(output, str):
|
|
output = orjson.dumps(output).decode("utf-8")
|
|
return {
|
|
"role": "tool",
|
|
"tool_call_id": item.get("call_id") or item.get("id"),
|
|
"content": output,
|
|
}
|
|
|
|
if itype in ("reasoning",):
|
|
# No Chat Completions equivalent — drop.
|
|
return None
|
|
|
|
# "message" item or a bare {role, content} chat-style item
|
|
role = item.get("role")
|
|
if role is None:
|
|
return None
|
|
return {"role": role, "content": _responses_content_to_chat(item.get("content"))}
|
|
|
|
|
|
def responses_input_to_messages(input_data, instructions=None):
|
|
"""Build a Chat Completions ``messages`` list from Responses ``input``.
|
|
|
|
``instructions`` becomes a leading system message; a string ``input`` becomes
|
|
a single user message; a list ``input`` is mapped item-by-item.
|
|
"""
|
|
messages = []
|
|
if instructions:
|
|
messages.append({"role": "system", "content": instructions})
|
|
if input_data is None:
|
|
return messages
|
|
if isinstance(input_data, str):
|
|
messages.append({"role": "user", "content": input_data})
|
|
return messages
|
|
if isinstance(input_data, list):
|
|
for item in input_data:
|
|
msg = _input_item_to_message(item)
|
|
if msg is not None:
|
|
messages.append(msg)
|
|
return messages
|
|
|
|
|
|
def _chat_content_to_responses_parts(content, assistant=False):
|
|
"""Convert chat message content → Responses content parts."""
|
|
text_type = "output_text" if assistant else "input_text"
|
|
if content is None:
|
|
return []
|
|
if isinstance(content, str):
|
|
return [{"type": text_type, "text": content}]
|
|
parts = []
|
|
for p in content if isinstance(content, list) else []:
|
|
if not isinstance(p, dict):
|
|
parts.append({"type": text_type, "text": str(p)})
|
|
elif p.get("type") == "text":
|
|
parts.append({"type": text_type, "text": p.get("text", "")})
|
|
elif p.get("type") == "image_url":
|
|
url = (p.get("image_url") or {}).get("url")
|
|
if url:
|
|
parts.append({"type": "input_image", "image_url": url})
|
|
return parts
|
|
|
|
|
|
def messages_to_responses_input(messages):
|
|
"""Convert chat messages → ``(instructions, Responses input items)``.
|
|
|
|
Used for the native passthrough path: history that the router has resolved in
|
|
chat-message space is re-expressed as Responses ``input``. Leading/standalone
|
|
system messages are merged into ``instructions``.
|
|
"""
|
|
instructions_parts = []
|
|
items = []
|
|
for m in messages:
|
|
role = m.get("role")
|
|
if role == "system":
|
|
c = m.get("content")
|
|
instructions_parts.append(c if isinstance(c, str) else orjson.dumps(c).decode("utf-8"))
|
|
continue
|
|
if role == "tool":
|
|
out = m.get("content")
|
|
if not isinstance(out, str):
|
|
out = orjson.dumps(out).decode("utf-8")
|
|
items.append({"type": "function_call_output",
|
|
"call_id": m.get("tool_call_id"), "output": out})
|
|
continue
|
|
if role == "assistant" and m.get("tool_calls"):
|
|
for tc in m["tool_calls"]:
|
|
fn = tc.get("function", {})
|
|
items.append({"type": "function_call", "call_id": tc.get("id"),
|
|
"name": fn.get("name"), "arguments": fn.get("arguments", "")})
|
|
if m.get("content"):
|
|
items.append({"role": "assistant",
|
|
"content": _chat_content_to_responses_parts(m["content"], assistant=True)})
|
|
continue
|
|
items.append({"role": role,
|
|
"content": _chat_content_to_responses_parts(m.get("content"),
|
|
assistant=(role == "assistant"))})
|
|
instructions = "\n\n".join(p for p in instructions_parts if p) or None
|
|
return instructions, items
|
|
|
|
|
|
def responses_object_to_sse(resp):
|
|
"""Render a *finished* Responses object as a valid SSE event stream.
|
|
|
|
Used to serve cache/store hits to streaming clients without a backend call.
|
|
"""
|
|
seq = [-1]
|
|
|
|
def ev(etype, payload):
|
|
seq[0] += 1
|
|
body = {"type": etype, "sequence_number": seq[0], **payload}
|
|
return f"event: {etype}\ndata: {orjson.dumps(body).decode('utf-8')}\n\n".encode("utf-8")
|
|
|
|
parts_out = []
|
|
in_progress = {**resp, "status": "in_progress", "output": [], "output_text": ""}
|
|
parts_out.append(ev("response.created", {"response": in_progress}))
|
|
parts_out.append(ev("response.in_progress", {"response": in_progress}))
|
|
for oi, item in enumerate(resp.get("output", [])):
|
|
parts_out.append(ev("response.output_item.added",
|
|
{"output_index": oi, "item": {**item, "status": "in_progress"}}))
|
|
if item.get("type") == "message":
|
|
for ci, part in enumerate(item.get("content", [])):
|
|
if part.get("type") == "output_text":
|
|
iid = item.get("id")
|
|
parts_out.append(ev("response.content_part.added", {
|
|
"item_id": iid, "output_index": oi, "content_index": ci,
|
|
"part": {"type": "output_text", "text": "", "annotations": []}}))
|
|
parts_out.append(ev("response.output_text.delta", {
|
|
"item_id": iid, "output_index": oi, "content_index": ci,
|
|
"delta": part.get("text", "")}))
|
|
parts_out.append(ev("response.output_text.done", {
|
|
"item_id": iid, "output_index": oi, "content_index": ci,
|
|
"text": part.get("text", "")}))
|
|
parts_out.append(ev("response.content_part.done", {
|
|
"item_id": iid, "output_index": oi, "content_index": ci, "part": part}))
|
|
parts_out.append(ev("response.output_item.done", {"output_index": oi, "item": item}))
|
|
parts_out.append(ev("response.completed", {"response": resp}))
|
|
return b"".join(parts_out)
|
|
|
|
|
|
def tools_responses_to_chat(tools):
|
|
"""Map Responses tool definitions (flattened) → Chat Completions (nested)."""
|
|
if not tools:
|
|
return None
|
|
out = []
|
|
for t in tools:
|
|
if isinstance(t, dict) and t.get("type") == "function" and "function" not in t:
|
|
fn = {k: t[k] for k in ("name", "description", "parameters", "strict") if k in t}
|
|
out.append({"type": "function", "function": fn})
|
|
else:
|
|
out.append(t)
|
|
return out
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Response direction: Chat Completions → Responses
|
|
# ---------------------------------------------------------------------------
|
|
def _new_id(prefix):
|
|
return f"{prefix}_{secrets.token_hex(16)}"
|
|
|
|
|
|
def chat_message_to_output_items(message):
|
|
"""Convert an assistant chat message (dict) into Responses output items."""
|
|
items = []
|
|
content = message.get("content")
|
|
if content:
|
|
items.append({
|
|
"type": "message",
|
|
"id": _new_id("msg"),
|
|
"status": "completed",
|
|
"role": "assistant",
|
|
"content": [{"type": "output_text", "text": content, "annotations": []}],
|
|
})
|
|
for tc in message.get("tool_calls") or []:
|
|
fn = tc.get("function", {})
|
|
items.append({
|
|
"type": "function_call",
|
|
"id": _new_id("fc"),
|
|
"call_id": tc.get("id"),
|
|
"name": fn.get("name"),
|
|
"arguments": fn.get("arguments", ""),
|
|
"status": "completed",
|
|
})
|
|
return items
|
|
|
|
|
|
def usage_chat_to_responses(usage):
|
|
"""Map chat usage ``{prompt_tokens, completion_tokens}`` → Responses usage."""
|
|
if not usage:
|
|
return None
|
|
prompt = usage.get("prompt_tokens") or 0
|
|
completion = usage.get("completion_tokens") or 0
|
|
return {
|
|
"input_tokens": prompt,
|
|
"output_tokens": completion,
|
|
"total_tokens": usage.get("total_tokens") or (prompt + completion),
|
|
}
|
|
|
|
|
|
def output_items_to_text(output_items):
|
|
"""Concatenate the ``output_text`` parts of all message items."""
|
|
chunks = []
|
|
for item in output_items or []:
|
|
if item.get("type") != "message":
|
|
continue
|
|
for part in item.get("content") or []:
|
|
if part.get("type") == "output_text":
|
|
chunks.append(part.get("text", ""))
|
|
return "".join(chunks)
|
|
|
|
|
|
def build_response_object(
|
|
*,
|
|
response_id,
|
|
model,
|
|
output_items=None,
|
|
usage=None,
|
|
status="completed",
|
|
created_at=None,
|
|
previous_response_id=None,
|
|
instructions=None,
|
|
error=None,
|
|
metadata=None,
|
|
):
|
|
"""Assemble a full ``object:"response"`` body for a non-streaming reply."""
|
|
output_items = output_items or []
|
|
return {
|
|
"id": response_id,
|
|
"object": "response",
|
|
"created_at": created_at or int(time.time()),
|
|
"status": status,
|
|
"model": model,
|
|
"output": output_items,
|
|
"output_text": output_items_to_text(output_items),
|
|
"instructions": instructions,
|
|
"previous_response_id": previous_response_id,
|
|
"usage": usage_chat_to_responses(usage) if usage and "input_tokens" not in usage else usage,
|
|
"error": error,
|
|
"metadata": metadata or {},
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Streaming direction: Chat Completions deltas → Responses typed SSE events
|
|
# ---------------------------------------------------------------------------
|
|
class ChatToResponsesStream:
|
|
"""Translate a Chat Completions streaming generator into Responses events.
|
|
|
|
Usage::
|
|
|
|
translator = ChatToResponsesStream(response_id, model, created_at)
|
|
async for sse_bytes in translator.events(chat_async_gen):
|
|
yield sse_bytes
|
|
# translator.output_items / translator.usage now populated for storage
|
|
|
|
Emits the ordered event family
|
|
``response.created`` → ``response.in_progress`` →
|
|
(``response.output_item.added`` → ``response.content_part.added`` →
|
|
``response.output_text.delta``* → ``response.output_text.done`` →
|
|
``response.content_part.done`` → ``response.output_item.done``) and/or
|
|
function-call item events → ``response.completed`` (carrying usage).
|
|
"""
|
|
|
|
def __init__(self, response_id, model, created_at=None,
|
|
previous_response_id=None, instructions=None, metadata=None):
|
|
self.response_id = response_id
|
|
self.model = model
|
|
self.created_at = created_at or int(time.time())
|
|
self.previous_response_id = previous_response_id
|
|
self.instructions = instructions
|
|
self.metadata = metadata or {}
|
|
self.seq = -1
|
|
self.output_items = []
|
|
self.usage = None
|
|
|
|
def _snapshot(self, status, output=None):
|
|
return build_response_object(
|
|
response_id=self.response_id,
|
|
model=self.model,
|
|
output_items=output if output is not None else [],
|
|
usage=self.usage,
|
|
status=status,
|
|
created_at=self.created_at,
|
|
previous_response_id=self.previous_response_id,
|
|
instructions=self.instructions,
|
|
metadata=self.metadata,
|
|
)
|
|
|
|
def _event(self, etype, payload):
|
|
self.seq += 1
|
|
body = {"type": etype, "sequence_number": self.seq, **payload}
|
|
return f"event: {etype}\ndata: {orjson.dumps(body).decode('utf-8')}\n\n".encode("utf-8")
|
|
|
|
async def events(self, async_gen):
|
|
yield self._event("response.created", {"response": self._snapshot("in_progress")})
|
|
yield self._event("response.in_progress", {"response": self._snapshot("in_progress")})
|
|
|
|
next_oi = 0
|
|
# text message state
|
|
msg_item_id = None
|
|
msg_oi = None
|
|
text_parts = []
|
|
# function-call state, keyed by chat tool_call index
|
|
tc_state = {} # idx -> {oi, item_id, call_id, name, args}
|
|
|
|
async for chunk in async_gen:
|
|
usage = getattr(chunk, "usage", None)
|
|
if usage is not None:
|
|
self.usage = {
|
|
"prompt_tokens": usage.prompt_tokens or 0,
|
|
"completion_tokens": usage.completion_tokens or 0,
|
|
}
|
|
choices = getattr(chunk, "choices", None)
|
|
if not choices:
|
|
continue
|
|
delta = choices[0].delta
|
|
|
|
content_piece = getattr(delta, "content", None)
|
|
if content_piece:
|
|
if msg_item_id is None:
|
|
msg_item_id = _new_id("msg")
|
|
msg_oi = next_oi
|
|
next_oi += 1
|
|
item = {
|
|
"id": msg_item_id, "type": "message", "status": "in_progress",
|
|
"role": "assistant", "content": [],
|
|
}
|
|
yield self._event("response.output_item.added",
|
|
{"output_index": msg_oi, "item": item})
|
|
yield self._event("response.content_part.added", {
|
|
"item_id": msg_item_id, "output_index": msg_oi, "content_index": 0,
|
|
"part": {"type": "output_text", "text": "", "annotations": []},
|
|
})
|
|
text_parts.append(content_piece)
|
|
yield self._event("response.output_text.delta", {
|
|
"item_id": msg_item_id, "output_index": msg_oi, "content_index": 0,
|
|
"delta": content_piece,
|
|
})
|
|
|
|
for tc in getattr(delta, "tool_calls", None) or []:
|
|
idx = tc.index
|
|
fn = getattr(tc, "function", None)
|
|
if idx not in tc_state:
|
|
item_id = _new_id("fc")
|
|
state = {
|
|
"oi": next_oi, "item_id": item_id,
|
|
"call_id": getattr(tc, "id", None) or _new_id("call"),
|
|
"name": (fn.name if fn else None), "args": "",
|
|
}
|
|
next_oi += 1
|
|
tc_state[idx] = state
|
|
yield self._event("response.output_item.added", {
|
|
"output_index": state["oi"],
|
|
"item": {
|
|
"id": item_id, "type": "function_call", "status": "in_progress",
|
|
"call_id": state["call_id"], "name": state["name"], "arguments": "",
|
|
},
|
|
})
|
|
else:
|
|
state = tc_state[idx]
|
|
if getattr(tc, "id", None):
|
|
state["call_id"] = tc.id
|
|
if fn and fn.name:
|
|
state["name"] = fn.name
|
|
if fn and fn.arguments:
|
|
state["args"] += fn.arguments
|
|
yield self._event("response.function_call_arguments.delta", {
|
|
"item_id": state["item_id"], "output_index": state["oi"],
|
|
"delta": fn.arguments,
|
|
})
|
|
|
|
# finalize message item
|
|
if msg_item_id is not None:
|
|
full_text = "".join(text_parts)
|
|
yield self._event("response.output_text.done", {
|
|
"item_id": msg_item_id, "output_index": msg_oi, "content_index": 0,
|
|
"text": full_text,
|
|
})
|
|
done_part = {"type": "output_text", "text": full_text, "annotations": []}
|
|
yield self._event("response.content_part.done", {
|
|
"item_id": msg_item_id, "output_index": msg_oi, "content_index": 0,
|
|
"part": done_part,
|
|
})
|
|
msg_item = {
|
|
"id": msg_item_id, "type": "message", "status": "completed",
|
|
"role": "assistant", "content": [done_part],
|
|
}
|
|
yield self._event("response.output_item.done",
|
|
{"output_index": msg_oi, "item": msg_item})
|
|
|
|
# finalize function-call items (in output-index order)
|
|
tc_items = {}
|
|
for idx, state in tc_state.items():
|
|
yield self._event("response.function_call_arguments.done", {
|
|
"item_id": state["item_id"], "output_index": state["oi"],
|
|
"arguments": state["args"],
|
|
})
|
|
fc_item = {
|
|
"id": state["item_id"], "type": "function_call", "status": "completed",
|
|
"call_id": state["call_id"], "name": state["name"], "arguments": state["args"],
|
|
}
|
|
tc_items[state["oi"]] = fc_item
|
|
yield self._event("response.output_item.done",
|
|
{"output_index": state["oi"], "item": fc_item})
|
|
|
|
# assemble final output items ordered by output index
|
|
ordered = []
|
|
if msg_item_id is not None:
|
|
ordered.append((msg_oi, msg_item))
|
|
ordered.extend(tc_items.items())
|
|
self.output_items = [item for _, item in sorted(ordered, key=lambda kv: kv[0])]
|
|
|
|
yield self._event("response.completed",
|
|
{"response": self._snapshot("completed", self.output_items)})
|