Fix HandlerType import and apply Black formatting

- Import HandlerType from pytest_httpserver.httpserver (not top-level)
- Apply Black formatting to all new test files

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Adil Hafeez 2026-02-18 23:47:12 +00:00
parent 3a6a672c9d
commit aeef0c33a8
4 changed files with 256 additions and 83 deletions

View file

@ -6,7 +6,8 @@ The gateway container routes to host.docker.internal:51001 where the mock server
import json
import pytest
from pytest_httpserver import HTTPServer, HandlerType
from pytest_httpserver import HTTPServer
from pytest_httpserver.httpserver import HandlerType
from werkzeug.wrappers import Response
@ -19,7 +20,10 @@ def httpserver_listen_address():
# OpenAI Chat Completions helpers
# ---------------------------------------------------------------------------
def make_openai_chat_response(content="Hello from mock!", model="gpt-5-mini-2025-08-07", tool_calls=None):
def make_openai_chat_response(
content="Hello from mock!", model="gpt-5-mini-2025-08-07", tool_calls=None
):
message = {"role": "assistant", "content": content}
finish_reason = "stop"
if tool_calls:
@ -61,7 +65,9 @@ def make_openai_chat_stream(content="Hello from mock!", model="gpt-5-mini-2025-0
return "".join(lines)
def make_openai_tool_call_stream(model="gpt-5-mini-2025-08-07", tool_name="echo_tool", tool_args='{"text":"hello"}'):
def make_openai_tool_call_stream(
model="gpt-5-mini-2025-08-07", tool_name="echo_tool", tool_args='{"text":"hello"}'
):
lines = []
# Role chunk
lines.append(
@ -92,7 +98,10 @@ def make_openai_tool_call_stream(model="gpt-5-mini-2025-08-07", tool_name="echo_
# Anthropic Messages helpers
# ---------------------------------------------------------------------------
def make_anthropic_response(content="Hello from mock!", model="claude-sonnet-4-20250514"):
def make_anthropic_response(
content="Hello from mock!", model="claude-sonnet-4-20250514"
):
return {
"id": "msg-mock-123",
"type": "message",
@ -108,12 +117,21 @@ def make_anthropic_response(content="Hello from mock!", model="claude-sonnet-4-2
def make_anthropic_stream(content="Hello from mock!", model="claude-sonnet-4-20250514"):
lines = []
msg = {
"id": "msg-mock-123", "type": "message", "role": "assistant",
"model": model, "content": [], "stop_reason": None,
"stop_sequence": None, "usage": {"input_tokens": 10, "output_tokens": 0},
"id": "msg-mock-123",
"type": "message",
"role": "assistant",
"model": model,
"content": [],
"stop_reason": None,
"stop_sequence": None,
"usage": {"input_tokens": 10, "output_tokens": 0},
}
lines.append(f"event: message_start\ndata: {json.dumps({'type': 'message_start', 'message': msg})}\n\n")
lines.append(f'event: content_block_start\ndata: {{"type":"content_block_start","index":0,"content_block":{{"type":"text","text":""}}}}\n\n')
lines.append(
f"event: message_start\ndata: {json.dumps({'type': 'message_start', 'message': msg})}\n\n"
)
lines.append(
f'event: content_block_start\ndata: {{"type":"content_block_start","index":0,"content_block":{{"type":"text","text":""}}}}\n\n'
)
words = content.split(" ")
for i, word in enumerate(words):
@ -124,7 +142,9 @@ def make_anthropic_stream(content="Hello from mock!", model="claude-sonnet-4-202
f'event: content_block_delta\ndata: {{"type":"content_block_delta","index":0,"delta":{{"type":"text_delta","text":"{escaped}"}}}}\n\n'
)
lines.append(f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":0}}\n\n')
lines.append(
f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":0}}\n\n'
)
lines.append(
f'event: message_delta\ndata: {{"type":"message_delta","delta":{{"stop_reason":"end_turn","stop_sequence":null}},"usage":{{"output_tokens":5}}}}\n\n'
)
@ -132,33 +152,52 @@ def make_anthropic_stream(content="Hello from mock!", model="claude-sonnet-4-202
return "".join(lines)
def make_anthropic_thinking_stream(content="The answer is 4.", thinking="Let me think... 2+2=4", model="claude-sonnet-4-20250514"):
def make_anthropic_thinking_stream(
content="The answer is 4.",
thinking="Let me think... 2+2=4",
model="claude-sonnet-4-20250514",
):
lines = []
msg = {
"id": "msg-mock-think", "type": "message", "role": "assistant",
"model": model, "content": [], "stop_reason": None,
"stop_sequence": None, "usage": {"input_tokens": 10, "output_tokens": 0},
"id": "msg-mock-think",
"type": "message",
"role": "assistant",
"model": model,
"content": [],
"stop_reason": None,
"stop_sequence": None,
"usage": {"input_tokens": 10, "output_tokens": 0},
}
lines.append(f"event: message_start\ndata: {json.dumps({'type': 'message_start', 'message': msg})}\n\n")
lines.append(
f"event: message_start\ndata: {json.dumps({'type': 'message_start', 'message': msg})}\n\n"
)
# Thinking block
lines.append(f'event: content_block_start\ndata: {{"type":"content_block_start","index":0,"content_block":{{"type":"thinking","thinking":""}}}}\n\n')
lines.append(
f'event: content_block_start\ndata: {{"type":"content_block_start","index":0,"content_block":{{"type":"thinking","thinking":""}}}}\n\n'
)
for word in thinking.split(" "):
escaped = json.dumps(word)[1:-1]
lines.append(
f'event: content_block_delta\ndata: {{"type":"content_block_delta","index":0,"delta":{{"type":"thinking_delta","thinking":"{escaped} "}}}}\n\n'
)
lines.append(f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":0}}\n\n')
lines.append(
f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":0}}\n\n'
)
# Text block
lines.append(f'event: content_block_start\ndata: {{"type":"content_block_start","index":1,"content_block":{{"type":"text","text":""}}}}\n\n')
lines.append(
f'event: content_block_start\ndata: {{"type":"content_block_start","index":1,"content_block":{{"type":"text","text":""}}}}\n\n'
)
for i, word in enumerate(content.split(" ")):
prefix = " " if i > 0 else ""
escaped = json.dumps(f"{prefix}{word}")[1:-1]
lines.append(
f'event: content_block_delta\ndata: {{"type":"content_block_delta","index":1,"delta":{{"type":"text_delta","text":"{escaped}"}}}}\n\n'
)
lines.append(f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":1}}\n\n')
lines.append(
f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":1}}\n\n'
)
lines.append(
f'event: message_delta\ndata: {{"type":"message_delta","delta":{{"stop_reason":"end_turn","stop_sequence":null}},"usage":{{"output_tokens":20}}}}\n\n'
@ -171,7 +210,12 @@ def make_anthropic_thinking_stream(content="The answer is 4.", thinking="Let me
# OpenAI Responses API helpers
# ---------------------------------------------------------------------------
def make_responses_api_response(content="Hello from mock!", model="gpt-5-mini-2025-08-07", response_id="resp-mock-123"):
def make_responses_api_response(
content="Hello from mock!",
model="gpt-5-mini-2025-08-07",
response_id="resp-mock-123",
):
return {
"id": response_id,
"object": "response",
@ -183,7 +227,9 @@ def make_responses_api_response(content="Hello from mock!", model="gpt-5-mini-20
"id": "msg_mock_123",
"status": "completed",
"role": "assistant",
"content": [{"type": "output_text", "text": content, "annotations": []}],
"content": [
{"type": "output_text", "text": content, "annotations": []}
],
}
],
"status": "completed",
@ -191,10 +237,23 @@ def make_responses_api_response(content="Hello from mock!", model="gpt-5-mini-20
}
def make_responses_api_stream(content="Hello from mock!", model="gpt-5-mini-2025-08-07", response_id="resp-mock-123"):
def make_responses_api_stream(
content="Hello from mock!",
model="gpt-5-mini-2025-08-07",
response_id="resp-mock-123",
):
lines = []
resp_base = {"id": response_id, "object": "response", "created_at": 1234567890, "model": model, "output": [], "status": "in_progress"}
lines.append(f"event: response.created\ndata: {json.dumps({'type': 'response.created', 'response': resp_base})}\n\n")
resp_base = {
"id": response_id,
"object": "response",
"created_at": 1234567890,
"model": model,
"output": [],
"status": "in_progress",
}
lines.append(
f"event: response.created\ndata: {json.dumps({'type': 'response.created', 'response': resp_base})}\n\n"
)
lines.append(
f'event: response.output_item.added\ndata: {{"type":"response.output_item.added","output_index":0,'
f'"item":{{"type":"message","id":"msg_mock_123","status":"in_progress","role":"assistant","content":[]}}}}\n\n'
@ -219,7 +278,9 @@ def make_responses_api_stream(content="Hello from mock!", model="gpt-5-mini-2025
)
final_item = {
"type": "message", "id": "msg_mock_123", "status": "completed",
"type": "message",
"id": "msg_mock_123",
"status": "completed",
"role": "assistant",
"content": [{"type": "output_text", "text": content, "annotations": []}],
}
@ -227,8 +288,12 @@ def make_responses_api_stream(content="Hello from mock!", model="gpt-5-mini-2025
f"event: response.output_item.done\ndata: {json.dumps({'type': 'response.output_item.done', 'output_index': 0, 'item': final_item})}\n\n"
)
final_resp = dict(resp_base, output=[final_item], status="completed",
usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15})
final_resp = dict(
resp_base,
output=[final_item],
status="completed",
usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
)
lines.append(
f"event: response.completed\ndata: {json.dumps({'type': 'response.completed', 'response': final_resp})}\n\n"
)
@ -239,7 +304,10 @@ def make_responses_api_stream(content="Hello from mock!", model="gpt-5-mini-2025
# Mock server setup helpers
# ---------------------------------------------------------------------------
def setup_openai_chat_mock(httpserver: HTTPServer, content="Hello from mock!", tool_calls=None):
def setup_openai_chat_mock(
httpserver: HTTPServer, content="Hello from mock!", tool_calls=None
):
"""Register a permanent handler for /v1/chat/completions on the mock server.
Returns a list that will be populated with captured request bodies.
"""
@ -253,26 +321,35 @@ def setup_openai_chat_mock(httpserver: HTTPServer, content="Hello from mock!", t
if tool_calls and not is_stream:
return Response(
json.dumps(make_openai_chat_response(model=model, tool_calls=tool_calls)),
status=200, content_type="application/json",
json.dumps(
make_openai_chat_response(model=model, tool_calls=tool_calls)
),
status=200,
content_type="application/json",
)
if is_stream:
return Response(
make_openai_chat_stream(content=content, model=model),
status=200, content_type="text/event-stream",
status=200,
content_type="text/event-stream",
)
return Response(
json.dumps(make_openai_chat_response(content=content, model=model)),
status=200, content_type="application/json",
status=200,
content_type="application/json",
)
httpserver.expect_request(
"/v1/chat/completions", method="POST", handler_type=HandlerType.PERMANENT,
"/v1/chat/completions",
method="POST",
handler_type=HandlerType.PERMANENT,
).respond_with_handler(handler)
return captured
def setup_anthropic_mock(httpserver: HTTPServer, content="Hello from mock!", thinking=False):
def setup_anthropic_mock(
httpserver: HTTPServer, content="Hello from mock!", thinking=False
):
"""Register a permanent handler for /v1/messages on the mock server.
Returns a list that will be populated with captured request bodies.
"""
@ -287,20 +364,25 @@ def setup_anthropic_mock(httpserver: HTTPServer, content="Hello from mock!", thi
if thinking and is_stream:
return Response(
make_anthropic_thinking_stream(model=model),
status=200, content_type="text/event-stream",
status=200,
content_type="text/event-stream",
)
if is_stream:
return Response(
make_anthropic_stream(content=content, model=model),
status=200, content_type="text/event-stream",
status=200,
content_type="text/event-stream",
)
return Response(
json.dumps(make_anthropic_response(content=content, model=model)),
status=200, content_type="application/json",
status=200,
content_type="application/json",
)
httpserver.expect_request(
"/v1/messages", method="POST", handler_type=HandlerType.PERMANENT,
"/v1/messages",
method="POST",
handler_type=HandlerType.PERMANENT,
).respond_with_handler(handler)
return captured
@ -322,25 +404,45 @@ def setup_responses_api_mock(httpserver: HTTPServer, content="Hello from mock!")
if is_stream:
return Response(
make_responses_api_stream(content=content, model=model, response_id=response_id),
status=200, content_type="text/event-stream",
make_responses_api_stream(
content=content, model=model, response_id=response_id
),
status=200,
content_type="text/event-stream",
)
return Response(
json.dumps(make_responses_api_response(content=content, model=model, response_id=response_id)),
status=200, content_type="application/json",
json.dumps(
make_responses_api_response(
content=content, model=model, response_id=response_id
)
),
status=200,
content_type="application/json",
)
httpserver.expect_request(
"/v1/responses", method="POST", handler_type=HandlerType.PERMANENT,
"/v1/responses",
method="POST",
handler_type=HandlerType.PERMANENT,
).respond_with_handler(handler)
return captured
def setup_error_mock(httpserver: HTTPServer, path="/v1/chat/completions", status=400, body=None):
def setup_error_mock(
httpserver: HTTPServer, path="/v1/chat/completions", status=400, body=None
):
"""Register a handler that returns an error response."""
error_body = body or json.dumps({
"error": {"message": "Bad Request", "type": "invalid_request_error", "code": "bad_request"}
})
httpserver.expect_request(path, method="POST").respond_with_data(
error_body, status=status, content_type="application/json",
error_body = body or json.dumps(
{
"error": {
"message": "Bad Request",
"type": "invalid_request_error",
"code": "bad_request",
}
}
)
httpserver.expect_request(path, method="POST").respond_with_data(
error_body,
status=status,
content_type="application/json",
)

View file

@ -131,7 +131,9 @@ def test_anthropic_client_with_alias_streaming(httpserver: HTTPServer):
def test_openai_client_with_claude_model(httpserver: HTTPServer):
"""OpenAI client → Claude model → gateway routes to Anthropic upstream → transforms response to OpenAI format"""
captured = setup_anthropic_mock(httpserver, content="Hello from Claude via OpenAI client!")
captured = setup_anthropic_mock(
httpserver, content="Hello from Claude via OpenAI client!"
)
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
completion = client.chat.completions.create(
@ -140,7 +142,9 @@ def test_openai_client_with_claude_model(httpserver: HTTPServer):
messages=[{"role": "user", "content": "Hello"}],
)
assert completion.choices[0].message.content == "Hello from Claude via OpenAI client!"
assert (
completion.choices[0].message.content == "Hello from Claude via OpenAI client!"
)
assert len(captured) == 1
assert captured[0]["model"] == "claude-sonnet-4-20250514"
@ -167,7 +171,9 @@ def test_openai_client_with_claude_model_streaming(httpserver: HTTPServer):
def test_anthropic_client_with_openai_model(httpserver: HTTPServer):
"""Anthropic client → OpenAI model (gpt-4o-mini) → OpenAI upstream → transforms response to Anthropic format"""
captured = setup_openai_chat_mock(httpserver, content="Hello from GPT via Anthropic!")
captured = setup_openai_chat_mock(
httpserver, content="Hello from GPT via Anthropic!"
)
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
message = client.messages.create(
@ -257,7 +263,10 @@ def test_assistant_message_with_null_content_and_tool_calls(httpserver: HTTPServ
{
"id": "call_test123",
"type": "function",
"function": {"name": "get_weather", "arguments": '{"city": "Seattle"}'},
"function": {
"name": "get_weather",
"arguments": '{"city": "Seattle"}',
},
}
],
},
@ -329,7 +338,9 @@ def test_anthropic_thinking_mode_streaming(httpserver: HTTPServer):
messages=[{"role": "user", "content": "What is 2+2?"}],
) as stream:
for event in stream:
if event.type == "content_block_start" and getattr(event, "content_block", None):
if event.type == "content_block_start" and getattr(
event, "content_block", None
):
if getattr(event.content_block, "type", None) == "thinking":
thinking_block_started = True
if event.type == "content_block_delta" and getattr(event, "delta", None):

View file

@ -58,9 +58,13 @@ def test_responses_api_streaming_passthrough(httpserver: HTTPServer):
text_chunks = []
final_message = None
for event in stream:
if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None):
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
event, "delta", None
):
text_chunks.append(event.delta)
if getattr(event, "type", None) == "response.completed" and getattr(event, "response", None):
if getattr(event, "type", None) == "response.completed" and getattr(
event, "response", None
):
final_message = event.response
full_content = "".join(text_chunks)
@ -72,7 +76,9 @@ def test_responses_api_with_tools_passthrough(httpserver: HTTPServer):
"""Responses API with tools for OpenAI model"""
setup_responses_api_mock(httpserver, content="Tool response")
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0)
client = openai.OpenAI(
api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0
)
tools = [
{
"type": "function",
@ -100,7 +106,9 @@ def test_responses_api_streaming_with_tools_passthrough(httpserver: HTTPServer):
"""Responses API streaming with tools for OpenAI model"""
setup_responses_api_mock(httpserver, content="Streamed tool response")
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0)
client = openai.OpenAI(
api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0
)
tools = [
{
"type": "function",
@ -127,7 +135,9 @@ def test_responses_api_streaming_with_tools_passthrough(httpserver: HTTPServer):
etype = getattr(event, "type", None)
if etype == "response.output_text.delta" and getattr(event, "delta", None):
text_chunks.append(event.delta)
if etype == "response.function_call_arguments.delta" and getattr(event, "delta", None):
if etype == "response.function_call_arguments.delta" and getattr(
event, "delta", None
):
tool_calls.append(event.delta)
assert text_chunks or tool_calls, "Expected streamed text or tool call deltas"
@ -140,7 +150,9 @@ def test_responses_api_streaming_with_tools_passthrough(httpserver: HTTPServer):
def test_responses_api_non_streaming_upstream_anthropic(httpserver: HTTPServer):
"""Responses API with Anthropic model → translated to /v1/chat/completions"""
captured = setup_openai_chat_mock(httpserver, content="Hello from Claude via Responses!")
captured = setup_openai_chat_mock(
httpserver, content="Hello from Claude via Responses!"
)
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
resp = client.responses.create(
@ -165,7 +177,9 @@ def test_responses_api_streaming_upstream_anthropic(httpserver: HTTPServer):
text_chunks = []
for event in stream:
if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None):
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
event, "delta", None
):
text_chunks.append(event.delta)
assert len(text_chunks) > 0, "Should have received streaming text deltas"
@ -202,7 +216,9 @@ def test_responses_api_streaming_with_tools_upstream_anthropic(httpserver: HTTPS
"""Responses API streaming with tools routed to Anthropic"""
setup_openai_chat_mock(httpserver, content="Streamed tool via Claude")
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0)
client = openai.OpenAI(
api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0
)
tools = [
{
"type": "function",
@ -229,7 +245,9 @@ def test_responses_api_streaming_with_tools_upstream_anthropic(httpserver: HTTPS
etype = getattr(event, "type", None)
if etype == "response.output_text.delta" and getattr(event, "delta", None):
text_chunks.append(event.delta)
if etype == "response.function_call_arguments.delta" and getattr(event, "delta", None):
if etype == "response.function_call_arguments.delta" and getattr(
event, "delta", None
):
tool_calls.append(event.delta)
assert text_chunks or tool_calls, "Expected streamed text or tool call deltas"
@ -254,7 +272,9 @@ def test_responses_api_mixed_content_types(httpserver: HTTPServer):
},
{
"role": "user",
"content": [{"type": "input_text", "text": "What is the weather in Seattle"}],
"content": [
{"type": "input_text", "text": "What is the weather in Seattle"}
],
},
],
)
@ -278,7 +298,9 @@ def test_conversation_state_management_two_turn(httpserver: HTTPServer):
# For non-OpenAI models, Responses API translates to /v1/chat/completions
# But for OpenAI models, it uses /v1/responses directly
# The state management is handled by brightstaff regardless of upstream
captured = setup_openai_chat_mock(httpserver, content="I remember your name is Alice!")
captured = setup_openai_chat_mock(
httpserver, content="I remember your name is Alice!"
)
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
@ -306,7 +328,9 @@ def test_conversation_state_management_two_turn(httpserver: HTTPServer):
second_request = captured[1]
messages = second_request.get("messages", [])
# Should have messages from both turns (user + assistant from turn 1, plus user from turn 2)
assert len(messages) >= 3, f"Expected >= 3 messages in second turn, got {len(messages)}: {messages}"
assert (
len(messages) >= 3
), f"Expected >= 3 messages in second turn, got {len(messages)}: {messages}"
def test_conversation_state_management_two_turn_streaming(httpserver: HTTPServer):
@ -325,9 +349,13 @@ def test_conversation_state_management_two_turn_streaming(httpserver: HTTPServer
text_chunks_1 = []
response_id_1 = None
for event in stream1:
if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None):
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
event, "delta", None
):
text_chunks_1.append(event.delta)
if getattr(event, "type", None) == "response.completed" and getattr(event, "response", None):
if getattr(event, "type", None) == "response.completed" and getattr(
event, "response", None
):
response_id_1 = event.response.id
assert response_id_1 is not None
@ -344,9 +372,13 @@ def test_conversation_state_management_two_turn_streaming(httpserver: HTTPServer
text_chunks_2 = []
response_id_2 = None
for event in stream2:
if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None):
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
event, "delta", None
):
text_chunks_2.append(event.delta)
if getattr(event, "type", None) == "response.completed" and getattr(event, "response", None):
if getattr(event, "type", None) == "response.completed" and getattr(
event, "response", None
):
response_id_2 = event.response.id
assert response_id_2 is not None
@ -357,4 +389,6 @@ def test_conversation_state_management_two_turn_streaming(httpserver: HTTPServer
assert len(captured) == 2
second_request = captured[1]
messages = second_request.get("messages", [])
assert len(messages) >= 3, f"Expected >= 3 messages in second turn, got {len(messages)}"
assert (
len(messages) >= 3
), f"Expected >= 3 messages in second turn, got {len(messages)}"

View file

@ -17,7 +17,8 @@ import anthropic
import pytest
import logging
from pytest_httpserver import HTTPServer, HandlerType
from pytest_httpserver import HTTPServer
from pytest_httpserver.httpserver import HandlerType
from werkzeug.wrappers import Response
from conftest import (
@ -39,7 +40,9 @@ LLM_GATEWAY_BASE = "http://localhost:12000"
def test_openai_chat_streaming_basic(httpserver: HTTPServer):
"""Basic OpenAI streaming: verify chunks arrive in order and reassemble correctly"""
setup_openai_chat_mock(httpserver, content="The quick brown fox jumps over the lazy dog")
setup_openai_chat_mock(
httpserver, content="The quick brown fox jumps over the lazy dog"
)
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
stream = client.chat.completions.create(
@ -66,12 +69,17 @@ def test_openai_chat_streaming_tool_calls(httpserver: HTTPServer):
body = json.loads(request.data)
model = body.get("model", "gpt-5-mini-2025-08-07")
return Response(
make_openai_tool_call_stream(model=model, tool_name="echo_tool", tool_args='{"text":"hello"}'),
status=200, content_type="text/event-stream",
make_openai_tool_call_stream(
model=model, tool_name="echo_tool", tool_args='{"text":"hello"}'
),
status=200,
content_type="text/event-stream",
)
httpserver.expect_request(
"/v1/chat/completions", method="POST", handler_type=HandlerType.PERMANENT,
"/v1/chat/completions",
method="POST",
handler_type=HandlerType.PERMANENT,
).respond_with_handler(handler)
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
@ -85,7 +93,11 @@ def test_openai_chat_streaming_tool_calls(httpserver: HTTPServer):
"function": {
"name": "echo_tool",
"description": "Echo input",
"parameters": {"type": "object", "properties": {"text": {"type": "string"}}, "required": ["text"]},
"parameters": {
"type": "object",
"properties": {"text": {"type": "string"}},
"required": ["text"],
},
},
}
],
@ -97,14 +109,18 @@ def test_openai_chat_streaming_tool_calls(httpserver: HTTPServer):
if chunk.choices and chunk.choices[0].delta.tool_calls:
for tc in chunk.choices[0].delta.tool_calls:
while len(tool_calls) <= tc.index:
tool_calls.append({"id": "", "function": {"name": "", "arguments": ""}})
tool_calls.append(
{"id": "", "function": {"name": "", "arguments": ""}}
)
if tc.id:
tool_calls[tc.index]["id"] = tc.id
if tc.function:
if tc.function.name:
tool_calls[tc.index]["function"]["name"] = tc.function.name
if tc.function.arguments:
tool_calls[tc.index]["function"]["arguments"] += tc.function.arguments
tool_calls[tc.index]["function"][
"arguments"
] += tc.function.arguments
assert len(tool_calls) > 0, "Should have received tool calls"
assert tool_calls[0]["function"]["name"] == "echo_tool"
@ -142,7 +158,11 @@ def test_anthropic_messages_streaming_thinking(httpserver: HTTPServer):
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
events_seen = {"thinking_start": False, "thinking_delta": False, "text_delta": False}
events_seen = {
"thinking_start": False,
"thinking_delta": False,
"text_delta": False,
}
with client.messages.stream(
model="claude-sonnet-4-20250514",
@ -151,7 +171,9 @@ def test_anthropic_messages_streaming_thinking(httpserver: HTTPServer):
messages=[{"role": "user", "content": "What is 2+2?"}],
) as stream:
for event in stream:
if event.type == "content_block_start" and getattr(event, "content_block", None):
if event.type == "content_block_start" and getattr(
event, "content_block", None
):
if getattr(event.content_block, "type", None) == "thinking":
events_seen["thinking_start"] = True
if event.type == "content_block_delta" and getattr(event, "delta", None):
@ -255,7 +277,11 @@ def test_responses_api_streaming_translated_upstream(httpserver: HTTPServer):
text_chunks = []
for event in stream:
if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None):
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
event, "delta", None
):
text_chunks.append(event.delta)
assert len(text_chunks) > 0, "Should have received text delta events from translated stream"
assert (
len(text_chunks) > 0
), "Should have received text delta events from translated stream"