mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
Fix HandlerType import and apply Black formatting
- Import HandlerType from pytest_httpserver.httpserver (not top-level) - Apply Black formatting to all new test files Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
3a6a672c9d
commit
aeef0c33a8
4 changed files with 256 additions and 83 deletions
|
|
@ -6,7 +6,8 @@ The gateway container routes to host.docker.internal:51001 where the mock server
|
|||
|
||||
import json
|
||||
import pytest
|
||||
from pytest_httpserver import HTTPServer, HandlerType
|
||||
from pytest_httpserver import HTTPServer
|
||||
from pytest_httpserver.httpserver import HandlerType
|
||||
from werkzeug.wrappers import Response
|
||||
|
||||
|
||||
|
|
@ -19,7 +20,10 @@ def httpserver_listen_address():
|
|||
# OpenAI Chat Completions helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def make_openai_chat_response(content="Hello from mock!", model="gpt-5-mini-2025-08-07", tool_calls=None):
|
||||
|
||||
def make_openai_chat_response(
|
||||
content="Hello from mock!", model="gpt-5-mini-2025-08-07", tool_calls=None
|
||||
):
|
||||
message = {"role": "assistant", "content": content}
|
||||
finish_reason = "stop"
|
||||
if tool_calls:
|
||||
|
|
@ -61,7 +65,9 @@ def make_openai_chat_stream(content="Hello from mock!", model="gpt-5-mini-2025-0
|
|||
return "".join(lines)
|
||||
|
||||
|
||||
def make_openai_tool_call_stream(model="gpt-5-mini-2025-08-07", tool_name="echo_tool", tool_args='{"text":"hello"}'):
|
||||
def make_openai_tool_call_stream(
|
||||
model="gpt-5-mini-2025-08-07", tool_name="echo_tool", tool_args='{"text":"hello"}'
|
||||
):
|
||||
lines = []
|
||||
# Role chunk
|
||||
lines.append(
|
||||
|
|
@ -92,7 +98,10 @@ def make_openai_tool_call_stream(model="gpt-5-mini-2025-08-07", tool_name="echo_
|
|||
# Anthropic Messages helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def make_anthropic_response(content="Hello from mock!", model="claude-sonnet-4-20250514"):
|
||||
|
||||
def make_anthropic_response(
|
||||
content="Hello from mock!", model="claude-sonnet-4-20250514"
|
||||
):
|
||||
return {
|
||||
"id": "msg-mock-123",
|
||||
"type": "message",
|
||||
|
|
@ -108,12 +117,21 @@ def make_anthropic_response(content="Hello from mock!", model="claude-sonnet-4-2
|
|||
def make_anthropic_stream(content="Hello from mock!", model="claude-sonnet-4-20250514"):
|
||||
lines = []
|
||||
msg = {
|
||||
"id": "msg-mock-123", "type": "message", "role": "assistant",
|
||||
"model": model, "content": [], "stop_reason": None,
|
||||
"stop_sequence": None, "usage": {"input_tokens": 10, "output_tokens": 0},
|
||||
"id": "msg-mock-123",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"model": model,
|
||||
"content": [],
|
||||
"stop_reason": None,
|
||||
"stop_sequence": None,
|
||||
"usage": {"input_tokens": 10, "output_tokens": 0},
|
||||
}
|
||||
lines.append(f"event: message_start\ndata: {json.dumps({'type': 'message_start', 'message': msg})}\n\n")
|
||||
lines.append(f'event: content_block_start\ndata: {{"type":"content_block_start","index":0,"content_block":{{"type":"text","text":""}}}}\n\n')
|
||||
lines.append(
|
||||
f"event: message_start\ndata: {json.dumps({'type': 'message_start', 'message': msg})}\n\n"
|
||||
)
|
||||
lines.append(
|
||||
f'event: content_block_start\ndata: {{"type":"content_block_start","index":0,"content_block":{{"type":"text","text":""}}}}\n\n'
|
||||
)
|
||||
|
||||
words = content.split(" ")
|
||||
for i, word in enumerate(words):
|
||||
|
|
@ -124,7 +142,9 @@ def make_anthropic_stream(content="Hello from mock!", model="claude-sonnet-4-202
|
|||
f'event: content_block_delta\ndata: {{"type":"content_block_delta","index":0,"delta":{{"type":"text_delta","text":"{escaped}"}}}}\n\n'
|
||||
)
|
||||
|
||||
lines.append(f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":0}}\n\n')
|
||||
lines.append(
|
||||
f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":0}}\n\n'
|
||||
)
|
||||
lines.append(
|
||||
f'event: message_delta\ndata: {{"type":"message_delta","delta":{{"stop_reason":"end_turn","stop_sequence":null}},"usage":{{"output_tokens":5}}}}\n\n'
|
||||
)
|
||||
|
|
@ -132,33 +152,52 @@ def make_anthropic_stream(content="Hello from mock!", model="claude-sonnet-4-202
|
|||
return "".join(lines)
|
||||
|
||||
|
||||
def make_anthropic_thinking_stream(content="The answer is 4.", thinking="Let me think... 2+2=4", model="claude-sonnet-4-20250514"):
|
||||
def make_anthropic_thinking_stream(
|
||||
content="The answer is 4.",
|
||||
thinking="Let me think... 2+2=4",
|
||||
model="claude-sonnet-4-20250514",
|
||||
):
|
||||
lines = []
|
||||
msg = {
|
||||
"id": "msg-mock-think", "type": "message", "role": "assistant",
|
||||
"model": model, "content": [], "stop_reason": None,
|
||||
"stop_sequence": None, "usage": {"input_tokens": 10, "output_tokens": 0},
|
||||
"id": "msg-mock-think",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"model": model,
|
||||
"content": [],
|
||||
"stop_reason": None,
|
||||
"stop_sequence": None,
|
||||
"usage": {"input_tokens": 10, "output_tokens": 0},
|
||||
}
|
||||
lines.append(f"event: message_start\ndata: {json.dumps({'type': 'message_start', 'message': msg})}\n\n")
|
||||
lines.append(
|
||||
f"event: message_start\ndata: {json.dumps({'type': 'message_start', 'message': msg})}\n\n"
|
||||
)
|
||||
|
||||
# Thinking block
|
||||
lines.append(f'event: content_block_start\ndata: {{"type":"content_block_start","index":0,"content_block":{{"type":"thinking","thinking":""}}}}\n\n')
|
||||
lines.append(
|
||||
f'event: content_block_start\ndata: {{"type":"content_block_start","index":0,"content_block":{{"type":"thinking","thinking":""}}}}\n\n'
|
||||
)
|
||||
for word in thinking.split(" "):
|
||||
escaped = json.dumps(word)[1:-1]
|
||||
lines.append(
|
||||
f'event: content_block_delta\ndata: {{"type":"content_block_delta","index":0,"delta":{{"type":"thinking_delta","thinking":"{escaped} "}}}}\n\n'
|
||||
)
|
||||
lines.append(f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":0}}\n\n')
|
||||
lines.append(
|
||||
f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":0}}\n\n'
|
||||
)
|
||||
|
||||
# Text block
|
||||
lines.append(f'event: content_block_start\ndata: {{"type":"content_block_start","index":1,"content_block":{{"type":"text","text":""}}}}\n\n')
|
||||
lines.append(
|
||||
f'event: content_block_start\ndata: {{"type":"content_block_start","index":1,"content_block":{{"type":"text","text":""}}}}\n\n'
|
||||
)
|
||||
for i, word in enumerate(content.split(" ")):
|
||||
prefix = " " if i > 0 else ""
|
||||
escaped = json.dumps(f"{prefix}{word}")[1:-1]
|
||||
lines.append(
|
||||
f'event: content_block_delta\ndata: {{"type":"content_block_delta","index":1,"delta":{{"type":"text_delta","text":"{escaped}"}}}}\n\n'
|
||||
)
|
||||
lines.append(f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":1}}\n\n')
|
||||
lines.append(
|
||||
f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":1}}\n\n'
|
||||
)
|
||||
|
||||
lines.append(
|
||||
f'event: message_delta\ndata: {{"type":"message_delta","delta":{{"stop_reason":"end_turn","stop_sequence":null}},"usage":{{"output_tokens":20}}}}\n\n'
|
||||
|
|
@ -171,7 +210,12 @@ def make_anthropic_thinking_stream(content="The answer is 4.", thinking="Let me
|
|||
# OpenAI Responses API helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def make_responses_api_response(content="Hello from mock!", model="gpt-5-mini-2025-08-07", response_id="resp-mock-123"):
|
||||
|
||||
def make_responses_api_response(
|
||||
content="Hello from mock!",
|
||||
model="gpt-5-mini-2025-08-07",
|
||||
response_id="resp-mock-123",
|
||||
):
|
||||
return {
|
||||
"id": response_id,
|
||||
"object": "response",
|
||||
|
|
@ -183,7 +227,9 @@ def make_responses_api_response(content="Hello from mock!", model="gpt-5-mini-20
|
|||
"id": "msg_mock_123",
|
||||
"status": "completed",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": content, "annotations": []}],
|
||||
"content": [
|
||||
{"type": "output_text", "text": content, "annotations": []}
|
||||
],
|
||||
}
|
||||
],
|
||||
"status": "completed",
|
||||
|
|
@ -191,10 +237,23 @@ def make_responses_api_response(content="Hello from mock!", model="gpt-5-mini-20
|
|||
}
|
||||
|
||||
|
||||
def make_responses_api_stream(content="Hello from mock!", model="gpt-5-mini-2025-08-07", response_id="resp-mock-123"):
|
||||
def make_responses_api_stream(
|
||||
content="Hello from mock!",
|
||||
model="gpt-5-mini-2025-08-07",
|
||||
response_id="resp-mock-123",
|
||||
):
|
||||
lines = []
|
||||
resp_base = {"id": response_id, "object": "response", "created_at": 1234567890, "model": model, "output": [], "status": "in_progress"}
|
||||
lines.append(f"event: response.created\ndata: {json.dumps({'type': 'response.created', 'response': resp_base})}\n\n")
|
||||
resp_base = {
|
||||
"id": response_id,
|
||||
"object": "response",
|
||||
"created_at": 1234567890,
|
||||
"model": model,
|
||||
"output": [],
|
||||
"status": "in_progress",
|
||||
}
|
||||
lines.append(
|
||||
f"event: response.created\ndata: {json.dumps({'type': 'response.created', 'response': resp_base})}\n\n"
|
||||
)
|
||||
lines.append(
|
||||
f'event: response.output_item.added\ndata: {{"type":"response.output_item.added","output_index":0,'
|
||||
f'"item":{{"type":"message","id":"msg_mock_123","status":"in_progress","role":"assistant","content":[]}}}}\n\n'
|
||||
|
|
@ -219,7 +278,9 @@ def make_responses_api_stream(content="Hello from mock!", model="gpt-5-mini-2025
|
|||
)
|
||||
|
||||
final_item = {
|
||||
"type": "message", "id": "msg_mock_123", "status": "completed",
|
||||
"type": "message",
|
||||
"id": "msg_mock_123",
|
||||
"status": "completed",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": content, "annotations": []}],
|
||||
}
|
||||
|
|
@ -227,8 +288,12 @@ def make_responses_api_stream(content="Hello from mock!", model="gpt-5-mini-2025
|
|||
f"event: response.output_item.done\ndata: {json.dumps({'type': 'response.output_item.done', 'output_index': 0, 'item': final_item})}\n\n"
|
||||
)
|
||||
|
||||
final_resp = dict(resp_base, output=[final_item], status="completed",
|
||||
usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15})
|
||||
final_resp = dict(
|
||||
resp_base,
|
||||
output=[final_item],
|
||||
status="completed",
|
||||
usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
|
||||
)
|
||||
lines.append(
|
||||
f"event: response.completed\ndata: {json.dumps({'type': 'response.completed', 'response': final_resp})}\n\n"
|
||||
)
|
||||
|
|
@ -239,7 +304,10 @@ def make_responses_api_stream(content="Hello from mock!", model="gpt-5-mini-2025
|
|||
# Mock server setup helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def setup_openai_chat_mock(httpserver: HTTPServer, content="Hello from mock!", tool_calls=None):
|
||||
|
||||
def setup_openai_chat_mock(
|
||||
httpserver: HTTPServer, content="Hello from mock!", tool_calls=None
|
||||
):
|
||||
"""Register a permanent handler for /v1/chat/completions on the mock server.
|
||||
Returns a list that will be populated with captured request bodies.
|
||||
"""
|
||||
|
|
@ -253,26 +321,35 @@ def setup_openai_chat_mock(httpserver: HTTPServer, content="Hello from mock!", t
|
|||
|
||||
if tool_calls and not is_stream:
|
||||
return Response(
|
||||
json.dumps(make_openai_chat_response(model=model, tool_calls=tool_calls)),
|
||||
status=200, content_type="application/json",
|
||||
json.dumps(
|
||||
make_openai_chat_response(model=model, tool_calls=tool_calls)
|
||||
),
|
||||
status=200,
|
||||
content_type="application/json",
|
||||
)
|
||||
if is_stream:
|
||||
return Response(
|
||||
make_openai_chat_stream(content=content, model=model),
|
||||
status=200, content_type="text/event-stream",
|
||||
status=200,
|
||||
content_type="text/event-stream",
|
||||
)
|
||||
return Response(
|
||||
json.dumps(make_openai_chat_response(content=content, model=model)),
|
||||
status=200, content_type="application/json",
|
||||
status=200,
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
httpserver.expect_request(
|
||||
"/v1/chat/completions", method="POST", handler_type=HandlerType.PERMANENT,
|
||||
"/v1/chat/completions",
|
||||
method="POST",
|
||||
handler_type=HandlerType.PERMANENT,
|
||||
).respond_with_handler(handler)
|
||||
return captured
|
||||
|
||||
|
||||
def setup_anthropic_mock(httpserver: HTTPServer, content="Hello from mock!", thinking=False):
|
||||
def setup_anthropic_mock(
|
||||
httpserver: HTTPServer, content="Hello from mock!", thinking=False
|
||||
):
|
||||
"""Register a permanent handler for /v1/messages on the mock server.
|
||||
Returns a list that will be populated with captured request bodies.
|
||||
"""
|
||||
|
|
@ -287,20 +364,25 @@ def setup_anthropic_mock(httpserver: HTTPServer, content="Hello from mock!", thi
|
|||
if thinking and is_stream:
|
||||
return Response(
|
||||
make_anthropic_thinking_stream(model=model),
|
||||
status=200, content_type="text/event-stream",
|
||||
status=200,
|
||||
content_type="text/event-stream",
|
||||
)
|
||||
if is_stream:
|
||||
return Response(
|
||||
make_anthropic_stream(content=content, model=model),
|
||||
status=200, content_type="text/event-stream",
|
||||
status=200,
|
||||
content_type="text/event-stream",
|
||||
)
|
||||
return Response(
|
||||
json.dumps(make_anthropic_response(content=content, model=model)),
|
||||
status=200, content_type="application/json",
|
||||
status=200,
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
httpserver.expect_request(
|
||||
"/v1/messages", method="POST", handler_type=HandlerType.PERMANENT,
|
||||
"/v1/messages",
|
||||
method="POST",
|
||||
handler_type=HandlerType.PERMANENT,
|
||||
).respond_with_handler(handler)
|
||||
return captured
|
||||
|
||||
|
|
@ -322,25 +404,45 @@ def setup_responses_api_mock(httpserver: HTTPServer, content="Hello from mock!")
|
|||
|
||||
if is_stream:
|
||||
return Response(
|
||||
make_responses_api_stream(content=content, model=model, response_id=response_id),
|
||||
status=200, content_type="text/event-stream",
|
||||
make_responses_api_stream(
|
||||
content=content, model=model, response_id=response_id
|
||||
),
|
||||
status=200,
|
||||
content_type="text/event-stream",
|
||||
)
|
||||
return Response(
|
||||
json.dumps(make_responses_api_response(content=content, model=model, response_id=response_id)),
|
||||
status=200, content_type="application/json",
|
||||
json.dumps(
|
||||
make_responses_api_response(
|
||||
content=content, model=model, response_id=response_id
|
||||
)
|
||||
),
|
||||
status=200,
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
httpserver.expect_request(
|
||||
"/v1/responses", method="POST", handler_type=HandlerType.PERMANENT,
|
||||
"/v1/responses",
|
||||
method="POST",
|
||||
handler_type=HandlerType.PERMANENT,
|
||||
).respond_with_handler(handler)
|
||||
return captured
|
||||
|
||||
|
||||
def setup_error_mock(httpserver: HTTPServer, path="/v1/chat/completions", status=400, body=None):
|
||||
def setup_error_mock(
|
||||
httpserver: HTTPServer, path="/v1/chat/completions", status=400, body=None
|
||||
):
|
||||
"""Register a handler that returns an error response."""
|
||||
error_body = body or json.dumps({
|
||||
"error": {"message": "Bad Request", "type": "invalid_request_error", "code": "bad_request"}
|
||||
})
|
||||
httpserver.expect_request(path, method="POST").respond_with_data(
|
||||
error_body, status=status, content_type="application/json",
|
||||
error_body = body or json.dumps(
|
||||
{
|
||||
"error": {
|
||||
"message": "Bad Request",
|
||||
"type": "invalid_request_error",
|
||||
"code": "bad_request",
|
||||
}
|
||||
}
|
||||
)
|
||||
httpserver.expect_request(path, method="POST").respond_with_data(
|
||||
error_body,
|
||||
status=status,
|
||||
content_type="application/json",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -131,7 +131,9 @@ def test_anthropic_client_with_alias_streaming(httpserver: HTTPServer):
|
|||
|
||||
def test_openai_client_with_claude_model(httpserver: HTTPServer):
|
||||
"""OpenAI client → Claude model → gateway routes to Anthropic upstream → transforms response to OpenAI format"""
|
||||
captured = setup_anthropic_mock(httpserver, content="Hello from Claude via OpenAI client!")
|
||||
captured = setup_anthropic_mock(
|
||||
httpserver, content="Hello from Claude via OpenAI client!"
|
||||
)
|
||||
|
||||
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
|
||||
completion = client.chat.completions.create(
|
||||
|
|
@ -140,7 +142,9 @@ def test_openai_client_with_claude_model(httpserver: HTTPServer):
|
|||
messages=[{"role": "user", "content": "Hello"}],
|
||||
)
|
||||
|
||||
assert completion.choices[0].message.content == "Hello from Claude via OpenAI client!"
|
||||
assert (
|
||||
completion.choices[0].message.content == "Hello from Claude via OpenAI client!"
|
||||
)
|
||||
assert len(captured) == 1
|
||||
assert captured[0]["model"] == "claude-sonnet-4-20250514"
|
||||
|
||||
|
|
@ -167,7 +171,9 @@ def test_openai_client_with_claude_model_streaming(httpserver: HTTPServer):
|
|||
|
||||
def test_anthropic_client_with_openai_model(httpserver: HTTPServer):
|
||||
"""Anthropic client → OpenAI model (gpt-4o-mini) → OpenAI upstream → transforms response to Anthropic format"""
|
||||
captured = setup_openai_chat_mock(httpserver, content="Hello from GPT via Anthropic!")
|
||||
captured = setup_openai_chat_mock(
|
||||
httpserver, content="Hello from GPT via Anthropic!"
|
||||
)
|
||||
|
||||
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
|
||||
message = client.messages.create(
|
||||
|
|
@ -257,7 +263,10 @@ def test_assistant_message_with_null_content_and_tool_calls(httpserver: HTTPServ
|
|||
{
|
||||
"id": "call_test123",
|
||||
"type": "function",
|
||||
"function": {"name": "get_weather", "arguments": '{"city": "Seattle"}'},
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"arguments": '{"city": "Seattle"}',
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
|
|
@ -329,7 +338,9 @@ def test_anthropic_thinking_mode_streaming(httpserver: HTTPServer):
|
|||
messages=[{"role": "user", "content": "What is 2+2?"}],
|
||||
) as stream:
|
||||
for event in stream:
|
||||
if event.type == "content_block_start" and getattr(event, "content_block", None):
|
||||
if event.type == "content_block_start" and getattr(
|
||||
event, "content_block", None
|
||||
):
|
||||
if getattr(event.content_block, "type", None) == "thinking":
|
||||
thinking_block_started = True
|
||||
if event.type == "content_block_delta" and getattr(event, "delta", None):
|
||||
|
|
|
|||
|
|
@ -58,9 +58,13 @@ def test_responses_api_streaming_passthrough(httpserver: HTTPServer):
|
|||
text_chunks = []
|
||||
final_message = None
|
||||
for event in stream:
|
||||
if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None):
|
||||
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
|
||||
event, "delta", None
|
||||
):
|
||||
text_chunks.append(event.delta)
|
||||
if getattr(event, "type", None) == "response.completed" and getattr(event, "response", None):
|
||||
if getattr(event, "type", None) == "response.completed" and getattr(
|
||||
event, "response", None
|
||||
):
|
||||
final_message = event.response
|
||||
|
||||
full_content = "".join(text_chunks)
|
||||
|
|
@ -72,7 +76,9 @@ def test_responses_api_with_tools_passthrough(httpserver: HTTPServer):
|
|||
"""Responses API with tools for OpenAI model"""
|
||||
setup_responses_api_mock(httpserver, content="Tool response")
|
||||
|
||||
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0)
|
||||
client = openai.OpenAI(
|
||||
api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0
|
||||
)
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
|
|
@ -100,7 +106,9 @@ def test_responses_api_streaming_with_tools_passthrough(httpserver: HTTPServer):
|
|||
"""Responses API streaming with tools for OpenAI model"""
|
||||
setup_responses_api_mock(httpserver, content="Streamed tool response")
|
||||
|
||||
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0)
|
||||
client = openai.OpenAI(
|
||||
api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0
|
||||
)
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
|
|
@ -127,7 +135,9 @@ def test_responses_api_streaming_with_tools_passthrough(httpserver: HTTPServer):
|
|||
etype = getattr(event, "type", None)
|
||||
if etype == "response.output_text.delta" and getattr(event, "delta", None):
|
||||
text_chunks.append(event.delta)
|
||||
if etype == "response.function_call_arguments.delta" and getattr(event, "delta", None):
|
||||
if etype == "response.function_call_arguments.delta" and getattr(
|
||||
event, "delta", None
|
||||
):
|
||||
tool_calls.append(event.delta)
|
||||
|
||||
assert text_chunks or tool_calls, "Expected streamed text or tool call deltas"
|
||||
|
|
@ -140,7 +150,9 @@ def test_responses_api_streaming_with_tools_passthrough(httpserver: HTTPServer):
|
|||
|
||||
def test_responses_api_non_streaming_upstream_anthropic(httpserver: HTTPServer):
|
||||
"""Responses API with Anthropic model → translated to /v1/chat/completions"""
|
||||
captured = setup_openai_chat_mock(httpserver, content="Hello from Claude via Responses!")
|
||||
captured = setup_openai_chat_mock(
|
||||
httpserver, content="Hello from Claude via Responses!"
|
||||
)
|
||||
|
||||
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
|
||||
resp = client.responses.create(
|
||||
|
|
@ -165,7 +177,9 @@ def test_responses_api_streaming_upstream_anthropic(httpserver: HTTPServer):
|
|||
|
||||
text_chunks = []
|
||||
for event in stream:
|
||||
if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None):
|
||||
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
|
||||
event, "delta", None
|
||||
):
|
||||
text_chunks.append(event.delta)
|
||||
|
||||
assert len(text_chunks) > 0, "Should have received streaming text deltas"
|
||||
|
|
@ -202,7 +216,9 @@ def test_responses_api_streaming_with_tools_upstream_anthropic(httpserver: HTTPS
|
|||
"""Responses API streaming with tools routed to Anthropic"""
|
||||
setup_openai_chat_mock(httpserver, content="Streamed tool via Claude")
|
||||
|
||||
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0)
|
||||
client = openai.OpenAI(
|
||||
api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0
|
||||
)
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
|
|
@ -229,7 +245,9 @@ def test_responses_api_streaming_with_tools_upstream_anthropic(httpserver: HTTPS
|
|||
etype = getattr(event, "type", None)
|
||||
if etype == "response.output_text.delta" and getattr(event, "delta", None):
|
||||
text_chunks.append(event.delta)
|
||||
if etype == "response.function_call_arguments.delta" and getattr(event, "delta", None):
|
||||
if etype == "response.function_call_arguments.delta" and getattr(
|
||||
event, "delta", None
|
||||
):
|
||||
tool_calls.append(event.delta)
|
||||
|
||||
assert text_chunks or tool_calls, "Expected streamed text or tool call deltas"
|
||||
|
|
@ -254,7 +272,9 @@ def test_responses_api_mixed_content_types(httpserver: HTTPServer):
|
|||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "input_text", "text": "What is the weather in Seattle"}],
|
||||
"content": [
|
||||
{"type": "input_text", "text": "What is the weather in Seattle"}
|
||||
],
|
||||
},
|
||||
],
|
||||
)
|
||||
|
|
@ -278,7 +298,9 @@ def test_conversation_state_management_two_turn(httpserver: HTTPServer):
|
|||
# For non-OpenAI models, Responses API translates to /v1/chat/completions
|
||||
# But for OpenAI models, it uses /v1/responses directly
|
||||
# The state management is handled by brightstaff regardless of upstream
|
||||
captured = setup_openai_chat_mock(httpserver, content="I remember your name is Alice!")
|
||||
captured = setup_openai_chat_mock(
|
||||
httpserver, content="I remember your name is Alice!"
|
||||
)
|
||||
|
||||
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
|
||||
|
||||
|
|
@ -306,7 +328,9 @@ def test_conversation_state_management_two_turn(httpserver: HTTPServer):
|
|||
second_request = captured[1]
|
||||
messages = second_request.get("messages", [])
|
||||
# Should have messages from both turns (user + assistant from turn 1, plus user from turn 2)
|
||||
assert len(messages) >= 3, f"Expected >= 3 messages in second turn, got {len(messages)}: {messages}"
|
||||
assert (
|
||||
len(messages) >= 3
|
||||
), f"Expected >= 3 messages in second turn, got {len(messages)}: {messages}"
|
||||
|
||||
|
||||
def test_conversation_state_management_two_turn_streaming(httpserver: HTTPServer):
|
||||
|
|
@ -325,9 +349,13 @@ def test_conversation_state_management_two_turn_streaming(httpserver: HTTPServer
|
|||
text_chunks_1 = []
|
||||
response_id_1 = None
|
||||
for event in stream1:
|
||||
if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None):
|
||||
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
|
||||
event, "delta", None
|
||||
):
|
||||
text_chunks_1.append(event.delta)
|
||||
if getattr(event, "type", None) == "response.completed" and getattr(event, "response", None):
|
||||
if getattr(event, "type", None) == "response.completed" and getattr(
|
||||
event, "response", None
|
||||
):
|
||||
response_id_1 = event.response.id
|
||||
|
||||
assert response_id_1 is not None
|
||||
|
|
@ -344,9 +372,13 @@ def test_conversation_state_management_two_turn_streaming(httpserver: HTTPServer
|
|||
text_chunks_2 = []
|
||||
response_id_2 = None
|
||||
for event in stream2:
|
||||
if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None):
|
||||
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
|
||||
event, "delta", None
|
||||
):
|
||||
text_chunks_2.append(event.delta)
|
||||
if getattr(event, "type", None) == "response.completed" and getattr(event, "response", None):
|
||||
if getattr(event, "type", None) == "response.completed" and getattr(
|
||||
event, "response", None
|
||||
):
|
||||
response_id_2 = event.response.id
|
||||
|
||||
assert response_id_2 is not None
|
||||
|
|
@ -357,4 +389,6 @@ def test_conversation_state_management_two_turn_streaming(httpserver: HTTPServer
|
|||
assert len(captured) == 2
|
||||
second_request = captured[1]
|
||||
messages = second_request.get("messages", [])
|
||||
assert len(messages) >= 3, f"Expected >= 3 messages in second turn, got {len(messages)}"
|
||||
assert (
|
||||
len(messages) >= 3
|
||||
), f"Expected >= 3 messages in second turn, got {len(messages)}"
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@ import anthropic
|
|||
import pytest
|
||||
import logging
|
||||
|
||||
from pytest_httpserver import HTTPServer, HandlerType
|
||||
from pytest_httpserver import HTTPServer
|
||||
from pytest_httpserver.httpserver import HandlerType
|
||||
from werkzeug.wrappers import Response
|
||||
|
||||
from conftest import (
|
||||
|
|
@ -39,7 +40,9 @@ LLM_GATEWAY_BASE = "http://localhost:12000"
|
|||
|
||||
def test_openai_chat_streaming_basic(httpserver: HTTPServer):
|
||||
"""Basic OpenAI streaming: verify chunks arrive in order and reassemble correctly"""
|
||||
setup_openai_chat_mock(httpserver, content="The quick brown fox jumps over the lazy dog")
|
||||
setup_openai_chat_mock(
|
||||
httpserver, content="The quick brown fox jumps over the lazy dog"
|
||||
)
|
||||
|
||||
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
|
||||
stream = client.chat.completions.create(
|
||||
|
|
@ -66,12 +69,17 @@ def test_openai_chat_streaming_tool_calls(httpserver: HTTPServer):
|
|||
body = json.loads(request.data)
|
||||
model = body.get("model", "gpt-5-mini-2025-08-07")
|
||||
return Response(
|
||||
make_openai_tool_call_stream(model=model, tool_name="echo_tool", tool_args='{"text":"hello"}'),
|
||||
status=200, content_type="text/event-stream",
|
||||
make_openai_tool_call_stream(
|
||||
model=model, tool_name="echo_tool", tool_args='{"text":"hello"}'
|
||||
),
|
||||
status=200,
|
||||
content_type="text/event-stream",
|
||||
)
|
||||
|
||||
httpserver.expect_request(
|
||||
"/v1/chat/completions", method="POST", handler_type=HandlerType.PERMANENT,
|
||||
"/v1/chat/completions",
|
||||
method="POST",
|
||||
handler_type=HandlerType.PERMANENT,
|
||||
).respond_with_handler(handler)
|
||||
|
||||
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
|
||||
|
|
@ -85,7 +93,11 @@ def test_openai_chat_streaming_tool_calls(httpserver: HTTPServer):
|
|||
"function": {
|
||||
"name": "echo_tool",
|
||||
"description": "Echo input",
|
||||
"parameters": {"type": "object", "properties": {"text": {"type": "string"}}, "required": ["text"]},
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"text": {"type": "string"}},
|
||||
"required": ["text"],
|
||||
},
|
||||
},
|
||||
}
|
||||
],
|
||||
|
|
@ -97,14 +109,18 @@ def test_openai_chat_streaming_tool_calls(httpserver: HTTPServer):
|
|||
if chunk.choices and chunk.choices[0].delta.tool_calls:
|
||||
for tc in chunk.choices[0].delta.tool_calls:
|
||||
while len(tool_calls) <= tc.index:
|
||||
tool_calls.append({"id": "", "function": {"name": "", "arguments": ""}})
|
||||
tool_calls.append(
|
||||
{"id": "", "function": {"name": "", "arguments": ""}}
|
||||
)
|
||||
if tc.id:
|
||||
tool_calls[tc.index]["id"] = tc.id
|
||||
if tc.function:
|
||||
if tc.function.name:
|
||||
tool_calls[tc.index]["function"]["name"] = tc.function.name
|
||||
if tc.function.arguments:
|
||||
tool_calls[tc.index]["function"]["arguments"] += tc.function.arguments
|
||||
tool_calls[tc.index]["function"][
|
||||
"arguments"
|
||||
] += tc.function.arguments
|
||||
|
||||
assert len(tool_calls) > 0, "Should have received tool calls"
|
||||
assert tool_calls[0]["function"]["name"] == "echo_tool"
|
||||
|
|
@ -142,7 +158,11 @@ def test_anthropic_messages_streaming_thinking(httpserver: HTTPServer):
|
|||
|
||||
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
|
||||
|
||||
events_seen = {"thinking_start": False, "thinking_delta": False, "text_delta": False}
|
||||
events_seen = {
|
||||
"thinking_start": False,
|
||||
"thinking_delta": False,
|
||||
"text_delta": False,
|
||||
}
|
||||
|
||||
with client.messages.stream(
|
||||
model="claude-sonnet-4-20250514",
|
||||
|
|
@ -151,7 +171,9 @@ def test_anthropic_messages_streaming_thinking(httpserver: HTTPServer):
|
|||
messages=[{"role": "user", "content": "What is 2+2?"}],
|
||||
) as stream:
|
||||
for event in stream:
|
||||
if event.type == "content_block_start" and getattr(event, "content_block", None):
|
||||
if event.type == "content_block_start" and getattr(
|
||||
event, "content_block", None
|
||||
):
|
||||
if getattr(event.content_block, "type", None) == "thinking":
|
||||
events_seen["thinking_start"] = True
|
||||
if event.type == "content_block_delta" and getattr(event, "delta", None):
|
||||
|
|
@ -255,7 +277,11 @@ def test_responses_api_streaming_translated_upstream(httpserver: HTTPServer):
|
|||
|
||||
text_chunks = []
|
||||
for event in stream:
|
||||
if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None):
|
||||
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
|
||||
event, "delta", None
|
||||
):
|
||||
text_chunks.append(event.delta)
|
||||
|
||||
assert len(text_chunks) > 0, "Should have received text delta events from translated stream"
|
||||
assert (
|
||||
len(text_chunks) > 0
|
||||
), "Should have received text delta events from translated stream"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue