diff --git a/tests/archgw/conftest.py b/tests/archgw/conftest.py index 9a58e4da..b2f663e9 100644 --- a/tests/archgw/conftest.py +++ b/tests/archgw/conftest.py @@ -6,7 +6,8 @@ The gateway container routes to host.docker.internal:51001 where the mock server import json import pytest -from pytest_httpserver import HTTPServer, HandlerType +from pytest_httpserver import HTTPServer +from pytest_httpserver.httpserver import HandlerType from werkzeug.wrappers import Response @@ -19,7 +20,10 @@ def httpserver_listen_address(): # OpenAI Chat Completions helpers # --------------------------------------------------------------------------- -def make_openai_chat_response(content="Hello from mock!", model="gpt-5-mini-2025-08-07", tool_calls=None): + +def make_openai_chat_response( + content="Hello from mock!", model="gpt-5-mini-2025-08-07", tool_calls=None +): message = {"role": "assistant", "content": content} finish_reason = "stop" if tool_calls: @@ -61,7 +65,9 @@ def make_openai_chat_stream(content="Hello from mock!", model="gpt-5-mini-2025-0 return "".join(lines) -def make_openai_tool_call_stream(model="gpt-5-mini-2025-08-07", tool_name="echo_tool", tool_args='{"text":"hello"}'): +def make_openai_tool_call_stream( + model="gpt-5-mini-2025-08-07", tool_name="echo_tool", tool_args='{"text":"hello"}' +): lines = [] # Role chunk lines.append( @@ -92,7 +98,10 @@ def make_openai_tool_call_stream(model="gpt-5-mini-2025-08-07", tool_name="echo_ # Anthropic Messages helpers # --------------------------------------------------------------------------- -def make_anthropic_response(content="Hello from mock!", model="claude-sonnet-4-20250514"): + +def make_anthropic_response( + content="Hello from mock!", model="claude-sonnet-4-20250514" +): return { "id": "msg-mock-123", "type": "message", @@ -108,12 +117,21 @@ def make_anthropic_response(content="Hello from mock!", model="claude-sonnet-4-2 def make_anthropic_stream(content="Hello from mock!", model="claude-sonnet-4-20250514"): lines = [] msg = { - "id": "msg-mock-123", "type": "message", "role": "assistant", - "model": model, "content": [], "stop_reason": None, - "stop_sequence": None, "usage": {"input_tokens": 10, "output_tokens": 0}, + "id": "msg-mock-123", + "type": "message", + "role": "assistant", + "model": model, + "content": [], + "stop_reason": None, + "stop_sequence": None, + "usage": {"input_tokens": 10, "output_tokens": 0}, } - lines.append(f"event: message_start\ndata: {json.dumps({'type': 'message_start', 'message': msg})}\n\n") - lines.append(f'event: content_block_start\ndata: {{"type":"content_block_start","index":0,"content_block":{{"type":"text","text":""}}}}\n\n') + lines.append( + f"event: message_start\ndata: {json.dumps({'type': 'message_start', 'message': msg})}\n\n" + ) + lines.append( + f'event: content_block_start\ndata: {{"type":"content_block_start","index":0,"content_block":{{"type":"text","text":""}}}}\n\n' + ) words = content.split(" ") for i, word in enumerate(words): @@ -124,7 +142,9 @@ def make_anthropic_stream(content="Hello from mock!", model="claude-sonnet-4-202 f'event: content_block_delta\ndata: {{"type":"content_block_delta","index":0,"delta":{{"type":"text_delta","text":"{escaped}"}}}}\n\n' ) - lines.append(f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":0}}\n\n') + lines.append( + f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":0}}\n\n' + ) lines.append( f'event: message_delta\ndata: {{"type":"message_delta","delta":{{"stop_reason":"end_turn","stop_sequence":null}},"usage":{{"output_tokens":5}}}}\n\n' ) @@ -132,33 +152,52 @@ def make_anthropic_stream(content="Hello from mock!", model="claude-sonnet-4-202 return "".join(lines) -def make_anthropic_thinking_stream(content="The answer is 4.", thinking="Let me think... 2+2=4", model="claude-sonnet-4-20250514"): +def make_anthropic_thinking_stream( + content="The answer is 4.", + thinking="Let me think... 2+2=4", + model="claude-sonnet-4-20250514", +): lines = [] msg = { - "id": "msg-mock-think", "type": "message", "role": "assistant", - "model": model, "content": [], "stop_reason": None, - "stop_sequence": None, "usage": {"input_tokens": 10, "output_tokens": 0}, + "id": "msg-mock-think", + "type": "message", + "role": "assistant", + "model": model, + "content": [], + "stop_reason": None, + "stop_sequence": None, + "usage": {"input_tokens": 10, "output_tokens": 0}, } - lines.append(f"event: message_start\ndata: {json.dumps({'type': 'message_start', 'message': msg})}\n\n") + lines.append( + f"event: message_start\ndata: {json.dumps({'type': 'message_start', 'message': msg})}\n\n" + ) # Thinking block - lines.append(f'event: content_block_start\ndata: {{"type":"content_block_start","index":0,"content_block":{{"type":"thinking","thinking":""}}}}\n\n') + lines.append( + f'event: content_block_start\ndata: {{"type":"content_block_start","index":0,"content_block":{{"type":"thinking","thinking":""}}}}\n\n' + ) for word in thinking.split(" "): escaped = json.dumps(word)[1:-1] lines.append( f'event: content_block_delta\ndata: {{"type":"content_block_delta","index":0,"delta":{{"type":"thinking_delta","thinking":"{escaped} "}}}}\n\n' ) - lines.append(f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":0}}\n\n') + lines.append( + f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":0}}\n\n' + ) # Text block - lines.append(f'event: content_block_start\ndata: {{"type":"content_block_start","index":1,"content_block":{{"type":"text","text":""}}}}\n\n') + lines.append( + f'event: content_block_start\ndata: {{"type":"content_block_start","index":1,"content_block":{{"type":"text","text":""}}}}\n\n' + ) for i, word in enumerate(content.split(" ")): prefix = " " if i > 0 else "" escaped = json.dumps(f"{prefix}{word}")[1:-1] lines.append( f'event: content_block_delta\ndata: {{"type":"content_block_delta","index":1,"delta":{{"type":"text_delta","text":"{escaped}"}}}}\n\n' ) - lines.append(f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":1}}\n\n') + lines.append( + f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":1}}\n\n' + ) lines.append( f'event: message_delta\ndata: {{"type":"message_delta","delta":{{"stop_reason":"end_turn","stop_sequence":null}},"usage":{{"output_tokens":20}}}}\n\n' @@ -171,7 +210,12 @@ def make_anthropic_thinking_stream(content="The answer is 4.", thinking="Let me # OpenAI Responses API helpers # --------------------------------------------------------------------------- -def make_responses_api_response(content="Hello from mock!", model="gpt-5-mini-2025-08-07", response_id="resp-mock-123"): + +def make_responses_api_response( + content="Hello from mock!", + model="gpt-5-mini-2025-08-07", + response_id="resp-mock-123", +): return { "id": response_id, "object": "response", @@ -183,7 +227,9 @@ def make_responses_api_response(content="Hello from mock!", model="gpt-5-mini-20 "id": "msg_mock_123", "status": "completed", "role": "assistant", - "content": [{"type": "output_text", "text": content, "annotations": []}], + "content": [ + {"type": "output_text", "text": content, "annotations": []} + ], } ], "status": "completed", @@ -191,10 +237,23 @@ def make_responses_api_response(content="Hello from mock!", model="gpt-5-mini-20 } -def make_responses_api_stream(content="Hello from mock!", model="gpt-5-mini-2025-08-07", response_id="resp-mock-123"): +def make_responses_api_stream( + content="Hello from mock!", + model="gpt-5-mini-2025-08-07", + response_id="resp-mock-123", +): lines = [] - resp_base = {"id": response_id, "object": "response", "created_at": 1234567890, "model": model, "output": [], "status": "in_progress"} - lines.append(f"event: response.created\ndata: {json.dumps({'type': 'response.created', 'response': resp_base})}\n\n") + resp_base = { + "id": response_id, + "object": "response", + "created_at": 1234567890, + "model": model, + "output": [], + "status": "in_progress", + } + lines.append( + f"event: response.created\ndata: {json.dumps({'type': 'response.created', 'response': resp_base})}\n\n" + ) lines.append( f'event: response.output_item.added\ndata: {{"type":"response.output_item.added","output_index":0,' f'"item":{{"type":"message","id":"msg_mock_123","status":"in_progress","role":"assistant","content":[]}}}}\n\n' @@ -219,7 +278,9 @@ def make_responses_api_stream(content="Hello from mock!", model="gpt-5-mini-2025 ) final_item = { - "type": "message", "id": "msg_mock_123", "status": "completed", + "type": "message", + "id": "msg_mock_123", + "status": "completed", "role": "assistant", "content": [{"type": "output_text", "text": content, "annotations": []}], } @@ -227,8 +288,12 @@ def make_responses_api_stream(content="Hello from mock!", model="gpt-5-mini-2025 f"event: response.output_item.done\ndata: {json.dumps({'type': 'response.output_item.done', 'output_index': 0, 'item': final_item})}\n\n" ) - final_resp = dict(resp_base, output=[final_item], status="completed", - usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}) + final_resp = dict( + resp_base, + output=[final_item], + status="completed", + usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, + ) lines.append( f"event: response.completed\ndata: {json.dumps({'type': 'response.completed', 'response': final_resp})}\n\n" ) @@ -239,7 +304,10 @@ def make_responses_api_stream(content="Hello from mock!", model="gpt-5-mini-2025 # Mock server setup helpers # --------------------------------------------------------------------------- -def setup_openai_chat_mock(httpserver: HTTPServer, content="Hello from mock!", tool_calls=None): + +def setup_openai_chat_mock( + httpserver: HTTPServer, content="Hello from mock!", tool_calls=None +): """Register a permanent handler for /v1/chat/completions on the mock server. Returns a list that will be populated with captured request bodies. """ @@ -253,26 +321,35 @@ def setup_openai_chat_mock(httpserver: HTTPServer, content="Hello from mock!", t if tool_calls and not is_stream: return Response( - json.dumps(make_openai_chat_response(model=model, tool_calls=tool_calls)), - status=200, content_type="application/json", + json.dumps( + make_openai_chat_response(model=model, tool_calls=tool_calls) + ), + status=200, + content_type="application/json", ) if is_stream: return Response( make_openai_chat_stream(content=content, model=model), - status=200, content_type="text/event-stream", + status=200, + content_type="text/event-stream", ) return Response( json.dumps(make_openai_chat_response(content=content, model=model)), - status=200, content_type="application/json", + status=200, + content_type="application/json", ) httpserver.expect_request( - "/v1/chat/completions", method="POST", handler_type=HandlerType.PERMANENT, + "/v1/chat/completions", + method="POST", + handler_type=HandlerType.PERMANENT, ).respond_with_handler(handler) return captured -def setup_anthropic_mock(httpserver: HTTPServer, content="Hello from mock!", thinking=False): +def setup_anthropic_mock( + httpserver: HTTPServer, content="Hello from mock!", thinking=False +): """Register a permanent handler for /v1/messages on the mock server. Returns a list that will be populated with captured request bodies. """ @@ -287,20 +364,25 @@ def setup_anthropic_mock(httpserver: HTTPServer, content="Hello from mock!", thi if thinking and is_stream: return Response( make_anthropic_thinking_stream(model=model), - status=200, content_type="text/event-stream", + status=200, + content_type="text/event-stream", ) if is_stream: return Response( make_anthropic_stream(content=content, model=model), - status=200, content_type="text/event-stream", + status=200, + content_type="text/event-stream", ) return Response( json.dumps(make_anthropic_response(content=content, model=model)), - status=200, content_type="application/json", + status=200, + content_type="application/json", ) httpserver.expect_request( - "/v1/messages", method="POST", handler_type=HandlerType.PERMANENT, + "/v1/messages", + method="POST", + handler_type=HandlerType.PERMANENT, ).respond_with_handler(handler) return captured @@ -322,25 +404,45 @@ def setup_responses_api_mock(httpserver: HTTPServer, content="Hello from mock!") if is_stream: return Response( - make_responses_api_stream(content=content, model=model, response_id=response_id), - status=200, content_type="text/event-stream", + make_responses_api_stream( + content=content, model=model, response_id=response_id + ), + status=200, + content_type="text/event-stream", ) return Response( - json.dumps(make_responses_api_response(content=content, model=model, response_id=response_id)), - status=200, content_type="application/json", + json.dumps( + make_responses_api_response( + content=content, model=model, response_id=response_id + ) + ), + status=200, + content_type="application/json", ) httpserver.expect_request( - "/v1/responses", method="POST", handler_type=HandlerType.PERMANENT, + "/v1/responses", + method="POST", + handler_type=HandlerType.PERMANENT, ).respond_with_handler(handler) return captured -def setup_error_mock(httpserver: HTTPServer, path="/v1/chat/completions", status=400, body=None): +def setup_error_mock( + httpserver: HTTPServer, path="/v1/chat/completions", status=400, body=None +): """Register a handler that returns an error response.""" - error_body = body or json.dumps({ - "error": {"message": "Bad Request", "type": "invalid_request_error", "code": "bad_request"} - }) - httpserver.expect_request(path, method="POST").respond_with_data( - error_body, status=status, content_type="application/json", + error_body = body or json.dumps( + { + "error": { + "message": "Bad Request", + "type": "invalid_request_error", + "code": "bad_request", + } + } + ) + httpserver.expect_request(path, method="POST").respond_with_data( + error_body, + status=status, + content_type="application/json", ) diff --git a/tests/archgw/test_model_alias_routing.py b/tests/archgw/test_model_alias_routing.py index 242a2d5f..763c5a8f 100644 --- a/tests/archgw/test_model_alias_routing.py +++ b/tests/archgw/test_model_alias_routing.py @@ -131,7 +131,9 @@ def test_anthropic_client_with_alias_streaming(httpserver: HTTPServer): def test_openai_client_with_claude_model(httpserver: HTTPServer): """OpenAI client → Claude model → gateway routes to Anthropic upstream → transforms response to OpenAI format""" - captured = setup_anthropic_mock(httpserver, content="Hello from Claude via OpenAI client!") + captured = setup_anthropic_mock( + httpserver, content="Hello from Claude via OpenAI client!" + ) client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1") completion = client.chat.completions.create( @@ -140,7 +142,9 @@ def test_openai_client_with_claude_model(httpserver: HTTPServer): messages=[{"role": "user", "content": "Hello"}], ) - assert completion.choices[0].message.content == "Hello from Claude via OpenAI client!" + assert ( + completion.choices[0].message.content == "Hello from Claude via OpenAI client!" + ) assert len(captured) == 1 assert captured[0]["model"] == "claude-sonnet-4-20250514" @@ -167,7 +171,9 @@ def test_openai_client_with_claude_model_streaming(httpserver: HTTPServer): def test_anthropic_client_with_openai_model(httpserver: HTTPServer): """Anthropic client → OpenAI model (gpt-4o-mini) → OpenAI upstream → transforms response to Anthropic format""" - captured = setup_openai_chat_mock(httpserver, content="Hello from GPT via Anthropic!") + captured = setup_openai_chat_mock( + httpserver, content="Hello from GPT via Anthropic!" + ) client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE) message = client.messages.create( @@ -257,7 +263,10 @@ def test_assistant_message_with_null_content_and_tool_calls(httpserver: HTTPServ { "id": "call_test123", "type": "function", - "function": {"name": "get_weather", "arguments": '{"city": "Seattle"}'}, + "function": { + "name": "get_weather", + "arguments": '{"city": "Seattle"}', + }, } ], }, @@ -329,7 +338,9 @@ def test_anthropic_thinking_mode_streaming(httpserver: HTTPServer): messages=[{"role": "user", "content": "What is 2+2?"}], ) as stream: for event in stream: - if event.type == "content_block_start" and getattr(event, "content_block", None): + if event.type == "content_block_start" and getattr( + event, "content_block", None + ): if getattr(event.content_block, "type", None) == "thinking": thinking_block_started = True if event.type == "content_block_delta" and getattr(event, "delta", None): diff --git a/tests/archgw/test_responses_api.py b/tests/archgw/test_responses_api.py index 432bf1e2..665f8608 100644 --- a/tests/archgw/test_responses_api.py +++ b/tests/archgw/test_responses_api.py @@ -58,9 +58,13 @@ def test_responses_api_streaming_passthrough(httpserver: HTTPServer): text_chunks = [] final_message = None for event in stream: - if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None): + if getattr(event, "type", None) == "response.output_text.delta" and getattr( + event, "delta", None + ): text_chunks.append(event.delta) - if getattr(event, "type", None) == "response.completed" and getattr(event, "response", None): + if getattr(event, "type", None) == "response.completed" and getattr( + event, "response", None + ): final_message = event.response full_content = "".join(text_chunks) @@ -72,7 +76,9 @@ def test_responses_api_with_tools_passthrough(httpserver: HTTPServer): """Responses API with tools for OpenAI model""" setup_responses_api_mock(httpserver, content="Tool response") - client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0) + client = openai.OpenAI( + api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0 + ) tools = [ { "type": "function", @@ -100,7 +106,9 @@ def test_responses_api_streaming_with_tools_passthrough(httpserver: HTTPServer): """Responses API streaming with tools for OpenAI model""" setup_responses_api_mock(httpserver, content="Streamed tool response") - client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0) + client = openai.OpenAI( + api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0 + ) tools = [ { "type": "function", @@ -127,7 +135,9 @@ def test_responses_api_streaming_with_tools_passthrough(httpserver: HTTPServer): etype = getattr(event, "type", None) if etype == "response.output_text.delta" and getattr(event, "delta", None): text_chunks.append(event.delta) - if etype == "response.function_call_arguments.delta" and getattr(event, "delta", None): + if etype == "response.function_call_arguments.delta" and getattr( + event, "delta", None + ): tool_calls.append(event.delta) assert text_chunks or tool_calls, "Expected streamed text or tool call deltas" @@ -140,7 +150,9 @@ def test_responses_api_streaming_with_tools_passthrough(httpserver: HTTPServer): def test_responses_api_non_streaming_upstream_anthropic(httpserver: HTTPServer): """Responses API with Anthropic model → translated to /v1/chat/completions""" - captured = setup_openai_chat_mock(httpserver, content="Hello from Claude via Responses!") + captured = setup_openai_chat_mock( + httpserver, content="Hello from Claude via Responses!" + ) client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1") resp = client.responses.create( @@ -165,7 +177,9 @@ def test_responses_api_streaming_upstream_anthropic(httpserver: HTTPServer): text_chunks = [] for event in stream: - if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None): + if getattr(event, "type", None) == "response.output_text.delta" and getattr( + event, "delta", None + ): text_chunks.append(event.delta) assert len(text_chunks) > 0, "Should have received streaming text deltas" @@ -202,7 +216,9 @@ def test_responses_api_streaming_with_tools_upstream_anthropic(httpserver: HTTPS """Responses API streaming with tools routed to Anthropic""" setup_openai_chat_mock(httpserver, content="Streamed tool via Claude") - client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0) + client = openai.OpenAI( + api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0 + ) tools = [ { "type": "function", @@ -229,7 +245,9 @@ def test_responses_api_streaming_with_tools_upstream_anthropic(httpserver: HTTPS etype = getattr(event, "type", None) if etype == "response.output_text.delta" and getattr(event, "delta", None): text_chunks.append(event.delta) - if etype == "response.function_call_arguments.delta" and getattr(event, "delta", None): + if etype == "response.function_call_arguments.delta" and getattr( + event, "delta", None + ): tool_calls.append(event.delta) assert text_chunks or tool_calls, "Expected streamed text or tool call deltas" @@ -254,7 +272,9 @@ def test_responses_api_mixed_content_types(httpserver: HTTPServer): }, { "role": "user", - "content": [{"type": "input_text", "text": "What is the weather in Seattle"}], + "content": [ + {"type": "input_text", "text": "What is the weather in Seattle"} + ], }, ], ) @@ -278,7 +298,9 @@ def test_conversation_state_management_two_turn(httpserver: HTTPServer): # For non-OpenAI models, Responses API translates to /v1/chat/completions # But for OpenAI models, it uses /v1/responses directly # The state management is handled by brightstaff regardless of upstream - captured = setup_openai_chat_mock(httpserver, content="I remember your name is Alice!") + captured = setup_openai_chat_mock( + httpserver, content="I remember your name is Alice!" + ) client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1") @@ -306,7 +328,9 @@ def test_conversation_state_management_two_turn(httpserver: HTTPServer): second_request = captured[1] messages = second_request.get("messages", []) # Should have messages from both turns (user + assistant from turn 1, plus user from turn 2) - assert len(messages) >= 3, f"Expected >= 3 messages in second turn, got {len(messages)}: {messages}" + assert ( + len(messages) >= 3 + ), f"Expected >= 3 messages in second turn, got {len(messages)}: {messages}" def test_conversation_state_management_two_turn_streaming(httpserver: HTTPServer): @@ -325,9 +349,13 @@ def test_conversation_state_management_two_turn_streaming(httpserver: HTTPServer text_chunks_1 = [] response_id_1 = None for event in stream1: - if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None): + if getattr(event, "type", None) == "response.output_text.delta" and getattr( + event, "delta", None + ): text_chunks_1.append(event.delta) - if getattr(event, "type", None) == "response.completed" and getattr(event, "response", None): + if getattr(event, "type", None) == "response.completed" and getattr( + event, "response", None + ): response_id_1 = event.response.id assert response_id_1 is not None @@ -344,9 +372,13 @@ def test_conversation_state_management_two_turn_streaming(httpserver: HTTPServer text_chunks_2 = [] response_id_2 = None for event in stream2: - if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None): + if getattr(event, "type", None) == "response.output_text.delta" and getattr( + event, "delta", None + ): text_chunks_2.append(event.delta) - if getattr(event, "type", None) == "response.completed" and getattr(event, "response", None): + if getattr(event, "type", None) == "response.completed" and getattr( + event, "response", None + ): response_id_2 = event.response.id assert response_id_2 is not None @@ -357,4 +389,6 @@ def test_conversation_state_management_two_turn_streaming(httpserver: HTTPServer assert len(captured) == 2 second_request = captured[1] messages = second_request.get("messages", []) - assert len(messages) >= 3, f"Expected >= 3 messages in second turn, got {len(messages)}" + assert ( + len(messages) >= 3 + ), f"Expected >= 3 messages in second turn, got {len(messages)}" diff --git a/tests/archgw/test_streaming.py b/tests/archgw/test_streaming.py index 51de0037..42d5666e 100644 --- a/tests/archgw/test_streaming.py +++ b/tests/archgw/test_streaming.py @@ -17,7 +17,8 @@ import anthropic import pytest import logging -from pytest_httpserver import HTTPServer, HandlerType +from pytest_httpserver import HTTPServer +from pytest_httpserver.httpserver import HandlerType from werkzeug.wrappers import Response from conftest import ( @@ -39,7 +40,9 @@ LLM_GATEWAY_BASE = "http://localhost:12000" def test_openai_chat_streaming_basic(httpserver: HTTPServer): """Basic OpenAI streaming: verify chunks arrive in order and reassemble correctly""" - setup_openai_chat_mock(httpserver, content="The quick brown fox jumps over the lazy dog") + setup_openai_chat_mock( + httpserver, content="The quick brown fox jumps over the lazy dog" + ) client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1") stream = client.chat.completions.create( @@ -66,12 +69,17 @@ def test_openai_chat_streaming_tool_calls(httpserver: HTTPServer): body = json.loads(request.data) model = body.get("model", "gpt-5-mini-2025-08-07") return Response( - make_openai_tool_call_stream(model=model, tool_name="echo_tool", tool_args='{"text":"hello"}'), - status=200, content_type="text/event-stream", + make_openai_tool_call_stream( + model=model, tool_name="echo_tool", tool_args='{"text":"hello"}' + ), + status=200, + content_type="text/event-stream", ) httpserver.expect_request( - "/v1/chat/completions", method="POST", handler_type=HandlerType.PERMANENT, + "/v1/chat/completions", + method="POST", + handler_type=HandlerType.PERMANENT, ).respond_with_handler(handler) client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1") @@ -85,7 +93,11 @@ def test_openai_chat_streaming_tool_calls(httpserver: HTTPServer): "function": { "name": "echo_tool", "description": "Echo input", - "parameters": {"type": "object", "properties": {"text": {"type": "string"}}, "required": ["text"]}, + "parameters": { + "type": "object", + "properties": {"text": {"type": "string"}}, + "required": ["text"], + }, }, } ], @@ -97,14 +109,18 @@ def test_openai_chat_streaming_tool_calls(httpserver: HTTPServer): if chunk.choices and chunk.choices[0].delta.tool_calls: for tc in chunk.choices[0].delta.tool_calls: while len(tool_calls) <= tc.index: - tool_calls.append({"id": "", "function": {"name": "", "arguments": ""}}) + tool_calls.append( + {"id": "", "function": {"name": "", "arguments": ""}} + ) if tc.id: tool_calls[tc.index]["id"] = tc.id if tc.function: if tc.function.name: tool_calls[tc.index]["function"]["name"] = tc.function.name if tc.function.arguments: - tool_calls[tc.index]["function"]["arguments"] += tc.function.arguments + tool_calls[tc.index]["function"][ + "arguments" + ] += tc.function.arguments assert len(tool_calls) > 0, "Should have received tool calls" assert tool_calls[0]["function"]["name"] == "echo_tool" @@ -142,7 +158,11 @@ def test_anthropic_messages_streaming_thinking(httpserver: HTTPServer): client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE) - events_seen = {"thinking_start": False, "thinking_delta": False, "text_delta": False} + events_seen = { + "thinking_start": False, + "thinking_delta": False, + "text_delta": False, + } with client.messages.stream( model="claude-sonnet-4-20250514", @@ -151,7 +171,9 @@ def test_anthropic_messages_streaming_thinking(httpserver: HTTPServer): messages=[{"role": "user", "content": "What is 2+2?"}], ) as stream: for event in stream: - if event.type == "content_block_start" and getattr(event, "content_block", None): + if event.type == "content_block_start" and getattr( + event, "content_block", None + ): if getattr(event.content_block, "type", None) == "thinking": events_seen["thinking_start"] = True if event.type == "content_block_delta" and getattr(event, "delta", None): @@ -255,7 +277,11 @@ def test_responses_api_streaming_translated_upstream(httpserver: HTTPServer): text_chunks = [] for event in stream: - if getattr(event, "type", None) == "response.output_text.delta" and getattr(event, "delta", None): + if getattr(event, "type", None) == "response.output_text.delta" and getattr( + event, "delta", None + ): text_chunks.append(event.delta) - assert len(text_chunks) > 0, "Should have received text delta events from translated stream" + assert ( + len(text_chunks) > 0 + ), "Should have received text delta events from translated stream"