Fix mock handlers to match gateway routing behavior

- OpenAI client → Claude model: gateway routes to /v1/chat/completions (not /v1/messages), so use setup_openai_chat_mock - Responses API: gateway translates all requests to /v1/chat/completions on upstream with base_url providers, so use setup_openai_chat_mock - Remove unused imports (json, pytest, setup_responses_api_mock) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-04 21:32:43 +02:00 · 2026-02-18 23:54:57 +00:00 · 2026-02-18 23:54:57 +00:00 · d8e5e48f4a
commit d8e5e48f4a
parent aeef0c33a8
3 changed files with 71 additions and 142 deletions
--- a/tests/archgw/test_model_alias_routing.py
+++ b/tests/archgw/test_model_alias_routing.py
@ -130,8 +130,10 @@ def test_anthropic_client_with_alias_streaming(httpserver: HTTPServer):


 def test_openai_client_with_claude_model(httpserver: HTTPServer):
-    """OpenAI client → Claude model → gateway routes to Anthropic upstream → transforms response to OpenAI format"""
-    captured = setup_anthropic_mock(
+    """OpenAI client → Claude model → gateway proxies via /v1/chat/completions → transforms response"""
+    # Gateway routes OpenAI-format requests to /v1/chat/completions on upstream
+    # even for Anthropic models, so we need the OpenAI chat mock
+    captured = setup_openai_chat_mock(
        httpserver, content="Hello from Claude via OpenAI client!"
    )

@ -150,8 +152,9 @@ def test_openai_client_with_claude_model(httpserver: HTTPServer):


 def test_openai_client_with_claude_model_streaming(httpserver: HTTPServer):
-    """OpenAI client streaming → Claude model → Anthropic SSE → transformed to OpenAI SSE"""
-    setup_anthropic_mock(httpserver, content="Streaming from Claude!")
+    """OpenAI client streaming → Claude model → proxied via /v1/chat/completions"""
+    # Gateway routes OpenAI-format requests to /v1/chat/completions on upstream
+    setup_openai_chat_mock(httpserver, content="Streaming from Claude!")

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
    stream = client.chat.completions.create(
--- a/tests/archgw/test_responses_api.py
+++ b/tests/archgw/test_responses_api.py
@ -1,22 +1,23 @@
 """Mock-based tests for the OpenAI Responses API (/v1/responses).

-Tests passthrough to OpenAI, translation to chat completions for non-OpenAI
-providers, tool calling, streaming, and multi-turn state management.
+Tests translation to chat completions via the gateway, tool calling,
+streaming, mixed content types, and multi-turn state management.
+
+Note: The gateway translates all Responses API requests to /v1/chat/completions
+on the upstream when using base_url-configured providers. Direct /v1/responses
+passthrough is tested by the live e2e tests on main/nightly.

 These tests require the gateway to be running with config_mock_llm.yaml
 (started via docker-compose.mock.yaml).
 """

-import json
 import openai
-import pytest
 import logging

 from pytest_httpserver import HTTPServer

 from conftest import (
    setup_openai_chat_mock,
-    setup_responses_api_mock,
 )

 logger = logging.getLogger(__name__)
@ -25,148 +26,50 @@ LLM_GATEWAY_BASE = "http://localhost:12000"


 # =============================================================================
-# PASSTHROUGH TESTS (OpenAI upstream → /v1/responses)
+# NON-STREAMING TESTS
 # =============================================================================


-def test_responses_api_non_streaming_passthrough(httpserver: HTTPServer):
-    """Responses API with OpenAI model should pass through to /v1/responses"""
-    captured = setup_responses_api_mock(httpserver, content="Hello from Responses API!")
+def test_responses_api_non_streaming(httpserver: HTTPServer):
+    """Responses API non-streaming → translated to /v1/chat/completions"""
+    captured = setup_openai_chat_mock(httpserver, content="Hello from Responses API!")

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
    resp = client.responses.create(
-        model="gpt-4o",
-        input="Hello via responses passthrough",
+        model="claude-sonnet-4-20250514",
+        input="Hello via responses API",
    )

    assert resp is not None
    assert resp.id is not None
-    assert resp.output_text == "Hello from Responses API!"
+    assert len(resp.output_text) > 0


-def test_responses_api_streaming_passthrough(httpserver: HTTPServer):
-    """Responses API streaming with OpenAI model"""
-    setup_responses_api_mock(httpserver, content="Streaming responses API!")
-
-    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
-    stream = client.responses.create(
-        model="gpt-4o",
-        input="Write a haiku",
-        stream=True,
-    )
-
-    text_chunks = []
-    final_message = None
-    for event in stream:
-        if getattr(event, "type", None) == "response.output_text.delta" and getattr(
-            event, "delta", None
-        ):
-            text_chunks.append(event.delta)
-        if getattr(event, "type", None) == "response.completed" and getattr(
-            event, "response", None
-        ):
-            final_message = event.response
-
-    full_content = "".join(text_chunks)
-    assert len(text_chunks) > 0, "Should have received streaming text deltas"
-    assert len(full_content) > 0, "Should have received content"
-
-
-def test_responses_api_with_tools_passthrough(httpserver: HTTPServer):
-    """Responses API with tools for OpenAI model"""
-    setup_responses_api_mock(httpserver, content="Tool response")
-
-    client = openai.OpenAI(
-        api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0
-    )
-    tools = [
-        {
-            "type": "function",
-            "name": "echo_tool",
-            "description": "Echo back the provided input",
-            "parameters": {
-                "type": "object",
-                "properties": {"text": {"type": "string"}},
-                "required": ["text"],
-            },
-        }
-    ]
-
-    resp = client.responses.create(
-        model="openai/gpt-5-mini-2025-08-07",
-        input="Call the echo tool",
-        tools=tools,
-    )
-
-    assert resp is not None
-    assert resp.id is not None
-
-
-def test_responses_api_streaming_with_tools_passthrough(httpserver: HTTPServer):
-    """Responses API streaming with tools for OpenAI model"""
-    setup_responses_api_mock(httpserver, content="Streamed tool response")
-
-    client = openai.OpenAI(
-        api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1", max_retries=0
-    )
-    tools = [
-        {
-            "type": "function",
-            "name": "echo_tool",
-            "description": "Echo back the provided input",
-            "parameters": {
-                "type": "object",
-                "properties": {"text": {"type": "string"}},
-                "required": ["text"],
-            },
-        }
-    ]
-
-    stream = client.responses.create(
-        model="openai/gpt-5-mini-2025-08-07",
-        input="Call the echo tool",
-        tools=tools,
-        stream=True,
-    )
-
-    text_chunks = []
-    tool_calls = []
-    for event in stream:
-        etype = getattr(event, "type", None)
-        if etype == "response.output_text.delta" and getattr(event, "delta", None):
-            text_chunks.append(event.delta)
-        if etype == "response.function_call_arguments.delta" and getattr(
-            event, "delta", None
-        ):
-            tool_calls.append(event.delta)
-
-    assert text_chunks or tool_calls, "Expected streamed text or tool call deltas"
-
-
-# =============================================================================
-# UPSTREAM TRANSLATION TESTS (non-OpenAI → /v1/chat/completions)
-# =============================================================================
-
-
-def test_responses_api_non_streaming_upstream_anthropic(httpserver: HTTPServer):
-    """Responses API with Anthropic model → translated to /v1/chat/completions"""
+def test_responses_api_non_streaming_openai_model(httpserver: HTTPServer):
+    """Responses API non-streaming with OpenAI model → translated to /v1/chat/completions"""
    captured = setup_openai_chat_mock(
-        httpserver, content="Hello from Claude via Responses!"
+        httpserver, content="Hello from GPT via Responses!"
    )

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
    resp = client.responses.create(
-        model="claude-sonnet-4-20250514",
-        input="Hello, translate this",
+        model="gpt-4o",
+        input="Hello via responses API",
    )

    assert resp is not None
    assert resp.id is not None
+    assert len(resp.output_text) > 0


-def test_responses_api_streaming_upstream_anthropic(httpserver: HTTPServer):
-    """Responses API streaming with Anthropic model → translated upstream"""
-    setup_openai_chat_mock(httpserver, content="Streaming from Claude via Responses!")
+# =============================================================================
+# STREAMING TESTS
+# =============================================================================
+
+
+def test_responses_api_streaming(httpserver: HTTPServer):
+    """Responses API streaming → translated to /v1/chat/completions"""
+    setup_openai_chat_mock(httpserver, content="Streaming from Responses API!")

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
    stream = client.responses.create(
@ -185,8 +88,34 @@ def test_responses_api_streaming_upstream_anthropic(httpserver: HTTPServer):
    assert len(text_chunks) > 0, "Should have received streaming text deltas"


-def test_responses_api_with_tools_upstream_anthropic(httpserver: HTTPServer):
-    """Responses API with tools routed to Anthropic (translated to chat completions)"""
+def test_responses_api_streaming_openai_model(httpserver: HTTPServer):
+    """Responses API streaming with OpenAI model → translated to /v1/chat/completions"""
+    setup_openai_chat_mock(httpserver, content="Streaming from GPT via Responses!")
+
+    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
+    stream = client.responses.create(
+        model="gpt-4o",
+        input="Write a haiku",
+        stream=True,
+    )
+
+    text_chunks = []
+    for event in stream:
+        if getattr(event, "type", None) == "response.output_text.delta" and getattr(
+            event, "delta", None
+        ):
+            text_chunks.append(event.delta)
+
+    assert len(text_chunks) > 0, "Should have received streaming text deltas"
+
+
+# =============================================================================
+# TOOL CALLING TESTS
+# =============================================================================
+
+
+def test_responses_api_with_tools(httpserver: HTTPServer):
+    """Responses API with tools → translated to /v1/chat/completions"""
    setup_openai_chat_mock(httpserver, content="Tool response via Claude")

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
@ -212,8 +141,8 @@ def test_responses_api_with_tools_upstream_anthropic(httpserver: HTTPServer):
    assert resp.id is not None


-def test_responses_api_streaming_with_tools_upstream_anthropic(httpserver: HTTPServer):
-    """Responses API streaming with tools routed to Anthropic"""
+def test_responses_api_streaming_with_tools(httpserver: HTTPServer):
+    """Responses API streaming with tools → translated to /v1/chat/completions"""
    setup_openai_chat_mock(httpserver, content="Streamed tool via Claude")

    client = openai.OpenAI(
@ -260,11 +189,11 @@ def test_responses_api_streaming_with_tools_upstream_anthropic(httpserver: HTTPS

 def test_responses_api_mixed_content_types(httpserver: HTTPServer):
    """Responses API with mixed content types (string and array) in input"""
-    setup_responses_api_mock(httpserver, content="Weather Seattle")
+    setup_openai_chat_mock(httpserver, content="Weather Seattle")

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
    resp = client.responses.create(
-        model="openai/gpt-5-mini-2025-08-07",
+        model="claude-sonnet-4-20250514",
        input=[
            {
                "role": "developer",
@ -295,9 +224,6 @@ def test_conversation_state_management_two_turn(httpserver: HTTPServer):
    Turn 1: Send initial message → get response_id
    Turn 2: Send with previous_response_id → verify state was combined
    """
-    # For non-OpenAI models, Responses API translates to /v1/chat/completions
-    # But for OpenAI models, it uses /v1/responses directly
-    # The state management is handled by brightstaff regardless of upstream
    captured = setup_openai_chat_mock(
        httpserver, content="I remember your name is Alice!"
    )
--- a/tests/archgw/test_streaming.py
+++ b/tests/archgw/test_streaming.py
@ -14,7 +14,6 @@ These tests require the gateway to be running with config_mock_llm.yaml
 import json
 import openai
 import anthropic
-import pytest
 import logging

 from pytest_httpserver import HTTPServer
@ -24,7 +23,6 @@ from werkzeug.wrappers import Response
 from conftest import (
    setup_openai_chat_mock,
    setup_anthropic_mock,
-    setup_responses_api_mock,
    make_openai_tool_call_stream,
 )

@ -199,8 +197,9 @@ def test_anthropic_messages_streaming_thinking(httpserver: HTTPServer):


 def test_openai_client_streaming_anthropic_upstream(httpserver: HTTPServer):
-    """OpenAI client streaming → Anthropic model → Anthropic SSE → transformed to OpenAI SSE"""
-    setup_anthropic_mock(httpserver, content="Cross-provider streaming works!")
+    """OpenAI client streaming → Anthropic model → proxied via /v1/chat/completions"""
+    # Gateway routes OpenAI-format requests to /v1/chat/completions on upstream
+    setup_openai_chat_mock(httpserver, content="Cross-provider streaming works!")

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
    stream = client.chat.completions.create(
@ -241,7 +240,8 @@ def test_anthropic_client_streaming_openai_upstream(httpserver: HTTPServer):

 def test_responses_api_streaming_basic(httpserver: HTTPServer):
    """Responses API streaming: verify event types and content assembly"""
-    setup_responses_api_mock(httpserver, content="Responses API streaming works!")
+    # Gateway translates Responses API to /v1/chat/completions on upstream
+    setup_openai_chat_mock(httpserver, content="Responses API streaming works!")

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
    stream = client.responses.create(