plano/tests/archgw/test_model_alias_routing.py
Adil Hafeez d8e5e48f4a Fix mock handlers to match gateway routing behavior
- OpenAI client → Claude model: gateway routes to /v1/chat/completions
  (not /v1/messages), so use setup_openai_chat_mock
- Responses API: gateway translates all requests to /v1/chat/completions
  on upstream with base_url providers, so use setup_openai_chat_mock
- Remove unused imports (json, pytest, setup_responses_api_mock)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 23:54:57 +00:00

365 lines
14 KiB
Python

"""Mock-based tests for model alias routing.
Tests alias resolution, protocol transformation (OpenAI client ↔ Anthropic upstream
and vice versa), error handling, and multi-turn conversations with tool calls.
These tests require the gateway to be running with config_mock_llm.yaml
(started via docker-compose.mock.yaml).
"""
import json
import openai
import anthropic
import pytest
import logging
from pytest_httpserver import HTTPServer
from conftest import (
setup_openai_chat_mock,
setup_anthropic_mock,
setup_error_mock,
make_openai_chat_response,
)
logger = logging.getLogger(__name__)
LLM_GATEWAY_BASE = "http://localhost:12000"
# =============================================================================
# ALIAS RESOLUTION TESTS — OpenAI client
# =============================================================================
def test_openai_client_with_alias_arch_summarize_v1(httpserver: HTTPServer):
"""arch.summarize.v1 should resolve to gpt-5-mini-2025-08-07 (OpenAI)"""
captured = setup_openai_chat_mock(httpserver, content="Hello from mock OpenAI!")
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
completion = client.chat.completions.create(
model="arch.summarize.v1",
max_completion_tokens=500,
messages=[{"role": "user", "content": "Hello"}],
)
assert completion.choices[0].message.content == "Hello from mock OpenAI!"
# Verify alias was resolved before reaching upstream
assert len(captured) == 1
assert captured[0]["model"] == "gpt-5-mini-2025-08-07"
def test_openai_client_with_alias_arch_v1(httpserver: HTTPServer):
"""arch.v1 should resolve to o3 (OpenAI)"""
captured = setup_openai_chat_mock(httpserver, content="Hello from mock o3!")
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
completion = client.chat.completions.create(
model="arch.v1",
max_completion_tokens=500,
messages=[{"role": "user", "content": "Hello"}],
)
assert completion.choices[0].message.content == "Hello from mock o3!"
assert len(captured) == 1
assert captured[0]["model"] == "o3"
def test_openai_client_with_alias_streaming(httpserver: HTTPServer):
"""Streaming with alias should resolve and return streamed content"""
setup_openai_chat_mock(httpserver, content="Hello from streaming mock!")
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
stream = client.chat.completions.create(
model="arch.summarize.v1",
max_completion_tokens=500,
messages=[{"role": "user", "content": "Hello"}],
stream=True,
)
chunks = []
for chunk in stream:
if chunk.choices[0].delta.content:
chunks.append(chunk.choices[0].delta.content)
assert "".join(chunks) == "Hello from streaming mock!"
# =============================================================================
# ALIAS RESOLUTION TESTS — Anthropic client
# =============================================================================
def test_anthropic_client_with_alias_arch_summarize_v1(httpserver: HTTPServer):
"""Anthropic client with alias should route to OpenAI upstream, response transformed to Anthropic format"""
captured = setup_openai_chat_mock(httpserver, content="Hello via Anthropic client!")
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
message = client.messages.create(
model="arch.summarize.v1",
max_tokens=500,
messages=[{"role": "user", "content": "Hello"}],
)
response_text = "".join(b.text for b in message.content if b.type == "text")
assert response_text == "Hello via Anthropic client!"
# Verify upstream received OpenAI-format request with resolved model
assert len(captured) == 1
assert captured[0]["model"] == "gpt-5-mini-2025-08-07"
def test_anthropic_client_with_alias_streaming(httpserver: HTTPServer):
"""Anthropic client streaming with alias → OpenAI upstream → transformed back to Anthropic SSE"""
setup_openai_chat_mock(httpserver, content="Streaming via Anthropic!")
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
with client.messages.stream(
model="arch.summarize.v1",
max_tokens=500,
messages=[{"role": "user", "content": "Hello"}],
) as stream:
pieces = [t for t in stream.text_stream]
full_text = "".join(pieces)
assert full_text == "Streaming via Anthropic!"
# =============================================================================
# PROTOCOL TRANSFORMATION TESTS
# =============================================================================
def test_openai_client_with_claude_model(httpserver: HTTPServer):
"""OpenAI client → Claude model → gateway proxies via /v1/chat/completions → transforms response"""
# Gateway routes OpenAI-format requests to /v1/chat/completions on upstream
# even for Anthropic models, so we need the OpenAI chat mock
captured = setup_openai_chat_mock(
httpserver, content="Hello from Claude via OpenAI client!"
)
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
completion = client.chat.completions.create(
model="claude-sonnet-4-20250514",
max_tokens=500,
messages=[{"role": "user", "content": "Hello"}],
)
assert (
completion.choices[0].message.content == "Hello from Claude via OpenAI client!"
)
assert len(captured) == 1
assert captured[0]["model"] == "claude-sonnet-4-20250514"
def test_openai_client_with_claude_model_streaming(httpserver: HTTPServer):
"""OpenAI client streaming → Claude model → proxied via /v1/chat/completions"""
# Gateway routes OpenAI-format requests to /v1/chat/completions on upstream
setup_openai_chat_mock(httpserver, content="Streaming from Claude!")
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
stream = client.chat.completions.create(
model="claude-sonnet-4-20250514",
max_tokens=500,
messages=[{"role": "user", "content": "Hello"}],
stream=True,
)
chunks = []
for chunk in stream:
if chunk.choices[0].delta.content:
chunks.append(chunk.choices[0].delta.content)
assert "".join(chunks) == "Streaming from Claude!"
def test_anthropic_client_with_openai_model(httpserver: HTTPServer):
"""Anthropic client → OpenAI model (gpt-4o-mini) → OpenAI upstream → transforms response to Anthropic format"""
captured = setup_openai_chat_mock(
httpserver, content="Hello from GPT via Anthropic!"
)
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
message = client.messages.create(
model="gpt-4o-mini",
max_tokens=500,
messages=[{"role": "user", "content": "Hello"}],
)
response_text = "".join(b.text for b in message.content if b.type == "text")
assert response_text == "Hello from GPT via Anthropic!"
assert len(captured) == 1
assert captured[0]["model"] == "gpt-4o-mini"
def test_anthropic_client_with_openai_model_streaming(httpserver: HTTPServer):
"""Anthropic client streaming → OpenAI model → OpenAI SSE → transformed to Anthropic SSE"""
setup_openai_chat_mock(httpserver, content="Streaming from GPT!")
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
with client.messages.stream(
model="gpt-4o-mini",
max_tokens=500,
messages=[{"role": "user", "content": "Hello"}],
) as stream:
pieces = [t for t in stream.text_stream]
full_text = "".join(pieces)
assert full_text == "Streaming from GPT!"
# =============================================================================
# DIRECT MODEL TESTS
# =============================================================================
def test_direct_model_gpt4o_mini_openai(httpserver: HTTPServer):
"""Direct model name (no alias) via OpenAI client"""
captured = setup_openai_chat_mock(httpserver, content="Direct GPT response!")
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
completion = client.chat.completions.create(
model="gpt-4o-mini",
max_completion_tokens=500,
messages=[{"role": "user", "content": "Hello"}],
)
assert completion.choices[0].message.content == "Direct GPT response!"
assert captured[0]["model"] == "gpt-4o-mini"
def test_direct_model_claude_anthropic(httpserver: HTTPServer):
"""Direct Claude model via Anthropic client"""
captured = setup_anthropic_mock(httpserver, content="Direct Claude response!")
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
message = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=500,
messages=[{"role": "user", "content": "Hello"}],
)
response_text = "".join(b.text for b in message.content if b.type == "text")
assert response_text == "Direct Claude response!"
assert captured[0]["model"] == "claude-sonnet-4-20250514"
# =============================================================================
# MULTI-TURN WITH TOOL CALLS
# =============================================================================
def test_assistant_message_with_null_content_and_tool_calls(httpserver: HTTPServer):
"""Gateway should handle assistant messages with null content + tool_calls in history"""
setup_openai_chat_mock(httpserver, content="The weather is sunny in Seattle.")
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
completion = client.chat.completions.create(
model="gpt-4o",
max_tokens=500,
messages=[
{"role": "system", "content": "You are a weather assistant."},
{"role": "user", "content": "What's the weather in Seattle?"},
{
"role": "assistant",
"content": None,
"tool_calls": [
{
"id": "call_test123",
"type": "function",
"function": {
"name": "get_weather",
"arguments": '{"city": "Seattle"}',
},
}
],
},
{
"role": "tool",
"tool_call_id": "call_test123",
"content": '{"temperature": "10C", "condition": "Partly cloudy"}',
},
],
tools=[
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather for a city",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
},
}
],
)
assert completion.choices[0].message.content == "The weather is sunny in Seattle."
# =============================================================================
# ERROR HANDLING
# =============================================================================
def test_nonexistent_alias(httpserver: HTTPServer):
"""Non-existent alias should be treated as direct model name and likely fail"""
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
try:
client.chat.completions.create(
model="nonexistent.alias",
max_completion_tokens=50,
messages=[{"role": "user", "content": "Hello"}],
)
# If it succeeds, the alias was passed through as a direct model name
except Exception:
# Error is also acceptable - non-existent model should fail
pass
# =============================================================================
# THINKING MODE
# =============================================================================
def test_anthropic_thinking_mode_streaming(httpserver: HTTPServer):
"""Anthropic thinking mode should stream thinking + text blocks correctly"""
setup_anthropic_mock(httpserver, thinking=True)
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
thinking_block_started = False
thinking_delta_seen = False
text_delta_seen = False
with client.messages.stream(
model="claude-sonnet-4-20250514",
max_tokens=2048,
thinking={"type": "enabled", "budget_tokens": 1024},
messages=[{"role": "user", "content": "What is 2+2?"}],
) as stream:
for event in stream:
if event.type == "content_block_start" and getattr(
event, "content_block", None
):
if getattr(event.content_block, "type", None) == "thinking":
thinking_block_started = True
if event.type == "content_block_delta" and getattr(event, "delta", None):
if event.delta.type == "text_delta":
text_delta_seen = True
elif event.delta.type == "thinking_delta":
thinking_delta_seen = True
final = stream.get_final_message()
assert final is not None
assert final.content and len(final.content) > 0
assert text_delta_seen, "Expected text deltas in stream"
assert thinking_block_started, "No thinking block started"
assert thinking_delta_seen, "No thinking deltas observed"
block_types = [blk.type for blk in final.content]
assert "text" in block_types
assert "thinking" in block_types