mirror of
https://github.com/katanemo/plano.git
synced 2026-05-04 21:32:43 +02:00
- OpenAI client → Claude model: gateway routes to /v1/chat/completions (not /v1/messages), so use setup_openai_chat_mock - Responses API: gateway translates all requests to /v1/chat/completions on upstream with base_url providers, so use setup_openai_chat_mock - Remove unused imports (json, pytest, setup_responses_api_mock) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
365 lines
14 KiB
Python
365 lines
14 KiB
Python
"""Mock-based tests for model alias routing.
|
|
|
|
Tests alias resolution, protocol transformation (OpenAI client ↔ Anthropic upstream
|
|
and vice versa), error handling, and multi-turn conversations with tool calls.
|
|
|
|
These tests require the gateway to be running with config_mock_llm.yaml
|
|
(started via docker-compose.mock.yaml).
|
|
"""
|
|
|
|
import json
|
|
import openai
|
|
import anthropic
|
|
import pytest
|
|
import logging
|
|
|
|
from pytest_httpserver import HTTPServer
|
|
|
|
from conftest import (
|
|
setup_openai_chat_mock,
|
|
setup_anthropic_mock,
|
|
setup_error_mock,
|
|
make_openai_chat_response,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
LLM_GATEWAY_BASE = "http://localhost:12000"
|
|
|
|
|
|
# =============================================================================
|
|
# ALIAS RESOLUTION TESTS — OpenAI client
|
|
# =============================================================================
|
|
|
|
|
|
def test_openai_client_with_alias_arch_summarize_v1(httpserver: HTTPServer):
|
|
"""arch.summarize.v1 should resolve to gpt-5-mini-2025-08-07 (OpenAI)"""
|
|
captured = setup_openai_chat_mock(httpserver, content="Hello from mock OpenAI!")
|
|
|
|
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
|
|
completion = client.chat.completions.create(
|
|
model="arch.summarize.v1",
|
|
max_completion_tokens=500,
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
)
|
|
|
|
assert completion.choices[0].message.content == "Hello from mock OpenAI!"
|
|
# Verify alias was resolved before reaching upstream
|
|
assert len(captured) == 1
|
|
assert captured[0]["model"] == "gpt-5-mini-2025-08-07"
|
|
|
|
|
|
def test_openai_client_with_alias_arch_v1(httpserver: HTTPServer):
|
|
"""arch.v1 should resolve to o3 (OpenAI)"""
|
|
captured = setup_openai_chat_mock(httpserver, content="Hello from mock o3!")
|
|
|
|
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
|
|
completion = client.chat.completions.create(
|
|
model="arch.v1",
|
|
max_completion_tokens=500,
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
)
|
|
|
|
assert completion.choices[0].message.content == "Hello from mock o3!"
|
|
assert len(captured) == 1
|
|
assert captured[0]["model"] == "o3"
|
|
|
|
|
|
def test_openai_client_with_alias_streaming(httpserver: HTTPServer):
|
|
"""Streaming with alias should resolve and return streamed content"""
|
|
setup_openai_chat_mock(httpserver, content="Hello from streaming mock!")
|
|
|
|
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
|
|
stream = client.chat.completions.create(
|
|
model="arch.summarize.v1",
|
|
max_completion_tokens=500,
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
stream=True,
|
|
)
|
|
|
|
chunks = []
|
|
for chunk in stream:
|
|
if chunk.choices[0].delta.content:
|
|
chunks.append(chunk.choices[0].delta.content)
|
|
|
|
assert "".join(chunks) == "Hello from streaming mock!"
|
|
|
|
|
|
# =============================================================================
|
|
# ALIAS RESOLUTION TESTS — Anthropic client
|
|
# =============================================================================
|
|
|
|
|
|
def test_anthropic_client_with_alias_arch_summarize_v1(httpserver: HTTPServer):
|
|
"""Anthropic client with alias should route to OpenAI upstream, response transformed to Anthropic format"""
|
|
captured = setup_openai_chat_mock(httpserver, content="Hello via Anthropic client!")
|
|
|
|
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
|
|
message = client.messages.create(
|
|
model="arch.summarize.v1",
|
|
max_tokens=500,
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
)
|
|
|
|
response_text = "".join(b.text for b in message.content if b.type == "text")
|
|
assert response_text == "Hello via Anthropic client!"
|
|
# Verify upstream received OpenAI-format request with resolved model
|
|
assert len(captured) == 1
|
|
assert captured[0]["model"] == "gpt-5-mini-2025-08-07"
|
|
|
|
|
|
def test_anthropic_client_with_alias_streaming(httpserver: HTTPServer):
|
|
"""Anthropic client streaming with alias → OpenAI upstream → transformed back to Anthropic SSE"""
|
|
setup_openai_chat_mock(httpserver, content="Streaming via Anthropic!")
|
|
|
|
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
|
|
with client.messages.stream(
|
|
model="arch.summarize.v1",
|
|
max_tokens=500,
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
) as stream:
|
|
pieces = [t for t in stream.text_stream]
|
|
full_text = "".join(pieces)
|
|
|
|
assert full_text == "Streaming via Anthropic!"
|
|
|
|
|
|
# =============================================================================
|
|
# PROTOCOL TRANSFORMATION TESTS
|
|
# =============================================================================
|
|
|
|
|
|
def test_openai_client_with_claude_model(httpserver: HTTPServer):
|
|
"""OpenAI client → Claude model → gateway proxies via /v1/chat/completions → transforms response"""
|
|
# Gateway routes OpenAI-format requests to /v1/chat/completions on upstream
|
|
# even for Anthropic models, so we need the OpenAI chat mock
|
|
captured = setup_openai_chat_mock(
|
|
httpserver, content="Hello from Claude via OpenAI client!"
|
|
)
|
|
|
|
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
|
|
completion = client.chat.completions.create(
|
|
model="claude-sonnet-4-20250514",
|
|
max_tokens=500,
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
)
|
|
|
|
assert (
|
|
completion.choices[0].message.content == "Hello from Claude via OpenAI client!"
|
|
)
|
|
assert len(captured) == 1
|
|
assert captured[0]["model"] == "claude-sonnet-4-20250514"
|
|
|
|
|
|
def test_openai_client_with_claude_model_streaming(httpserver: HTTPServer):
|
|
"""OpenAI client streaming → Claude model → proxied via /v1/chat/completions"""
|
|
# Gateway routes OpenAI-format requests to /v1/chat/completions on upstream
|
|
setup_openai_chat_mock(httpserver, content="Streaming from Claude!")
|
|
|
|
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
|
|
stream = client.chat.completions.create(
|
|
model="claude-sonnet-4-20250514",
|
|
max_tokens=500,
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
stream=True,
|
|
)
|
|
|
|
chunks = []
|
|
for chunk in stream:
|
|
if chunk.choices[0].delta.content:
|
|
chunks.append(chunk.choices[0].delta.content)
|
|
|
|
assert "".join(chunks) == "Streaming from Claude!"
|
|
|
|
|
|
def test_anthropic_client_with_openai_model(httpserver: HTTPServer):
|
|
"""Anthropic client → OpenAI model (gpt-4o-mini) → OpenAI upstream → transforms response to Anthropic format"""
|
|
captured = setup_openai_chat_mock(
|
|
httpserver, content="Hello from GPT via Anthropic!"
|
|
)
|
|
|
|
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
|
|
message = client.messages.create(
|
|
model="gpt-4o-mini",
|
|
max_tokens=500,
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
)
|
|
|
|
response_text = "".join(b.text for b in message.content if b.type == "text")
|
|
assert response_text == "Hello from GPT via Anthropic!"
|
|
assert len(captured) == 1
|
|
assert captured[0]["model"] == "gpt-4o-mini"
|
|
|
|
|
|
def test_anthropic_client_with_openai_model_streaming(httpserver: HTTPServer):
|
|
"""Anthropic client streaming → OpenAI model → OpenAI SSE → transformed to Anthropic SSE"""
|
|
setup_openai_chat_mock(httpserver, content="Streaming from GPT!")
|
|
|
|
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
|
|
with client.messages.stream(
|
|
model="gpt-4o-mini",
|
|
max_tokens=500,
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
) as stream:
|
|
pieces = [t for t in stream.text_stream]
|
|
full_text = "".join(pieces)
|
|
|
|
assert full_text == "Streaming from GPT!"
|
|
|
|
|
|
# =============================================================================
|
|
# DIRECT MODEL TESTS
|
|
# =============================================================================
|
|
|
|
|
|
def test_direct_model_gpt4o_mini_openai(httpserver: HTTPServer):
|
|
"""Direct model name (no alias) via OpenAI client"""
|
|
captured = setup_openai_chat_mock(httpserver, content="Direct GPT response!")
|
|
|
|
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
|
|
completion = client.chat.completions.create(
|
|
model="gpt-4o-mini",
|
|
max_completion_tokens=500,
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
)
|
|
|
|
assert completion.choices[0].message.content == "Direct GPT response!"
|
|
assert captured[0]["model"] == "gpt-4o-mini"
|
|
|
|
|
|
def test_direct_model_claude_anthropic(httpserver: HTTPServer):
|
|
"""Direct Claude model via Anthropic client"""
|
|
captured = setup_anthropic_mock(httpserver, content="Direct Claude response!")
|
|
|
|
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
|
|
message = client.messages.create(
|
|
model="claude-sonnet-4-20250514",
|
|
max_tokens=500,
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
)
|
|
|
|
response_text = "".join(b.text for b in message.content if b.type == "text")
|
|
assert response_text == "Direct Claude response!"
|
|
assert captured[0]["model"] == "claude-sonnet-4-20250514"
|
|
|
|
|
|
# =============================================================================
|
|
# MULTI-TURN WITH TOOL CALLS
|
|
# =============================================================================
|
|
|
|
|
|
def test_assistant_message_with_null_content_and_tool_calls(httpserver: HTTPServer):
|
|
"""Gateway should handle assistant messages with null content + tool_calls in history"""
|
|
setup_openai_chat_mock(httpserver, content="The weather is sunny in Seattle.")
|
|
|
|
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
|
|
completion = client.chat.completions.create(
|
|
model="gpt-4o",
|
|
max_tokens=500,
|
|
messages=[
|
|
{"role": "system", "content": "You are a weather assistant."},
|
|
{"role": "user", "content": "What's the weather in Seattle?"},
|
|
{
|
|
"role": "assistant",
|
|
"content": None,
|
|
"tool_calls": [
|
|
{
|
|
"id": "call_test123",
|
|
"type": "function",
|
|
"function": {
|
|
"name": "get_weather",
|
|
"arguments": '{"city": "Seattle"}',
|
|
},
|
|
}
|
|
],
|
|
},
|
|
{
|
|
"role": "tool",
|
|
"tool_call_id": "call_test123",
|
|
"content": '{"temperature": "10C", "condition": "Partly cloudy"}',
|
|
},
|
|
],
|
|
tools=[
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "get_weather",
|
|
"description": "Get weather for a city",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {"city": {"type": "string"}},
|
|
"required": ["city"],
|
|
},
|
|
},
|
|
}
|
|
],
|
|
)
|
|
|
|
assert completion.choices[0].message.content == "The weather is sunny in Seattle."
|
|
|
|
|
|
# =============================================================================
|
|
# ERROR HANDLING
|
|
# =============================================================================
|
|
|
|
|
|
def test_nonexistent_alias(httpserver: HTTPServer):
|
|
"""Non-existent alias should be treated as direct model name and likely fail"""
|
|
client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
|
|
|
|
try:
|
|
client.chat.completions.create(
|
|
model="nonexistent.alias",
|
|
max_completion_tokens=50,
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
)
|
|
# If it succeeds, the alias was passed through as a direct model name
|
|
except Exception:
|
|
# Error is also acceptable - non-existent model should fail
|
|
pass
|
|
|
|
|
|
# =============================================================================
|
|
# THINKING MODE
|
|
# =============================================================================
|
|
|
|
|
|
def test_anthropic_thinking_mode_streaming(httpserver: HTTPServer):
|
|
"""Anthropic thinking mode should stream thinking + text blocks correctly"""
|
|
setup_anthropic_mock(httpserver, thinking=True)
|
|
|
|
client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
|
|
|
|
thinking_block_started = False
|
|
thinking_delta_seen = False
|
|
text_delta_seen = False
|
|
|
|
with client.messages.stream(
|
|
model="claude-sonnet-4-20250514",
|
|
max_tokens=2048,
|
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
|
messages=[{"role": "user", "content": "What is 2+2?"}],
|
|
) as stream:
|
|
for event in stream:
|
|
if event.type == "content_block_start" and getattr(
|
|
event, "content_block", None
|
|
):
|
|
if getattr(event.content_block, "type", None) == "thinking":
|
|
thinking_block_started = True
|
|
if event.type == "content_block_delta" and getattr(event, "delta", None):
|
|
if event.delta.type == "text_delta":
|
|
text_delta_seen = True
|
|
elif event.delta.type == "thinking_delta":
|
|
thinking_delta_seen = True
|
|
|
|
final = stream.get_final_message()
|
|
|
|
assert final is not None
|
|
assert final.content and len(final.content) > 0
|
|
assert text_delta_seen, "Expected text deltas in stream"
|
|
assert thinking_block_started, "No thinking block started"
|
|
assert thinking_delta_seen, "No thinking deltas observed"
|
|
|
|
block_types = [blk.type for blk in final.content]
|
|
assert "text" in block_types
|
|
assert "thinking" in block_types
|