plano/tests/archgw/test_model_alias_routing.py

"""Mock-based tests for model alias routing.

Tests alias resolution, protocol transformation (OpenAI client ↔ Anthropic upstream
and vice versa), error handling, and multi-turn conversations with tool calls.

These tests require the gateway to be running with config_mock_llm.yaml
(started via docker-compose.mock.yaml).
"""

import json
import openai
import anthropic
import pytest
import logging

from pytest_httpserver import HTTPServer

from conftest import (
    setup_openai_chat_mock,
    setup_anthropic_mock,
    setup_error_mock,
    make_openai_chat_response,
)

logger = logging.getLogger(__name__)

LLM_GATEWAY_BASE = "http://localhost:12000"


# =============================================================================
# ALIAS RESOLUTION TESTS — OpenAI client
# =============================================================================


def test_openai_client_with_alias_arch_summarize_v1(httpserver: HTTPServer):
    """arch.summarize.v1 should resolve to gpt-5-mini-2025-08-07 (OpenAI)"""
    captured = setup_openai_chat_mock(httpserver, content="Hello from mock OpenAI!")

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
    completion = client.chat.completions.create(
        model="arch.summarize.v1",
        max_completion_tokens=500,
        messages=[{"role": "user", "content": "Hello"}],
    )

    assert completion.choices[0].message.content == "Hello from mock OpenAI!"
    # Verify alias was resolved before reaching upstream
    assert len(captured) == 1
    assert captured[0]["model"] == "gpt-5-mini-2025-08-07"


def test_openai_client_with_alias_arch_v1(httpserver: HTTPServer):
    """arch.v1 should resolve to o3 (OpenAI)"""
    captured = setup_openai_chat_mock(httpserver, content="Hello from mock o3!")

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
    completion = client.chat.completions.create(
        model="arch.v1",
        max_completion_tokens=500,
        messages=[{"role": "user", "content": "Hello"}],
    )

    assert completion.choices[0].message.content == "Hello from mock o3!"
    assert len(captured) == 1
    assert captured[0]["model"] == "o3"


def test_openai_client_with_alias_streaming(httpserver: HTTPServer):
    """Streaming with alias should resolve and return streamed content"""
    setup_openai_chat_mock(httpserver, content="Hello from streaming mock!")

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
    stream = client.chat.completions.create(
        model="arch.summarize.v1",
        max_completion_tokens=500,
        messages=[{"role": "user", "content": "Hello"}],
        stream=True,
    )

    chunks = []
    for chunk in stream:
        if chunk.choices[0].delta.content:
            chunks.append(chunk.choices[0].delta.content)

    assert "".join(chunks) == "Hello from streaming mock!"


# =============================================================================
# ALIAS RESOLUTION TESTS — Anthropic client
# =============================================================================


def test_anthropic_client_with_alias_arch_summarize_v1(httpserver: HTTPServer):
    """Anthropic client with alias should route to OpenAI upstream, response transformed to Anthropic format"""
    captured = setup_openai_chat_mock(httpserver, content="Hello via Anthropic client!")

    client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
    message = client.messages.create(
        model="arch.summarize.v1",
        max_tokens=500,
        messages=[{"role": "user", "content": "Hello"}],
    )

    response_text = "".join(b.text for b in message.content if b.type == "text")
    assert response_text == "Hello via Anthropic client!"
    # Verify upstream received OpenAI-format request with resolved model
    assert len(captured) == 1
    assert captured[0]["model"] == "gpt-5-mini-2025-08-07"


def test_anthropic_client_with_alias_streaming(httpserver: HTTPServer):
    """Anthropic client streaming with alias → OpenAI upstream → transformed back to Anthropic SSE"""
    setup_openai_chat_mock(httpserver, content="Streaming via Anthropic!")

    client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
    with client.messages.stream(
        model="arch.summarize.v1",
        max_tokens=500,
        messages=[{"role": "user", "content": "Hello"}],
    ) as stream:
        pieces = [t for t in stream.text_stream]
        full_text = "".join(pieces)

    assert full_text == "Streaming via Anthropic!"


# =============================================================================
# PROTOCOL TRANSFORMATION TESTS
# =============================================================================


def test_openai_client_with_claude_model(httpserver: HTTPServer):
    """OpenAI client → Claude model → gateway proxies via /v1/chat/completions → transforms response"""
    # Gateway routes OpenAI-format requests to /v1/chat/completions on upstream
    # even for Anthropic models, so we need the OpenAI chat mock
    captured = setup_openai_chat_mock(
        httpserver, content="Hello from Claude via OpenAI client!"
    )

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
    completion = client.chat.completions.create(
        model="claude-sonnet-4-20250514",
        max_tokens=500,
        messages=[{"role": "user", "content": "Hello"}],
    )

    assert (
        completion.choices[0].message.content == "Hello from Claude via OpenAI client!"
    )
    assert len(captured) == 1
    assert captured[0]["model"] == "claude-sonnet-4-20250514"


def test_openai_client_with_claude_model_streaming(httpserver: HTTPServer):
    """OpenAI client streaming → Claude model → proxied via /v1/chat/completions"""
    # Gateway routes OpenAI-format requests to /v1/chat/completions on upstream
    setup_openai_chat_mock(httpserver, content="Streaming from Claude!")

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
    stream = client.chat.completions.create(
        model="claude-sonnet-4-20250514",
        max_tokens=500,
        messages=[{"role": "user", "content": "Hello"}],
        stream=True,
    )

    chunks = []
    for chunk in stream:
        if chunk.choices[0].delta.content:
            chunks.append(chunk.choices[0].delta.content)

    assert "".join(chunks) == "Streaming from Claude!"


def test_anthropic_client_with_openai_model(httpserver: HTTPServer):
    """Anthropic client → OpenAI model (gpt-4o-mini) → OpenAI upstream → transforms response to Anthropic format"""
    captured = setup_openai_chat_mock(
        httpserver, content="Hello from GPT via Anthropic!"
    )

    client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
    message = client.messages.create(
        model="gpt-4o-mini",
        max_tokens=500,
        messages=[{"role": "user", "content": "Hello"}],
    )

    response_text = "".join(b.text for b in message.content if b.type == "text")
    assert response_text == "Hello from GPT via Anthropic!"
    assert len(captured) == 1
    assert captured[0]["model"] == "gpt-4o-mini"


def test_anthropic_client_with_openai_model_streaming(httpserver: HTTPServer):
    """Anthropic client streaming → OpenAI model → OpenAI SSE → transformed to Anthropic SSE"""
    setup_openai_chat_mock(httpserver, content="Streaming from GPT!")

    client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
    with client.messages.stream(
        model="gpt-4o-mini",
        max_tokens=500,
        messages=[{"role": "user", "content": "Hello"}],
    ) as stream:
        pieces = [t for t in stream.text_stream]
        full_text = "".join(pieces)

    assert full_text == "Streaming from GPT!"


# =============================================================================
# DIRECT MODEL TESTS
# =============================================================================


def test_direct_model_gpt4o_mini_openai(httpserver: HTTPServer):
    """Direct model name (no alias) via OpenAI client"""
    captured = setup_openai_chat_mock(httpserver, content="Direct GPT response!")

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        max_completion_tokens=500,
        messages=[{"role": "user", "content": "Hello"}],
    )

    assert completion.choices[0].message.content == "Direct GPT response!"
    assert captured[0]["model"] == "gpt-4o-mini"


def test_direct_model_claude_anthropic(httpserver: HTTPServer):
    """Direct Claude model via Anthropic client"""
    captured = setup_anthropic_mock(httpserver, content="Direct Claude response!")

    client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)
    message = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=500,
        messages=[{"role": "user", "content": "Hello"}],
    )

    response_text = "".join(b.text for b in message.content if b.type == "text")
    assert response_text == "Direct Claude response!"
    assert captured[0]["model"] == "claude-sonnet-4-20250514"


# =============================================================================
# MULTI-TURN WITH TOOL CALLS
# =============================================================================


def test_assistant_message_with_null_content_and_tool_calls(httpserver: HTTPServer):
    """Gateway should handle assistant messages with null content + tool_calls in history"""
    setup_openai_chat_mock(httpserver, content="The weather is sunny in Seattle.")

    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")
    completion = client.chat.completions.create(
        model="gpt-4o",
        max_tokens=500,
        messages=[
            {"role": "system", "content": "You are a weather assistant."},
            {"role": "user", "content": "What's the weather in Seattle?"},
            {
                "role": "assistant",
                "content": None,
                "tool_calls": [
                    {
                        "id": "call_test123",
                        "type": "function",
                        "function": {
                            "name": "get_weather",
                            "arguments": '{"city": "Seattle"}',
                        },
                    }
                ],
            },
            {
                "role": "tool",
                "tool_call_id": "call_test123",
                "content": '{"temperature": "10C", "condition": "Partly cloudy"}',
            },
        ],
        tools=[
            {
                "type": "function",
                "function": {
                    "name": "get_weather",
                    "description": "Get weather for a city",
                    "parameters": {
                        "type": "object",
                        "properties": {"city": {"type": "string"}},
                        "required": ["city"],
                    },
                },
            }
        ],
    )

    assert completion.choices[0].message.content == "The weather is sunny in Seattle."


# =============================================================================
# ERROR HANDLING
# =============================================================================


def test_nonexistent_alias(httpserver: HTTPServer):
    """Non-existent alias should be treated as direct model name and likely fail"""
    client = openai.OpenAI(api_key="test-key", base_url=f"{LLM_GATEWAY_BASE}/v1")

    try:
        client.chat.completions.create(
            model="nonexistent.alias",
            max_completion_tokens=50,
            messages=[{"role": "user", "content": "Hello"}],
        )
        # If it succeeds, the alias was passed through as a direct model name
    except Exception:
        # Error is also acceptable - non-existent model should fail
        pass


# =============================================================================
# THINKING MODE
# =============================================================================


def test_anthropic_thinking_mode_streaming(httpserver: HTTPServer):
    """Anthropic thinking mode should stream thinking + text blocks correctly"""
    setup_anthropic_mock(httpserver, thinking=True)

    client = anthropic.Anthropic(api_key="test-key", base_url=LLM_GATEWAY_BASE)

    thinking_block_started = False
    thinking_delta_seen = False
    text_delta_seen = False

    with client.messages.stream(
        model="claude-sonnet-4-20250514",
        max_tokens=2048,
        thinking={"type": "enabled", "budget_tokens": 1024},
        messages=[{"role": "user", "content": "What is 2+2?"}],
    ) as stream:
        for event in stream:
            if event.type == "content_block_start" and getattr(
                event, "content_block", None
            ):
                if getattr(event.content_block, "type", None) == "thinking":
                    thinking_block_started = True
            if event.type == "content_block_delta" and getattr(event, "delta", None):
                if event.delta.type == "text_delta":
                    text_delta_seen = True
                elif event.delta.type == "thinking_delta":
                    thinking_delta_seen = True

        final = stream.get_final_message()

    assert final is not None
    assert final.content and len(final.content) > 0
    assert text_delta_seen, "Expected text deltas in stream"
    assert thinking_block_started, "No thinking block started"
    assert thinking_delta_seen, "No thinking deltas observed"

    block_types = [blk.type for blk in final.content]
    assert "text" in block_types
    assert "thinking" in block_types