fix(ci): switch retired claude-sonnet-4-20250514 to claude-sonnet-4-6 (#975)

2026-06-26 15:39:40 +02:00 · 2026-06-24 10:13:37 -07:00 · 2026-06-24 10:13:37 -07:00 · 5cc4c4ee77
commit 5cc4c4ee77
parent 5d990d9609
29 changed files with 80 additions and 82 deletions
--- a/tests/e2e/config_memory_state_v1_responses.yaml
+++ b/tests/e2e/config_memory_state_v1_responses.yaml
@ -15,7 +15,7 @@ llm_providers:
    default: true

    # Anthropic Models
-  - model: anthropic/claude-sonnet-4-20250514
+  - model: anthropic/claude-sonnet-4-6
    access_key: $ANTHROPIC_API_KEY

 # State storage configuration for v1/responses API
--- a/tests/e2e/test_model_alias_routing.py
+++ b/tests/e2e/test_model_alias_routing.py
@ -440,7 +440,7 @@ def test_anthropic_thinking_mode_streaming():
    text_delta_seen = False

    with client.messages.stream(
-        model="claude-sonnet-4-20250514",
+        model="claude-sonnet-4-6",
        max_tokens=2048,
        thinking={"type": "enabled", "budget_tokens": 1024},  # <- idiomatic
        messages=[{"role": "user", "content": "Explain briefly what 2+2 equals"}],
--- a/tests/e2e/test_openai_responses_api_client.py
+++ b/tests/e2e/test_openai_responses_api_client.py
@ -489,7 +489,7 @@ def test_openai_responses_api_non_streaming_upstream_anthropic():
    client = openai.OpenAI(api_key="test-key", base_url=f"{base_url}/v1")

    resp = client.responses.create(
-        model="claude-sonnet-4-20250514", input="Hello, translate this via grok alias"
+        model="claude-sonnet-4-6", input="Hello, translate this via grok alias"
    )

    # Print the response content - handle both responses format and chat completions format
@ -509,7 +509,7 @@ def test_openai_responses_api_with_streaming_upstream_anthropic():

    # Simple streaming responses API request using a direct model (pass-through)
    stream = client.responses.create(
-        model="claude-sonnet-4-20250514",
+        model="claude-sonnet-4-6",
        input="Write a short haiku about coding",
        stream=True,
    )
@ -566,7 +566,7 @@ def test_openai_responses_api_non_streaming_with_tools_upstream_anthropic():
    ]

    resp = client.responses.create(
-        model="claude-sonnet-4-20250514",
+        model="claude-sonnet-4-6",
        input="Call the echo tool",
        tools=tools,
    )
@ -598,7 +598,7 @@ def test_openai_responses_api_streaming_with_tools_upstream_anthropic():
    ]

    stream = client.responses.create(
-        model="claude-sonnet-4-20250514",
+        model="claude-sonnet-4-6",
        input="Call the echo tool with hello_world",
        tools=tools,
        stream=True,
--- a/tests/e2e/test_openai_responses_api_client_with_state.py
+++ b/tests/e2e/test_openai_responses_api_client_with_state.py
@ -35,7 +35,7 @@ def test_conversation_state_management_two_turn():
    # Turn 1: Send initial message to Anthropic (non-OpenAI model)
    logger.info("\n[TURN 1] Sending initial message...")
    resp1 = client.responses.create(
-        model="claude-sonnet-4-20250514",
+        model="claude-sonnet-4-6",
        input="My name is Alice and I like pizza.",
    )

@ -53,7 +53,7 @@ def test_conversation_state_management_two_turn():
        f"\n[TURN 2] Sending follow-up with previous_response_id={response_id_1}"
    )
    resp2 = client.responses.create(
-        model="claude-sonnet-4-20250514",
+        model="claude-sonnet-4-6",
        input="Please list all the messages you have received in our conversation, numbering each one.",
        previous_response_id=response_id_1,
    )
@ -121,7 +121,7 @@ def test_conversation_state_management_two_turn_streaming():
    # Turn 1: Send initial streaming message to Anthropic (non-OpenAI model)
    logger.info("\n[TURN 1] Sending initial streaming message...")
    stream1 = client.responses.create(
-        model="claude-sonnet-4-20250514",
+        model="claude-sonnet-4-6",
        input="My name is Alice and I like pizza.",
        stream=True,
    )
@ -154,7 +154,7 @@ def test_conversation_state_management_two_turn_streaming():
        f"\n[TURN 2] Sending follow-up streaming request with previous_response_id={response_id_1}"
    )
    stream2 = client.responses.create(
-        model="claude-sonnet-4-20250514",
+        model="claude-sonnet-4-6",
        input="Please list all the messages you have received in our conversation, numbering each one.",
        previous_response_id=response_id_1,
        stream=True,
--- a/tests/e2e/test_prompt_gateway.py
+++ b/tests/e2e/test_prompt_gateway.py
@ -395,7 +395,7 @@ def test_claude_v1_messages_api():
    )

    message = client.messages.create(
-        model="claude-sonnet-4-20250514",  # Use working model from smoke test
+        model="claude-sonnet-4-6",  # Use working model from smoke test
        max_tokens=50,
        messages=[
            {
@ -414,7 +414,7 @@ def test_claude_v1_messages_api_streaming():
    client = anthropic.Anthropic(api_key="test-key", base_url=base_url)

    with client.messages.stream(
-        model="claude-sonnet-4-20250514",
+        model="claude-sonnet-4-6",
        max_tokens=50,
        messages=[
            {
@ -525,7 +525,7 @@ def test_openai_gpt4o_mini_v1_messages_api_streaming():


 def test_openai_client_with_claude_model_streaming():
-    """Test OpenAI client using /v1/chat/completions API with Claude model (claude-sonnet-4-20250514)
+    """Test OpenAI client using /v1/chat/completions API with Claude model (claude-sonnet-4-6)
    This tests the transformation: Anthropic upstream -> OpenAI client format with proper chunk handling
    """
    # Get the base URL from the LLM gateway endpoint
@ -537,7 +537,7 @@ def test_openai_client_with_claude_model_streaming():
    )

    stream = client.chat.completions.create(
-        model="claude-sonnet-4-20250514",  # Claude model via OpenAI client
+        model="claude-sonnet-4-6",  # Claude model via OpenAI client
        max_tokens=50,
        messages=[
            {