fix(ci): switch retired claude-sonnet-4-20250514 to claude-sonnet-4-6 (#975)

This commit is contained in:
Musa 2026-06-24 10:13:37 -07:00 committed by GitHub
parent 5d990d9609
commit 5cc4c4ee77
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 80 additions and 82 deletions

View file

@ -12,7 +12,7 @@ model_providers:
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
default: true
- model: anthropic/claude-sonnet-4-20250514
- model: anthropic/claude-sonnet-4-6
access_key: $ANTHROPIC_API_KEY
listeners:

View file

@ -93,19 +93,19 @@ echo ""
echo "=== /v1/messages ==="
run_test "Non-streaming with PII (phone)" /v1/messages 200 '{
"model": "claude-sonnet-4-20250514",
"model": "claude-sonnet-4-6",
"max_tokens": 256,
"messages": [{"role": "user", "content": "Call me at 555-867-5309 to discuss my account"}]
}'
run_test "Non-streaming with PII (SSN)" /v1/messages 200 '{
"model": "claude-sonnet-4-20250514",
"model": "claude-sonnet-4-6",
"max_tokens": 256,
"messages": [{"role": "user", "content": "My SSN is 123-45-6789"}]
}'
run_test "No PII" /v1/messages 200 '{
"model": "claude-sonnet-4-20250514",
"model": "claude-sonnet-4-6",
"max_tokens": 256,
"messages": [{"role": "user", "content": "Hello, how are you?"}]
}'

View file

@ -30,7 +30,7 @@ model_providers:
model: openai/gpt-4o-mini
- access_key: $ANTHROPIC_API_KEY
model: anthropic/claude-sonnet-4-20250514
model: anthropic/claude-sonnet-4-6
system_prompt: |
You are a helpful assistant.

View file

@ -28,7 +28,7 @@ model_providers:
- model: anthropic/*
access_key: $ANTHROPIC_API_KEY
- model: anthropic/claude-sonnet-4-20250514
- model: anthropic/claude-sonnet-4-6
access_key: $ANTHROPIC_API_KEY
- model: anthropic/claude-3-haiku-20240307
@ -71,7 +71,7 @@ model_aliases:
# Alias for creative tasks -> Claude model
arch.creative.v1:
target: claude-sonnet-4-20250514
target: claude-sonnet-4-6
# Alias for quick responses -> fast model
arch.fast.v1:
@ -85,7 +85,7 @@ model_aliases:
target: gpt-5-mini-2025-08-07
creative-model:
target: claude-sonnet-4-20250514
target: claude-sonnet-4-6
coding-model:
target: us.amazon.nova-premier-v1:0

View file

@ -33,7 +33,7 @@ routing_preferences:
- name: code_generation
description: generating new code, writing functions, or creating boilerplate
models:
- anthropic/claude-sonnet-4-20250514
- anthropic/claude-sonnet-4-6
- openai/gpt-4o
```
@ -46,7 +46,7 @@ When a request arrives, Plano:
```
1. Request arrives → "Write binary search in Python"
2. Plano-Orchestrator classifies → route: "code_generation"
3. Response → models: ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"]
3. Response → models: ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"]
```
No match? Plano-Orchestrator returns an empty route → client falls back to the model in the original request.
@ -98,7 +98,7 @@ curl http://localhost:12000/routing/v1/chat/completions \
Response:
```json
{
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"],
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"],
"route": "code_generation",
"trace_id": "c16d1096c1af4a17abb48fb182918a88"
}
@ -124,7 +124,7 @@ curl http://localhost:12000/routing/v1/chat/completions \
Response (first call):
```json
{
"model": "anthropic/claude-sonnet-4-20250514",
"model": "anthropic/claude-sonnet-4-6",
"route": "code_generation",
"trace_id": "c16d1096c1af4a17abb48fb182918a88",
"session_id": "my-session-123",
@ -146,7 +146,7 @@ curl http://localhost:12000/routing/v1/chat/completions \
Response (pinned):
```json
{
"model": "anthropic/claude-sonnet-4-20250514",
"model": "anthropic/claude-sonnet-4-6",
"route": "code_generation",
"trace_id": "a1b2c3d4e5f6...",
"session_id": "my-session-123",
@ -233,7 +233,7 @@ kubectl rollout restart deployment/plano
--- 1. Code generation query (OpenAI format) ---
{
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"],
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"],
"route": "code_generation",
"trace_id": "c16d1096c1af4a17abb48fb182918a88"
}
@ -254,14 +254,14 @@ kubectl rollout restart deployment/plano
--- 4. Code generation query (Anthropic format) ---
{
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"],
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"],
"route": "code_generation",
"trace_id": "26be822bbdf14a3ba19fe198e55ea4a9"
}
--- 7. Session pinning - first call (fresh routing decision) ---
{
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"],
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"],
"route": "code_generation",
"trace_id": "f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6",
"session_id": "demo-session-001",
@ -271,7 +271,7 @@ kubectl rollout restart deployment/plano
--- 8. Session pinning - second call (same session, pinned) ---
Notice: same model returned with "pinned": true, routing was skipped
{
"model": "anthropic/claude-sonnet-4-20250514",
"model": "anthropic/claude-sonnet-4-6",
"route": "code_generation",
"trace_id": "a9b8c7d6e5f4a3b2c1d0e9f8a7b6c5d4",
"session_id": "demo-session-001",

View file

@ -13,7 +13,7 @@ model_providers:
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
- model: anthropic/claude-sonnet-4-20250514
- model: anthropic/claude-sonnet-4-6
access_key: $ANTHROPIC_API_KEY
routing_preferences:
@ -26,5 +26,5 @@ routing_preferences:
- name: code_generation
description: generating new code, writing functions, or creating boilerplate
models:
- anthropic/claude-sonnet-4-20250514
- anthropic/claude-sonnet-4-6
- openai/gpt-4o

View file

@ -23,7 +23,7 @@ model_providers:
- name: complex_reasoning
description: complex reasoning tasks, multi-step analysis, or detailed explanations
- model: anthropic/claude-sonnet-4-20250514
- model: anthropic/claude-sonnet-4-6
access_key: $ANTHROPIC_API_KEY
routing_preferences:
- name: code_generation

View file

@ -102,7 +102,7 @@ curl -s "$PLANO_URL/routing/v1/chat/completions" \
{
"name": "coding",
"description": "code generation, writing functions, debugging",
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o", "openai/gpt-4o-mini"],
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o", "openai/gpt-4o-mini"],
"selection_policy": {"prefer": "fastest"}
}
]

View file

@ -12,13 +12,13 @@ from http.server import HTTPServer, BaseHTTPRequestHandler
PROMETHEUS_METRICS = """\
# HELP model_latency_p95_seconds P95 request latency in seconds per model
# TYPE model_latency_p95_seconds gauge
model_latency_p95_seconds{model_name="anthropic/claude-sonnet-4-20250514"} 0.85
model_latency_p95_seconds{model_name="anthropic/claude-sonnet-4-6"} 0.85
model_latency_p95_seconds{model_name="openai/gpt-4o"} 1.20
model_latency_p95_seconds{model_name="openai/gpt-4o-mini"} 0.40
""".encode()
COST_DATA = {
"anthropic/claude-sonnet-4-20250514": {
"anthropic/claude-sonnet-4-6": {
"input_per_million": 3.0,
"output_per_million": 15.0,
},

View file

@ -30,7 +30,7 @@ POST http://localhost:12000/routing/v1/messages
Content-Type: application/json
{
"model": "claude-sonnet-4-20250514",
"model": "claude-sonnet-4-6",
"max_tokens": 1024,
"messages": [{"role": "user", "content": "Write a REST API in Go using Gin"}]
}

View file

@ -17,7 +17,7 @@ model_providers:
- name: code understanding
description: understand and explain existing code snippets, functions, or libraries
- model: anthropic/claude-sonnet-4-20250514
- model: anthropic/claude-sonnet-4-6
access_key: $ANTHROPIC_API_KEY
routing_preferences:
- name: code generation