mirror of
https://github.com/katanemo/plano.git
synced 2026-06-26 15:39:40 +02:00
fix(ci): switch retired claude-sonnet-4-20250514 to claude-sonnet-4-6 (#975)
This commit is contained in:
parent
5d990d9609
commit
5cc4c4ee77
29 changed files with 80 additions and 82 deletions
|
|
@ -12,7 +12,7 @@ model_providers:
|
|||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
listeners:
|
||||
|
|
|
|||
|
|
@ -93,19 +93,19 @@ echo ""
|
|||
echo "=== /v1/messages ==="
|
||||
|
||||
run_test "Non-streaming with PII (phone)" /v1/messages 200 '{
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"model": "claude-sonnet-4-6",
|
||||
"max_tokens": 256,
|
||||
"messages": [{"role": "user", "content": "Call me at 555-867-5309 to discuss my account"}]
|
||||
}'
|
||||
|
||||
run_test "Non-streaming with PII (SSN)" /v1/messages 200 '{
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"model": "claude-sonnet-4-6",
|
||||
"max_tokens": 256,
|
||||
"messages": [{"role": "user", "content": "My SSN is 123-45-6789"}]
|
||||
}'
|
||||
|
||||
run_test "No PII" /v1/messages 200 '{
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"model": "claude-sonnet-4-6",
|
||||
"max_tokens": 256,
|
||||
"messages": [{"role": "user", "content": "Hello, how are you?"}]
|
||||
}'
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ model_providers:
|
|||
model: openai/gpt-4o-mini
|
||||
|
||||
- access_key: $ANTHROPIC_API_KEY
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
model: anthropic/claude-sonnet-4-6
|
||||
|
||||
system_prompt: |
|
||||
You are a helpful assistant.
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ model_providers:
|
|||
- model: anthropic/*
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: anthropic/claude-3-haiku-20240307
|
||||
|
|
@ -71,7 +71,7 @@ model_aliases:
|
|||
|
||||
# Alias for creative tasks -> Claude model
|
||||
arch.creative.v1:
|
||||
target: claude-sonnet-4-20250514
|
||||
target: claude-sonnet-4-6
|
||||
|
||||
# Alias for quick responses -> fast model
|
||||
arch.fast.v1:
|
||||
|
|
@ -85,7 +85,7 @@ model_aliases:
|
|||
target: gpt-5-mini-2025-08-07
|
||||
|
||||
creative-model:
|
||||
target: claude-sonnet-4-20250514
|
||||
target: claude-sonnet-4-6
|
||||
|
||||
coding-model:
|
||||
target: us.amazon.nova-premier-v1:0
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ routing_preferences:
|
|||
- name: code_generation
|
||||
description: generating new code, writing functions, or creating boilerplate
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- anthropic/claude-sonnet-4-6
|
||||
- openai/gpt-4o
|
||||
```
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ When a request arrives, Plano:
|
|||
```
|
||||
1. Request arrives → "Write binary search in Python"
|
||||
2. Plano-Orchestrator classifies → route: "code_generation"
|
||||
3. Response → models: ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"]
|
||||
3. Response → models: ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"]
|
||||
```
|
||||
|
||||
No match? Plano-Orchestrator returns an empty route → client falls back to the model in the original request.
|
||||
|
|
@ -98,7 +98,7 @@ curl http://localhost:12000/routing/v1/chat/completions \
|
|||
Response:
|
||||
```json
|
||||
{
|
||||
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"],
|
||||
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"],
|
||||
"route": "code_generation",
|
||||
"trace_id": "c16d1096c1af4a17abb48fb182918a88"
|
||||
}
|
||||
|
|
@ -124,7 +124,7 @@ curl http://localhost:12000/routing/v1/chat/completions \
|
|||
Response (first call):
|
||||
```json
|
||||
{
|
||||
"model": "anthropic/claude-sonnet-4-20250514",
|
||||
"model": "anthropic/claude-sonnet-4-6",
|
||||
"route": "code_generation",
|
||||
"trace_id": "c16d1096c1af4a17abb48fb182918a88",
|
||||
"session_id": "my-session-123",
|
||||
|
|
@ -146,7 +146,7 @@ curl http://localhost:12000/routing/v1/chat/completions \
|
|||
Response (pinned):
|
||||
```json
|
||||
{
|
||||
"model": "anthropic/claude-sonnet-4-20250514",
|
||||
"model": "anthropic/claude-sonnet-4-6",
|
||||
"route": "code_generation",
|
||||
"trace_id": "a1b2c3d4e5f6...",
|
||||
"session_id": "my-session-123",
|
||||
|
|
@ -233,7 +233,7 @@ kubectl rollout restart deployment/plano
|
|||
|
||||
--- 1. Code generation query (OpenAI format) ---
|
||||
{
|
||||
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"],
|
||||
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"],
|
||||
"route": "code_generation",
|
||||
"trace_id": "c16d1096c1af4a17abb48fb182918a88"
|
||||
}
|
||||
|
|
@ -254,14 +254,14 @@ kubectl rollout restart deployment/plano
|
|||
|
||||
--- 4. Code generation query (Anthropic format) ---
|
||||
{
|
||||
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"],
|
||||
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"],
|
||||
"route": "code_generation",
|
||||
"trace_id": "26be822bbdf14a3ba19fe198e55ea4a9"
|
||||
}
|
||||
|
||||
--- 7. Session pinning - first call (fresh routing decision) ---
|
||||
{
|
||||
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"],
|
||||
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"],
|
||||
"route": "code_generation",
|
||||
"trace_id": "f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6",
|
||||
"session_id": "demo-session-001",
|
||||
|
|
@ -271,7 +271,7 @@ kubectl rollout restart deployment/plano
|
|||
--- 8. Session pinning - second call (same session, pinned) ---
|
||||
Notice: same model returned with "pinned": true, routing was skipped
|
||||
{
|
||||
"model": "anthropic/claude-sonnet-4-20250514",
|
||||
"model": "anthropic/claude-sonnet-4-6",
|
||||
"route": "code_generation",
|
||||
"trace_id": "a9b8c7d6e5f4a3b2c1d0e9f8a7b6c5d4",
|
||||
"session_id": "demo-session-001",
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ model_providers:
|
|||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
routing_preferences:
|
||||
|
|
@ -26,5 +26,5 @@ routing_preferences:
|
|||
- name: code_generation
|
||||
description: generating new code, writing functions, or creating boilerplate
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- anthropic/claude-sonnet-4-6
|
||||
- openai/gpt-4o
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ model_providers:
|
|||
- name: complex_reasoning
|
||||
description: complex reasoning tasks, multi-step analysis, or detailed explanations
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ curl -s "$PLANO_URL/routing/v1/chat/completions" \
|
|||
{
|
||||
"name": "coding",
|
||||
"description": "code generation, writing functions, debugging",
|
||||
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o", "openai/gpt-4o-mini"],
|
||||
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o", "openai/gpt-4o-mini"],
|
||||
"selection_policy": {"prefer": "fastest"}
|
||||
}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -12,13 +12,13 @@ from http.server import HTTPServer, BaseHTTPRequestHandler
|
|||
PROMETHEUS_METRICS = """\
|
||||
# HELP model_latency_p95_seconds P95 request latency in seconds per model
|
||||
# TYPE model_latency_p95_seconds gauge
|
||||
model_latency_p95_seconds{model_name="anthropic/claude-sonnet-4-20250514"} 0.85
|
||||
model_latency_p95_seconds{model_name="anthropic/claude-sonnet-4-6"} 0.85
|
||||
model_latency_p95_seconds{model_name="openai/gpt-4o"} 1.20
|
||||
model_latency_p95_seconds{model_name="openai/gpt-4o-mini"} 0.40
|
||||
""".encode()
|
||||
|
||||
COST_DATA = {
|
||||
"anthropic/claude-sonnet-4-20250514": {
|
||||
"anthropic/claude-sonnet-4-6": {
|
||||
"input_per_million": 3.0,
|
||||
"output_per_million": 15.0,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ POST http://localhost:12000/routing/v1/messages
|
|||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"model": "claude-sonnet-4-6",
|
||||
"max_tokens": 1024,
|
||||
"messages": [{"role": "user", "content": "Write a REST API in Go using Gin"}]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ model_providers:
|
|||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue