mirror of
https://github.com/katanemo/plano.git
synced 2026-06-29 15:49:40 +02:00
fix(ci): switch retired claude-sonnet-4-20250514 to claude-sonnet-4-6 (#975)
This commit is contained in:
parent
5d990d9609
commit
5cc4c4ee77
29 changed files with 80 additions and 82 deletions
|
|
@ -11,7 +11,7 @@ model_providers:
|
|||
default: true
|
||||
|
||||
# Anthropic Models
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
listeners:
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ model_providers:
|
|||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
|
|
|
|||
|
|
@ -584,7 +584,7 @@ model_providers:
|
|||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
|
|
@ -601,9 +601,7 @@ model_providers:
|
|||
by_name = {entry["name"]: entry for entry in top_level}
|
||||
assert set(by_name) == {"code understanding", "code generation"}
|
||||
assert by_name["code understanding"]["models"] == ["openai/gpt-4o"]
|
||||
assert by_name["code generation"]["models"] == [
|
||||
"anthropic/claude-sonnet-4-20250514"
|
||||
]
|
||||
assert by_name["code generation"]["models"] == ["anthropic/claude-sonnet-4-6"]
|
||||
assert (
|
||||
by_name["code understanding"]["description"]
|
||||
== "understand and explain existing code snippets, functions, or libraries"
|
||||
|
|
@ -626,7 +624,7 @@ model_providers:
|
|||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
|
|
@ -641,7 +639,7 @@ model_providers:
|
|||
assert entry["name"] == "code generation"
|
||||
assert entry["models"] == [
|
||||
"openai/gpt-4o",
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
"anthropic/claude-sonnet-4-6",
|
||||
]
|
||||
assert config_yaml["version"] == "v0.4.0"
|
||||
|
||||
|
|
@ -658,7 +656,7 @@ listeners:
|
|||
model_providers:
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
routing_preferences:
|
||||
|
|
@ -666,7 +664,7 @@ routing_preferences:
|
|||
description: generating new code snippets or boilerplate
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- anthropic/claude-sonnet-4-6
|
||||
"""
|
||||
config_yaml = yaml.safe_load(plano_config)
|
||||
before = yaml.safe_dump(config_yaml, sort_keys=True)
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ model_providers:
|
|||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
listeners:
|
||||
|
|
|
|||
|
|
@ -93,19 +93,19 @@ echo ""
|
|||
echo "=== /v1/messages ==="
|
||||
|
||||
run_test "Non-streaming with PII (phone)" /v1/messages 200 '{
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"model": "claude-sonnet-4-6",
|
||||
"max_tokens": 256,
|
||||
"messages": [{"role": "user", "content": "Call me at 555-867-5309 to discuss my account"}]
|
||||
}'
|
||||
|
||||
run_test "Non-streaming with PII (SSN)" /v1/messages 200 '{
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"model": "claude-sonnet-4-6",
|
||||
"max_tokens": 256,
|
||||
"messages": [{"role": "user", "content": "My SSN is 123-45-6789"}]
|
||||
}'
|
||||
|
||||
run_test "No PII" /v1/messages 200 '{
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"model": "claude-sonnet-4-6",
|
||||
"max_tokens": 256,
|
||||
"messages": [{"role": "user", "content": "Hello, how are you?"}]
|
||||
}'
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ model_providers:
|
|||
model: openai/gpt-4o-mini
|
||||
|
||||
- access_key: $ANTHROPIC_API_KEY
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
model: anthropic/claude-sonnet-4-6
|
||||
|
||||
system_prompt: |
|
||||
You are a helpful assistant.
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ model_providers:
|
|||
- model: anthropic/*
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: anthropic/claude-3-haiku-20240307
|
||||
|
|
@ -71,7 +71,7 @@ model_aliases:
|
|||
|
||||
# Alias for creative tasks -> Claude model
|
||||
arch.creative.v1:
|
||||
target: claude-sonnet-4-20250514
|
||||
target: claude-sonnet-4-6
|
||||
|
||||
# Alias for quick responses -> fast model
|
||||
arch.fast.v1:
|
||||
|
|
@ -85,7 +85,7 @@ model_aliases:
|
|||
target: gpt-5-mini-2025-08-07
|
||||
|
||||
creative-model:
|
||||
target: claude-sonnet-4-20250514
|
||||
target: claude-sonnet-4-6
|
||||
|
||||
coding-model:
|
||||
target: us.amazon.nova-premier-v1:0
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ routing_preferences:
|
|||
- name: code_generation
|
||||
description: generating new code, writing functions, or creating boilerplate
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- anthropic/claude-sonnet-4-6
|
||||
- openai/gpt-4o
|
||||
```
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ When a request arrives, Plano:
|
|||
```
|
||||
1. Request arrives → "Write binary search in Python"
|
||||
2. Plano-Orchestrator classifies → route: "code_generation"
|
||||
3. Response → models: ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"]
|
||||
3. Response → models: ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"]
|
||||
```
|
||||
|
||||
No match? Plano-Orchestrator returns an empty route → client falls back to the model in the original request.
|
||||
|
|
@ -98,7 +98,7 @@ curl http://localhost:12000/routing/v1/chat/completions \
|
|||
Response:
|
||||
```json
|
||||
{
|
||||
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"],
|
||||
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"],
|
||||
"route": "code_generation",
|
||||
"trace_id": "c16d1096c1af4a17abb48fb182918a88"
|
||||
}
|
||||
|
|
@ -124,7 +124,7 @@ curl http://localhost:12000/routing/v1/chat/completions \
|
|||
Response (first call):
|
||||
```json
|
||||
{
|
||||
"model": "anthropic/claude-sonnet-4-20250514",
|
||||
"model": "anthropic/claude-sonnet-4-6",
|
||||
"route": "code_generation",
|
||||
"trace_id": "c16d1096c1af4a17abb48fb182918a88",
|
||||
"session_id": "my-session-123",
|
||||
|
|
@ -146,7 +146,7 @@ curl http://localhost:12000/routing/v1/chat/completions \
|
|||
Response (pinned):
|
||||
```json
|
||||
{
|
||||
"model": "anthropic/claude-sonnet-4-20250514",
|
||||
"model": "anthropic/claude-sonnet-4-6",
|
||||
"route": "code_generation",
|
||||
"trace_id": "a1b2c3d4e5f6...",
|
||||
"session_id": "my-session-123",
|
||||
|
|
@ -233,7 +233,7 @@ kubectl rollout restart deployment/plano
|
|||
|
||||
--- 1. Code generation query (OpenAI format) ---
|
||||
{
|
||||
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"],
|
||||
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"],
|
||||
"route": "code_generation",
|
||||
"trace_id": "c16d1096c1af4a17abb48fb182918a88"
|
||||
}
|
||||
|
|
@ -254,14 +254,14 @@ kubectl rollout restart deployment/plano
|
|||
|
||||
--- 4. Code generation query (Anthropic format) ---
|
||||
{
|
||||
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"],
|
||||
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"],
|
||||
"route": "code_generation",
|
||||
"trace_id": "26be822bbdf14a3ba19fe198e55ea4a9"
|
||||
}
|
||||
|
||||
--- 7. Session pinning - first call (fresh routing decision) ---
|
||||
{
|
||||
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"],
|
||||
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"],
|
||||
"route": "code_generation",
|
||||
"trace_id": "f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6",
|
||||
"session_id": "demo-session-001",
|
||||
|
|
@ -271,7 +271,7 @@ kubectl rollout restart deployment/plano
|
|||
--- 8. Session pinning - second call (same session, pinned) ---
|
||||
Notice: same model returned with "pinned": true, routing was skipped
|
||||
{
|
||||
"model": "anthropic/claude-sonnet-4-20250514",
|
||||
"model": "anthropic/claude-sonnet-4-6",
|
||||
"route": "code_generation",
|
||||
"trace_id": "a9b8c7d6e5f4a3b2c1d0e9f8a7b6c5d4",
|
||||
"session_id": "demo-session-001",
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ model_providers:
|
|||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
routing_preferences:
|
||||
|
|
@ -26,5 +26,5 @@ routing_preferences:
|
|||
- name: code_generation
|
||||
description: generating new code, writing functions, or creating boilerplate
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- anthropic/claude-sonnet-4-6
|
||||
- openai/gpt-4o
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ model_providers:
|
|||
- name: complex_reasoning
|
||||
description: complex reasoning tasks, multi-step analysis, or detailed explanations
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ curl -s "$PLANO_URL/routing/v1/chat/completions" \
|
|||
{
|
||||
"name": "coding",
|
||||
"description": "code generation, writing functions, debugging",
|
||||
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o", "openai/gpt-4o-mini"],
|
||||
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o", "openai/gpt-4o-mini"],
|
||||
"selection_policy": {"prefer": "fastest"}
|
||||
}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -12,13 +12,13 @@ from http.server import HTTPServer, BaseHTTPRequestHandler
|
|||
PROMETHEUS_METRICS = """\
|
||||
# HELP model_latency_p95_seconds P95 request latency in seconds per model
|
||||
# TYPE model_latency_p95_seconds gauge
|
||||
model_latency_p95_seconds{model_name="anthropic/claude-sonnet-4-20250514"} 0.85
|
||||
model_latency_p95_seconds{model_name="anthropic/claude-sonnet-4-6"} 0.85
|
||||
model_latency_p95_seconds{model_name="openai/gpt-4o"} 1.20
|
||||
model_latency_p95_seconds{model_name="openai/gpt-4o-mini"} 0.40
|
||||
""".encode()
|
||||
|
||||
COST_DATA = {
|
||||
"anthropic/claude-sonnet-4-20250514": {
|
||||
"anthropic/claude-sonnet-4-6": {
|
||||
"input_per_million": 3.0,
|
||||
"output_per_million": 15.0,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ POST http://localhost:12000/routing/v1/messages
|
|||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"model": "claude-sonnet-4-6",
|
||||
"max_tokens": 1024,
|
||||
"messages": [{"role": "user", "content": "Write a REST API in Go using Gin"}]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ model_providers:
|
|||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ POST /v1/chat/completions
|
|||
{
|
||||
"name": "code generation",
|
||||
"description": "generating new code snippets",
|
||||
"models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o", "openai/gpt-4o-mini"]
|
||||
"models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o", "openai/gpt-4o-mini"]
|
||||
},
|
||||
{
|
||||
"name": "general questions",
|
||||
|
|
@ -55,7 +55,7 @@ POST /v1/chat/completions
|
|||
```json
|
||||
{
|
||||
"models": [
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
"anthropic/claude-sonnet-4-6",
|
||||
"openai/gpt-4o",
|
||||
"openai/gpt-4o-mini"
|
||||
],
|
||||
|
|
@ -100,7 +100,7 @@ Requires `version: v0.4.0` or above. Models listed under `routing_preferences` m
|
|||
version: v0.4.0
|
||||
|
||||
model_providers:
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
|
@ -112,7 +112,7 @@ routing_preferences:
|
|||
- name: code generation
|
||||
description: generating new code snippets or boilerplate
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- anthropic/claude-sonnet-4-6
|
||||
- openai/gpt-4o
|
||||
|
||||
- name: general questions
|
||||
|
|
@ -149,7 +149,7 @@ Response when pinned:
|
|||
|
||||
```json
|
||||
{
|
||||
"models": ["anthropic/claude-sonnet-4-20250514"],
|
||||
"models": ["anthropic/claude-sonnet-4-6"],
|
||||
"route": "code generation",
|
||||
"trace_id": "...",
|
||||
"session_id": "a1b2c3d4-5678-...",
|
||||
|
|
|
|||
|
|
@ -179,14 +179,14 @@ Anthropic
|
|||
- model: anthropic/*
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_PROD_API_KEY
|
||||
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- anthropic/claude-sonnet-4-6
|
||||
|
||||
DeepSeek
|
||||
~~~~~~~~
|
||||
|
|
@ -823,7 +823,7 @@ You can configure specific models with custom settings even when using wildcards
|
|||
|
||||
# Override specific model with custom settings
|
||||
# This model will NOT be included in the wildcard expansion above
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_PROD_API_KEY
|
||||
|
||||
# Another specific override
|
||||
|
|
@ -834,7 +834,7 @@ You can configure specific models with custom settings even when using wildcards
|
|||
- name: code_generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- anthropic/claude-sonnet-4-6
|
||||
|
||||
**Custom Provider Wildcards:**
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ model_providers:
|
|||
default: true
|
||||
|
||||
# Anthropic Models
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
# State storage configuration for v1/responses API
|
||||
|
|
|
|||
|
|
@ -171,7 +171,7 @@ Plano translates requests between its internal format and each provider's API. T
|
|||
| Model prefix | Wire format | Example |
|
||||
|---|---|---|
|
||||
| `openai/*` | OpenAI | `openai/gpt-4o` |
|
||||
| `anthropic/*` | Anthropic | `anthropic/claude-sonnet-4-20250514` |
|
||||
| `anthropic/*` | Anthropic | `anthropic/claude-sonnet-4-6` |
|
||||
| `gemini/*` | Google Gemini | `gemini/gemini-2.0-flash` |
|
||||
| `mistral/*` | Mistral | `mistral/mistral-large-latest` |
|
||||
| `groq/*` | Groq | `groq/llama-3.3-70b-versatile` |
|
||||
|
|
@ -199,7 +199,7 @@ model_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: gemini/gemini-2.0-flash
|
||||
|
|
@ -262,7 +262,7 @@ model_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
state_storage:
|
||||
|
|
@ -431,7 +431,7 @@ model_providers:
|
|||
default: true
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
model_aliases:
|
||||
|
|
@ -442,7 +442,7 @@ model_aliases:
|
|||
target: gpt-4o # High capability — for complex reasoning
|
||||
|
||||
plano.creative.v1:
|
||||
target: claude-sonnet-4-20250514 # Strong creative writing and analysis
|
||||
target: claude-sonnet-4-6 # Strong creative writing and analysis
|
||||
|
||||
plano.v1:
|
||||
target: gpt-4o # Default production alias
|
||||
|
|
@ -1419,7 +1419,7 @@ listeners:
|
|||
port: 12000
|
||||
|
||||
model_providers:
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
default: true
|
||||
|
||||
|
|
@ -1432,7 +1432,7 @@ routing_preferences:
|
|||
Writing code, debugging, code review, explaining concepts,
|
||||
answering programming questions, general development tasks.
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- anthropic/claude-sonnet-4-6
|
||||
- anthropic/claude-opus-4-6
|
||||
- name: complex architecture
|
||||
description: >
|
||||
|
|
@ -1440,11 +1440,11 @@ routing_preferences:
|
|||
architectural decisions, performance optimization, security audits.
|
||||
models:
|
||||
- anthropic/claude-opus-4-6
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- anthropic/claude-sonnet-4-6
|
||||
|
||||
model_aliases:
|
||||
claude.fast.v1:
|
||||
target: claude-sonnet-4-20250514
|
||||
target: claude-sonnet-4-6
|
||||
claude.smart.v1:
|
||||
target: claude-opus-4-6
|
||||
|
||||
|
|
@ -1838,7 +1838,7 @@ model_providers:
|
|||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
# --- Shared routing_preferences (top-level, v0.4.0+) ---
|
||||
|
|
@ -1851,11 +1851,11 @@ routing_preferences:
|
|||
description: Multi-step analysis, code generation, research synthesis
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- anthropic/claude-sonnet-4-6
|
||||
- name: long documents
|
||||
description: Summarizing or analyzing very long documents, PDFs, transcripts
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- anthropic/claude-sonnet-4-6
|
||||
- openai/gpt-4o
|
||||
|
||||
# --- Listener 1: OpenAI-compatible API gateway ---
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ model_providers:
|
|||
- name: complex reasoning
|
||||
description: Multi-step analysis, code generation, research synthesis
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: long documents
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ listeners:
|
|||
port: 12000
|
||||
|
||||
model_providers:
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
default: true
|
||||
routing_preferences:
|
||||
|
|
@ -62,7 +62,7 @@ model_providers:
|
|||
|
||||
model_aliases:
|
||||
claude.fast.v1:
|
||||
target: claude-sonnet-4-20250514
|
||||
target: claude-sonnet-4-6
|
||||
claude.smart.v1:
|
||||
target: claude-opus-4-6
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ Plano translates requests between its internal format and each provider's API. T
|
|||
| Model prefix | Wire format | Example |
|
||||
|---|---|---|
|
||||
| `openai/*` | OpenAI | `openai/gpt-4o` |
|
||||
| `anthropic/*` | Anthropic | `anthropic/claude-sonnet-4-20250514` |
|
||||
| `anthropic/*` | Anthropic | `anthropic/claude-sonnet-4-6` |
|
||||
| `gemini/*` | Google Gemini | `gemini/gemini-2.0-flash` |
|
||||
| `mistral/*` | Mistral | `mistral/mistral-large-latest` |
|
||||
| `groq/*` | Groq | `groq/llama-3.3-70b-versatile` |
|
||||
|
|
@ -42,7 +42,7 @@ model_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: gemini/gemini-2.0-flash
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ model_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
state_storage:
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ model_providers:
|
|||
default: true
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
model_aliases:
|
||||
|
|
@ -58,7 +58,7 @@ model_aliases:
|
|||
target: gpt-4o # High capability — for complex reasoning
|
||||
|
||||
plano.creative.v1:
|
||||
target: claude-sonnet-4-20250514 # Strong creative writing and analysis
|
||||
target: claude-sonnet-4-6 # Strong creative writing and analysis
|
||||
|
||||
plano.v1:
|
||||
target: gpt-4o # Default production alias
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@
|
|||
"testCase": {
|
||||
"description": "Detect and fix: \"Register Model Providers with Correct Format Identifiers\"",
|
||||
"input": "model_providers:\n - model: gpt-4o # Missing openai/ prefix — Plano cannot route this\n access_key: $OPENAI_API_KEY\n\n - model: claude-3-5-sonnet # Missing anthropic/ prefix\n access_key: $ANTHROPIC_API_KEY",
|
||||
"expected": "model_providers:\n - model: openai/gpt-4o\n access_key: $OPENAI_API_KEY\n default: true\n\n - model: anthropic/claude-sonnet-4-20250514\n access_key: $ANTHROPIC_API_KEY\n\n - model: gemini/gemini-2.0-flash\n access_key: $GOOGLE_API_KEY\n\nmodel_providers:\n - model: custom/llama3\n base_url: http://host.docker.internal:11434/v1 # Ollama endpoint\n provider_interface: openai # Ollama speaks OpenAI format\n default: true",
|
||||
"expected": "model_providers:\n - model: openai/gpt-4o\n access_key: $OPENAI_API_KEY\n default: true\n\n - model: anthropic/claude-sonnet-4-6\n access_key: $ANTHROPIC_API_KEY\n\n - model: gemini/gemini-2.0-flash\n access_key: $GOOGLE_API_KEY\n\nmodel_providers:\n - model: custom/llama3\n base_url: http://host.docker.internal:11434/v1 # Ollama endpoint\n provider_interface: openai # Ollama speaks OpenAI format\n default: true",
|
||||
"evaluationPrompt": "Given the following Plano config or CLI usage, identify if it violates the rule \"Register Model Providers with Correct Format Identifiers\" and explain how to fix it."
|
||||
}
|
||||
},
|
||||
|
|
@ -112,7 +112,7 @@
|
|||
"testCase": {
|
||||
"description": "Detect and fix: \"Use Environment Variable Substitution for All Secrets\"",
|
||||
"input": "version: v0.3.0\n\nmodel_providers:\n - model: openai/gpt-4o\n access_key: abcdefghijklmnopqrstuvwxyz... # Hardcoded — never do this\n\nstate_storage:\n type: postgres\n connection_string: \"postgresql://admin:mysecretpassword@prod-db:5432/plano\"\n\nprompt_targets:\n - name: get_data\n endpoint:\n name: my_api\n http_headers:\n Authorization: \"Bearer abcdefghijklmnopqrstuvwxyz\" # Hardcoded token",
|
||||
"expected": "version: v0.3.0\n\nmodel_providers:\n - model: openai/gpt-4o\n access_key: $OPENAI_API_KEY\n default: true\n\n - model: anthropic/claude-sonnet-4-20250514\n access_key: $ANTHROPIC_API_KEY\n\nstate_storage:\n type: postgres\n connection_string: \"postgresql://${DB_USER}:${DB_PASS}@${DB_HOST}:5432/${DB_NAME}\"\n\nprompt_targets:\n - name: get_data\n endpoint:\n name: my_api\n http_headers:\n Authorization: \"Bearer $MY_API_TOKEN\"\n\n# .env — add to .gitignore\nOPENAI_API_KEY=abcdefghijklmnopqrstuvwxyz...\nANTHROPIC_API_KEY=abcdefghijklmnopqrstuvwxyz...\nDB_USER=plano\nDB_PASS=secure-password\nDB_HOST=localhost\nMY_API_TOKEN=abcdefghijklmnopqrstuvwxyz...",
|
||||
"expected": "version: v0.3.0\n\nmodel_providers:\n - model: openai/gpt-4o\n access_key: $OPENAI_API_KEY\n default: true\n\n - model: anthropic/claude-sonnet-4-6\n access_key: $ANTHROPIC_API_KEY\n\nstate_storage:\n type: postgres\n connection_string: \"postgresql://${DB_USER}:${DB_PASS}@${DB_HOST}:5432/${DB_NAME}\"\n\nprompt_targets:\n - name: get_data\n endpoint:\n name: my_api\n http_headers:\n Authorization: \"Bearer $MY_API_TOKEN\"\n\n# .env — add to .gitignore\nOPENAI_API_KEY=abcdefghijklmnopqrstuvwxyz...\nANTHROPIC_API_KEY=abcdefghijklmnopqrstuvwxyz...\nDB_USER=plano\nDB_PASS=secure-password\nDB_HOST=localhost\nMY_API_TOKEN=abcdefghijklmnopqrstuvwxyz...",
|
||||
"evaluationPrompt": "Given the following Plano config or CLI usage, identify if it violates the rule \"Use Environment Variable Substitution for All Secrets\" and explain how to fix it."
|
||||
}
|
||||
},
|
||||
|
|
@ -288,7 +288,7 @@
|
|||
"testCase": {
|
||||
"description": "Detect and fix: \"Use Model Aliases for Semantic, Stable Model References\"",
|
||||
"input": "# config.yaml — no aliases defined\nversion: v0.3.0\n\nlisteners:\n - type: model\n name: model_listener\n port: 12000\n\nmodel_providers:\n - model: openai/gpt-4o\n access_key: $OPENAI_API_KEY\n default: true\n\n# Client code — brittle, must be updated when model changes\nclient.chat.completions.create(model=\"gpt-4o\", ...)",
|
||||
"expected": "version: v0.3.0\n\nlisteners:\n - type: model\n name: model_listener\n port: 12000\n\nmodel_providers:\n - model: openai/gpt-4o-mini\n access_key: $OPENAI_API_KEY\n default: true\n - model: openai/gpt-4o\n access_key: $OPENAI_API_KEY\n - model: anthropic/claude-sonnet-4-20250514\n access_key: $ANTHROPIC_API_KEY\n\nmodel_aliases:\n plano.fast.v1:\n target: gpt-4o-mini # Cheap, fast — for high-volume tasks\n\n plano.smart.v1:\n target: gpt-4o # High capability — for complex reasoning\n\n plano.creative.v1:\n target: claude-sonnet-4-20250514 # Strong creative writing and analysis\n\n plano.v1:\n target: gpt-4o # Default production alias\n\n# Client code — stable, alias is the contract\nclient.chat.completions.create(model=\"plano.smart.v1\", ...)",
|
||||
"expected": "version: v0.3.0\n\nlisteners:\n - type: model\n name: model_listener\n port: 12000\n\nmodel_providers:\n - model: openai/gpt-4o-mini\n access_key: $OPENAI_API_KEY\n default: true\n - model: openai/gpt-4o\n access_key: $OPENAI_API_KEY\n - model: anthropic/claude-sonnet-4-6\n access_key: $ANTHROPIC_API_KEY\n\nmodel_aliases:\n plano.fast.v1:\n target: gpt-4o-mini # Cheap, fast — for high-volume tasks\n\n plano.smart.v1:\n target: gpt-4o # High capability — for complex reasoning\n\n plano.creative.v1:\n target: claude-sonnet-4-6 # Strong creative writing and analysis\n\n plano.v1:\n target: gpt-4o # Default production alias\n\n# Client code — stable, alias is the contract\nclient.chat.completions.create(model=\"plano.smart.v1\", ...)",
|
||||
"evaluationPrompt": "Given the following Plano config or CLI usage, identify if it violates the rule \"Use Model Aliases for Semantic, Stable Model References\" and explain how to fix it."
|
||||
}
|
||||
},
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ llm_providers:
|
|||
default: true
|
||||
|
||||
# Anthropic Models
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
# State storage configuration for v1/responses API
|
||||
|
|
|
|||
|
|
@ -440,7 +440,7 @@ def test_anthropic_thinking_mode_streaming():
|
|||
text_delta_seen = False
|
||||
|
||||
with client.messages.stream(
|
||||
model="claude-sonnet-4-20250514",
|
||||
model="claude-sonnet-4-6",
|
||||
max_tokens=2048,
|
||||
thinking={"type": "enabled", "budget_tokens": 1024}, # <- idiomatic
|
||||
messages=[{"role": "user", "content": "Explain briefly what 2+2 equals"}],
|
||||
|
|
|
|||
|
|
@ -489,7 +489,7 @@ def test_openai_responses_api_non_streaming_upstream_anthropic():
|
|||
client = openai.OpenAI(api_key="test-key", base_url=f"{base_url}/v1")
|
||||
|
||||
resp = client.responses.create(
|
||||
model="claude-sonnet-4-20250514", input="Hello, translate this via grok alias"
|
||||
model="claude-sonnet-4-6", input="Hello, translate this via grok alias"
|
||||
)
|
||||
|
||||
# Print the response content - handle both responses format and chat completions format
|
||||
|
|
@ -509,7 +509,7 @@ def test_openai_responses_api_with_streaming_upstream_anthropic():
|
|||
|
||||
# Simple streaming responses API request using a direct model (pass-through)
|
||||
stream = client.responses.create(
|
||||
model="claude-sonnet-4-20250514",
|
||||
model="claude-sonnet-4-6",
|
||||
input="Write a short haiku about coding",
|
||||
stream=True,
|
||||
)
|
||||
|
|
@ -566,7 +566,7 @@ def test_openai_responses_api_non_streaming_with_tools_upstream_anthropic():
|
|||
]
|
||||
|
||||
resp = client.responses.create(
|
||||
model="claude-sonnet-4-20250514",
|
||||
model="claude-sonnet-4-6",
|
||||
input="Call the echo tool",
|
||||
tools=tools,
|
||||
)
|
||||
|
|
@ -598,7 +598,7 @@ def test_openai_responses_api_streaming_with_tools_upstream_anthropic():
|
|||
]
|
||||
|
||||
stream = client.responses.create(
|
||||
model="claude-sonnet-4-20250514",
|
||||
model="claude-sonnet-4-6",
|
||||
input="Call the echo tool with hello_world",
|
||||
tools=tools,
|
||||
stream=True,
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ def test_conversation_state_management_two_turn():
|
|||
# Turn 1: Send initial message to Anthropic (non-OpenAI model)
|
||||
logger.info("\n[TURN 1] Sending initial message...")
|
||||
resp1 = client.responses.create(
|
||||
model="claude-sonnet-4-20250514",
|
||||
model="claude-sonnet-4-6",
|
||||
input="My name is Alice and I like pizza.",
|
||||
)
|
||||
|
||||
|
|
@ -53,7 +53,7 @@ def test_conversation_state_management_two_turn():
|
|||
f"\n[TURN 2] Sending follow-up with previous_response_id={response_id_1}"
|
||||
)
|
||||
resp2 = client.responses.create(
|
||||
model="claude-sonnet-4-20250514",
|
||||
model="claude-sonnet-4-6",
|
||||
input="Please list all the messages you have received in our conversation, numbering each one.",
|
||||
previous_response_id=response_id_1,
|
||||
)
|
||||
|
|
@ -121,7 +121,7 @@ def test_conversation_state_management_two_turn_streaming():
|
|||
# Turn 1: Send initial streaming message to Anthropic (non-OpenAI model)
|
||||
logger.info("\n[TURN 1] Sending initial streaming message...")
|
||||
stream1 = client.responses.create(
|
||||
model="claude-sonnet-4-20250514",
|
||||
model="claude-sonnet-4-6",
|
||||
input="My name is Alice and I like pizza.",
|
||||
stream=True,
|
||||
)
|
||||
|
|
@ -154,7 +154,7 @@ def test_conversation_state_management_two_turn_streaming():
|
|||
f"\n[TURN 2] Sending follow-up streaming request with previous_response_id={response_id_1}"
|
||||
)
|
||||
stream2 = client.responses.create(
|
||||
model="claude-sonnet-4-20250514",
|
||||
model="claude-sonnet-4-6",
|
||||
input="Please list all the messages you have received in our conversation, numbering each one.",
|
||||
previous_response_id=response_id_1,
|
||||
stream=True,
|
||||
|
|
|
|||
|
|
@ -395,7 +395,7 @@ def test_claude_v1_messages_api():
|
|||
)
|
||||
|
||||
message = client.messages.create(
|
||||
model="claude-sonnet-4-20250514", # Use working model from smoke test
|
||||
model="claude-sonnet-4-6", # Use working model from smoke test
|
||||
max_tokens=50,
|
||||
messages=[
|
||||
{
|
||||
|
|
@ -414,7 +414,7 @@ def test_claude_v1_messages_api_streaming():
|
|||
client = anthropic.Anthropic(api_key="test-key", base_url=base_url)
|
||||
|
||||
with client.messages.stream(
|
||||
model="claude-sonnet-4-20250514",
|
||||
model="claude-sonnet-4-6",
|
||||
max_tokens=50,
|
||||
messages=[
|
||||
{
|
||||
|
|
@ -525,7 +525,7 @@ def test_openai_gpt4o_mini_v1_messages_api_streaming():
|
|||
|
||||
|
||||
def test_openai_client_with_claude_model_streaming():
|
||||
"""Test OpenAI client using /v1/chat/completions API with Claude model (claude-sonnet-4-20250514)
|
||||
"""Test OpenAI client using /v1/chat/completions API with Claude model (claude-sonnet-4-6)
|
||||
This tests the transformation: Anthropic upstream -> OpenAI client format with proper chunk handling
|
||||
"""
|
||||
# Get the base URL from the LLM gateway endpoint
|
||||
|
|
@ -537,7 +537,7 @@ def test_openai_client_with_claude_model_streaming():
|
|||
)
|
||||
|
||||
stream = client.chat.completions.create(
|
||||
model="claude-sonnet-4-20250514", # Claude model via OpenAI client
|
||||
model="claude-sonnet-4-6", # Claude model via OpenAI client
|
||||
max_tokens=50,
|
||||
messages=[
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue