From 5cc4c4ee775580ff4322ca2b4845b47f13b7394b Mon Sep 17 00:00:00 2001 From: Musa Date: Wed, 24 Jun 2026 10:13:37 -0700 Subject: [PATCH] fix(ci): switch retired claude-sonnet-4-20250514 to claude-sonnet-4-6 (#975) --- .../conversational_state_v1_responses.yaml | 2 +- .../templates/preference_aware_routing.yaml | 2 +- cli/test/test_config_generator.py | 14 +++++------ .../filter_chains/pii_anonymizer/config.yaml | 2 +- demos/filter_chains/pii_anonymizer/test.sh | 6 ++--- .../weather_forecast/config.yaml | 2 +- .../config_with_aliases.yaml | 6 ++--- .../model_routing_service/README.md | 18 +++++++------- .../model_routing_service/config.yaml | 4 ++-- .../model_routing_service/config_k8s.yaml | 2 +- .../llm_routing/model_routing_service/demo.sh | 2 +- .../model_routing_service/metrics_server.py | 4 ++-- .../model_routing_service/test.rest | 2 +- .../preference_based_routing/config.yaml | 2 +- docs/routing-api.md | 10 ++++---- .../llm_providers/supported_providers.rst | 8 +++---- .../plano_config_state_storage_example.yaml | 2 +- skills/AGENTS.md | 24 +++++++++---------- skills/rules/advanced-multi-listener.md | 2 +- skills/rules/cli-agent.md | 4 ++-- skills/rules/config-providers.md | 4 ++-- skills/rules/config-secrets.md | 2 +- skills/rules/routing-aliases.md | 4 ++-- skills/test-cases.json | 6 ++--- .../e2e/config_memory_state_v1_responses.yaml | 2 +- tests/e2e/test_model_alias_routing.py | 2 +- tests/e2e/test_openai_responses_api_client.py | 8 +++---- ..._openai_responses_api_client_with_state.py | 8 +++---- tests/e2e/test_prompt_gateway.py | 8 +++---- 29 files changed, 80 insertions(+), 82 deletions(-) diff --git a/cli/planoai/templates/conversational_state_v1_responses.yaml b/cli/planoai/templates/conversational_state_v1_responses.yaml index 403278a9..11fb7477 100644 --- a/cli/planoai/templates/conversational_state_v1_responses.yaml +++ b/cli/planoai/templates/conversational_state_v1_responses.yaml @@ -11,7 +11,7 @@ model_providers: default: true # Anthropic Models - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY listeners: diff --git a/cli/planoai/templates/preference_aware_routing.yaml b/cli/planoai/templates/preference_aware_routing.yaml index e38b3881..1fcb6bf4 100644 --- a/cli/planoai/templates/preference_aware_routing.yaml +++ b/cli/planoai/templates/preference_aware_routing.yaml @@ -12,7 +12,7 @@ model_providers: - name: code understanding description: understand and explain existing code snippets, functions, or libraries - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY routing_preferences: - name: code generation diff --git a/cli/test/test_config_generator.py b/cli/test/test_config_generator.py index 9aade29e..0c5dc969 100644 --- a/cli/test/test_config_generator.py +++ b/cli/test/test_config_generator.py @@ -584,7 +584,7 @@ model_providers: - name: code understanding description: understand and explain existing code snippets, functions, or libraries - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY routing_preferences: - name: code generation @@ -601,9 +601,7 @@ model_providers: by_name = {entry["name"]: entry for entry in top_level} assert set(by_name) == {"code understanding", "code generation"} assert by_name["code understanding"]["models"] == ["openai/gpt-4o"] - assert by_name["code generation"]["models"] == [ - "anthropic/claude-sonnet-4-20250514" - ] + assert by_name["code generation"]["models"] == ["anthropic/claude-sonnet-4-6"] assert ( by_name["code understanding"]["description"] == "understand and explain existing code snippets, functions, or libraries" @@ -626,7 +624,7 @@ model_providers: - name: code generation description: generating new code snippets, functions, or boilerplate based on user prompts or requirements - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY routing_preferences: - name: code generation @@ -641,7 +639,7 @@ model_providers: assert entry["name"] == "code generation" assert entry["models"] == [ "openai/gpt-4o", - "anthropic/claude-sonnet-4-20250514", + "anthropic/claude-sonnet-4-6", ] assert config_yaml["version"] == "v0.4.0" @@ -658,7 +656,7 @@ listeners: model_providers: - model: openai/gpt-4o access_key: $OPENAI_API_KEY - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY routing_preferences: @@ -666,7 +664,7 @@ routing_preferences: description: generating new code snippets or boilerplate models: - openai/gpt-4o - - anthropic/claude-sonnet-4-20250514 + - anthropic/claude-sonnet-4-6 """ config_yaml = yaml.safe_load(plano_config) before = yaml.safe_dump(config_yaml, sort_keys=True) diff --git a/demos/filter_chains/pii_anonymizer/config.yaml b/demos/filter_chains/pii_anonymizer/config.yaml index b183379f..9921c9aa 100644 --- a/demos/filter_chains/pii_anonymizer/config.yaml +++ b/demos/filter_chains/pii_anonymizer/config.yaml @@ -12,7 +12,7 @@ model_providers: - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY default: true - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY listeners: diff --git a/demos/filter_chains/pii_anonymizer/test.sh b/demos/filter_chains/pii_anonymizer/test.sh index a9019b78..83a2e9cf 100755 --- a/demos/filter_chains/pii_anonymizer/test.sh +++ b/demos/filter_chains/pii_anonymizer/test.sh @@ -93,19 +93,19 @@ echo "" echo "=== /v1/messages ===" run_test "Non-streaming with PII (phone)" /v1/messages 200 '{ - "model": "claude-sonnet-4-20250514", + "model": "claude-sonnet-4-6", "max_tokens": 256, "messages": [{"role": "user", "content": "Call me at 555-867-5309 to discuss my account"}] }' run_test "Non-streaming with PII (SSN)" /v1/messages 200 '{ - "model": "claude-sonnet-4-20250514", + "model": "claude-sonnet-4-6", "max_tokens": 256, "messages": [{"role": "user", "content": "My SSN is 123-45-6789"}] }' run_test "No PII" /v1/messages 200 '{ - "model": "claude-sonnet-4-20250514", + "model": "claude-sonnet-4-6", "max_tokens": 256, "messages": [{"role": "user", "content": "Hello, how are you?"}] }' diff --git a/demos/getting_started/weather_forecast/config.yaml b/demos/getting_started/weather_forecast/config.yaml index 65048912..b5983f42 100644 --- a/demos/getting_started/weather_forecast/config.yaml +++ b/demos/getting_started/weather_forecast/config.yaml @@ -30,7 +30,7 @@ model_providers: model: openai/gpt-4o-mini - access_key: $ANTHROPIC_API_KEY - model: anthropic/claude-sonnet-4-20250514 + model: anthropic/claude-sonnet-4-6 system_prompt: | You are a helpful assistant. diff --git a/demos/llm_routing/model_alias_routing/config_with_aliases.yaml b/demos/llm_routing/model_alias_routing/config_with_aliases.yaml index f46359cc..bb873582 100644 --- a/demos/llm_routing/model_alias_routing/config_with_aliases.yaml +++ b/demos/llm_routing/model_alias_routing/config_with_aliases.yaml @@ -28,7 +28,7 @@ model_providers: - model: anthropic/* access_key: $ANTHROPIC_API_KEY - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY - model: anthropic/claude-3-haiku-20240307 @@ -71,7 +71,7 @@ model_aliases: # Alias for creative tasks -> Claude model arch.creative.v1: - target: claude-sonnet-4-20250514 + target: claude-sonnet-4-6 # Alias for quick responses -> fast model arch.fast.v1: @@ -85,7 +85,7 @@ model_aliases: target: gpt-5-mini-2025-08-07 creative-model: - target: claude-sonnet-4-20250514 + target: claude-sonnet-4-6 coding-model: target: us.amazon.nova-premier-v1:0 diff --git a/demos/llm_routing/model_routing_service/README.md b/demos/llm_routing/model_routing_service/README.md index eaec32c7..e7064969 100644 --- a/demos/llm_routing/model_routing_service/README.md +++ b/demos/llm_routing/model_routing_service/README.md @@ -33,7 +33,7 @@ routing_preferences: - name: code_generation description: generating new code, writing functions, or creating boilerplate models: - - anthropic/claude-sonnet-4-20250514 + - anthropic/claude-sonnet-4-6 - openai/gpt-4o ``` @@ -46,7 +46,7 @@ When a request arrives, Plano: ``` 1. Request arrives → "Write binary search in Python" 2. Plano-Orchestrator classifies → route: "code_generation" -3. Response → models: ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"] +3. Response → models: ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"] ``` No match? Plano-Orchestrator returns an empty route → client falls back to the model in the original request. @@ -98,7 +98,7 @@ curl http://localhost:12000/routing/v1/chat/completions \ Response: ```json { - "models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"], + "models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"], "route": "code_generation", "trace_id": "c16d1096c1af4a17abb48fb182918a88" } @@ -124,7 +124,7 @@ curl http://localhost:12000/routing/v1/chat/completions \ Response (first call): ```json { - "model": "anthropic/claude-sonnet-4-20250514", + "model": "anthropic/claude-sonnet-4-6", "route": "code_generation", "trace_id": "c16d1096c1af4a17abb48fb182918a88", "session_id": "my-session-123", @@ -146,7 +146,7 @@ curl http://localhost:12000/routing/v1/chat/completions \ Response (pinned): ```json { - "model": "anthropic/claude-sonnet-4-20250514", + "model": "anthropic/claude-sonnet-4-6", "route": "code_generation", "trace_id": "a1b2c3d4e5f6...", "session_id": "my-session-123", @@ -233,7 +233,7 @@ kubectl rollout restart deployment/plano --- 1. Code generation query (OpenAI format) --- { - "models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"], + "models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"], "route": "code_generation", "trace_id": "c16d1096c1af4a17abb48fb182918a88" } @@ -254,14 +254,14 @@ kubectl rollout restart deployment/plano --- 4. Code generation query (Anthropic format) --- { - "models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"], + "models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"], "route": "code_generation", "trace_id": "26be822bbdf14a3ba19fe198e55ea4a9" } --- 7. Session pinning - first call (fresh routing decision) --- { - "models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"], + "models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o"], "route": "code_generation", "trace_id": "f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6", "session_id": "demo-session-001", @@ -271,7 +271,7 @@ kubectl rollout restart deployment/plano --- 8. Session pinning - second call (same session, pinned) --- Notice: same model returned with "pinned": true, routing was skipped { - "model": "anthropic/claude-sonnet-4-20250514", + "model": "anthropic/claude-sonnet-4-6", "route": "code_generation", "trace_id": "a9b8c7d6e5f4a3b2c1d0e9f8a7b6c5d4", "session_id": "demo-session-001", diff --git a/demos/llm_routing/model_routing_service/config.yaml b/demos/llm_routing/model_routing_service/config.yaml index 0bcf658d..0b4b3a21 100644 --- a/demos/llm_routing/model_routing_service/config.yaml +++ b/demos/llm_routing/model_routing_service/config.yaml @@ -13,7 +13,7 @@ model_providers: - model: openai/gpt-4o access_key: $OPENAI_API_KEY - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY routing_preferences: @@ -26,5 +26,5 @@ routing_preferences: - name: code_generation description: generating new code, writing functions, or creating boilerplate models: - - anthropic/claude-sonnet-4-20250514 + - anthropic/claude-sonnet-4-6 - openai/gpt-4o diff --git a/demos/llm_routing/model_routing_service/config_k8s.yaml b/demos/llm_routing/model_routing_service/config_k8s.yaml index 49f452a9..1f3959af 100644 --- a/demos/llm_routing/model_routing_service/config_k8s.yaml +++ b/demos/llm_routing/model_routing_service/config_k8s.yaml @@ -23,7 +23,7 @@ model_providers: - name: complex_reasoning description: complex reasoning tasks, multi-step analysis, or detailed explanations - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY routing_preferences: - name: code_generation diff --git a/demos/llm_routing/model_routing_service/demo.sh b/demos/llm_routing/model_routing_service/demo.sh index dafd60b3..1a3ad23b 100755 --- a/demos/llm_routing/model_routing_service/demo.sh +++ b/demos/llm_routing/model_routing_service/demo.sh @@ -102,7 +102,7 @@ curl -s "$PLANO_URL/routing/v1/chat/completions" \ { "name": "coding", "description": "code generation, writing functions, debugging", - "models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o", "openai/gpt-4o-mini"], + "models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o", "openai/gpt-4o-mini"], "selection_policy": {"prefer": "fastest"} } ] diff --git a/demos/llm_routing/model_routing_service/metrics_server.py b/demos/llm_routing/model_routing_service/metrics_server.py index b37f97fb..a00fb81c 100644 --- a/demos/llm_routing/model_routing_service/metrics_server.py +++ b/demos/llm_routing/model_routing_service/metrics_server.py @@ -12,13 +12,13 @@ from http.server import HTTPServer, BaseHTTPRequestHandler PROMETHEUS_METRICS = """\ # HELP model_latency_p95_seconds P95 request latency in seconds per model # TYPE model_latency_p95_seconds gauge -model_latency_p95_seconds{model_name="anthropic/claude-sonnet-4-20250514"} 0.85 +model_latency_p95_seconds{model_name="anthropic/claude-sonnet-4-6"} 0.85 model_latency_p95_seconds{model_name="openai/gpt-4o"} 1.20 model_latency_p95_seconds{model_name="openai/gpt-4o-mini"} 0.40 """.encode() COST_DATA = { - "anthropic/claude-sonnet-4-20250514": { + "anthropic/claude-sonnet-4-6": { "input_per_million": 3.0, "output_per_million": 15.0, }, diff --git a/demos/llm_routing/model_routing_service/test.rest b/demos/llm_routing/model_routing_service/test.rest index b41d75f2..b780d0c0 100644 --- a/demos/llm_routing/model_routing_service/test.rest +++ b/demos/llm_routing/model_routing_service/test.rest @@ -30,7 +30,7 @@ POST http://localhost:12000/routing/v1/messages Content-Type: application/json { - "model": "claude-sonnet-4-20250514", + "model": "claude-sonnet-4-6", "max_tokens": 1024, "messages": [{"role": "user", "content": "Write a REST API in Go using Gin"}] } diff --git a/demos/llm_routing/preference_based_routing/config.yaml b/demos/llm_routing/preference_based_routing/config.yaml index 38e8920a..4c82f869 100644 --- a/demos/llm_routing/preference_based_routing/config.yaml +++ b/demos/llm_routing/preference_based_routing/config.yaml @@ -17,7 +17,7 @@ model_providers: - name: code understanding description: understand and explain existing code snippets, functions, or libraries - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY routing_preferences: - name: code generation diff --git a/docs/routing-api.md b/docs/routing-api.md index 4d1d6a8e..8bbf7ce9 100644 --- a/docs/routing-api.md +++ b/docs/routing-api.md @@ -21,7 +21,7 @@ POST /v1/chat/completions { "name": "code generation", "description": "generating new code snippets", - "models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o", "openai/gpt-4o-mini"] + "models": ["anthropic/claude-sonnet-4-6", "openai/gpt-4o", "openai/gpt-4o-mini"] }, { "name": "general questions", @@ -55,7 +55,7 @@ POST /v1/chat/completions ```json { "models": [ - "anthropic/claude-sonnet-4-20250514", + "anthropic/claude-sonnet-4-6", "openai/gpt-4o", "openai/gpt-4o-mini" ], @@ -100,7 +100,7 @@ Requires `version: v0.4.0` or above. Models listed under `routing_preferences` m version: v0.4.0 model_providers: - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY - model: openai/gpt-4o access_key: $OPENAI_API_KEY @@ -112,7 +112,7 @@ routing_preferences: - name: code generation description: generating new code snippets or boilerplate models: - - anthropic/claude-sonnet-4-20250514 + - anthropic/claude-sonnet-4-6 - openai/gpt-4o - name: general questions @@ -149,7 +149,7 @@ Response when pinned: ```json { - "models": ["anthropic/claude-sonnet-4-20250514"], + "models": ["anthropic/claude-sonnet-4-6"], "route": "code generation", "trace_id": "...", "session_id": "a1b2c3d4-5678-...", diff --git a/docs/source/concepts/llm_providers/supported_providers.rst b/docs/source/concepts/llm_providers/supported_providers.rst index d95340f4..43d5e42d 100644 --- a/docs/source/concepts/llm_providers/supported_providers.rst +++ b/docs/source/concepts/llm_providers/supported_providers.rst @@ -179,14 +179,14 @@ Anthropic - model: anthropic/* access_key: $ANTHROPIC_API_KEY - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_PROD_API_KEY routing_preferences: - name: code_generation description: generating new code snippets, functions, or boilerplate based on user prompts or requirements models: - - anthropic/claude-sonnet-4-20250514 + - anthropic/claude-sonnet-4-6 DeepSeek ~~~~~~~~ @@ -823,7 +823,7 @@ You can configure specific models with custom settings even when using wildcards # Override specific model with custom settings # This model will NOT be included in the wildcard expansion above - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_PROD_API_KEY # Another specific override @@ -834,7 +834,7 @@ You can configure specific models with custom settings even when using wildcards - name: code_generation description: generating new code snippets, functions, or boilerplate based on user prompts or requirements models: - - anthropic/claude-sonnet-4-20250514 + - anthropic/claude-sonnet-4-6 **Custom Provider Wildcards:** diff --git a/docs/source/resources/includes/plano_config_state_storage_example.yaml b/docs/source/resources/includes/plano_config_state_storage_example.yaml index 81a8d3a9..70297447 100644 --- a/docs/source/resources/includes/plano_config_state_storage_example.yaml +++ b/docs/source/resources/includes/plano_config_state_storage_example.yaml @@ -13,7 +13,7 @@ model_providers: default: true # Anthropic Models - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY # State storage configuration for v1/responses API diff --git a/skills/AGENTS.md b/skills/AGENTS.md index 2c0e7208..6aa08c5f 100644 --- a/skills/AGENTS.md +++ b/skills/AGENTS.md @@ -171,7 +171,7 @@ Plano translates requests between its internal format and each provider's API. T | Model prefix | Wire format | Example | |---|---|---| | `openai/*` | OpenAI | `openai/gpt-4o` | -| `anthropic/*` | Anthropic | `anthropic/claude-sonnet-4-20250514` | +| `anthropic/*` | Anthropic | `anthropic/claude-sonnet-4-6` | | `gemini/*` | Google Gemini | `gemini/gemini-2.0-flash` | | `mistral/*` | Mistral | `mistral/mistral-large-latest` | | `groq/*` | Groq | `groq/llama-3.3-70b-versatile` | @@ -199,7 +199,7 @@ model_providers: access_key: $OPENAI_API_KEY default: true - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY - model: gemini/gemini-2.0-flash @@ -262,7 +262,7 @@ model_providers: access_key: $OPENAI_API_KEY default: true - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY state_storage: @@ -431,7 +431,7 @@ model_providers: default: true - model: openai/gpt-4o access_key: $OPENAI_API_KEY - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY model_aliases: @@ -442,7 +442,7 @@ model_aliases: target: gpt-4o # High capability — for complex reasoning plano.creative.v1: - target: claude-sonnet-4-20250514 # Strong creative writing and analysis + target: claude-sonnet-4-6 # Strong creative writing and analysis plano.v1: target: gpt-4o # Default production alias @@ -1419,7 +1419,7 @@ listeners: port: 12000 model_providers: - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY default: true @@ -1432,7 +1432,7 @@ routing_preferences: Writing code, debugging, code review, explaining concepts, answering programming questions, general development tasks. models: - - anthropic/claude-sonnet-4-20250514 + - anthropic/claude-sonnet-4-6 - anthropic/claude-opus-4-6 - name: complex architecture description: > @@ -1440,11 +1440,11 @@ routing_preferences: architectural decisions, performance optimization, security audits. models: - anthropic/claude-opus-4-6 - - anthropic/claude-sonnet-4-20250514 + - anthropic/claude-sonnet-4-6 model_aliases: claude.fast.v1: - target: claude-sonnet-4-20250514 + target: claude-sonnet-4-6 claude.smart.v1: target: claude-opus-4-6 @@ -1838,7 +1838,7 @@ model_providers: - model: openai/gpt-4o access_key: $OPENAI_API_KEY - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY # --- Shared routing_preferences (top-level, v0.4.0+) --- @@ -1851,11 +1851,11 @@ routing_preferences: description: Multi-step analysis, code generation, research synthesis models: - openai/gpt-4o - - anthropic/claude-sonnet-4-20250514 + - anthropic/claude-sonnet-4-6 - name: long documents description: Summarizing or analyzing very long documents, PDFs, transcripts models: - - anthropic/claude-sonnet-4-20250514 + - anthropic/claude-sonnet-4-6 - openai/gpt-4o # --- Listener 1: OpenAI-compatible API gateway --- diff --git a/skills/rules/advanced-multi-listener.md b/skills/rules/advanced-multi-listener.md index 81c8d4d9..764f3462 100644 --- a/skills/rules/advanced-multi-listener.md +++ b/skills/rules/advanced-multi-listener.md @@ -42,7 +42,7 @@ model_providers: - name: complex reasoning description: Multi-step analysis, code generation, research synthesis - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY routing_preferences: - name: long documents diff --git a/skills/rules/cli-agent.md b/skills/rules/cli-agent.md index e311e99e..e123765f 100644 --- a/skills/rules/cli-agent.md +++ b/skills/rules/cli-agent.md @@ -43,7 +43,7 @@ listeners: port: 12000 model_providers: - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY default: true routing_preferences: @@ -62,7 +62,7 @@ model_providers: model_aliases: claude.fast.v1: - target: claude-sonnet-4-20250514 + target: claude-sonnet-4-6 claude.smart.v1: target: claude-opus-4-6 diff --git a/skills/rules/config-providers.md b/skills/rules/config-providers.md index 30476cd5..a8f62df2 100644 --- a/skills/rules/config-providers.md +++ b/skills/rules/config-providers.md @@ -14,7 +14,7 @@ Plano translates requests between its internal format and each provider's API. T | Model prefix | Wire format | Example | |---|---|---| | `openai/*` | OpenAI | `openai/gpt-4o` | -| `anthropic/*` | Anthropic | `anthropic/claude-sonnet-4-20250514` | +| `anthropic/*` | Anthropic | `anthropic/claude-sonnet-4-6` | | `gemini/*` | Google Gemini | `gemini/gemini-2.0-flash` | | `mistral/*` | Mistral | `mistral/mistral-large-latest` | | `groq/*` | Groq | `groq/llama-3.3-70b-versatile` | @@ -42,7 +42,7 @@ model_providers: access_key: $OPENAI_API_KEY default: true - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY - model: gemini/gemini-2.0-flash diff --git a/skills/rules/config-secrets.md b/skills/rules/config-secrets.md index 5f585c87..bb20c855 100644 --- a/skills/rules/config-secrets.md +++ b/skills/rules/config-secrets.md @@ -40,7 +40,7 @@ model_providers: access_key: $OPENAI_API_KEY default: true - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY state_storage: diff --git a/skills/rules/routing-aliases.md b/skills/rules/routing-aliases.md index 91f0b31a..2630c12b 100644 --- a/skills/rules/routing-aliases.md +++ b/skills/rules/routing-aliases.md @@ -47,7 +47,7 @@ model_providers: default: true - model: openai/gpt-4o access_key: $OPENAI_API_KEY - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY model_aliases: @@ -58,7 +58,7 @@ model_aliases: target: gpt-4o # High capability — for complex reasoning plano.creative.v1: - target: claude-sonnet-4-20250514 # Strong creative writing and analysis + target: claude-sonnet-4-6 # Strong creative writing and analysis plano.v1: target: gpt-4o # Default production alias diff --git a/skills/test-cases.json b/skills/test-cases.json index c8bcfe33..eec7e010 100644 --- a/skills/test-cases.json +++ b/skills/test-cases.json @@ -92,7 +92,7 @@ "testCase": { "description": "Detect and fix: \"Register Model Providers with Correct Format Identifiers\"", "input": "model_providers:\n - model: gpt-4o # Missing openai/ prefix — Plano cannot route this\n access_key: $OPENAI_API_KEY\n\n - model: claude-3-5-sonnet # Missing anthropic/ prefix\n access_key: $ANTHROPIC_API_KEY", - "expected": "model_providers:\n - model: openai/gpt-4o\n access_key: $OPENAI_API_KEY\n default: true\n\n - model: anthropic/claude-sonnet-4-20250514\n access_key: $ANTHROPIC_API_KEY\n\n - model: gemini/gemini-2.0-flash\n access_key: $GOOGLE_API_KEY\n\nmodel_providers:\n - model: custom/llama3\n base_url: http://host.docker.internal:11434/v1 # Ollama endpoint\n provider_interface: openai # Ollama speaks OpenAI format\n default: true", + "expected": "model_providers:\n - model: openai/gpt-4o\n access_key: $OPENAI_API_KEY\n default: true\n\n - model: anthropic/claude-sonnet-4-6\n access_key: $ANTHROPIC_API_KEY\n\n - model: gemini/gemini-2.0-flash\n access_key: $GOOGLE_API_KEY\n\nmodel_providers:\n - model: custom/llama3\n base_url: http://host.docker.internal:11434/v1 # Ollama endpoint\n provider_interface: openai # Ollama speaks OpenAI format\n default: true", "evaluationPrompt": "Given the following Plano config or CLI usage, identify if it violates the rule \"Register Model Providers with Correct Format Identifiers\" and explain how to fix it." } }, @@ -112,7 +112,7 @@ "testCase": { "description": "Detect and fix: \"Use Environment Variable Substitution for All Secrets\"", "input": "version: v0.3.0\n\nmodel_providers:\n - model: openai/gpt-4o\n access_key: abcdefghijklmnopqrstuvwxyz... # Hardcoded — never do this\n\nstate_storage:\n type: postgres\n connection_string: \"postgresql://admin:mysecretpassword@prod-db:5432/plano\"\n\nprompt_targets:\n - name: get_data\n endpoint:\n name: my_api\n http_headers:\n Authorization: \"Bearer abcdefghijklmnopqrstuvwxyz\" # Hardcoded token", - "expected": "version: v0.3.0\n\nmodel_providers:\n - model: openai/gpt-4o\n access_key: $OPENAI_API_KEY\n default: true\n\n - model: anthropic/claude-sonnet-4-20250514\n access_key: $ANTHROPIC_API_KEY\n\nstate_storage:\n type: postgres\n connection_string: \"postgresql://${DB_USER}:${DB_PASS}@${DB_HOST}:5432/${DB_NAME}\"\n\nprompt_targets:\n - name: get_data\n endpoint:\n name: my_api\n http_headers:\n Authorization: \"Bearer $MY_API_TOKEN\"\n\n# .env — add to .gitignore\nOPENAI_API_KEY=abcdefghijklmnopqrstuvwxyz...\nANTHROPIC_API_KEY=abcdefghijklmnopqrstuvwxyz...\nDB_USER=plano\nDB_PASS=secure-password\nDB_HOST=localhost\nMY_API_TOKEN=abcdefghijklmnopqrstuvwxyz...", + "expected": "version: v0.3.0\n\nmodel_providers:\n - model: openai/gpt-4o\n access_key: $OPENAI_API_KEY\n default: true\n\n - model: anthropic/claude-sonnet-4-6\n access_key: $ANTHROPIC_API_KEY\n\nstate_storage:\n type: postgres\n connection_string: \"postgresql://${DB_USER}:${DB_PASS}@${DB_HOST}:5432/${DB_NAME}\"\n\nprompt_targets:\n - name: get_data\n endpoint:\n name: my_api\n http_headers:\n Authorization: \"Bearer $MY_API_TOKEN\"\n\n# .env — add to .gitignore\nOPENAI_API_KEY=abcdefghijklmnopqrstuvwxyz...\nANTHROPIC_API_KEY=abcdefghijklmnopqrstuvwxyz...\nDB_USER=plano\nDB_PASS=secure-password\nDB_HOST=localhost\nMY_API_TOKEN=abcdefghijklmnopqrstuvwxyz...", "evaluationPrompt": "Given the following Plano config or CLI usage, identify if it violates the rule \"Use Environment Variable Substitution for All Secrets\" and explain how to fix it." } }, @@ -288,7 +288,7 @@ "testCase": { "description": "Detect and fix: \"Use Model Aliases for Semantic, Stable Model References\"", "input": "# config.yaml — no aliases defined\nversion: v0.3.0\n\nlisteners:\n - type: model\n name: model_listener\n port: 12000\n\nmodel_providers:\n - model: openai/gpt-4o\n access_key: $OPENAI_API_KEY\n default: true\n\n# Client code — brittle, must be updated when model changes\nclient.chat.completions.create(model=\"gpt-4o\", ...)", - "expected": "version: v0.3.0\n\nlisteners:\n - type: model\n name: model_listener\n port: 12000\n\nmodel_providers:\n - model: openai/gpt-4o-mini\n access_key: $OPENAI_API_KEY\n default: true\n - model: openai/gpt-4o\n access_key: $OPENAI_API_KEY\n - model: anthropic/claude-sonnet-4-20250514\n access_key: $ANTHROPIC_API_KEY\n\nmodel_aliases:\n plano.fast.v1:\n target: gpt-4o-mini # Cheap, fast — for high-volume tasks\n\n plano.smart.v1:\n target: gpt-4o # High capability — for complex reasoning\n\n plano.creative.v1:\n target: claude-sonnet-4-20250514 # Strong creative writing and analysis\n\n plano.v1:\n target: gpt-4o # Default production alias\n\n# Client code — stable, alias is the contract\nclient.chat.completions.create(model=\"plano.smart.v1\", ...)", + "expected": "version: v0.3.0\n\nlisteners:\n - type: model\n name: model_listener\n port: 12000\n\nmodel_providers:\n - model: openai/gpt-4o-mini\n access_key: $OPENAI_API_KEY\n default: true\n - model: openai/gpt-4o\n access_key: $OPENAI_API_KEY\n - model: anthropic/claude-sonnet-4-6\n access_key: $ANTHROPIC_API_KEY\n\nmodel_aliases:\n plano.fast.v1:\n target: gpt-4o-mini # Cheap, fast — for high-volume tasks\n\n plano.smart.v1:\n target: gpt-4o # High capability — for complex reasoning\n\n plano.creative.v1:\n target: claude-sonnet-4-6 # Strong creative writing and analysis\n\n plano.v1:\n target: gpt-4o # Default production alias\n\n# Client code — stable, alias is the contract\nclient.chat.completions.create(model=\"plano.smart.v1\", ...)", "evaluationPrompt": "Given the following Plano config or CLI usage, identify if it violates the rule \"Use Model Aliases for Semantic, Stable Model References\" and explain how to fix it." } }, diff --git a/tests/e2e/config_memory_state_v1_responses.yaml b/tests/e2e/config_memory_state_v1_responses.yaml index afc40910..b0977062 100644 --- a/tests/e2e/config_memory_state_v1_responses.yaml +++ b/tests/e2e/config_memory_state_v1_responses.yaml @@ -15,7 +15,7 @@ llm_providers: default: true # Anthropic Models - - model: anthropic/claude-sonnet-4-20250514 + - model: anthropic/claude-sonnet-4-6 access_key: $ANTHROPIC_API_KEY # State storage configuration for v1/responses API diff --git a/tests/e2e/test_model_alias_routing.py b/tests/e2e/test_model_alias_routing.py index f9e695a5..deb86003 100644 --- a/tests/e2e/test_model_alias_routing.py +++ b/tests/e2e/test_model_alias_routing.py @@ -440,7 +440,7 @@ def test_anthropic_thinking_mode_streaming(): text_delta_seen = False with client.messages.stream( - model="claude-sonnet-4-20250514", + model="claude-sonnet-4-6", max_tokens=2048, thinking={"type": "enabled", "budget_tokens": 1024}, # <- idiomatic messages=[{"role": "user", "content": "Explain briefly what 2+2 equals"}], diff --git a/tests/e2e/test_openai_responses_api_client.py b/tests/e2e/test_openai_responses_api_client.py index 6e110e0d..d4fafb5b 100644 --- a/tests/e2e/test_openai_responses_api_client.py +++ b/tests/e2e/test_openai_responses_api_client.py @@ -489,7 +489,7 @@ def test_openai_responses_api_non_streaming_upstream_anthropic(): client = openai.OpenAI(api_key="test-key", base_url=f"{base_url}/v1") resp = client.responses.create( - model="claude-sonnet-4-20250514", input="Hello, translate this via grok alias" + model="claude-sonnet-4-6", input="Hello, translate this via grok alias" ) # Print the response content - handle both responses format and chat completions format @@ -509,7 +509,7 @@ def test_openai_responses_api_with_streaming_upstream_anthropic(): # Simple streaming responses API request using a direct model (pass-through) stream = client.responses.create( - model="claude-sonnet-4-20250514", + model="claude-sonnet-4-6", input="Write a short haiku about coding", stream=True, ) @@ -566,7 +566,7 @@ def test_openai_responses_api_non_streaming_with_tools_upstream_anthropic(): ] resp = client.responses.create( - model="claude-sonnet-4-20250514", + model="claude-sonnet-4-6", input="Call the echo tool", tools=tools, ) @@ -598,7 +598,7 @@ def test_openai_responses_api_streaming_with_tools_upstream_anthropic(): ] stream = client.responses.create( - model="claude-sonnet-4-20250514", + model="claude-sonnet-4-6", input="Call the echo tool with hello_world", tools=tools, stream=True, diff --git a/tests/e2e/test_openai_responses_api_client_with_state.py b/tests/e2e/test_openai_responses_api_client_with_state.py index c23307e6..6fead76b 100644 --- a/tests/e2e/test_openai_responses_api_client_with_state.py +++ b/tests/e2e/test_openai_responses_api_client_with_state.py @@ -35,7 +35,7 @@ def test_conversation_state_management_two_turn(): # Turn 1: Send initial message to Anthropic (non-OpenAI model) logger.info("\n[TURN 1] Sending initial message...") resp1 = client.responses.create( - model="claude-sonnet-4-20250514", + model="claude-sonnet-4-6", input="My name is Alice and I like pizza.", ) @@ -53,7 +53,7 @@ def test_conversation_state_management_two_turn(): f"\n[TURN 2] Sending follow-up with previous_response_id={response_id_1}" ) resp2 = client.responses.create( - model="claude-sonnet-4-20250514", + model="claude-sonnet-4-6", input="Please list all the messages you have received in our conversation, numbering each one.", previous_response_id=response_id_1, ) @@ -121,7 +121,7 @@ def test_conversation_state_management_two_turn_streaming(): # Turn 1: Send initial streaming message to Anthropic (non-OpenAI model) logger.info("\n[TURN 1] Sending initial streaming message...") stream1 = client.responses.create( - model="claude-sonnet-4-20250514", + model="claude-sonnet-4-6", input="My name is Alice and I like pizza.", stream=True, ) @@ -154,7 +154,7 @@ def test_conversation_state_management_two_turn_streaming(): f"\n[TURN 2] Sending follow-up streaming request with previous_response_id={response_id_1}" ) stream2 = client.responses.create( - model="claude-sonnet-4-20250514", + model="claude-sonnet-4-6", input="Please list all the messages you have received in our conversation, numbering each one.", previous_response_id=response_id_1, stream=True, diff --git a/tests/e2e/test_prompt_gateway.py b/tests/e2e/test_prompt_gateway.py index 9c89059c..d91483af 100644 --- a/tests/e2e/test_prompt_gateway.py +++ b/tests/e2e/test_prompt_gateway.py @@ -395,7 +395,7 @@ def test_claude_v1_messages_api(): ) message = client.messages.create( - model="claude-sonnet-4-20250514", # Use working model from smoke test + model="claude-sonnet-4-6", # Use working model from smoke test max_tokens=50, messages=[ { @@ -414,7 +414,7 @@ def test_claude_v1_messages_api_streaming(): client = anthropic.Anthropic(api_key="test-key", base_url=base_url) with client.messages.stream( - model="claude-sonnet-4-20250514", + model="claude-sonnet-4-6", max_tokens=50, messages=[ { @@ -525,7 +525,7 @@ def test_openai_gpt4o_mini_v1_messages_api_streaming(): def test_openai_client_with_claude_model_streaming(): - """Test OpenAI client using /v1/chat/completions API with Claude model (claude-sonnet-4-20250514) + """Test OpenAI client using /v1/chat/completions API with Claude model (claude-sonnet-4-6) This tests the transformation: Anthropic upstream -> OpenAI client format with proper chunk handling """ # Get the base URL from the LLM gateway endpoint @@ -537,7 +537,7 @@ def test_openai_client_with_claude_model_streaming(): ) stream = client.chat.completions.create( - model="claude-sonnet-4-20250514", # Claude model via OpenAI client + model="claude-sonnet-4-6", # Claude model via OpenAI client max_tokens=50, messages=[ {