fix(routing): auto-migrate v0.3.0 inline routing_preferences to v0.4.0 top-level

Lift inline routing_preferences under each model_provider into the top-level routing_preferences list with merged models[] and bump version to v0.4.0, with a deprecation warning. Existing v0.3.0 demo configs (Claude Code, Codex, preference_based_routing, etc.) keep working unchanged. Schema flags the inline shape as deprecated but still accepts it. Docs and skills updated to canonical top-level multi-model form.
2026-05-30 14:25:15 +02:00 · 2026-04-24 11:28:22 -07:00 · 2026-04-24 11:28:22 -07:00 · dde90cae82
commit dde90cae82
parent b81eb7266c
11 changed files with 693 additions and 224 deletions
--- a/docs/source/resources/includes/plano_config_full_reference.yaml
+++ b/docs/source/resources/includes/plano_config_full_reference.yaml
@ -1,5 +1,5 @@
 # Plano Gateway configuration version
-version: v0.3.0
+version: v0.4.0

 # External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
 agents:
@ -32,17 +32,8 @@ model_providers:
  - model: mistral/ministral-3b-latest
    access_key: $MISTRAL_API_KEY

-  # routing_preferences: tags a model with named capabilities so Plano's LLM router
-  # can select the best model for each request based on intent. Requires the
-  # Plano-Orchestrator model (or equivalent) to be configured in overrides.llm_routing_model.
-  # Each preference has a name (short label) and a description (used for intent matching).
  - model: groq/llama-3.3-70b-versatile
    access_key: $GROQ_API_KEY
-    routing_preferences:
-      - name: code generation
-        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
-      - name: code review
-        description: reviewing, analyzing, and suggesting improvements to existing code

  # passthrough_auth: forwards the client's Authorization header upstream instead of
  # using the configured access_key. Useful for LiteLLM or similar proxy setups.
@ -64,6 +55,29 @@ model_aliases:
  smart-llm:
    target: gpt-4o

+# routing_preferences: top-level list that tags named task categories with an
+# ordered pool of candidate models. Plano's LLM router matches incoming requests
+# against these descriptions and returns an ordered list of models; the client
+# uses models[0] as primary and retries with models[1], models[2]... on 429/5xx.
+# Requires overrides.llm_routing_model to point at Plano-Orchestrator (or equivalent).
+# Each model in `models` must be declared in model_providers above.
+# selection_policy is optional: {prefer: cheapest|fastest|none} lets the router
+# reorder candidates using live cost/latency data from model_metrics_sources.
+routing_preferences:
+  - name: code generation
+    description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+    models:
+      - anthropic/claude-sonnet-4-0
+      - openai/gpt-4o
+      - groq/llama-3.3-70b-versatile
+  - name: code review
+    description: reviewing, analyzing, and suggesting improvements to existing code
+    models:
+      - anthropic/claude-sonnet-4-0
+      - groq/llama-3.3-70b-versatile
+    selection_policy:
+      prefer: cheapest
+
 # HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
 listeners:
  # Agent listener for routing requests to multiple agents