diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py
index b372810d..cb07767e 100644
--- a/cli/planoai/config_generator.py
+++ b/cli/planoai/config_generator.py
@@ -58,6 +58,110 @@ def get_endpoint_and_port(endpoint, protocol):
         return endpoint, port
 
 
+def migrate_inline_routing_preferences(config_yaml):
+    """Lift v0.3.0-style inline ``routing_preferences`` under each
+    ``model_providers`` entry to the v0.4.0 top-level ``routing_preferences``
+    list with ``models: [...]``.
+
+    This function is a no-op for configs whose ``version`` is already
+    ``v0.4.0`` or newer — those are assumed to be on the canonical
+    top-level shape and are passed through untouched.
+
+    For older configs, the version is bumped to ``v0.4.0`` up front so
+    brightstaff's v0.4.0 gate for top-level ``routing_preferences``
+    accepts the rendered config, then inline preferences under each
+    provider are lifted into the top-level list. Preferences with the
+    same ``name`` across multiple providers are merged into a single
+    top-level entry whose ``models`` list contains every provider's
+    full ``<provider>/<model>`` string in declaration order. The first
+    ``description`` encountered wins; conflicts are warned, not errored,
+    so existing v0.3.0 configs keep compiling. Any top-level preference
+    already defined by the user is preserved as-is.
+    """
+    current_version = str(config_yaml.get("version", ""))
+    if _version_tuple(current_version) >= (0, 4, 0):
+        return
+
+    config_yaml["version"] = "v0.4.0"
+
+    model_providers = config_yaml.get("model_providers") or []
+    if not model_providers:
+        return
+
+    migrated = {}
+    for model_provider in model_providers:
+        inline_prefs = model_provider.get("routing_preferences")
+        if not inline_prefs:
+            continue
+
+        full_model_name = model_provider.get("model")
+        if not full_model_name:
+            continue
+
+        if "/" in full_model_name and full_model_name.split("/")[-1].strip() == "*":
+            raise Exception(
+                f"Model {full_model_name} has routing_preferences but uses wildcard (*). Models with routing preferences cannot be wildcards."
+            )
+
+        for pref in inline_prefs:
+            name = pref.get("name")
+            description = pref.get("description", "")
+            if not name:
+                continue
+            if name in migrated:
+                entry = migrated[name]
+                if description and description != entry["description"]:
+                    print(
+                        f"WARNING: routing preference '{name}' has conflicting descriptions across providers; keeping the first one."
+                    )
+                if full_model_name not in entry["models"]:
+                    entry["models"].append(full_model_name)
+            else:
+                migrated[name] = {
+                    "name": name,
+                    "description": description,
+                    "models": [full_model_name],
+                }
+
+    if not migrated:
+        return
+
+    for model_provider in model_providers:
+        if "routing_preferences" in model_provider:
+            del model_provider["routing_preferences"]
+
+    existing_top_level = config_yaml.get("routing_preferences") or []
+    existing_names = {entry.get("name") for entry in existing_top_level}
+    merged = list(existing_top_level)
+    for name, entry in migrated.items():
+        if name in existing_names:
+            continue
+        merged.append(entry)
+    config_yaml["routing_preferences"] = merged
+
+    print(
+        "WARNING: inline routing_preferences under model_providers is deprecated "
+        "and has been auto-migrated to top-level routing_preferences. Update your "
+        "config to v0.4.0 top-level form. See docs/routing-api.md"
+    )
+
+
+def _version_tuple(version_string):
+    stripped = version_string.strip().lstrip("vV")
+    if not stripped:
+        return (0, 0, 0)
+    parts = stripped.split("-", 1)[0].split(".")
+    out = []
+    for part in parts[:3]:
+        try:
+            out.append(int(part))
+        except ValueError:
+            out.append(0)
+    while len(out) < 3:
+        out.append(0)
+    return tuple(out)
+
+
 def validate_and_render_schema():
     ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
         "ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
@@ -101,6 +205,8 @@ def validate_and_render_schema():
         config_yaml["model_providers"] = config_yaml["llm_providers"]
         del config_yaml["llm_providers"]
 
+    migrate_inline_routing_preferences(config_yaml)
+
     listeners, llm_gateway, prompt_gateway = convert_legacy_listeners(
         config_yaml.get("listeners"), config_yaml.get("model_providers")
     )
@@ -200,7 +306,16 @@ def validate_and_render_schema():
     model_provider_name_set = set()
     llms_with_usage = []
     model_name_keys = set()
-    model_usage_name_keys = set()
+
+    top_level_preferences = config_yaml.get("routing_preferences") or []
+    seen_pref_names = set()
+    for pref in top_level_preferences:
+        pref_name = pref.get("name")
+        if pref_name in seen_pref_names:
+            raise Exception(
+                f'Duplicate routing preference name "{pref_name}", please provide unique name for each routing preference'
+            )
+        seen_pref_names.add(pref_name)
 
     print("listeners: ", listeners)
 
@@ -259,10 +374,6 @@ def validate_and_render_schema():
                     raise Exception(
                         f"Model {model_name} is configured as default but uses wildcard (*). Default models cannot be wildcards."
                     )
-                if model_provider.get("routing_preferences"):
-                    raise Exception(
-                        f"Model {model_name} has routing_preferences but uses wildcard (*). Models with routing preferences cannot be wildcards."
-                    )
 
             # Validate azure_openai and ollama provider requires base_url
             if (provider in SUPPORTED_PROVIDERS_WITH_BASE_URL) and model_provider.get(
@@ -311,13 +422,6 @@ def validate_and_render_schema():
                     )
                 model_name_keys.add(model_id)
 
-            for routing_preference in model_provider.get("routing_preferences", []):
-                if routing_preference.get("name") in model_usage_name_keys:
-                    raise Exception(
-                        f'Duplicate routing preference name "{routing_preference.get("name")}", please provide unique name for each routing preference'
-                    )
-                model_usage_name_keys.add(routing_preference.get("name"))
-
             # Warn if both passthrough_auth and access_key are configured
             if model_provider.get("passthrough_auth") and model_provider.get(
                 "access_key"
@@ -405,7 +509,7 @@ def validate_and_render_schema():
     router_model_id = (
         router_model.split("/", 1)[1] if "/" in router_model else router_model
     )
-    if len(model_usage_name_keys) > 0 and router_model_id not in model_name_set:
+    if len(seen_pref_names) > 0 and router_model_id not in model_name_set:
         updated_model_providers.append(
             {
                 "name": "plano-orchestrator",
diff --git a/cli/test/test_config_generator.py b/cli/test/test_config_generator.py
index 3aec2390..77b5b480 100644
--- a/cli/test/test_config_generator.py
+++ b/cli/test/test_config_generator.py
@@ -1,7 +1,11 @@
 import json
 import pytest
+import yaml
 from unittest import mock
-from planoai.config_generator import validate_and_render_schema
+from planoai.config_generator import (
+    validate_and_render_schema,
+    migrate_inline_routing_preferences,
+)
 
 
 @pytest.fixture(autouse=True)
@@ -295,32 +299,30 @@ model_providers:
         "id": "duplicate_routeing_preference_name",
         "expected_error": "Duplicate routing preference name",
         "plano_config": """
-version: v0.1.0
+version: v0.4.0
 
 listeners:
-  egress_traffic:
-    address: 0.0.0.0
+  - name: llm
+    type: model
     port: 12000
-    message_format: openai
-    timeout: 30s
-
-llm_providers:
 
+model_providers:
   - model: openai/gpt-4o-mini
     access_key: $OPENAI_API_KEY
     default: true
 
   - model: openai/gpt-4o
     access_key: $OPENAI_API_KEY
-    routing_preferences:
-      - name: code understanding
-        description: understand and explain existing code snippets, functions, or libraries
 
-  - model: openai/gpt-4.1
-    access_key: $OPENAI_API_KEY
-    routing_preferences:
-      - name: code understanding
-        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+routing_preferences:
+  - name: code understanding
+    description: understand and explain existing code snippets, functions, or libraries
+    models:
+      - openai/gpt-4o
+  - name: code understanding
+    description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+    models:
+      - openai/gpt-4o-mini
 
 tracing:
   random_sampling: 100
@@ -501,3 +503,238 @@ def test_convert_legacy_llm_providers_no_prompt_gateway():
         "port": 12000,
         "timeout": "30s",
     }
+
+
+def test_inline_routing_preferences_migrated_to_top_level():
+    plano_config = """
+version: v0.3.0
+
+listeners:
+  - type: model
+    name: model_listener
+    port: 12000
+
+model_providers:
+  - model: openai/gpt-4o-mini
+    access_key: $OPENAI_API_KEY
+    default: true
+
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    routing_preferences:
+      - name: code understanding
+        description: understand and explain existing code snippets, functions, or libraries
+
+  - model: anthropic/claude-sonnet-4-20250514
+    access_key: $ANTHROPIC_API_KEY
+    routing_preferences:
+      - name: code generation
+        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+"""
+    config_yaml = yaml.safe_load(plano_config)
+    migrate_inline_routing_preferences(config_yaml)
+
+    assert config_yaml["version"] == "v0.4.0"
+    for provider in config_yaml["model_providers"]:
+        assert "routing_preferences" not in provider
+
+    top_level = config_yaml["routing_preferences"]
+    by_name = {entry["name"]: entry for entry in top_level}
+    assert set(by_name) == {"code understanding", "code generation"}
+    assert by_name["code understanding"]["models"] == ["openai/gpt-4o"]
+    assert by_name["code generation"]["models"] == [
+        "anthropic/claude-sonnet-4-20250514"
+    ]
+    assert (
+        by_name["code understanding"]["description"]
+        == "understand and explain existing code snippets, functions, or libraries"
+    )
+
+
+def test_inline_same_name_across_providers_merges_models():
+    plano_config = """
+version: v0.3.0
+
+listeners:
+  - type: model
+    name: model_listener
+    port: 12000
+
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    routing_preferences:
+      - name: code generation
+        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+
+  - model: anthropic/claude-sonnet-4-20250514
+    access_key: $ANTHROPIC_API_KEY
+    routing_preferences:
+      - name: code generation
+        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+"""
+    config_yaml = yaml.safe_load(plano_config)
+    migrate_inline_routing_preferences(config_yaml)
+
+    top_level = config_yaml["routing_preferences"]
+    assert len(top_level) == 1
+    entry = top_level[0]
+    assert entry["name"] == "code generation"
+    assert entry["models"] == [
+        "openai/gpt-4o",
+        "anthropic/claude-sonnet-4-20250514",
+    ]
+    assert config_yaml["version"] == "v0.4.0"
+
+
+def test_existing_top_level_routing_preferences_preserved():
+    plano_config = """
+version: v0.4.0
+
+listeners:
+  - type: model
+    name: model_listener
+    port: 12000
+
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+  - model: anthropic/claude-sonnet-4-20250514
+    access_key: $ANTHROPIC_API_KEY
+
+routing_preferences:
+  - name: code generation
+    description: generating new code snippets or boilerplate
+    models:
+      - openai/gpt-4o
+      - anthropic/claude-sonnet-4-20250514
+"""
+    config_yaml = yaml.safe_load(plano_config)
+    before = yaml.safe_dump(config_yaml, sort_keys=True)
+    migrate_inline_routing_preferences(config_yaml)
+    after = yaml.safe_dump(config_yaml, sort_keys=True)
+
+    assert before == after
+
+
+def test_existing_top_level_wins_over_inline_migration():
+    plano_config = """
+version: v0.3.0
+
+listeners:
+  - type: model
+    name: model_listener
+    port: 12000
+
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    routing_preferences:
+      - name: code generation
+        description: inline description should lose
+
+routing_preferences:
+  - name: code generation
+    description: user-defined top-level description wins
+    models:
+      - openai/gpt-4o
+"""
+    config_yaml = yaml.safe_load(plano_config)
+    migrate_inline_routing_preferences(config_yaml)
+
+    top_level = config_yaml["routing_preferences"]
+    assert len(top_level) == 1
+    entry = top_level[0]
+    assert entry["description"] == "user-defined top-level description wins"
+    assert entry["models"] == ["openai/gpt-4o"]
+
+
+def test_wildcard_with_inline_routing_preferences_errors():
+    plano_config = """
+version: v0.3.0
+
+listeners:
+  - type: model
+    name: model_listener
+    port: 12000
+
+model_providers:
+  - model: openrouter/*
+    base_url: https://openrouter.ai/api/v1
+    passthrough_auth: true
+    routing_preferences:
+      - name: code generation
+        description: generating code
+"""
+    config_yaml = yaml.safe_load(plano_config)
+    with pytest.raises(Exception) as excinfo:
+        migrate_inline_routing_preferences(config_yaml)
+    assert "wildcard" in str(excinfo.value).lower()
+
+
+def test_migration_bumps_version_even_without_inline_preferences():
+    plano_config = """
+version: v0.3.0
+
+listeners:
+  - type: model
+    name: model_listener
+    port: 12000
+
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+"""
+    config_yaml = yaml.safe_load(plano_config)
+    migrate_inline_routing_preferences(config_yaml)
+
+    assert "routing_preferences" not in config_yaml
+    assert config_yaml["version"] == "v0.4.0"
+
+
+def test_migration_is_noop_on_v040_config_with_stray_inline_preferences():
+    # v0.4.0 configs are assumed to be on the canonical top-level shape.
+    # The migration intentionally does not rescue stray inline preferences
+    # at v0.4.0+ so that the deprecation boundary is a clean version gate.
+    plano_config = """
+version: v0.4.0
+
+listeners:
+  - type: model
+    name: model_listener
+    port: 12000
+
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    routing_preferences:
+      - name: code generation
+        description: generating new code
+"""
+    config_yaml = yaml.safe_load(plano_config)
+    migrate_inline_routing_preferences(config_yaml)
+
+    assert config_yaml["version"] == "v0.4.0"
+    assert "routing_preferences" not in config_yaml
+    assert config_yaml["model_providers"][0]["routing_preferences"] == [
+        {"name": "code generation", "description": "generating new code"}
+    ]
+
+
+def test_migration_does_not_downgrade_newer_versions():
+    plano_config = """
+version: v0.5.0
+
+listeners:
+  - type: model
+    name: model_listener
+    port: 12000
+
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+"""
+    config_yaml = yaml.safe_load(plano_config)
+    migrate_inline_routing_preferences(config_yaml)
+
+    assert config_yaml["version"] == "v0.5.0"
diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml
index 2f9eea63..9560b437 100644
--- a/config/plano_config_schema.yaml
+++ b/config/plano_config_schema.yaml
@@ -201,6 +201,7 @@ properties:
           description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
         routing_preferences:
           type: array
+          description: "[DEPRECATED] Inline routing_preferences under a model_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
           items:
             type: object
             properties:
@@ -258,6 +259,7 @@ properties:
           description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
         routing_preferences:
           type: array
+          description: "[DEPRECATED] Inline routing_preferences under an llm_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
           items:
             type: object
             properties:
diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs
index 86aa331d..1275d77d 100644
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@@ -656,7 +656,7 @@ mod test {
         .expect("reference config file not found");
 
         let config: super::Configuration = serde_yaml::from_str(&ref_config).unwrap();
-        assert_eq!(config.version, "v0.3.0");
+        assert_eq!(config.version, "v0.4.0");
 
         if let Some(prompt_targets) = &config.prompt_targets {
             assert!(
diff --git a/demos/llm_routing/claude_code_router/config.yaml b/demos/llm_routing/claude_code_router/config.yaml
index e72aa73a..6235b6c6 100644
--- a/demos/llm_routing/claude_code_router/config.yaml
+++ b/demos/llm_routing/claude_code_router/config.yaml
@@ -19,7 +19,7 @@ model_providers:
       - name: code understanding
         description: understand and explain existing code snippets, functions, or libraries
   # Anthropic Models
-  - model: anthropic/claude-sonnet-4-5
+  - model: anthropic/claude-sonnet-4-6
     default: true
     access_key: $ANTHROPIC_API_KEY
 
diff --git a/docs/routing-api.md b/docs/routing-api.md
index c2b9c63f..4d1d6a8e 100644
--- a/docs/routing-api.md
+++ b/docs/routing-api.md
@@ -34,11 +34,13 @@ POST /v1/chat/completions
 
 ### `routing_preferences` fields
 
-| Field | Type | Required | Description |
-|---|---|---|---|
-| `name` | string | yes | Route identifier. Must match the LLM router's route classification. |
-| `description` | string | yes | Natural language description used by the router to match user intent. |
-| `models` | string[] | yes | Ordered candidate pool. At least one entry required. Must be declared in `model_providers`. |
+
+| Field         | Type     | Required | Description                                                                                 |
+| ------------- | -------- | -------- | ------------------------------------------------------------------------------------------- |
+| `name`        | string   | yes      | Route identifier. Must match the LLM router's route classification.                         |
+| `description` | string   | yes      | Natural language description used by the router to match user intent.                       |
+| `models`      | string[] | yes      | Ordered candidate pool. At least one entry required. Must be declared in `model_providers`. |
+
 
 ### Notes
 
@@ -64,11 +66,13 @@ POST /v1/chat/completions
 
 ### Fields
 
-| Field | Type | Description |
-|---|---|---|
-| `models` | string[] | Ranked model list. Use `models[0]` as primary; retry with `models[1]` on 429/5xx, and so on. |
-| `route` | string \| null | Name of the matched route. `null` if no route matched — client should use the original request `model`. |
-| `trace_id` | string | Trace ID for distributed tracing and observability. |
+
+| Field      | Type          | Description                                                                                             |
+| ---------- | ------------- | ------------------------------------------------------------------------------------------------------- |
+| `models`   | string[]      | Ranked model list. Use `models[0]` as primary; retry with `models[1]` on 429/5xx, and so on.            |
+| `route`    | string | null | Name of the matched route. `null` if no route matched — client should use the original request `model`. |
+| `trace_id` | string        | Trace ID for distributed tracing and observability.                                                     |
+
 
 ---
 
@@ -142,6 +146,7 @@ X-Model-Affinity: a1b2c3d4-5678-...
 ```
 
 Response when pinned:
+
 ```json
 {
   "models": ["anthropic/claude-sonnet-4-20250514"],
@@ -155,6 +160,7 @@ Response when pinned:
 Without the header, routing runs fresh every time (no breaking change).
 
 Configure TTL and cache size:
+
 ```yaml
 routing:
   session_ttl_seconds: 600    # default: 10 min
@@ -165,7 +171,8 @@ routing:
 
 ## Version Requirements
 
-| Version | Top-level `routing_preferences` |
-|---|---|
+
+| Version    | Top-level `routing_preferences`        |
+| ---------- | -------------------------------------- |
 | `< v0.4.0` | Not allowed — startup error if present |
-| `v0.4.0+` | Supported (required for model routing) |
+| `v0.4.0+`  | Supported (required for model routing) |
diff --git a/docs/source/concepts/llm_providers/supported_providers.rst b/docs/source/concepts/llm_providers/supported_providers.rst
index 87163d3b..60f468e0 100644
--- a/docs/source/concepts/llm_providers/supported_providers.rst
+++ b/docs/source/concepts/llm_providers/supported_providers.rst
@@ -158,7 +158,9 @@ Anthropic
 
 .. code-block:: yaml
 
-    llm_providers:
+    version: v0.4.0
+
+    model_providers:
       # Configure all Anthropic models with wildcard
       - model: anthropic/*
         access_key: $ANTHROPIC_API_KEY
@@ -179,8 +181,12 @@ Anthropic
 
       - model: anthropic/claude-sonnet-4-20250514
         access_key: $ANTHROPIC_PROD_API_KEY
-        routing_preferences:
-          - name: code_generation
+
+    routing_preferences:
+      - name: code_generation
+        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+        models:
+          - anthropic/claude-sonnet-4-20250514
 
 DeepSeek
 ~~~~~~~~
@@ -798,7 +804,9 @@ You can configure specific models with custom settings even when using wildcards
 
 .. code-block:: yaml
 
-    llm_providers:
+    version: v0.4.0
+
+    model_providers:
       # Expand to all Anthropic models
       - model: anthropic/*
         access_key: $ANTHROPIC_API_KEY
@@ -807,14 +815,17 @@ You can configure specific models with custom settings even when using wildcards
       # This model will NOT be included in the wildcard expansion above
       - model: anthropic/claude-sonnet-4-20250514
         access_key: $ANTHROPIC_PROD_API_KEY
-        routing_preferences:
-          - name: code_generation
-            priority: 1
 
       # Another specific override
       - model: anthropic/claude-3-haiku-20240307
         access_key: $ANTHROPIC_DEV_API_KEY
 
+    routing_preferences:
+      - name: code_generation
+        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+        models:
+          - anthropic/claude-sonnet-4-20250514
+
 **Custom Provider Wildcards:**
 
 For providers not in Plano's registry, wildcards enable dynamic model routing:
@@ -856,24 +867,36 @@ Mark one model as the default for fallback scenarios:
 Routing Preferences
 ~~~~~~~~~~~~~~~~~~~
 
-Configure routing preferences for dynamic model selection:
+Starting in ``v0.4.0``, configure routing preferences at the top level of the config. Each preference declares an ordered ``models`` candidate pool; the first entry is primary and the rest are fallbacks the client tries on ``429``/``5xx`` errors. Multiple providers can serve the same route — just list them all under ``models``. See :doc:`/guides/llm_router` for the full routing model.
 
 .. code-block:: yaml
 
-    llm_providers:
+    version: v0.4.0
+
+    model_providers:
       - model: openai/gpt-5.2
         access_key: $OPENAI_API_KEY
-        routing_preferences:
-          - name: complex_reasoning
-            description: deep analysis, mathematical problem solving, and logical reasoning
-          - name: code_review
-            description: reviewing and analyzing existing code for bugs and improvements
 
       - model: anthropic/claude-sonnet-4-5
         access_key: $ANTHROPIC_API_KEY
-        routing_preferences:
-          - name: creative_writing
-            description: creative content generation, storytelling, and writing assistance
+
+    routing_preferences:
+      - name: complex_reasoning
+        description: deep analysis, mathematical problem solving, and logical reasoning
+        models:
+          - openai/gpt-5.2
+          - anthropic/claude-sonnet-4-5
+      - name: code_review
+        description: reviewing and analyzing existing code for bugs and improvements
+        models:
+          - openai/gpt-5.2
+      - name: creative_writing
+        description: creative content generation, storytelling, and writing assistance
+        models:
+          - anthropic/claude-sonnet-4-5
+
+.. note::
+   ``v0.3.0`` configs that declare ``routing_preferences`` inline under each ``model_provider`` are auto-migrated to this top-level shape by the Plano CLI at compile time, with a deprecation warning. Update to the form above to silence the warning and gain the multi-model fallback behavior.
 
 .. _passthrough_auth:
 
diff --git a/docs/source/guides/llm_router.rst b/docs/source/guides/llm_router.rst
index 5539dddc..b66c01f2 100644
--- a/docs/source/guides/llm_router.rst
+++ b/docs/source/guides/llm_router.rst
@@ -147,38 +147,53 @@ Plano-Orchestrator analyzes each prompt to infer domain and action, then applies
 Configuration
 ^^^^^^^^^^^^^
 
-To configure preference-aligned dynamic routing, define routing preferences that map domains and actions to specific models:
+To configure preference-aligned dynamic routing, declare a top-level ``routing_preferences`` list and attach an ordered ``models`` candidate pool to each route. Starting in ``v0.4.0``, ``routing_preferences`` lives at the root of the config (not inline under ``model_providers``), which lets multiple models serve the same route — the first entry in ``models`` is primary, the rest are fallbacks that the client tries on ``429``/``5xx`` errors.
 
 .. code-block:: yaml
     :caption: Preference-Aligned Dynamic Routing Configuration
 
+    version: v0.4.0
+
     listeners:
-      egress_traffic:
+      - name: egress_traffic
+        type: model
         address: 0.0.0.0
         port: 12000
-        message_format: openai
         timeout: 30s
 
-    llm_providers:
+    model_providers:
       - model: openai/gpt-5.2
         access_key: $OPENAI_API_KEY
         default: true
 
       - model: openai/gpt-5
         access_key: $OPENAI_API_KEY
-        routing_preferences:
-          - name: code understanding
-            description: understand and explain existing code snippets, functions, or libraries
-          - name: complex reasoning
-            description: deep analysis, mathematical problem solving, and logical reasoning
 
       - model: anthropic/claude-sonnet-4-5
         access_key: $ANTHROPIC_API_KEY
-        routing_preferences:
-          - name: creative writing
-            description: creative content generation, storytelling, and writing assistance
-          - name: code generation
-            description: generating new code snippets, functions, or boilerplate based on user prompts
+
+    routing_preferences:
+      - name: code understanding
+        description: understand and explain existing code snippets, functions, or libraries
+        models:
+          - openai/gpt-5
+          - anthropic/claude-sonnet-4-5
+      - name: complex reasoning
+        description: deep analysis, mathematical problem solving, and logical reasoning
+        models:
+          - openai/gpt-5
+      - name: creative writing
+        description: creative content generation, storytelling, and writing assistance
+        models:
+          - anthropic/claude-sonnet-4-5
+      - name: code generation
+        description: generating new code snippets, functions, or boilerplate based on user prompts
+        models:
+          - anthropic/claude-sonnet-4-5
+          - openai/gpt-5
+
+.. note::
+   Configs still using the ``v0.3.0`` inline style (``routing_preferences`` nested under each ``model_provider``) are auto-migrated to this top-level shape by the Plano CLI at compile time, with a deprecation warning. Update your config to the form above to silence the warning.
 
 Client usage
 ^^^^^^^^^^^^
@@ -253,6 +268,8 @@ Using Ollama (recommended for local development)
 
    .. code-block:: yaml
 
+       version: v0.4.0
+
        overrides:
          llm_routing_model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
 
@@ -266,9 +283,12 @@ Using Ollama (recommended for local development)
 
          - model: anthropic/claude-sonnet-4-5
            access_key: $ANTHROPIC_API_KEY
-           routing_preferences:
-             - name: creative writing
-               description: creative content generation, storytelling, and writing assistance
+
+       routing_preferences:
+         - name: creative writing
+           description: creative content generation, storytelling, and writing assistance
+           models:
+             - anthropic/claude-sonnet-4-5
 
 4. **Verify the model is running**
 
@@ -322,6 +342,8 @@ vLLM provides higher throughput and GPU optimizations suitable for production de
 
    .. code-block:: yaml
 
+       version: v0.4.0
+
        overrides:
          llm_routing_model: plano/Plano-Orchestrator
 
@@ -335,9 +357,12 @@ vLLM provides higher throughput and GPU optimizations suitable for production de
 
          - model: anthropic/claude-sonnet-4-5
            access_key: $ANTHROPIC_API_KEY
-           routing_preferences:
-             - name: creative writing
-               description: creative content generation, storytelling, and writing assistance
+
+       routing_preferences:
+         - name: creative writing
+           description: creative content generation, storytelling, and writing assistance
+           models:
+             - anthropic/claude-sonnet-4-5
 
 5. **Verify the server is running**
 
@@ -468,22 +493,30 @@ You can combine static model selection with dynamic routing preferences for maxi
 .. code-block:: yaml
     :caption: Hybrid Routing Configuration
 
-    llm_providers:
+    version: v0.4.0
+
+    model_providers:
       - model: openai/gpt-5.2
         access_key: $OPENAI_API_KEY
         default: true
 
       - model: openai/gpt-5
         access_key: $OPENAI_API_KEY
-        routing_preferences:
-          - name: complex_reasoning
-            description: deep analysis and complex problem solving
 
       - model: anthropic/claude-sonnet-4-5
         access_key: $ANTHROPIC_API_KEY
-        routing_preferences:
-          - name: creative_tasks
-            description: creative writing and content generation
+
+    routing_preferences:
+      - name: complex_reasoning
+        description: deep analysis and complex problem solving
+        models:
+          - openai/gpt-5
+          - anthropic/claude-sonnet-4-5
+      - name: creative_tasks
+        description: creative writing and content generation
+        models:
+          - anthropic/claude-sonnet-4-5
+          - openai/gpt-5
 
     model_aliases:
       # Model aliases - friendly names that map to actual provider names
diff --git a/docs/source/resources/includes/plano_config_full_reference.yaml b/docs/source/resources/includes/plano_config_full_reference.yaml
index 808d0a98..99eb4510 100644
--- a/docs/source/resources/includes/plano_config_full_reference.yaml
+++ b/docs/source/resources/includes/plano_config_full_reference.yaml
@@ -1,5 +1,5 @@
 # Plano Gateway configuration version
-version: v0.3.0
+version: v0.4.0
 
 # External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
 agents:
@@ -32,17 +32,8 @@ model_providers:
   - model: mistral/ministral-3b-latest
     access_key: $MISTRAL_API_KEY
 
-  # routing_preferences: tags a model with named capabilities so Plano's LLM router
-  # can select the best model for each request based on intent. Requires the
-  # Plano-Orchestrator model (or equivalent) to be configured in overrides.llm_routing_model.
-  # Each preference has a name (short label) and a description (used for intent matching).
   - model: groq/llama-3.3-70b-versatile
     access_key: $GROQ_API_KEY
-    routing_preferences:
-      - name: code generation
-        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
-      - name: code review
-        description: reviewing, analyzing, and suggesting improvements to existing code
 
   # passthrough_auth: forwards the client's Authorization header upstream instead of
   # using the configured access_key. Useful for LiteLLM or similar proxy setups.
@@ -64,6 +55,29 @@ model_aliases:
   smart-llm:
     target: gpt-4o
 
+# routing_preferences: top-level list that tags named task categories with an
+# ordered pool of candidate models. Plano's LLM router matches incoming requests
+# against these descriptions and returns an ordered list of models; the client
+# uses models[0] as primary and retries with models[1], models[2]... on 429/5xx.
+# Requires overrides.llm_routing_model to point at Plano-Orchestrator (or equivalent).
+# Each model in `models` must be declared in model_providers above.
+# selection_policy is optional: {prefer: cheapest|fastest|none} lets the router
+# reorder candidates using live cost/latency data from model_metrics_sources.
+routing_preferences:
+  - name: code generation
+    description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+    models:
+      - anthropic/claude-sonnet-4-0
+      - openai/gpt-4o
+      - groq/llama-3.3-70b-versatile
+  - name: code review
+    description: reviewing, analyzing, and suggesting improvements to existing code
+    models:
+      - anthropic/claude-sonnet-4-0
+      - groq/llama-3.3-70b-versatile
+    selection_policy:
+      prefer: cheapest
+
 # HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
 listeners:
   # Agent listener for routing requests to multiple agents
diff --git a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
index a0603221..e2ab9110 100644
--- a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
+++ b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
@@ -69,12 +69,6 @@ listeners:
     model: llama-3.3-70b-versatile
     name: groq/llama-3.3-70b-versatile
     provider_interface: groq
-    routing_preferences:
-    - description: generating new code snippets, functions, or boilerplate based on
-        user prompts or requirements
-      name: code generation
-    - description: reviewing, analyzing, and suggesting improvements to existing code
-      name: code review
   - base_url: https://litellm.example.com
     cluster_name: openai_litellm.example.com
     endpoint: litellm.example.com
@@ -131,12 +125,6 @@ model_providers:
   model: llama-3.3-70b-versatile
   name: groq/llama-3.3-70b-versatile
   provider_interface: groq
-  routing_preferences:
-  - description: generating new code snippets, functions, or boilerplate based on
-      user prompts or requirements
-    name: code generation
-  - description: reviewing, analyzing, and suggesting improvements to existing code
-    name: code review
 - base_url: https://litellm.example.com
   cluster_name: openai_litellm.example.com
   endpoint: litellm.example.com
@@ -221,6 +209,21 @@ routing:
     type: memory
   session_max_entries: 10000
   session_ttl_seconds: 600
+routing_preferences:
+- description: generating new code snippets, functions, or boilerplate based on user
+    prompts or requirements
+  models:
+  - anthropic/claude-sonnet-4-0
+  - openai/gpt-4o
+  - groq/llama-3.3-70b-versatile
+  name: code generation
+- description: reviewing, analyzing, and suggesting improvements to existing code
+  models:
+  - anthropic/claude-sonnet-4-0
+  - groq/llama-3.3-70b-versatile
+  name: code review
+  selection_policy:
+    prefer: cheapest
 state_storage:
   type: memory
 system_prompt: 'You are a helpful assistant. Always respond concisely and accurately.
@@ -237,4 +240,4 @@ tracing:
       environment: production
       service.team: platform
   trace_arch_internal: false
-version: v0.3.0
+version: v0.4.0
diff --git a/skills/AGENTS.md b/skills/AGENTS.md
index 61fd7228..dab3144b 100644
--- a/skills/AGENTS.md
+++ b/skills/AGENTS.md
@@ -312,20 +312,24 @@ When a request does not match any routing preference, Plano forwards it to the `
 **Incorrect (no default provider set):**
 
 ```yaml
-version: v0.3.0
+version: v0.4.0
 
 model_providers:
   - model: openai/gpt-4o-mini     # No default: true anywhere
     access_key: $OPENAI_API_KEY
-    routing_preferences:
-      - name: summarization
-        description: Summarizing documents and extracting key points
 
   - model: openai/gpt-4o
     access_key: $OPENAI_API_KEY
-    routing_preferences:
-      - name: code_generation
-        description: Writing new functions and implementing algorithms
+
+routing_preferences:
+  - name: summarization
+    description: Summarizing documents and extracting key points
+    models:
+      - openai/gpt-4o-mini
+  - name: code_generation
+    description: Writing new functions and implementing algorithms
+    models:
+      - openai/gpt-4o
 ```
 
 **Incorrect (multiple defaults — ambiguous):**
@@ -344,25 +348,35 @@ model_providers:
 **Correct (exactly one default, covering unmatched requests):**
 
 ```yaml
-version: v0.3.0
+version: v0.4.0
 
 model_providers:
   - model: openai/gpt-4o-mini
     access_key: $OPENAI_API_KEY
     default: true               # Handles general/unclassified requests
-    routing_preferences:
-      - name: summarization
-        description: Summarizing documents, articles, and meeting notes
-      - name: classification
-        description: Categorizing inputs, labeling, and intent detection
 
   - model: openai/gpt-4o
     access_key: $OPENAI_API_KEY
-    routing_preferences:
-      - name: code_generation
-        description: Writing, debugging, and reviewing code
-      - name: complex_reasoning
-        description: Multi-step math, logical analysis, research synthesis
+
+routing_preferences:
+  - name: summarization
+    description: Summarizing documents, articles, and meeting notes
+    models:
+      - openai/gpt-4o-mini
+      - openai/gpt-4o
+  - name: classification
+    description: Categorizing inputs, labeling, and intent detection
+    models:
+      - openai/gpt-4o-mini
+  - name: code_generation
+    description: Writing, debugging, and reviewing code
+    models:
+      - openai/gpt-4o
+      - openai/gpt-4o-mini
+  - name: complex_reasoning
+    description: Multi-step math, logical analysis, research synthesis
+    models:
+      - openai/gpt-4o
 ```
 
 Choose your most cost-effective capable model as the default — it handles all traffic that doesn't match specialized preferences.
@@ -498,21 +512,27 @@ model_providers:
 **Combined: proxy for some models, Plano-managed for others:**
 
 ```yaml
+version: v0.4.0
+
 model_providers:
   - model: openai/gpt-4o-mini
     access_key: $OPENAI_API_KEY    # Plano manages this key
     default: true
-    routing_preferences:
-      - name: quick tasks
-        description: Short answers, simple lookups, fast completions
 
   - model: custom/vllm-llama
     base_url: http://gpu-server:8000
     provider_interface: openai
     passthrough_auth: true         # vLLM cluster handles its own auth
-    routing_preferences:
-      - name: long context
-        description: Processing very long documents, multi-document analysis
+
+routing_preferences:
+  - name: quick tasks
+    description: Short answers, simple lookups, fast completions
+    models:
+      - openai/gpt-4o-mini
+  - name: long context
+    description: Processing very long documents, multi-document analysis
+    models:
+      - custom/vllm-llama
 ```
 
 Reference: https://github.com/katanemo/archgw
@@ -526,67 +546,100 @@ Reference: https://github.com/katanemo/archgw
 
 ## Write Task-Specific Routing Preference Descriptions
 
-Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It routes the request to the first provider whose preferences match. Description quality directly determines routing accuracy.
+Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It returns an ordered `models` list for the matched route; the client uses `models[0]` as primary and falls back to `models[1]`, `models[2]`... on `429`/`5xx` errors. Description quality directly determines routing accuracy.
+
+Starting in `v0.4.0`, `routing_preferences` lives at the **top level** of the config and each entry carries its own `models: [...]` candidate pool. Listing multiple models under a single route gives you automatic provider fallback without extra client logic. Configs still using the legacy v0.3.0 inline shape (under each `model_provider`) are auto-migrated with a deprecation warning — prefer the top-level form below.
 
 **Incorrect (vague, overlapping descriptions):**
 
 ```yaml
+version: v0.4.0
+
 model_providers:
   - model: openai/gpt-4o-mini
     access_key: $OPENAI_API_KEY
     default: true
-    routing_preferences:
-      - name: simple
-        description: easy tasks      # Too vague — what is "easy"?
 
   - model: openai/gpt-4o
     access_key: $OPENAI_API_KEY
-    routing_preferences:
-      - name: hard
-        description: hard tasks      # Too vague — overlaps with "easy"
+
+routing_preferences:
+  - name: simple
+    description: easy tasks      # Too vague — what is "easy"?
+    models:
+      - openai/gpt-4o-mini
+  - name: hard
+    description: hard tasks      # Too vague — overlaps with "easy"
+    models:
+      - openai/gpt-4o
 ```
 
-**Correct (specific, distinct task descriptions):**
+**Correct (specific, distinct task descriptions, multi-model fallbacks):**
 
 ```yaml
+version: v0.4.0
+
 model_providers:
   - model: openai/gpt-4o-mini
     access_key: $OPENAI_API_KEY
     default: true
-    routing_preferences:
-      - name: summarization
-        description: >
-          Summarizing documents, articles, emails, or meeting transcripts.
-          Extracting key points, generating TL;DR sections, condensing long text.
-      - name: classification
-        description: >
-          Categorizing inputs, sentiment analysis, spam detection,
-          intent classification, labeling structured data fields.
-      - name: translation
-        description: >
-          Translating text between languages, localization tasks.
 
   - model: openai/gpt-4o
     access_key: $OPENAI_API_KEY
-    routing_preferences:
-      - name: code_generation
-        description: >
-          Writing new functions, classes, or modules from scratch.
-          Implementing algorithms, boilerplate generation, API integrations.
-      - name: code_review
-        description: >
-          Reviewing code for bugs, security vulnerabilities, performance issues.
-          Suggesting refactors, explaining complex code, debugging errors.
-      - name: complex_reasoning
-        description: >
-          Multi-step math problems, logical deduction, strategic planning,
-          research synthesis requiring chain-of-thought reasoning.
+
+  - model: anthropic/claude-sonnet-4-5
+    access_key: $ANTHROPIC_API_KEY
+
+routing_preferences:
+  - name: summarization
+    description: >
+      Summarizing documents, articles, emails, or meeting transcripts.
+      Extracting key points, generating TL;DR sections, condensing long text.
+    models:
+      - openai/gpt-4o-mini
+      - openai/gpt-4o
+  - name: classification
+    description: >
+      Categorizing inputs, sentiment analysis, spam detection,
+      intent classification, labeling structured data fields.
+    models:
+      - openai/gpt-4o-mini
+  - name: translation
+    description: >
+      Translating text between languages, localization tasks.
+    models:
+      - openai/gpt-4o-mini
+      - anthropic/claude-sonnet-4-5
+  - name: code_generation
+    description: >
+      Writing new functions, classes, or modules from scratch.
+      Implementing algorithms, boilerplate generation, API integrations.
+    models:
+      - openai/gpt-4o
+      - anthropic/claude-sonnet-4-5
+  - name: code_review
+    description: >
+      Reviewing code for bugs, security vulnerabilities, performance issues.
+      Suggesting refactors, explaining complex code, debugging errors.
+    models:
+      - anthropic/claude-sonnet-4-5
+      - openai/gpt-4o
+  - name: complex_reasoning
+    description: >
+      Multi-step math problems, logical deduction, strategic planning,
+      research synthesis requiring chain-of-thought reasoning.
+    models:
+      - openai/gpt-4o
+      - anthropic/claude-sonnet-4-5
 ```
 
 **Key principles for good preference descriptions:**
 - Use concrete action verbs: "writing", "reviewing", "translating", "summarizing"
 - List 3–5 specific sub-tasks or synonyms for each preference
-- Ensure preferences across providers are mutually exclusive in scope
+- Ensure preferences across routes are mutually exclusive in scope
+- Order `models` from most preferred to least — the client falls back in order on `429`/`5xx`
+- List multiple models under one route for automatic provider fallback without extra client logic
+- Every model listed in `models` must be declared in `model_providers`
 - Test with representative queries using `planoai trace` and `--where` filters to verify routing decisions
 
 Reference: https://github.com/katanemo/archgw
@@ -1451,7 +1504,7 @@ planoai cli_agent claude --path /path/to/project
 **Recommended config for Claude Code routing:**
 
 ```yaml
-version: v0.3.0
+version: v0.4.0
 
 listeners:
   - type: model
@@ -1462,19 +1515,25 @@ model_providers:
   - model: anthropic/claude-sonnet-4-20250514
     access_key: $ANTHROPIC_API_KEY
     default: true
-    routing_preferences:
-      - name: general coding
-        description: >
-          Writing code, debugging, code review, explaining concepts,
-          answering programming questions, general development tasks.
 
   - model: anthropic/claude-opus-4-6
     access_key: $ANTHROPIC_API_KEY
-    routing_preferences:
-      - name: complex architecture
-        description: >
-          System design, complex refactoring across many files,
-          architectural decisions, performance optimization, security audits.
+
+routing_preferences:
+  - name: general coding
+    description: >
+      Writing code, debugging, code review, explaining concepts,
+      answering programming questions, general development tasks.
+    models:
+      - anthropic/claude-sonnet-4-20250514
+      - anthropic/claude-opus-4-6
+  - name: complex architecture
+    description: >
+      System design, complex refactoring across many files,
+      architectural decisions, performance optimization, security audits.
+    models:
+      - anthropic/claude-opus-4-6
+      - anthropic/claude-sonnet-4-20250514
 
 model_aliases:
   claude.fast.v1:
@@ -1861,28 +1920,36 @@ listeners:
 **Multi-listener architecture (serves all client types):**
 
 ```yaml
-version: v0.3.0
+version: v0.4.0
 
 # --- Shared model providers ---
 model_providers:
   - model: openai/gpt-4o-mini
     access_key: $OPENAI_API_KEY
     default: true
-    routing_preferences:
-      - name: quick tasks
-        description: Short answers, formatting, classification, simple generation
 
   - model: openai/gpt-4o
     access_key: $OPENAI_API_KEY
-    routing_preferences:
-      - name: complex reasoning
-        description: Multi-step analysis, code generation, research synthesis
 
   - model: anthropic/claude-sonnet-4-20250514
     access_key: $ANTHROPIC_API_KEY
-    routing_preferences:
-      - name: long documents
-        description: Summarizing or analyzing very long documents, PDFs, transcripts
+
+# --- Shared routing_preferences (top-level, v0.4.0+) ---
+routing_preferences:
+  - name: quick tasks
+    description: Short answers, formatting, classification, simple generation
+    models:
+      - openai/gpt-4o-mini
+  - name: complex reasoning
+    description: Multi-step analysis, code generation, research synthesis
+    models:
+      - openai/gpt-4o
+      - anthropic/claude-sonnet-4-20250514
+  - name: long documents
+    description: Summarizing or analyzing very long documents, PDFs, transcripts
+    models:
+      - anthropic/claude-sonnet-4-20250514
+      - openai/gpt-4o
 
 # --- Listener 1: OpenAI-compatible API gateway ---
 # For: SDK clients, Claude Code, LangChain, etc.
diff --git a/skills/rules/routing-preferences.md b/skills/rules/routing-preferences.md
index 571a3acd..51127c5e 100644
--- a/skills/rules/routing-preferences.md
+++ b/skills/rules/routing-preferences.md
@@ -7,67 +7,100 @@ tags: routing, model-selection, preferences, llm-routing
 
 ## Write Task-Specific Routing Preference Descriptions
 
-Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It routes the request to the first provider whose preferences match. Description quality directly determines routing accuracy.
+Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It returns an ordered `models` list for the matched route; the client uses `models[0]` as primary and falls back to `models[1]`, `models[2]`... on `429`/`5xx` errors. Description quality directly determines routing accuracy.
+
+Starting in `v0.4.0`, `routing_preferences` lives at the **top level** of the config and each entry carries its own `models: [...]` candidate pool. Configs still using the legacy v0.3.0 inline shape (under each `model_provider`) are auto-migrated with a deprecation warning — prefer the top-level form below.
 
 **Incorrect (vague, overlapping descriptions):**
 
 ```yaml
+version: v0.4.0
+
 model_providers:
   - model: openai/gpt-4o-mini
     access_key: $OPENAI_API_KEY
     default: true
-    routing_preferences:
-      - name: simple
-        description: easy tasks      # Too vague — what is "easy"?
 
   - model: openai/gpt-4o
     access_key: $OPENAI_API_KEY
-    routing_preferences:
-      - name: hard
-        description: hard tasks      # Too vague — overlaps with "easy"
+
+routing_preferences:
+  - name: simple
+    description: easy tasks      # Too vague — what is "easy"?
+    models:
+      - openai/gpt-4o-mini
+  - name: hard
+    description: hard tasks      # Too vague — overlaps with "easy"
+    models:
+      - openai/gpt-4o
 ```
 
-**Correct (specific, distinct task descriptions):**
+**Correct (specific, distinct task descriptions, multi-model fallbacks):**
 
 ```yaml
+version: v0.4.0
+
 model_providers:
   - model: openai/gpt-4o-mini
     access_key: $OPENAI_API_KEY
     default: true
-    routing_preferences:
-      - name: summarization
-        description: >
-          Summarizing documents, articles, emails, or meeting transcripts.
-          Extracting key points, generating TL;DR sections, condensing long text.
-      - name: classification
-        description: >
-          Categorizing inputs, sentiment analysis, spam detection,
-          intent classification, labeling structured data fields.
-      - name: translation
-        description: >
-          Translating text between languages, localization tasks.
 
   - model: openai/gpt-4o
     access_key: $OPENAI_API_KEY
-    routing_preferences:
-      - name: code_generation
-        description: >
-          Writing new functions, classes, or modules from scratch.
-          Implementing algorithms, boilerplate generation, API integrations.
-      - name: code_review
-        description: >
-          Reviewing code for bugs, security vulnerabilities, performance issues.
-          Suggesting refactors, explaining complex code, debugging errors.
-      - name: complex_reasoning
-        description: >
-          Multi-step math problems, logical deduction, strategic planning,
-          research synthesis requiring chain-of-thought reasoning.
+
+  - model: anthropic/claude-sonnet-4-5
+    access_key: $ANTHROPIC_API_KEY
+
+routing_preferences:
+  - name: summarization
+    description: >
+      Summarizing documents, articles, emails, or meeting transcripts.
+      Extracting key points, generating TL;DR sections, condensing long text.
+    models:
+      - openai/gpt-4o-mini
+      - openai/gpt-4o
+  - name: classification
+    description: >
+      Categorizing inputs, sentiment analysis, spam detection,
+      intent classification, labeling structured data fields.
+    models:
+      - openai/gpt-4o-mini
+  - name: translation
+    description: >
+      Translating text between languages, localization tasks.
+    models:
+      - openai/gpt-4o-mini
+      - anthropic/claude-sonnet-4-5
+  - name: code_generation
+    description: >
+      Writing new functions, classes, or modules from scratch.
+      Implementing algorithms, boilerplate generation, API integrations.
+    models:
+      - openai/gpt-4o
+      - anthropic/claude-sonnet-4-5
+  - name: code_review
+    description: >
+      Reviewing code for bugs, security vulnerabilities, performance issues.
+      Suggesting refactors, explaining complex code, debugging errors.
+    models:
+      - anthropic/claude-sonnet-4-5
+      - openai/gpt-4o
+  - name: complex_reasoning
+    description: >
+      Multi-step math problems, logical deduction, strategic planning,
+      research synthesis requiring chain-of-thought reasoning.
+    models:
+      - openai/gpt-4o
+      - anthropic/claude-sonnet-4-5
 ```
 
 **Key principles for good preference descriptions:**
 - Use concrete action verbs: "writing", "reviewing", "translating", "summarizing"
 - List 3–5 specific sub-tasks or synonyms for each preference
-- Ensure preferences across providers are mutually exclusive in scope
+- Ensure preferences across routes are mutually exclusive in scope
+- Order `models` from most preferred to least — the client will fall back in order on `429`/`5xx`
+- List multiple models under one route to get automatic provider fallback without additional client logic
+- Every model listed in `models` must be declared in `model_providers`
 - Test with representative queries using `planoai trace` and `--where` filters to verify routing decisions
 
-Reference: https://github.com/katanemo/archgw
+Reference: [Routing API](../../docs/routing-api.md) · https://github.com/katanemo/archgw