fix(routing): auto-migrate v0.3.0 inline routing_preferences to v0.4.0 top-level

Lift inline routing_preferences under each model_provider into the
top-level routing_preferences list with merged models[] and bump
version to v0.4.0, with a deprecation warning. Existing v0.3.0
demo configs (Claude Code, Codex, preference_based_routing, etc.)
keep working unchanged. Schema flags the inline shape as deprecated
but still accepts it. Docs and skills updated to canonical top-level
multi-model form.
This commit is contained in:
Spherrrical 2026-04-24 11:28:22 -07:00
parent b81eb7266c
commit dde90cae82
11 changed files with 693 additions and 224 deletions

View file

@ -58,6 +58,104 @@ def get_endpoint_and_port(endpoint, protocol):
return endpoint, port return endpoint, port
def migrate_inline_routing_preferences(config_yaml):
"""Lift v0.3.0-style inline ``routing_preferences`` under each
``model_providers`` entry to the v0.4.0 top-level ``routing_preferences``
list with ``models: [...]``.
Preferences with the same ``name`` across multiple providers are merged
into a single top-level entry whose ``models`` list contains every
provider's full ``<provider>/<model>`` string in declaration order. The
first ``description`` encountered wins; conflicts are warned, not
errored, so existing v0.3.0 configs keep compiling. Any top-level
preference already defined by the user is preserved as-is.
Also bumps ``version`` to ``v0.4.0`` when migration produced any entry,
so brightstaff's v0.4.0 gate for top-level ``routing_preferences``
accepts the rendered config.
"""
model_providers = config_yaml.get("model_providers") or []
if not model_providers:
return
migrated = {}
for model_provider in model_providers:
inline_prefs = model_provider.get("routing_preferences")
if not inline_prefs:
continue
full_model_name = model_provider.get("model")
if not full_model_name:
continue
if "/" in full_model_name and full_model_name.split("/")[-1].strip() == "*":
raise Exception(
f"Model {full_model_name} has routing_preferences but uses wildcard (*). Models with routing preferences cannot be wildcards."
)
for pref in inline_prefs:
name = pref.get("name")
description = pref.get("description", "")
if not name:
continue
if name in migrated:
entry = migrated[name]
if description and description != entry["description"]:
print(
f"WARNING: routing preference '{name}' has conflicting descriptions across providers; keeping the first one."
)
if full_model_name not in entry["models"]:
entry["models"].append(full_model_name)
else:
migrated[name] = {
"name": name,
"description": description,
"models": [full_model_name],
}
if not migrated:
return
for model_provider in model_providers:
if "routing_preferences" in model_provider:
del model_provider["routing_preferences"]
existing_top_level = config_yaml.get("routing_preferences") or []
existing_names = {entry.get("name") for entry in existing_top_level}
merged = list(existing_top_level)
for name, entry in migrated.items():
if name in existing_names:
continue
merged.append(entry)
config_yaml["routing_preferences"] = merged
current_version = str(config_yaml.get("version", ""))
if _version_tuple(current_version) < (0, 4, 0):
config_yaml["version"] = "v0.4.0"
print(
"WARNING: inline routing_preferences under model_providers is deprecated "
"and has been auto-migrated to top-level routing_preferences. Update your "
"config to v0.4.0 top-level form. See docs/routing-api.md"
)
def _version_tuple(version_string):
stripped = version_string.strip().lstrip("vV")
if not stripped:
return (0, 0, 0)
parts = stripped.split("-", 1)[0].split(".")
out = []
for part in parts[:3]:
try:
out.append(int(part))
except ValueError:
out.append(0)
while len(out) < 3:
out.append(0)
return tuple(out)
def validate_and_render_schema(): def validate_and_render_schema():
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv( ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
"ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml" "ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
@ -101,6 +199,8 @@ def validate_and_render_schema():
config_yaml["model_providers"] = config_yaml["llm_providers"] config_yaml["model_providers"] = config_yaml["llm_providers"]
del config_yaml["llm_providers"] del config_yaml["llm_providers"]
migrate_inline_routing_preferences(config_yaml)
listeners, llm_gateway, prompt_gateway = convert_legacy_listeners( listeners, llm_gateway, prompt_gateway = convert_legacy_listeners(
config_yaml.get("listeners"), config_yaml.get("model_providers") config_yaml.get("listeners"), config_yaml.get("model_providers")
) )
@ -200,7 +300,16 @@ def validate_and_render_schema():
model_provider_name_set = set() model_provider_name_set = set()
llms_with_usage = [] llms_with_usage = []
model_name_keys = set() model_name_keys = set()
model_usage_name_keys = set()
top_level_preferences = config_yaml.get("routing_preferences") or []
seen_pref_names = set()
for pref in top_level_preferences:
pref_name = pref.get("name")
if pref_name in seen_pref_names:
raise Exception(
f'Duplicate routing preference name "{pref_name}", please provide unique name for each routing preference'
)
seen_pref_names.add(pref_name)
print("listeners: ", listeners) print("listeners: ", listeners)
@ -259,10 +368,6 @@ def validate_and_render_schema():
raise Exception( raise Exception(
f"Model {model_name} is configured as default but uses wildcard (*). Default models cannot be wildcards." f"Model {model_name} is configured as default but uses wildcard (*). Default models cannot be wildcards."
) )
if model_provider.get("routing_preferences"):
raise Exception(
f"Model {model_name} has routing_preferences but uses wildcard (*). Models with routing preferences cannot be wildcards."
)
# Validate azure_openai and ollama provider requires base_url # Validate azure_openai and ollama provider requires base_url
if (provider in SUPPORTED_PROVIDERS_WITH_BASE_URL) and model_provider.get( if (provider in SUPPORTED_PROVIDERS_WITH_BASE_URL) and model_provider.get(
@ -311,13 +416,6 @@ def validate_and_render_schema():
) )
model_name_keys.add(model_id) model_name_keys.add(model_id)
for routing_preference in model_provider.get("routing_preferences", []):
if routing_preference.get("name") in model_usage_name_keys:
raise Exception(
f'Duplicate routing preference name "{routing_preference.get("name")}", please provide unique name for each routing preference'
)
model_usage_name_keys.add(routing_preference.get("name"))
# Warn if both passthrough_auth and access_key are configured # Warn if both passthrough_auth and access_key are configured
if model_provider.get("passthrough_auth") and model_provider.get( if model_provider.get("passthrough_auth") and model_provider.get(
"access_key" "access_key"
@ -405,7 +503,7 @@ def validate_and_render_schema():
router_model_id = ( router_model_id = (
router_model.split("/", 1)[1] if "/" in router_model else router_model router_model.split("/", 1)[1] if "/" in router_model else router_model
) )
if len(model_usage_name_keys) > 0 and router_model_id not in model_name_set: if len(seen_pref_names) > 0 and router_model_id not in model_name_set:
updated_model_providers.append( updated_model_providers.append(
{ {
"name": "plano-orchestrator", "name": "plano-orchestrator",

View file

@ -1,7 +1,11 @@
import json import json
import pytest import pytest
import yaml
from unittest import mock from unittest import mock
from planoai.config_generator import validate_and_render_schema from planoai.config_generator import (
validate_and_render_schema,
migrate_inline_routing_preferences,
)
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
@ -295,32 +299,30 @@ model_providers:
"id": "duplicate_routeing_preference_name", "id": "duplicate_routeing_preference_name",
"expected_error": "Duplicate routing preference name", "expected_error": "Duplicate routing preference name",
"plano_config": """ "plano_config": """
version: v0.1.0 version: v0.4.0
listeners: listeners:
egress_traffic: - name: llm
address: 0.0.0.0 type: model
port: 12000 port: 12000
message_format: openai
timeout: 30s
llm_providers:
model_providers:
- model: openai/gpt-4o-mini - model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
default: true default: true
- model: openai/gpt-4o - model: openai/gpt-4o
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
routing_preferences: routing_preferences:
- name: code understanding - name: code understanding
description: understand and explain existing code snippets, functions, or libraries description: understand and explain existing code snippets, functions, or libraries
models:
- model: openai/gpt-4.1 - openai/gpt-4o
access_key: $OPENAI_API_KEY
routing_preferences:
- name: code understanding - name: code understanding
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
models:
- openai/gpt-4o-mini
tracing: tracing:
random_sampling: 100 random_sampling: 100
@ -501,3 +503,190 @@ def test_convert_legacy_llm_providers_no_prompt_gateway():
"port": 12000, "port": 12000,
"timeout": "30s", "timeout": "30s",
} }
def test_inline_routing_preferences_migrated_to_top_level():
plano_config = """
version: v0.3.0
listeners:
- type: model
name: model_listener
port: 12000
model_providers:
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
default: true
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
routing_preferences:
- name: code understanding
description: understand and explain existing code snippets, functions, or libraries
- model: anthropic/claude-sonnet-4-20250514
access_key: $ANTHROPIC_API_KEY
routing_preferences:
- name: code generation
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
"""
config_yaml = yaml.safe_load(plano_config)
migrate_inline_routing_preferences(config_yaml)
assert config_yaml["version"] == "v0.4.0"
for provider in config_yaml["model_providers"]:
assert "routing_preferences" not in provider
top_level = config_yaml["routing_preferences"]
by_name = {entry["name"]: entry for entry in top_level}
assert set(by_name) == {"code understanding", "code generation"}
assert by_name["code understanding"]["models"] == ["openai/gpt-4o"]
assert by_name["code generation"]["models"] == [
"anthropic/claude-sonnet-4-20250514"
]
assert (
by_name["code understanding"]["description"]
== "understand and explain existing code snippets, functions, or libraries"
)
def test_inline_same_name_across_providers_merges_models():
plano_config = """
version: v0.3.0
listeners:
- type: model
name: model_listener
port: 12000
model_providers:
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
routing_preferences:
- name: code generation
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
- model: anthropic/claude-sonnet-4-20250514
access_key: $ANTHROPIC_API_KEY
routing_preferences:
- name: code generation
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
"""
config_yaml = yaml.safe_load(plano_config)
migrate_inline_routing_preferences(config_yaml)
top_level = config_yaml["routing_preferences"]
assert len(top_level) == 1
entry = top_level[0]
assert entry["name"] == "code generation"
assert entry["models"] == [
"openai/gpt-4o",
"anthropic/claude-sonnet-4-20250514",
]
assert config_yaml["version"] == "v0.4.0"
def test_existing_top_level_routing_preferences_preserved():
plano_config = """
version: v0.4.0
listeners:
- type: model
name: model_listener
port: 12000
model_providers:
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
- model: anthropic/claude-sonnet-4-20250514
access_key: $ANTHROPIC_API_KEY
routing_preferences:
- name: code generation
description: generating new code snippets or boilerplate
models:
- openai/gpt-4o
- anthropic/claude-sonnet-4-20250514
"""
config_yaml = yaml.safe_load(plano_config)
before = yaml.safe_dump(config_yaml, sort_keys=True)
migrate_inline_routing_preferences(config_yaml)
after = yaml.safe_dump(config_yaml, sort_keys=True)
assert before == after
def test_existing_top_level_wins_over_inline_migration():
plano_config = """
version: v0.3.0
listeners:
- type: model
name: model_listener
port: 12000
model_providers:
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
routing_preferences:
- name: code generation
description: inline description should lose
routing_preferences:
- name: code generation
description: user-defined top-level description wins
models:
- openai/gpt-4o
"""
config_yaml = yaml.safe_load(plano_config)
migrate_inline_routing_preferences(config_yaml)
top_level = config_yaml["routing_preferences"]
assert len(top_level) == 1
entry = top_level[0]
assert entry["description"] == "user-defined top-level description wins"
assert entry["models"] == ["openai/gpt-4o"]
def test_wildcard_with_inline_routing_preferences_errors():
plano_config = """
version: v0.3.0
listeners:
- type: model
name: model_listener
port: 12000
model_providers:
- model: openrouter/*
base_url: https://openrouter.ai/api/v1
passthrough_auth: true
routing_preferences:
- name: code generation
description: generating code
"""
config_yaml = yaml.safe_load(plano_config)
with pytest.raises(Exception) as excinfo:
migrate_inline_routing_preferences(config_yaml)
assert "wildcard" in str(excinfo.value).lower()
def test_migration_noop_when_no_inline_preferences():
plano_config = """
version: v0.3.0
listeners:
- type: model
name: model_listener
port: 12000
model_providers:
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
"""
config_yaml = yaml.safe_load(plano_config)
migrate_inline_routing_preferences(config_yaml)
assert "routing_preferences" not in config_yaml
assert config_yaml["version"] == "v0.3.0"

View file

@ -201,6 +201,7 @@ properties:
description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)." description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
routing_preferences: routing_preferences:
type: array type: array
description: "[DEPRECATED] Inline routing_preferences under a model_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
items: items:
type: object type: object
properties: properties:
@ -258,6 +259,7 @@ properties:
description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)." description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
routing_preferences: routing_preferences:
type: array type: array
description: "[DEPRECATED] Inline routing_preferences under an llm_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
items: items:
type: object type: object
properties: properties:

View file

@ -19,7 +19,7 @@ model_providers:
- name: code understanding - name: code understanding
description: understand and explain existing code snippets, functions, or libraries description: understand and explain existing code snippets, functions, or libraries
# Anthropic Models # Anthropic Models
- model: anthropic/claude-sonnet-4-5 - model: anthropic/claude-sonnet-4-6
default: true default: true
access_key: $ANTHROPIC_API_KEY access_key: $ANTHROPIC_API_KEY

View file

@ -34,12 +34,14 @@ POST /v1/chat/completions
### `routing_preferences` fields ### `routing_preferences` fields
| Field | Type | Required | Description | | Field | Type | Required | Description |
|---|---|---|---| | ------------- | -------- | -------- | ------------------------------------------------------------------------------------------- |
| `name` | string | yes | Route identifier. Must match the LLM router's route classification. | | `name` | string | yes | Route identifier. Must match the LLM router's route classification. |
| `description` | string | yes | Natural language description used by the router to match user intent. | | `description` | string | yes | Natural language description used by the router to match user intent. |
| `models` | string[] | yes | Ordered candidate pool. At least one entry required. Must be declared in `model_providers`. | | `models` | string[] | yes | Ordered candidate pool. At least one entry required. Must be declared in `model_providers`. |
### Notes ### Notes
- `routing_preferences` is **optional**. If omitted, the config-defined preferences are used. - `routing_preferences` is **optional**. If omitted, the config-defined preferences are used.
@ -64,12 +66,14 @@ POST /v1/chat/completions
### Fields ### Fields
| Field | Type | Description | | Field | Type | Description |
|---|---|---| | ---------- | ------------- | ------------------------------------------------------------------------------------------------------- |
| `models` | string[] | Ranked model list. Use `models[0]` as primary; retry with `models[1]` on 429/5xx, and so on. | | `models` | string[] | Ranked model list. Use `models[0]` as primary; retry with `models[1]` on 429/5xx, and so on. |
| `route` | string \| null | Name of the matched route. `null` if no route matched — client should use the original request `model`. | | `route` | string | null | Name of the matched route. `null` if no route matched — client should use the original request `model`. |
| `trace_id` | string | Trace ID for distributed tracing and observability. | | `trace_id` | string | Trace ID for distributed tracing and observability. |
--- ---
## Client Usage Pattern ## Client Usage Pattern
@ -142,6 +146,7 @@ X-Model-Affinity: a1b2c3d4-5678-...
``` ```
Response when pinned: Response when pinned:
```json ```json
{ {
"models": ["anthropic/claude-sonnet-4-20250514"], "models": ["anthropic/claude-sonnet-4-20250514"],
@ -155,6 +160,7 @@ Response when pinned:
Without the header, routing runs fresh every time (no breaking change). Without the header, routing runs fresh every time (no breaking change).
Configure TTL and cache size: Configure TTL and cache size:
```yaml ```yaml
routing: routing:
session_ttl_seconds: 600 # default: 10 min session_ttl_seconds: 600 # default: 10 min
@ -165,7 +171,8 @@ routing:
## Version Requirements ## Version Requirements
| Version | Top-level `routing_preferences` | | Version | Top-level `routing_preferences` |
|---|---| | ---------- | -------------------------------------- |
| `< v0.4.0` | Not allowed — startup error if present | | `< v0.4.0` | Not allowed — startup error if present |
| `v0.4.0+` | Supported (required for model routing) | | `v0.4.0+` | Supported (required for model routing) |

View file

@ -158,7 +158,9 @@ Anthropic
.. code-block:: yaml .. code-block:: yaml
llm_providers: version: v0.4.0
model_providers:
# Configure all Anthropic models with wildcard # Configure all Anthropic models with wildcard
- model: anthropic/* - model: anthropic/*
access_key: $ANTHROPIC_API_KEY access_key: $ANTHROPIC_API_KEY
@ -179,8 +181,12 @@ Anthropic
- model: anthropic/claude-sonnet-4-20250514 - model: anthropic/claude-sonnet-4-20250514
access_key: $ANTHROPIC_PROD_API_KEY access_key: $ANTHROPIC_PROD_API_KEY
routing_preferences: routing_preferences:
- name: code_generation - name: code_generation
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
models:
- anthropic/claude-sonnet-4-20250514
DeepSeek DeepSeek
~~~~~~~~ ~~~~~~~~
@ -798,7 +804,9 @@ You can configure specific models with custom settings even when using wildcards
.. code-block:: yaml .. code-block:: yaml
llm_providers: version: v0.4.0
model_providers:
# Expand to all Anthropic models # Expand to all Anthropic models
- model: anthropic/* - model: anthropic/*
access_key: $ANTHROPIC_API_KEY access_key: $ANTHROPIC_API_KEY
@ -807,14 +815,17 @@ You can configure specific models with custom settings even when using wildcards
# This model will NOT be included in the wildcard expansion above # This model will NOT be included in the wildcard expansion above
- model: anthropic/claude-sonnet-4-20250514 - model: anthropic/claude-sonnet-4-20250514
access_key: $ANTHROPIC_PROD_API_KEY access_key: $ANTHROPIC_PROD_API_KEY
routing_preferences:
- name: code_generation
priority: 1
# Another specific override # Another specific override
- model: anthropic/claude-3-haiku-20240307 - model: anthropic/claude-3-haiku-20240307
access_key: $ANTHROPIC_DEV_API_KEY access_key: $ANTHROPIC_DEV_API_KEY
routing_preferences:
- name: code_generation
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
models:
- anthropic/claude-sonnet-4-20250514
**Custom Provider Wildcards:** **Custom Provider Wildcards:**
For providers not in Plano's registry, wildcards enable dynamic model routing: For providers not in Plano's registry, wildcards enable dynamic model routing:
@ -856,24 +867,36 @@ Mark one model as the default for fallback scenarios:
Routing Preferences Routing Preferences
~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~
Configure routing preferences for dynamic model selection: Starting in ``v0.4.0``, configure routing preferences at the top level of the config. Each preference declares an ordered ``models`` candidate pool; the first entry is primary and the rest are fallbacks the client tries on ``429``/``5xx`` errors. Multiple providers can serve the same route — just list them all under ``models``. See :doc:`/guides/llm_router` for the full routing model.
.. code-block:: yaml .. code-block:: yaml
llm_providers: version: v0.4.0
model_providers:
- model: openai/gpt-5.2 - model: openai/gpt-5.2
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
routing_preferences:
- name: complex_reasoning
description: deep analysis, mathematical problem solving, and logical reasoning
- name: code_review
description: reviewing and analyzing existing code for bugs and improvements
- model: anthropic/claude-sonnet-4-5 - model: anthropic/claude-sonnet-4-5
access_key: $ANTHROPIC_API_KEY access_key: $ANTHROPIC_API_KEY
routing_preferences: routing_preferences:
- name: complex_reasoning
description: deep analysis, mathematical problem solving, and logical reasoning
models:
- openai/gpt-5.2
- anthropic/claude-sonnet-4-5
- name: code_review
description: reviewing and analyzing existing code for bugs and improvements
models:
- openai/gpt-5.2
- name: creative_writing - name: creative_writing
description: creative content generation, storytelling, and writing assistance description: creative content generation, storytelling, and writing assistance
models:
- anthropic/claude-sonnet-4-5
.. note::
``v0.3.0`` configs that declare ``routing_preferences`` inline under each ``model_provider`` are auto-migrated to this top-level shape by the Plano CLI at compile time, with a deprecation warning. Update to the form above to silence the warning and gain the multi-model fallback behavior.
.. _passthrough_auth: .. _passthrough_auth:

View file

@ -147,38 +147,53 @@ Plano-Orchestrator analyzes each prompt to infer domain and action, then applies
Configuration Configuration
^^^^^^^^^^^^^ ^^^^^^^^^^^^^
To configure preference-aligned dynamic routing, define routing preferences that map domains and actions to specific models: To configure preference-aligned dynamic routing, declare a top-level ``routing_preferences`` list and attach an ordered ``models`` candidate pool to each route. Starting in ``v0.4.0``, ``routing_preferences`` lives at the root of the config (not inline under ``model_providers``), which lets multiple models serve the same route — the first entry in ``models`` is primary, the rest are fallbacks that the client tries on ``429``/``5xx`` errors.
.. code-block:: yaml .. code-block:: yaml
:caption: Preference-Aligned Dynamic Routing Configuration :caption: Preference-Aligned Dynamic Routing Configuration
version: v0.4.0
listeners: listeners:
egress_traffic: - name: egress_traffic
type: model
address: 0.0.0.0 address: 0.0.0.0
port: 12000 port: 12000
message_format: openai
timeout: 30s timeout: 30s
llm_providers: model_providers:
- model: openai/gpt-5.2 - model: openai/gpt-5.2
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
default: true default: true
- model: openai/gpt-5 - model: openai/gpt-5
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
routing_preferences:
- name: code understanding
description: understand and explain existing code snippets, functions, or libraries
- name: complex reasoning
description: deep analysis, mathematical problem solving, and logical reasoning
- model: anthropic/claude-sonnet-4-5 - model: anthropic/claude-sonnet-4-5
access_key: $ANTHROPIC_API_KEY access_key: $ANTHROPIC_API_KEY
routing_preferences: routing_preferences:
- name: code understanding
description: understand and explain existing code snippets, functions, or libraries
models:
- openai/gpt-5
- anthropic/claude-sonnet-4-5
- name: complex reasoning
description: deep analysis, mathematical problem solving, and logical reasoning
models:
- openai/gpt-5
- name: creative writing - name: creative writing
description: creative content generation, storytelling, and writing assistance description: creative content generation, storytelling, and writing assistance
models:
- anthropic/claude-sonnet-4-5
- name: code generation - name: code generation
description: generating new code snippets, functions, or boilerplate based on user prompts description: generating new code snippets, functions, or boilerplate based on user prompts
models:
- anthropic/claude-sonnet-4-5
- openai/gpt-5
.. note::
Configs still using the ``v0.3.0`` inline style (``routing_preferences`` nested under each ``model_provider``) are auto-migrated to this top-level shape by the Plano CLI at compile time, with a deprecation warning. Update your config to the form above to silence the warning.
Client usage Client usage
^^^^^^^^^^^^ ^^^^^^^^^^^^
@ -253,6 +268,8 @@ Using Ollama (recommended for local development)
.. code-block:: yaml .. code-block:: yaml
version: v0.4.0
overrides: overrides:
llm_routing_model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M llm_routing_model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
@ -266,9 +283,12 @@ Using Ollama (recommended for local development)
- model: anthropic/claude-sonnet-4-5 - model: anthropic/claude-sonnet-4-5
access_key: $ANTHROPIC_API_KEY access_key: $ANTHROPIC_API_KEY
routing_preferences: routing_preferences:
- name: creative writing - name: creative writing
description: creative content generation, storytelling, and writing assistance description: creative content generation, storytelling, and writing assistance
models:
- anthropic/claude-sonnet-4-5
4. **Verify the model is running** 4. **Verify the model is running**
@ -322,6 +342,8 @@ vLLM provides higher throughput and GPU optimizations suitable for production de
.. code-block:: yaml .. code-block:: yaml
version: v0.4.0
overrides: overrides:
llm_routing_model: plano/Plano-Orchestrator llm_routing_model: plano/Plano-Orchestrator
@ -335,9 +357,12 @@ vLLM provides higher throughput and GPU optimizations suitable for production de
- model: anthropic/claude-sonnet-4-5 - model: anthropic/claude-sonnet-4-5
access_key: $ANTHROPIC_API_KEY access_key: $ANTHROPIC_API_KEY
routing_preferences: routing_preferences:
- name: creative writing - name: creative writing
description: creative content generation, storytelling, and writing assistance description: creative content generation, storytelling, and writing assistance
models:
- anthropic/claude-sonnet-4-5
5. **Verify the server is running** 5. **Verify the server is running**
@ -468,22 +493,30 @@ You can combine static model selection with dynamic routing preferences for maxi
.. code-block:: yaml .. code-block:: yaml
:caption: Hybrid Routing Configuration :caption: Hybrid Routing Configuration
llm_providers: version: v0.4.0
model_providers:
- model: openai/gpt-5.2 - model: openai/gpt-5.2
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
default: true default: true
- model: openai/gpt-5 - model: openai/gpt-5
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
routing_preferences:
- name: complex_reasoning
description: deep analysis and complex problem solving
- model: anthropic/claude-sonnet-4-5 - model: anthropic/claude-sonnet-4-5
access_key: $ANTHROPIC_API_KEY access_key: $ANTHROPIC_API_KEY
routing_preferences: routing_preferences:
- name: complex_reasoning
description: deep analysis and complex problem solving
models:
- openai/gpt-5
- anthropic/claude-sonnet-4-5
- name: creative_tasks - name: creative_tasks
description: creative writing and content generation description: creative writing and content generation
models:
- anthropic/claude-sonnet-4-5
- openai/gpt-5
model_aliases: model_aliases:
# Model aliases - friendly names that map to actual provider names # Model aliases - friendly names that map to actual provider names

View file

@ -1,5 +1,5 @@
# Plano Gateway configuration version # Plano Gateway configuration version
version: v0.3.0 version: v0.4.0
# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions) # External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
agents: agents:
@ -32,17 +32,8 @@ model_providers:
- model: mistral/ministral-3b-latest - model: mistral/ministral-3b-latest
access_key: $MISTRAL_API_KEY access_key: $MISTRAL_API_KEY
# routing_preferences: tags a model with named capabilities so Plano's LLM router
# can select the best model for each request based on intent. Requires the
# Plano-Orchestrator model (or equivalent) to be configured in overrides.llm_routing_model.
# Each preference has a name (short label) and a description (used for intent matching).
- model: groq/llama-3.3-70b-versatile - model: groq/llama-3.3-70b-versatile
access_key: $GROQ_API_KEY access_key: $GROQ_API_KEY
routing_preferences:
- name: code generation
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
- name: code review
description: reviewing, analyzing, and suggesting improvements to existing code
# passthrough_auth: forwards the client's Authorization header upstream instead of # passthrough_auth: forwards the client's Authorization header upstream instead of
# using the configured access_key. Useful for LiteLLM or similar proxy setups. # using the configured access_key. Useful for LiteLLM or similar proxy setups.
@ -64,6 +55,29 @@ model_aliases:
smart-llm: smart-llm:
target: gpt-4o target: gpt-4o
# routing_preferences: top-level list that tags named task categories with an
# ordered pool of candidate models. Plano's LLM router matches incoming requests
# against these descriptions and returns an ordered list of models; the client
# uses models[0] as primary and retries with models[1], models[2]... on 429/5xx.
# Requires overrides.llm_routing_model to point at Plano-Orchestrator (or equivalent).
# Each model in `models` must be declared in model_providers above.
# selection_policy is optional: {prefer: cheapest|fastest|none} lets the router
# reorder candidates using live cost/latency data from model_metrics_sources.
routing_preferences:
- name: code generation
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
models:
- anthropic/claude-sonnet-4-0
- openai/gpt-4o
- groq/llama-3.3-70b-versatile
- name: code review
description: reviewing, analyzing, and suggesting improvements to existing code
models:
- anthropic/claude-sonnet-4-0
- groq/llama-3.3-70b-versatile
selection_policy:
prefer: cheapest
# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access # HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
listeners: listeners:
# Agent listener for routing requests to multiple agents # Agent listener for routing requests to multiple agents

View file

@ -69,12 +69,6 @@ listeners:
model: llama-3.3-70b-versatile model: llama-3.3-70b-versatile
name: groq/llama-3.3-70b-versatile name: groq/llama-3.3-70b-versatile
provider_interface: groq provider_interface: groq
routing_preferences:
- description: generating new code snippets, functions, or boilerplate based on
user prompts or requirements
name: code generation
- description: reviewing, analyzing, and suggesting improvements to existing code
name: code review
- base_url: https://litellm.example.com - base_url: https://litellm.example.com
cluster_name: openai_litellm.example.com cluster_name: openai_litellm.example.com
endpoint: litellm.example.com endpoint: litellm.example.com
@ -131,12 +125,6 @@ model_providers:
model: llama-3.3-70b-versatile model: llama-3.3-70b-versatile
name: groq/llama-3.3-70b-versatile name: groq/llama-3.3-70b-versatile
provider_interface: groq provider_interface: groq
routing_preferences:
- description: generating new code snippets, functions, or boilerplate based on
user prompts or requirements
name: code generation
- description: reviewing, analyzing, and suggesting improvements to existing code
name: code review
- base_url: https://litellm.example.com - base_url: https://litellm.example.com
cluster_name: openai_litellm.example.com cluster_name: openai_litellm.example.com
endpoint: litellm.example.com endpoint: litellm.example.com
@ -221,6 +209,21 @@ routing:
type: memory type: memory
session_max_entries: 10000 session_max_entries: 10000
session_ttl_seconds: 600 session_ttl_seconds: 600
routing_preferences:
- description: generating new code snippets, functions, or boilerplate based on user
prompts or requirements
models:
- anthropic/claude-sonnet-4-0
- openai/gpt-4o
- groq/llama-3.3-70b-versatile
name: code generation
- description: reviewing, analyzing, and suggesting improvements to existing code
models:
- anthropic/claude-sonnet-4-0
- groq/llama-3.3-70b-versatile
name: code review
selection_policy:
prefer: cheapest
state_storage: state_storage:
type: memory type: memory
system_prompt: 'You are a helpful assistant. Always respond concisely and accurately. system_prompt: 'You are a helpful assistant. Always respond concisely and accurately.
@ -237,4 +240,4 @@ tracing:
environment: production environment: production
service.team: platform service.team: platform
trace_arch_internal: false trace_arch_internal: false
version: v0.3.0 version: v0.4.0

View file

@ -312,20 +312,24 @@ When a request does not match any routing preference, Plano forwards it to the `
**Incorrect (no default provider set):** **Incorrect (no default provider set):**
```yaml ```yaml
version: v0.3.0 version: v0.4.0
model_providers: model_providers:
- model: openai/gpt-4o-mini # No default: true anywhere - model: openai/gpt-4o-mini # No default: true anywhere
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
routing_preferences:
- name: summarization
description: Summarizing documents and extracting key points
- model: openai/gpt-4o - model: openai/gpt-4o
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
routing_preferences: routing_preferences:
- name: summarization
description: Summarizing documents and extracting key points
models:
- openai/gpt-4o-mini
- name: code_generation - name: code_generation
description: Writing new functions and implementing algorithms description: Writing new functions and implementing algorithms
models:
- openai/gpt-4o
``` ```
**Incorrect (multiple defaults — ambiguous):** **Incorrect (multiple defaults — ambiguous):**
@ -344,25 +348,35 @@ model_providers:
**Correct (exactly one default, covering unmatched requests):** **Correct (exactly one default, covering unmatched requests):**
```yaml ```yaml
version: v0.3.0 version: v0.4.0
model_providers: model_providers:
- model: openai/gpt-4o-mini - model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
default: true # Handles general/unclassified requests default: true # Handles general/unclassified requests
routing_preferences:
- name: summarization
description: Summarizing documents, articles, and meeting notes
- name: classification
description: Categorizing inputs, labeling, and intent detection
- model: openai/gpt-4o - model: openai/gpt-4o
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
routing_preferences: routing_preferences:
- name: summarization
description: Summarizing documents, articles, and meeting notes
models:
- openai/gpt-4o-mini
- openai/gpt-4o
- name: classification
description: Categorizing inputs, labeling, and intent detection
models:
- openai/gpt-4o-mini
- name: code_generation - name: code_generation
description: Writing, debugging, and reviewing code description: Writing, debugging, and reviewing code
models:
- openai/gpt-4o
- openai/gpt-4o-mini
- name: complex_reasoning - name: complex_reasoning
description: Multi-step math, logical analysis, research synthesis description: Multi-step math, logical analysis, research synthesis
models:
- openai/gpt-4o
``` ```
Choose your most cost-effective capable model as the default — it handles all traffic that doesn't match specialized preferences. Choose your most cost-effective capable model as the default — it handles all traffic that doesn't match specialized preferences.
@ -498,21 +512,27 @@ model_providers:
**Combined: proxy for some models, Plano-managed for others:** **Combined: proxy for some models, Plano-managed for others:**
```yaml ```yaml
version: v0.4.0
model_providers: model_providers:
- model: openai/gpt-4o-mini - model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY # Plano manages this key access_key: $OPENAI_API_KEY # Plano manages this key
default: true default: true
routing_preferences:
- name: quick tasks
description: Short answers, simple lookups, fast completions
- model: custom/vllm-llama - model: custom/vllm-llama
base_url: http://gpu-server:8000 base_url: http://gpu-server:8000
provider_interface: openai provider_interface: openai
passthrough_auth: true # vLLM cluster handles its own auth passthrough_auth: true # vLLM cluster handles its own auth
routing_preferences: routing_preferences:
- name: quick tasks
description: Short answers, simple lookups, fast completions
models:
- openai/gpt-4o-mini
- name: long context - name: long context
description: Processing very long documents, multi-document analysis description: Processing very long documents, multi-document analysis
models:
- custom/vllm-llama
``` ```
Reference: https://github.com/katanemo/archgw Reference: https://github.com/katanemo/archgw
@ -526,67 +546,100 @@ Reference: https://github.com/katanemo/archgw
## Write Task-Specific Routing Preference Descriptions ## Write Task-Specific Routing Preference Descriptions
Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It routes the request to the first provider whose preferences match. Description quality directly determines routing accuracy. Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It returns an ordered `models` list for the matched route; the client uses `models[0]` as primary and falls back to `models[1]`, `models[2]`... on `429`/`5xx` errors. Description quality directly determines routing accuracy.
Starting in `v0.4.0`, `routing_preferences` lives at the **top level** of the config and each entry carries its own `models: [...]` candidate pool. Listing multiple models under a single route gives you automatic provider fallback without extra client logic. Configs still using the legacy v0.3.0 inline shape (under each `model_provider`) are auto-migrated with a deprecation warning — prefer the top-level form below.
**Incorrect (vague, overlapping descriptions):** **Incorrect (vague, overlapping descriptions):**
```yaml ```yaml
version: v0.4.0
model_providers: model_providers:
- model: openai/gpt-4o-mini - model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
default: true default: true
routing_preferences:
- name: simple
description: easy tasks # Too vague — what is "easy"?
- model: openai/gpt-4o - model: openai/gpt-4o
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
routing_preferences: routing_preferences:
- name: simple
description: easy tasks # Too vague — what is "easy"?
models:
- openai/gpt-4o-mini
- name: hard - name: hard
description: hard tasks # Too vague — overlaps with "easy" description: hard tasks # Too vague — overlaps with "easy"
models:
- openai/gpt-4o
``` ```
**Correct (specific, distinct task descriptions):** **Correct (specific, distinct task descriptions, multi-model fallbacks):**
```yaml ```yaml
version: v0.4.0
model_providers: model_providers:
- model: openai/gpt-4o-mini - model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
default: true default: true
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
- model: anthropic/claude-sonnet-4-5
access_key: $ANTHROPIC_API_KEY
routing_preferences: routing_preferences:
- name: summarization - name: summarization
description: > description: >
Summarizing documents, articles, emails, or meeting transcripts. Summarizing documents, articles, emails, or meeting transcripts.
Extracting key points, generating TL;DR sections, condensing long text. Extracting key points, generating TL;DR sections, condensing long text.
models:
- openai/gpt-4o-mini
- openai/gpt-4o
- name: classification - name: classification
description: > description: >
Categorizing inputs, sentiment analysis, spam detection, Categorizing inputs, sentiment analysis, spam detection,
intent classification, labeling structured data fields. intent classification, labeling structured data fields.
models:
- openai/gpt-4o-mini
- name: translation - name: translation
description: > description: >
Translating text between languages, localization tasks. Translating text between languages, localization tasks.
models:
- model: openai/gpt-4o - openai/gpt-4o-mini
access_key: $OPENAI_API_KEY - anthropic/claude-sonnet-4-5
routing_preferences:
- name: code_generation - name: code_generation
description: > description: >
Writing new functions, classes, or modules from scratch. Writing new functions, classes, or modules from scratch.
Implementing algorithms, boilerplate generation, API integrations. Implementing algorithms, boilerplate generation, API integrations.
models:
- openai/gpt-4o
- anthropic/claude-sonnet-4-5
- name: code_review - name: code_review
description: > description: >
Reviewing code for bugs, security vulnerabilities, performance issues. Reviewing code for bugs, security vulnerabilities, performance issues.
Suggesting refactors, explaining complex code, debugging errors. Suggesting refactors, explaining complex code, debugging errors.
models:
- anthropic/claude-sonnet-4-5
- openai/gpt-4o
- name: complex_reasoning - name: complex_reasoning
description: > description: >
Multi-step math problems, logical deduction, strategic planning, Multi-step math problems, logical deduction, strategic planning,
research synthesis requiring chain-of-thought reasoning. research synthesis requiring chain-of-thought reasoning.
models:
- openai/gpt-4o
- anthropic/claude-sonnet-4-5
``` ```
**Key principles for good preference descriptions:** **Key principles for good preference descriptions:**
- Use concrete action verbs: "writing", "reviewing", "translating", "summarizing" - Use concrete action verbs: "writing", "reviewing", "translating", "summarizing"
- List 35 specific sub-tasks or synonyms for each preference - List 35 specific sub-tasks or synonyms for each preference
- Ensure preferences across providers are mutually exclusive in scope - Ensure preferences across routes are mutually exclusive in scope
- Order `models` from most preferred to least — the client falls back in order on `429`/`5xx`
- List multiple models under one route for automatic provider fallback without extra client logic
- Every model listed in `models` must be declared in `model_providers`
- Test with representative queries using `planoai trace` and `--where` filters to verify routing decisions - Test with representative queries using `planoai trace` and `--where` filters to verify routing decisions
Reference: https://github.com/katanemo/archgw Reference: https://github.com/katanemo/archgw
@ -1451,7 +1504,7 @@ planoai cli_agent claude --path /path/to/project
**Recommended config for Claude Code routing:** **Recommended config for Claude Code routing:**
```yaml ```yaml
version: v0.3.0 version: v0.4.0
listeners: listeners:
- type: model - type: model
@ -1462,19 +1515,25 @@ model_providers:
- model: anthropic/claude-sonnet-4-20250514 - model: anthropic/claude-sonnet-4-20250514
access_key: $ANTHROPIC_API_KEY access_key: $ANTHROPIC_API_KEY
default: true default: true
- model: anthropic/claude-opus-4-6
access_key: $ANTHROPIC_API_KEY
routing_preferences: routing_preferences:
- name: general coding - name: general coding
description: > description: >
Writing code, debugging, code review, explaining concepts, Writing code, debugging, code review, explaining concepts,
answering programming questions, general development tasks. answering programming questions, general development tasks.
models:
- model: anthropic/claude-opus-4-6 - anthropic/claude-sonnet-4-20250514
access_key: $ANTHROPIC_API_KEY - anthropic/claude-opus-4-6
routing_preferences:
- name: complex architecture - name: complex architecture
description: > description: >
System design, complex refactoring across many files, System design, complex refactoring across many files,
architectural decisions, performance optimization, security audits. architectural decisions, performance optimization, security audits.
models:
- anthropic/claude-opus-4-6
- anthropic/claude-sonnet-4-20250514
model_aliases: model_aliases:
claude.fast.v1: claude.fast.v1:
@ -1861,28 +1920,36 @@ listeners:
**Multi-listener architecture (serves all client types):** **Multi-listener architecture (serves all client types):**
```yaml ```yaml
version: v0.3.0 version: v0.4.0
# --- Shared model providers --- # --- Shared model providers ---
model_providers: model_providers:
- model: openai/gpt-4o-mini - model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
default: true default: true
routing_preferences:
- name: quick tasks
description: Short answers, formatting, classification, simple generation
- model: openai/gpt-4o - model: openai/gpt-4o
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
routing_preferences:
- name: complex reasoning
description: Multi-step analysis, code generation, research synthesis
- model: anthropic/claude-sonnet-4-20250514 - model: anthropic/claude-sonnet-4-20250514
access_key: $ANTHROPIC_API_KEY access_key: $ANTHROPIC_API_KEY
# --- Shared routing_preferences (top-level, v0.4.0+) ---
routing_preferences: routing_preferences:
- name: quick tasks
description: Short answers, formatting, classification, simple generation
models:
- openai/gpt-4o-mini
- name: complex reasoning
description: Multi-step analysis, code generation, research synthesis
models:
- openai/gpt-4o
- anthropic/claude-sonnet-4-20250514
- name: long documents - name: long documents
description: Summarizing or analyzing very long documents, PDFs, transcripts description: Summarizing or analyzing very long documents, PDFs, transcripts
models:
- anthropic/claude-sonnet-4-20250514
- openai/gpt-4o
# --- Listener 1: OpenAI-compatible API gateway --- # --- Listener 1: OpenAI-compatible API gateway ---
# For: SDK clients, Claude Code, LangChain, etc. # For: SDK clients, Claude Code, LangChain, etc.

View file

@ -7,67 +7,100 @@ tags: routing, model-selection, preferences, llm-routing
## Write Task-Specific Routing Preference Descriptions ## Write Task-Specific Routing Preference Descriptions
Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It routes the request to the first provider whose preferences match. Description quality directly determines routing accuracy. Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It returns an ordered `models` list for the matched route; the client uses `models[0]` as primary and falls back to `models[1]`, `models[2]`... on `429`/`5xx` errors. Description quality directly determines routing accuracy.
Starting in `v0.4.0`, `routing_preferences` lives at the **top level** of the config and each entry carries its own `models: [...]` candidate pool. Configs still using the legacy v0.3.0 inline shape (under each `model_provider`) are auto-migrated with a deprecation warning — prefer the top-level form below.
**Incorrect (vague, overlapping descriptions):** **Incorrect (vague, overlapping descriptions):**
```yaml ```yaml
version: v0.4.0
model_providers: model_providers:
- model: openai/gpt-4o-mini - model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
default: true default: true
routing_preferences:
- name: simple
description: easy tasks # Too vague — what is "easy"?
- model: openai/gpt-4o - model: openai/gpt-4o
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
routing_preferences: routing_preferences:
- name: simple
description: easy tasks # Too vague — what is "easy"?
models:
- openai/gpt-4o-mini
- name: hard - name: hard
description: hard tasks # Too vague — overlaps with "easy" description: hard tasks # Too vague — overlaps with "easy"
models:
- openai/gpt-4o
``` ```
**Correct (specific, distinct task descriptions):** **Correct (specific, distinct task descriptions, multi-model fallbacks):**
```yaml ```yaml
version: v0.4.0
model_providers: model_providers:
- model: openai/gpt-4o-mini - model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY access_key: $OPENAI_API_KEY
default: true default: true
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
- model: anthropic/claude-sonnet-4-5
access_key: $ANTHROPIC_API_KEY
routing_preferences: routing_preferences:
- name: summarization - name: summarization
description: > description: >
Summarizing documents, articles, emails, or meeting transcripts. Summarizing documents, articles, emails, or meeting transcripts.
Extracting key points, generating TL;DR sections, condensing long text. Extracting key points, generating TL;DR sections, condensing long text.
models:
- openai/gpt-4o-mini
- openai/gpt-4o
- name: classification - name: classification
description: > description: >
Categorizing inputs, sentiment analysis, spam detection, Categorizing inputs, sentiment analysis, spam detection,
intent classification, labeling structured data fields. intent classification, labeling structured data fields.
models:
- openai/gpt-4o-mini
- name: translation - name: translation
description: > description: >
Translating text between languages, localization tasks. Translating text between languages, localization tasks.
models:
- model: openai/gpt-4o - openai/gpt-4o-mini
access_key: $OPENAI_API_KEY - anthropic/claude-sonnet-4-5
routing_preferences:
- name: code_generation - name: code_generation
description: > description: >
Writing new functions, classes, or modules from scratch. Writing new functions, classes, or modules from scratch.
Implementing algorithms, boilerplate generation, API integrations. Implementing algorithms, boilerplate generation, API integrations.
models:
- openai/gpt-4o
- anthropic/claude-sonnet-4-5
- name: code_review - name: code_review
description: > description: >
Reviewing code for bugs, security vulnerabilities, performance issues. Reviewing code for bugs, security vulnerabilities, performance issues.
Suggesting refactors, explaining complex code, debugging errors. Suggesting refactors, explaining complex code, debugging errors.
models:
- anthropic/claude-sonnet-4-5
- openai/gpt-4o
- name: complex_reasoning - name: complex_reasoning
description: > description: >
Multi-step math problems, logical deduction, strategic planning, Multi-step math problems, logical deduction, strategic planning,
research synthesis requiring chain-of-thought reasoning. research synthesis requiring chain-of-thought reasoning.
models:
- openai/gpt-4o
- anthropic/claude-sonnet-4-5
``` ```
**Key principles for good preference descriptions:** **Key principles for good preference descriptions:**
- Use concrete action verbs: "writing", "reviewing", "translating", "summarizing" - Use concrete action verbs: "writing", "reviewing", "translating", "summarizing"
- List 35 specific sub-tasks or synonyms for each preference - List 35 specific sub-tasks or synonyms for each preference
- Ensure preferences across providers are mutually exclusive in scope - Ensure preferences across routes are mutually exclusive in scope
- Order `models` from most preferred to least — the client will fall back in order on `429`/`5xx`
- List multiple models under one route to get automatic provider fallback without additional client logic
- Every model listed in `models` must be declared in `model_providers`
- Test with representative queries using `planoai trace` and `--where` filters to verify routing decisions - Test with representative queries using `planoai trace` and `--where` filters to verify routing decisions
Reference: https://github.com/katanemo/archgw Reference: [Routing API](../../docs/routing-api.md) · https://github.com/katanemo/archgw