mirror of
https://github.com/katanemo/plano.git
synced 2026-06-05 14:45:15 +02:00
fix(routing): auto-migrate v0.3.0 inline routing_preferences to v0.4.0 top-level (#912)
* fix(routing): auto-migrate v0.3.0 inline routing_preferences to v0.4.0 top-level Lift inline routing_preferences under each model_provider into the top-level routing_preferences list with merged models[] and bump version to v0.4.0, with a deprecation warning. Existing v0.3.0 demo configs (Claude Code, Codex, preference_based_routing, etc.) keep working unchanged. Schema flags the inline shape as deprecated but still accepts it. Docs and skills updated to canonical top-level multi-model form. * test(common): bump reference config assertion to v0.4.0 The rendered reference config was bumped to v0.4.0 when its inline routing_preferences were lifted to the top level; align the configuration deserialization test with that change. * fix(config_generator): bump version to v0.4.0 up front in migration Move the v0.3.0 -> v0.4.0 version bump to the top of migrate_inline_routing_preferences so it runs unconditionally, including for configs that already declare top-level routing_preferences at v0.3.0. Previously the bump only fired when inline migration produced entries, leaving top-level v0.3.0 configs rejected by brightstaff's v0.4.0 gate. Tests updated to cover the new behavior and to confirm we never downgrade newer versions. * fix(config_generator): gate routing_preferences migration on version < v0.4.0 Short-circuit the migration when the config already declares v0.4.0 or newer. Anything at v0.4.0+ is assumed to be on the canonical top-level shape and is passed through untouched, including stray inline preferences (which are the author's bug to fix). Only v0.3.0 and older configs are rewritten and bumped.
This commit is contained in:
parent
5a652eb666
commit
897fda2deb
12 changed files with 748 additions and 225 deletions
|
|
@ -58,6 +58,110 @@ def get_endpoint_and_port(endpoint, protocol):
|
|||
return endpoint, port
|
||||
|
||||
|
||||
def migrate_inline_routing_preferences(config_yaml):
|
||||
"""Lift v0.3.0-style inline ``routing_preferences`` under each
|
||||
``model_providers`` entry to the v0.4.0 top-level ``routing_preferences``
|
||||
list with ``models: [...]``.
|
||||
|
||||
This function is a no-op for configs whose ``version`` is already
|
||||
``v0.4.0`` or newer — those are assumed to be on the canonical
|
||||
top-level shape and are passed through untouched.
|
||||
|
||||
For older configs, the version is bumped to ``v0.4.0`` up front so
|
||||
brightstaff's v0.4.0 gate for top-level ``routing_preferences``
|
||||
accepts the rendered config, then inline preferences under each
|
||||
provider are lifted into the top-level list. Preferences with the
|
||||
same ``name`` across multiple providers are merged into a single
|
||||
top-level entry whose ``models`` list contains every provider's
|
||||
full ``<provider>/<model>`` string in declaration order. The first
|
||||
``description`` encountered wins; conflicts are warned, not errored,
|
||||
so existing v0.3.0 configs keep compiling. Any top-level preference
|
||||
already defined by the user is preserved as-is.
|
||||
"""
|
||||
current_version = str(config_yaml.get("version", ""))
|
||||
if _version_tuple(current_version) >= (0, 4, 0):
|
||||
return
|
||||
|
||||
config_yaml["version"] = "v0.4.0"
|
||||
|
||||
model_providers = config_yaml.get("model_providers") or []
|
||||
if not model_providers:
|
||||
return
|
||||
|
||||
migrated = {}
|
||||
for model_provider in model_providers:
|
||||
inline_prefs = model_provider.get("routing_preferences")
|
||||
if not inline_prefs:
|
||||
continue
|
||||
|
||||
full_model_name = model_provider.get("model")
|
||||
if not full_model_name:
|
||||
continue
|
||||
|
||||
if "/" in full_model_name and full_model_name.split("/")[-1].strip() == "*":
|
||||
raise Exception(
|
||||
f"Model {full_model_name} has routing_preferences but uses wildcard (*). Models with routing preferences cannot be wildcards."
|
||||
)
|
||||
|
||||
for pref in inline_prefs:
|
||||
name = pref.get("name")
|
||||
description = pref.get("description", "")
|
||||
if not name:
|
||||
continue
|
||||
if name in migrated:
|
||||
entry = migrated[name]
|
||||
if description and description != entry["description"]:
|
||||
print(
|
||||
f"WARNING: routing preference '{name}' has conflicting descriptions across providers; keeping the first one."
|
||||
)
|
||||
if full_model_name not in entry["models"]:
|
||||
entry["models"].append(full_model_name)
|
||||
else:
|
||||
migrated[name] = {
|
||||
"name": name,
|
||||
"description": description,
|
||||
"models": [full_model_name],
|
||||
}
|
||||
|
||||
if not migrated:
|
||||
return
|
||||
|
||||
for model_provider in model_providers:
|
||||
if "routing_preferences" in model_provider:
|
||||
del model_provider["routing_preferences"]
|
||||
|
||||
existing_top_level = config_yaml.get("routing_preferences") or []
|
||||
existing_names = {entry.get("name") for entry in existing_top_level}
|
||||
merged = list(existing_top_level)
|
||||
for name, entry in migrated.items():
|
||||
if name in existing_names:
|
||||
continue
|
||||
merged.append(entry)
|
||||
config_yaml["routing_preferences"] = merged
|
||||
|
||||
print(
|
||||
"WARNING: inline routing_preferences under model_providers is deprecated "
|
||||
"and has been auto-migrated to top-level routing_preferences. Update your "
|
||||
"config to v0.4.0 top-level form. See docs/routing-api.md"
|
||||
)
|
||||
|
||||
|
||||
def _version_tuple(version_string):
|
||||
stripped = version_string.strip().lstrip("vV")
|
||||
if not stripped:
|
||||
return (0, 0, 0)
|
||||
parts = stripped.split("-", 1)[0].split(".")
|
||||
out = []
|
||||
for part in parts[:3]:
|
||||
try:
|
||||
out.append(int(part))
|
||||
except ValueError:
|
||||
out.append(0)
|
||||
while len(out) < 3:
|
||||
out.append(0)
|
||||
return tuple(out)
|
||||
|
||||
|
||||
def validate_and_render_schema():
|
||||
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
|
||||
"ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
|
||||
|
|
@ -101,6 +205,8 @@ def validate_and_render_schema():
|
|||
config_yaml["model_providers"] = config_yaml["llm_providers"]
|
||||
del config_yaml["llm_providers"]
|
||||
|
||||
migrate_inline_routing_preferences(config_yaml)
|
||||
|
||||
listeners, llm_gateway, prompt_gateway = convert_legacy_listeners(
|
||||
config_yaml.get("listeners"), config_yaml.get("model_providers")
|
||||
)
|
||||
|
|
@ -200,7 +306,16 @@ def validate_and_render_schema():
|
|||
model_provider_name_set = set()
|
||||
llms_with_usage = []
|
||||
model_name_keys = set()
|
||||
model_usage_name_keys = set()
|
||||
|
||||
top_level_preferences = config_yaml.get("routing_preferences") or []
|
||||
seen_pref_names = set()
|
||||
for pref in top_level_preferences:
|
||||
pref_name = pref.get("name")
|
||||
if pref_name in seen_pref_names:
|
||||
raise Exception(
|
||||
f'Duplicate routing preference name "{pref_name}", please provide unique name for each routing preference'
|
||||
)
|
||||
seen_pref_names.add(pref_name)
|
||||
|
||||
print("listeners: ", listeners)
|
||||
|
||||
|
|
@ -259,10 +374,6 @@ def validate_and_render_schema():
|
|||
raise Exception(
|
||||
f"Model {model_name} is configured as default but uses wildcard (*). Default models cannot be wildcards."
|
||||
)
|
||||
if model_provider.get("routing_preferences"):
|
||||
raise Exception(
|
||||
f"Model {model_name} has routing_preferences but uses wildcard (*). Models with routing preferences cannot be wildcards."
|
||||
)
|
||||
|
||||
# Validate azure_openai and ollama provider requires base_url
|
||||
if (provider in SUPPORTED_PROVIDERS_WITH_BASE_URL) and model_provider.get(
|
||||
|
|
@ -311,13 +422,6 @@ def validate_and_render_schema():
|
|||
)
|
||||
model_name_keys.add(model_id)
|
||||
|
||||
for routing_preference in model_provider.get("routing_preferences", []):
|
||||
if routing_preference.get("name") in model_usage_name_keys:
|
||||
raise Exception(
|
||||
f'Duplicate routing preference name "{routing_preference.get("name")}", please provide unique name for each routing preference'
|
||||
)
|
||||
model_usage_name_keys.add(routing_preference.get("name"))
|
||||
|
||||
# Warn if both passthrough_auth and access_key are configured
|
||||
if model_provider.get("passthrough_auth") and model_provider.get(
|
||||
"access_key"
|
||||
|
|
@ -405,7 +509,7 @@ def validate_and_render_schema():
|
|||
router_model_id = (
|
||||
router_model.split("/", 1)[1] if "/" in router_model else router_model
|
||||
)
|
||||
if len(model_usage_name_keys) > 0 and router_model_id not in model_name_set:
|
||||
if len(seen_pref_names) > 0 and router_model_id not in model_name_set:
|
||||
updated_model_providers.append(
|
||||
{
|
||||
"name": "plano-orchestrator",
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
import json
|
||||
import pytest
|
||||
import yaml
|
||||
from unittest import mock
|
||||
from planoai.config_generator import validate_and_render_schema
|
||||
from planoai.config_generator import (
|
||||
validate_and_render_schema,
|
||||
migrate_inline_routing_preferences,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
|
|
@ -295,32 +299,30 @@ model_providers:
|
|||
"id": "duplicate_routeing_preference_name",
|
||||
"expected_error": "Duplicate routing preference name",
|
||||
"plano_config": """
|
||||
version: v0.1.0
|
||||
version: v0.4.0
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
- name: llm
|
||||
type: model
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
|
||||
- model: openai/gpt-4.1
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code understanding
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
routing_preferences:
|
||||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
- name: code understanding
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
|
|
@ -501,3 +503,238 @@ def test_convert_legacy_llm_providers_no_prompt_gateway():
|
|||
"port": 12000,
|
||||
"timeout": "30s",
|
||||
}
|
||||
|
||||
|
||||
def test_inline_routing_preferences_migrated_to_top_level():
|
||||
plano_config = """
|
||||
version: v0.3.0
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
name: model_listener
|
||||
port: 12000
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
"""
|
||||
config_yaml = yaml.safe_load(plano_config)
|
||||
migrate_inline_routing_preferences(config_yaml)
|
||||
|
||||
assert config_yaml["version"] == "v0.4.0"
|
||||
for provider in config_yaml["model_providers"]:
|
||||
assert "routing_preferences" not in provider
|
||||
|
||||
top_level = config_yaml["routing_preferences"]
|
||||
by_name = {entry["name"]: entry for entry in top_level}
|
||||
assert set(by_name) == {"code understanding", "code generation"}
|
||||
assert by_name["code understanding"]["models"] == ["openai/gpt-4o"]
|
||||
assert by_name["code generation"]["models"] == [
|
||||
"anthropic/claude-sonnet-4-20250514"
|
||||
]
|
||||
assert (
|
||||
by_name["code understanding"]["description"]
|
||||
== "understand and explain existing code snippets, functions, or libraries"
|
||||
)
|
||||
|
||||
|
||||
def test_inline_same_name_across_providers_merges_models():
|
||||
plano_config = """
|
||||
version: v0.3.0
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
name: model_listener
|
||||
port: 12000
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
"""
|
||||
config_yaml = yaml.safe_load(plano_config)
|
||||
migrate_inline_routing_preferences(config_yaml)
|
||||
|
||||
top_level = config_yaml["routing_preferences"]
|
||||
assert len(top_level) == 1
|
||||
entry = top_level[0]
|
||||
assert entry["name"] == "code generation"
|
||||
assert entry["models"] == [
|
||||
"openai/gpt-4o",
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
]
|
||||
assert config_yaml["version"] == "v0.4.0"
|
||||
|
||||
|
||||
def test_existing_top_level_routing_preferences_preserved():
|
||||
plano_config = """
|
||||
version: v0.4.0
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
name: model_listener
|
||||
port: 12000
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets or boilerplate
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
"""
|
||||
config_yaml = yaml.safe_load(plano_config)
|
||||
before = yaml.safe_dump(config_yaml, sort_keys=True)
|
||||
migrate_inline_routing_preferences(config_yaml)
|
||||
after = yaml.safe_dump(config_yaml, sort_keys=True)
|
||||
|
||||
assert before == after
|
||||
|
||||
|
||||
def test_existing_top_level_wins_over_inline_migration():
|
||||
plano_config = """
|
||||
version: v0.3.0
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
name: model_listener
|
||||
port: 12000
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: inline description should lose
|
||||
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: user-defined top-level description wins
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
"""
|
||||
config_yaml = yaml.safe_load(plano_config)
|
||||
migrate_inline_routing_preferences(config_yaml)
|
||||
|
||||
top_level = config_yaml["routing_preferences"]
|
||||
assert len(top_level) == 1
|
||||
entry = top_level[0]
|
||||
assert entry["description"] == "user-defined top-level description wins"
|
||||
assert entry["models"] == ["openai/gpt-4o"]
|
||||
|
||||
|
||||
def test_wildcard_with_inline_routing_preferences_errors():
|
||||
plano_config = """
|
||||
version: v0.3.0
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
name: model_listener
|
||||
port: 12000
|
||||
|
||||
model_providers:
|
||||
- model: openrouter/*
|
||||
base_url: https://openrouter.ai/api/v1
|
||||
passthrough_auth: true
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating code
|
||||
"""
|
||||
config_yaml = yaml.safe_load(plano_config)
|
||||
with pytest.raises(Exception) as excinfo:
|
||||
migrate_inline_routing_preferences(config_yaml)
|
||||
assert "wildcard" in str(excinfo.value).lower()
|
||||
|
||||
|
||||
def test_migration_bumps_version_even_without_inline_preferences():
|
||||
plano_config = """
|
||||
version: v0.3.0
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
name: model_listener
|
||||
port: 12000
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
"""
|
||||
config_yaml = yaml.safe_load(plano_config)
|
||||
migrate_inline_routing_preferences(config_yaml)
|
||||
|
||||
assert "routing_preferences" not in config_yaml
|
||||
assert config_yaml["version"] == "v0.4.0"
|
||||
|
||||
|
||||
def test_migration_is_noop_on_v040_config_with_stray_inline_preferences():
|
||||
# v0.4.0 configs are assumed to be on the canonical top-level shape.
|
||||
# The migration intentionally does not rescue stray inline preferences
|
||||
# at v0.4.0+ so that the deprecation boundary is a clean version gate.
|
||||
plano_config = """
|
||||
version: v0.4.0
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
name: model_listener
|
||||
port: 12000
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code
|
||||
"""
|
||||
config_yaml = yaml.safe_load(plano_config)
|
||||
migrate_inline_routing_preferences(config_yaml)
|
||||
|
||||
assert config_yaml["version"] == "v0.4.0"
|
||||
assert "routing_preferences" not in config_yaml
|
||||
assert config_yaml["model_providers"][0]["routing_preferences"] == [
|
||||
{"name": "code generation", "description": "generating new code"}
|
||||
]
|
||||
|
||||
|
||||
def test_migration_does_not_downgrade_newer_versions():
|
||||
plano_config = """
|
||||
version: v0.5.0
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
name: model_listener
|
||||
port: 12000
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
"""
|
||||
config_yaml = yaml.safe_load(plano_config)
|
||||
migrate_inline_routing_preferences(config_yaml)
|
||||
|
||||
assert config_yaml["version"] == "v0.5.0"
|
||||
|
|
|
|||
|
|
@ -201,6 +201,7 @@ properties:
|
|||
description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
|
||||
routing_preferences:
|
||||
type: array
|
||||
description: "[DEPRECATED] Inline routing_preferences under a model_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -258,6 +259,7 @@ properties:
|
|||
description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
|
||||
routing_preferences:
|
||||
type: array
|
||||
description: "[DEPRECATED] Inline routing_preferences under an llm_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
|||
|
|
@ -656,7 +656,7 @@ mod test {
|
|||
.expect("reference config file not found");
|
||||
|
||||
let config: super::Configuration = serde_yaml::from_str(&ref_config).unwrap();
|
||||
assert_eq!(config.version, "v0.3.0");
|
||||
assert_eq!(config.version, "v0.4.0");
|
||||
|
||||
if let Some(prompt_targets) = &config.prompt_targets {
|
||||
assert!(
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ model_providers:
|
|||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
# Anthropic Models
|
||||
- model: anthropic/claude-sonnet-4-5
|
||||
- model: anthropic/claude-sonnet-4-6
|
||||
default: true
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
|
|
|
|||
|
|
@ -34,11 +34,13 @@ POST /v1/chat/completions
|
|||
|
||||
### `routing_preferences` fields
|
||||
|
||||
| Field | Type | Required | Description |
|
||||
|---|---|---|---|
|
||||
| `name` | string | yes | Route identifier. Must match the LLM router's route classification. |
|
||||
| `description` | string | yes | Natural language description used by the router to match user intent. |
|
||||
| `models` | string[] | yes | Ordered candidate pool. At least one entry required. Must be declared in `model_providers`. |
|
||||
|
||||
| Field | Type | Required | Description |
|
||||
| ------------- | -------- | -------- | ------------------------------------------------------------------------------------------- |
|
||||
| `name` | string | yes | Route identifier. Must match the LLM router's route classification. |
|
||||
| `description` | string | yes | Natural language description used by the router to match user intent. |
|
||||
| `models` | string[] | yes | Ordered candidate pool. At least one entry required. Must be declared in `model_providers`. |
|
||||
|
||||
|
||||
### Notes
|
||||
|
||||
|
|
@ -64,11 +66,13 @@ POST /v1/chat/completions
|
|||
|
||||
### Fields
|
||||
|
||||
| Field | Type | Description |
|
||||
|---|---|---|
|
||||
| `models` | string[] | Ranked model list. Use `models[0]` as primary; retry with `models[1]` on 429/5xx, and so on. |
|
||||
| `route` | string \| null | Name of the matched route. `null` if no route matched — client should use the original request `model`. |
|
||||
| `trace_id` | string | Trace ID for distributed tracing and observability. |
|
||||
|
||||
| Field | Type | Description |
|
||||
| ---------- | ------------- | ------------------------------------------------------------------------------------------------------- |
|
||||
| `models` | string[] | Ranked model list. Use `models[0]` as primary; retry with `models[1]` on 429/5xx, and so on. |
|
||||
| `route` | string | null | Name of the matched route. `null` if no route matched — client should use the original request `model`. |
|
||||
| `trace_id` | string | Trace ID for distributed tracing and observability. |
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -142,6 +146,7 @@ X-Model-Affinity: a1b2c3d4-5678-...
|
|||
```
|
||||
|
||||
Response when pinned:
|
||||
|
||||
```json
|
||||
{
|
||||
"models": ["anthropic/claude-sonnet-4-20250514"],
|
||||
|
|
@ -155,6 +160,7 @@ Response when pinned:
|
|||
Without the header, routing runs fresh every time (no breaking change).
|
||||
|
||||
Configure TTL and cache size:
|
||||
|
||||
```yaml
|
||||
routing:
|
||||
session_ttl_seconds: 600 # default: 10 min
|
||||
|
|
@ -165,7 +171,8 @@ routing:
|
|||
|
||||
## Version Requirements
|
||||
|
||||
| Version | Top-level `routing_preferences` |
|
||||
|---|---|
|
||||
|
||||
| Version | Top-level `routing_preferences` |
|
||||
| ---------- | -------------------------------------- |
|
||||
| `< v0.4.0` | Not allowed — startup error if present |
|
||||
| `v0.4.0+` | Supported (required for model routing) |
|
||||
| `v0.4.0+` | Supported (required for model routing) |
|
||||
|
|
|
|||
|
|
@ -158,7 +158,9 @@ Anthropic
|
|||
|
||||
.. code-block:: yaml
|
||||
|
||||
llm_providers:
|
||||
version: v0.4.0
|
||||
|
||||
model_providers:
|
||||
# Configure all Anthropic models with wildcard
|
||||
- model: anthropic/*
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
|
@ -179,8 +181,12 @@ Anthropic
|
|||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
access_key: $ANTHROPIC_PROD_API_KEY
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
|
||||
DeepSeek
|
||||
~~~~~~~~
|
||||
|
|
@ -798,7 +804,9 @@ You can configure specific models with custom settings even when using wildcards
|
|||
|
||||
.. code-block:: yaml
|
||||
|
||||
llm_providers:
|
||||
version: v0.4.0
|
||||
|
||||
model_providers:
|
||||
# Expand to all Anthropic models
|
||||
- model: anthropic/*
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
|
@ -807,14 +815,17 @@ You can configure specific models with custom settings even when using wildcards
|
|||
# This model will NOT be included in the wildcard expansion above
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
access_key: $ANTHROPIC_PROD_API_KEY
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
priority: 1
|
||||
|
||||
# Another specific override
|
||||
- model: anthropic/claude-3-haiku-20240307
|
||||
access_key: $ANTHROPIC_DEV_API_KEY
|
||||
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
|
||||
**Custom Provider Wildcards:**
|
||||
|
||||
For providers not in Plano's registry, wildcards enable dynamic model routing:
|
||||
|
|
@ -856,24 +867,36 @@ Mark one model as the default for fallback scenarios:
|
|||
Routing Preferences
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Configure routing preferences for dynamic model selection:
|
||||
Starting in ``v0.4.0``, configure routing preferences at the top level of the config. Each preference declares an ordered ``models`` candidate pool; the first entry is primary and the rest are fallbacks the client tries on ``429``/``5xx`` errors. Multiple providers can serve the same route — just list them all under ``models``. See :doc:`/guides/llm_router` for the full routing model.
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
llm_providers:
|
||||
version: v0.4.0
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-5.2
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: complex_reasoning
|
||||
description: deep analysis, mathematical problem solving, and logical reasoning
|
||||
- name: code_review
|
||||
description: reviewing and analyzing existing code for bugs and improvements
|
||||
|
||||
- model: anthropic/claude-sonnet-4-5
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: creative_writing
|
||||
description: creative content generation, storytelling, and writing assistance
|
||||
|
||||
routing_preferences:
|
||||
- name: complex_reasoning
|
||||
description: deep analysis, mathematical problem solving, and logical reasoning
|
||||
models:
|
||||
- openai/gpt-5.2
|
||||
- anthropic/claude-sonnet-4-5
|
||||
- name: code_review
|
||||
description: reviewing and analyzing existing code for bugs and improvements
|
||||
models:
|
||||
- openai/gpt-5.2
|
||||
- name: creative_writing
|
||||
description: creative content generation, storytelling, and writing assistance
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-5
|
||||
|
||||
.. note::
|
||||
``v0.3.0`` configs that declare ``routing_preferences`` inline under each ``model_provider`` are auto-migrated to this top-level shape by the Plano CLI at compile time, with a deprecation warning. Update to the form above to silence the warning and gain the multi-model fallback behavior.
|
||||
|
||||
.. _passthrough_auth:
|
||||
|
||||
|
|
|
|||
|
|
@ -147,38 +147,53 @@ Plano-Orchestrator analyzes each prompt to infer domain and action, then applies
|
|||
Configuration
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
To configure preference-aligned dynamic routing, define routing preferences that map domains and actions to specific models:
|
||||
To configure preference-aligned dynamic routing, declare a top-level ``routing_preferences`` list and attach an ordered ``models`` candidate pool to each route. Starting in ``v0.4.0``, ``routing_preferences`` lives at the root of the config (not inline under ``model_providers``), which lets multiple models serve the same route — the first entry in ``models`` is primary, the rest are fallbacks that the client tries on ``429``/``5xx`` errors.
|
||||
|
||||
.. code-block:: yaml
|
||||
:caption: Preference-Aligned Dynamic Routing Configuration
|
||||
|
||||
version: v0.4.0
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
- name: egress_traffic
|
||||
type: model
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
model_providers:
|
||||
- model: openai/gpt-5.2
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: openai/gpt-5
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
- name: complex reasoning
|
||||
description: deep analysis, mathematical problem solving, and logical reasoning
|
||||
|
||||
- model: anthropic/claude-sonnet-4-5
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: creative writing
|
||||
description: creative content generation, storytelling, and writing assistance
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts
|
||||
|
||||
routing_preferences:
|
||||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
models:
|
||||
- openai/gpt-5
|
||||
- anthropic/claude-sonnet-4-5
|
||||
- name: complex reasoning
|
||||
description: deep analysis, mathematical problem solving, and logical reasoning
|
||||
models:
|
||||
- openai/gpt-5
|
||||
- name: creative writing
|
||||
description: creative content generation, storytelling, and writing assistance
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-5
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-5
|
||||
- openai/gpt-5
|
||||
|
||||
.. note::
|
||||
Configs still using the ``v0.3.0`` inline style (``routing_preferences`` nested under each ``model_provider``) are auto-migrated to this top-level shape by the Plano CLI at compile time, with a deprecation warning. Update your config to the form above to silence the warning.
|
||||
|
||||
Client usage
|
||||
^^^^^^^^^^^^
|
||||
|
|
@ -253,6 +268,8 @@ Using Ollama (recommended for local development)
|
|||
|
||||
.. code-block:: yaml
|
||||
|
||||
version: v0.4.0
|
||||
|
||||
overrides:
|
||||
llm_routing_model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
|
||||
|
||||
|
|
@ -266,9 +283,12 @@ Using Ollama (recommended for local development)
|
|||
|
||||
- model: anthropic/claude-sonnet-4-5
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: creative writing
|
||||
description: creative content generation, storytelling, and writing assistance
|
||||
|
||||
routing_preferences:
|
||||
- name: creative writing
|
||||
description: creative content generation, storytelling, and writing assistance
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-5
|
||||
|
||||
4. **Verify the model is running**
|
||||
|
||||
|
|
@ -322,6 +342,8 @@ vLLM provides higher throughput and GPU optimizations suitable for production de
|
|||
|
||||
.. code-block:: yaml
|
||||
|
||||
version: v0.4.0
|
||||
|
||||
overrides:
|
||||
llm_routing_model: plano/Plano-Orchestrator
|
||||
|
||||
|
|
@ -335,9 +357,12 @@ vLLM provides higher throughput and GPU optimizations suitable for production de
|
|||
|
||||
- model: anthropic/claude-sonnet-4-5
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: creative writing
|
||||
description: creative content generation, storytelling, and writing assistance
|
||||
|
||||
routing_preferences:
|
||||
- name: creative writing
|
||||
description: creative content generation, storytelling, and writing assistance
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-5
|
||||
|
||||
5. **Verify the server is running**
|
||||
|
||||
|
|
@ -468,22 +493,30 @@ You can combine static model selection with dynamic routing preferences for maxi
|
|||
.. code-block:: yaml
|
||||
:caption: Hybrid Routing Configuration
|
||||
|
||||
llm_providers:
|
||||
version: v0.4.0
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-5.2
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: openai/gpt-5
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: complex_reasoning
|
||||
description: deep analysis and complex problem solving
|
||||
|
||||
- model: anthropic/claude-sonnet-4-5
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: creative_tasks
|
||||
description: creative writing and content generation
|
||||
|
||||
routing_preferences:
|
||||
- name: complex_reasoning
|
||||
description: deep analysis and complex problem solving
|
||||
models:
|
||||
- openai/gpt-5
|
||||
- anthropic/claude-sonnet-4-5
|
||||
- name: creative_tasks
|
||||
description: creative writing and content generation
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-5
|
||||
- openai/gpt-5
|
||||
|
||||
model_aliases:
|
||||
# Model aliases - friendly names that map to actual provider names
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# Plano Gateway configuration version
|
||||
version: v0.3.0
|
||||
version: v0.4.0
|
||||
|
||||
# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
|
||||
agents:
|
||||
|
|
@ -32,17 +32,8 @@ model_providers:
|
|||
- model: mistral/ministral-3b-latest
|
||||
access_key: $MISTRAL_API_KEY
|
||||
|
||||
# routing_preferences: tags a model with named capabilities so Plano's LLM router
|
||||
# can select the best model for each request based on intent. Requires the
|
||||
# Plano-Orchestrator model (or equivalent) to be configured in overrides.llm_routing_model.
|
||||
# Each preference has a name (short label) and a description (used for intent matching).
|
||||
- model: groq/llama-3.3-70b-versatile
|
||||
access_key: $GROQ_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
- name: code review
|
||||
description: reviewing, analyzing, and suggesting improvements to existing code
|
||||
|
||||
# passthrough_auth: forwards the client's Authorization header upstream instead of
|
||||
# using the configured access_key. Useful for LiteLLM or similar proxy setups.
|
||||
|
|
@ -64,6 +55,29 @@ model_aliases:
|
|||
smart-llm:
|
||||
target: gpt-4o
|
||||
|
||||
# routing_preferences: top-level list that tags named task categories with an
|
||||
# ordered pool of candidate models. Plano's LLM router matches incoming requests
|
||||
# against these descriptions and returns an ordered list of models; the client
|
||||
# uses models[0] as primary and retries with models[1], models[2]... on 429/5xx.
|
||||
# Requires overrides.llm_routing_model to point at Plano-Orchestrator (or equivalent).
|
||||
# Each model in `models` must be declared in model_providers above.
|
||||
# selection_policy is optional: {prefer: cheapest|fastest|none} lets the router
|
||||
# reorder candidates using live cost/latency data from model_metrics_sources.
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-0
|
||||
- openai/gpt-4o
|
||||
- groq/llama-3.3-70b-versatile
|
||||
- name: code review
|
||||
description: reviewing, analyzing, and suggesting improvements to existing code
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-0
|
||||
- groq/llama-3.3-70b-versatile
|
||||
selection_policy:
|
||||
prefer: cheapest
|
||||
|
||||
# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
|
||||
listeners:
|
||||
# Agent listener for routing requests to multiple agents
|
||||
|
|
|
|||
|
|
@ -69,12 +69,6 @@ listeners:
|
|||
model: llama-3.3-70b-versatile
|
||||
name: groq/llama-3.3-70b-versatile
|
||||
provider_interface: groq
|
||||
routing_preferences:
|
||||
- description: generating new code snippets, functions, or boilerplate based on
|
||||
user prompts or requirements
|
||||
name: code generation
|
||||
- description: reviewing, analyzing, and suggesting improvements to existing code
|
||||
name: code review
|
||||
- base_url: https://litellm.example.com
|
||||
cluster_name: openai_litellm.example.com
|
||||
endpoint: litellm.example.com
|
||||
|
|
@ -131,12 +125,6 @@ model_providers:
|
|||
model: llama-3.3-70b-versatile
|
||||
name: groq/llama-3.3-70b-versatile
|
||||
provider_interface: groq
|
||||
routing_preferences:
|
||||
- description: generating new code snippets, functions, or boilerplate based on
|
||||
user prompts or requirements
|
||||
name: code generation
|
||||
- description: reviewing, analyzing, and suggesting improvements to existing code
|
||||
name: code review
|
||||
- base_url: https://litellm.example.com
|
||||
cluster_name: openai_litellm.example.com
|
||||
endpoint: litellm.example.com
|
||||
|
|
@ -221,6 +209,21 @@ routing:
|
|||
type: memory
|
||||
session_max_entries: 10000
|
||||
session_ttl_seconds: 600
|
||||
routing_preferences:
|
||||
- description: generating new code snippets, functions, or boilerplate based on user
|
||||
prompts or requirements
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-0
|
||||
- openai/gpt-4o
|
||||
- groq/llama-3.3-70b-versatile
|
||||
name: code generation
|
||||
- description: reviewing, analyzing, and suggesting improvements to existing code
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-0
|
||||
- groq/llama-3.3-70b-versatile
|
||||
name: code review
|
||||
selection_policy:
|
||||
prefer: cheapest
|
||||
state_storage:
|
||||
type: memory
|
||||
system_prompt: 'You are a helpful assistant. Always respond concisely and accurately.
|
||||
|
|
@ -237,4 +240,4 @@ tracing:
|
|||
environment: production
|
||||
service.team: platform
|
||||
trace_arch_internal: false
|
||||
version: v0.3.0
|
||||
version: v0.4.0
|
||||
|
|
|
|||
225
skills/AGENTS.md
225
skills/AGENTS.md
|
|
@ -312,20 +312,24 @@ When a request does not match any routing preference, Plano forwards it to the `
|
|||
**Incorrect (no default provider set):**
|
||||
|
||||
```yaml
|
||||
version: v0.3.0
|
||||
version: v0.4.0
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini # No default: true anywhere
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: summarization
|
||||
description: Summarizing documents and extracting key points
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
description: Writing new functions and implementing algorithms
|
||||
|
||||
routing_preferences:
|
||||
- name: summarization
|
||||
description: Summarizing documents and extracting key points
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
- name: code_generation
|
||||
description: Writing new functions and implementing algorithms
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
```
|
||||
|
||||
**Incorrect (multiple defaults — ambiguous):**
|
||||
|
|
@ -344,25 +348,35 @@ model_providers:
|
|||
**Correct (exactly one default, covering unmatched requests):**
|
||||
|
||||
```yaml
|
||||
version: v0.3.0
|
||||
version: v0.4.0
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true # Handles general/unclassified requests
|
||||
routing_preferences:
|
||||
- name: summarization
|
||||
description: Summarizing documents, articles, and meeting notes
|
||||
- name: classification
|
||||
description: Categorizing inputs, labeling, and intent detection
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
description: Writing, debugging, and reviewing code
|
||||
- name: complex_reasoning
|
||||
description: Multi-step math, logical analysis, research synthesis
|
||||
|
||||
routing_preferences:
|
||||
- name: summarization
|
||||
description: Summarizing documents, articles, and meeting notes
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
- openai/gpt-4o
|
||||
- name: classification
|
||||
description: Categorizing inputs, labeling, and intent detection
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
- name: code_generation
|
||||
description: Writing, debugging, and reviewing code
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
- openai/gpt-4o-mini
|
||||
- name: complex_reasoning
|
||||
description: Multi-step math, logical analysis, research synthesis
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
```
|
||||
|
||||
Choose your most cost-effective capable model as the default — it handles all traffic that doesn't match specialized preferences.
|
||||
|
|
@ -498,21 +512,27 @@ model_providers:
|
|||
**Combined: proxy for some models, Plano-managed for others:**
|
||||
|
||||
```yaml
|
||||
version: v0.4.0
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY # Plano manages this key
|
||||
default: true
|
||||
routing_preferences:
|
||||
- name: quick tasks
|
||||
description: Short answers, simple lookups, fast completions
|
||||
|
||||
- model: custom/vllm-llama
|
||||
base_url: http://gpu-server:8000
|
||||
provider_interface: openai
|
||||
passthrough_auth: true # vLLM cluster handles its own auth
|
||||
routing_preferences:
|
||||
- name: long context
|
||||
description: Processing very long documents, multi-document analysis
|
||||
|
||||
routing_preferences:
|
||||
- name: quick tasks
|
||||
description: Short answers, simple lookups, fast completions
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
- name: long context
|
||||
description: Processing very long documents, multi-document analysis
|
||||
models:
|
||||
- custom/vllm-llama
|
||||
```
|
||||
|
||||
Reference: https://github.com/katanemo/archgw
|
||||
|
|
@ -526,67 +546,100 @@ Reference: https://github.com/katanemo/archgw
|
|||
|
||||
## Write Task-Specific Routing Preference Descriptions
|
||||
|
||||
Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It routes the request to the first provider whose preferences match. Description quality directly determines routing accuracy.
|
||||
Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It returns an ordered `models` list for the matched route; the client uses `models[0]` as primary and falls back to `models[1]`, `models[2]`... on `429`/`5xx` errors. Description quality directly determines routing accuracy.
|
||||
|
||||
Starting in `v0.4.0`, `routing_preferences` lives at the **top level** of the config and each entry carries its own `models: [...]` candidate pool. Listing multiple models under a single route gives you automatic provider fallback without extra client logic. Configs still using the legacy v0.3.0 inline shape (under each `model_provider`) are auto-migrated with a deprecation warning — prefer the top-level form below.
|
||||
|
||||
**Incorrect (vague, overlapping descriptions):**
|
||||
|
||||
```yaml
|
||||
version: v0.4.0
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
routing_preferences:
|
||||
- name: simple
|
||||
description: easy tasks # Too vague — what is "easy"?
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: hard
|
||||
description: hard tasks # Too vague — overlaps with "easy"
|
||||
|
||||
routing_preferences:
|
||||
- name: simple
|
||||
description: easy tasks # Too vague — what is "easy"?
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
- name: hard
|
||||
description: hard tasks # Too vague — overlaps with "easy"
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
```
|
||||
|
||||
**Correct (specific, distinct task descriptions):**
|
||||
**Correct (specific, distinct task descriptions, multi-model fallbacks):**
|
||||
|
||||
```yaml
|
||||
version: v0.4.0
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
routing_preferences:
|
||||
- name: summarization
|
||||
description: >
|
||||
Summarizing documents, articles, emails, or meeting transcripts.
|
||||
Extracting key points, generating TL;DR sections, condensing long text.
|
||||
- name: classification
|
||||
description: >
|
||||
Categorizing inputs, sentiment analysis, spam detection,
|
||||
intent classification, labeling structured data fields.
|
||||
- name: translation
|
||||
description: >
|
||||
Translating text between languages, localization tasks.
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
description: >
|
||||
Writing new functions, classes, or modules from scratch.
|
||||
Implementing algorithms, boilerplate generation, API integrations.
|
||||
- name: code_review
|
||||
description: >
|
||||
Reviewing code for bugs, security vulnerabilities, performance issues.
|
||||
Suggesting refactors, explaining complex code, debugging errors.
|
||||
- name: complex_reasoning
|
||||
description: >
|
||||
Multi-step math problems, logical deduction, strategic planning,
|
||||
research synthesis requiring chain-of-thought reasoning.
|
||||
|
||||
- model: anthropic/claude-sonnet-4-5
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
routing_preferences:
|
||||
- name: summarization
|
||||
description: >
|
||||
Summarizing documents, articles, emails, or meeting transcripts.
|
||||
Extracting key points, generating TL;DR sections, condensing long text.
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
- openai/gpt-4o
|
||||
- name: classification
|
||||
description: >
|
||||
Categorizing inputs, sentiment analysis, spam detection,
|
||||
intent classification, labeling structured data fields.
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
- name: translation
|
||||
description: >
|
||||
Translating text between languages, localization tasks.
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
- anthropic/claude-sonnet-4-5
|
||||
- name: code_generation
|
||||
description: >
|
||||
Writing new functions, classes, or modules from scratch.
|
||||
Implementing algorithms, boilerplate generation, API integrations.
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
- anthropic/claude-sonnet-4-5
|
||||
- name: code_review
|
||||
description: >
|
||||
Reviewing code for bugs, security vulnerabilities, performance issues.
|
||||
Suggesting refactors, explaining complex code, debugging errors.
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-5
|
||||
- openai/gpt-4o
|
||||
- name: complex_reasoning
|
||||
description: >
|
||||
Multi-step math problems, logical deduction, strategic planning,
|
||||
research synthesis requiring chain-of-thought reasoning.
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
- anthropic/claude-sonnet-4-5
|
||||
```
|
||||
|
||||
**Key principles for good preference descriptions:**
|
||||
- Use concrete action verbs: "writing", "reviewing", "translating", "summarizing"
|
||||
- List 3–5 specific sub-tasks or synonyms for each preference
|
||||
- Ensure preferences across providers are mutually exclusive in scope
|
||||
- Ensure preferences across routes are mutually exclusive in scope
|
||||
- Order `models` from most preferred to least — the client falls back in order on `429`/`5xx`
|
||||
- List multiple models under one route for automatic provider fallback without extra client logic
|
||||
- Every model listed in `models` must be declared in `model_providers`
|
||||
- Test with representative queries using `planoai trace` and `--where` filters to verify routing decisions
|
||||
|
||||
Reference: https://github.com/katanemo/archgw
|
||||
|
|
@ -1451,7 +1504,7 @@ planoai cli_agent claude --path /path/to/project
|
|||
**Recommended config for Claude Code routing:**
|
||||
|
||||
```yaml
|
||||
version: v0.3.0
|
||||
version: v0.4.0
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
|
|
@ -1462,19 +1515,25 @@ model_providers:
|
|||
- model: anthropic/claude-sonnet-4-20250514
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
default: true
|
||||
routing_preferences:
|
||||
- name: general coding
|
||||
description: >
|
||||
Writing code, debugging, code review, explaining concepts,
|
||||
answering programming questions, general development tasks.
|
||||
|
||||
- model: anthropic/claude-opus-4-6
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: complex architecture
|
||||
description: >
|
||||
System design, complex refactoring across many files,
|
||||
architectural decisions, performance optimization, security audits.
|
||||
|
||||
routing_preferences:
|
||||
- name: general coding
|
||||
description: >
|
||||
Writing code, debugging, code review, explaining concepts,
|
||||
answering programming questions, general development tasks.
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- anthropic/claude-opus-4-6
|
||||
- name: complex architecture
|
||||
description: >
|
||||
System design, complex refactoring across many files,
|
||||
architectural decisions, performance optimization, security audits.
|
||||
models:
|
||||
- anthropic/claude-opus-4-6
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
|
||||
model_aliases:
|
||||
claude.fast.v1:
|
||||
|
|
@ -1861,28 +1920,36 @@ listeners:
|
|||
**Multi-listener architecture (serves all client types):**
|
||||
|
||||
```yaml
|
||||
version: v0.3.0
|
||||
version: v0.4.0
|
||||
|
||||
# --- Shared model providers ---
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
routing_preferences:
|
||||
- name: quick tasks
|
||||
description: Short answers, formatting, classification, simple generation
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: complex reasoning
|
||||
description: Multi-step analysis, code generation, research synthesis
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: long documents
|
||||
description: Summarizing or analyzing very long documents, PDFs, transcripts
|
||||
|
||||
# --- Shared routing_preferences (top-level, v0.4.0+) ---
|
||||
routing_preferences:
|
||||
- name: quick tasks
|
||||
description: Short answers, formatting, classification, simple generation
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
- name: complex reasoning
|
||||
description: Multi-step analysis, code generation, research synthesis
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- name: long documents
|
||||
description: Summarizing or analyzing very long documents, PDFs, transcripts
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- openai/gpt-4o
|
||||
|
||||
# --- Listener 1: OpenAI-compatible API gateway ---
|
||||
# For: SDK clients, Claude Code, LangChain, etc.
|
||||
|
|
|
|||
|
|
@ -7,67 +7,100 @@ tags: routing, model-selection, preferences, llm-routing
|
|||
|
||||
## Write Task-Specific Routing Preference Descriptions
|
||||
|
||||
Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It routes the request to the first provider whose preferences match. Description quality directly determines routing accuracy.
|
||||
Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It returns an ordered `models` list for the matched route; the client uses `models[0]` as primary and falls back to `models[1]`, `models[2]`... on `429`/`5xx` errors. Description quality directly determines routing accuracy.
|
||||
|
||||
Starting in `v0.4.0`, `routing_preferences` lives at the **top level** of the config and each entry carries its own `models: [...]` candidate pool. Configs still using the legacy v0.3.0 inline shape (under each `model_provider`) are auto-migrated with a deprecation warning — prefer the top-level form below.
|
||||
|
||||
**Incorrect (vague, overlapping descriptions):**
|
||||
|
||||
```yaml
|
||||
version: v0.4.0
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
routing_preferences:
|
||||
- name: simple
|
||||
description: easy tasks # Too vague — what is "easy"?
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: hard
|
||||
description: hard tasks # Too vague — overlaps with "easy"
|
||||
|
||||
routing_preferences:
|
||||
- name: simple
|
||||
description: easy tasks # Too vague — what is "easy"?
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
- name: hard
|
||||
description: hard tasks # Too vague — overlaps with "easy"
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
```
|
||||
|
||||
**Correct (specific, distinct task descriptions):**
|
||||
**Correct (specific, distinct task descriptions, multi-model fallbacks):**
|
||||
|
||||
```yaml
|
||||
version: v0.4.0
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
routing_preferences:
|
||||
- name: summarization
|
||||
description: >
|
||||
Summarizing documents, articles, emails, or meeting transcripts.
|
||||
Extracting key points, generating TL;DR sections, condensing long text.
|
||||
- name: classification
|
||||
description: >
|
||||
Categorizing inputs, sentiment analysis, spam detection,
|
||||
intent classification, labeling structured data fields.
|
||||
- name: translation
|
||||
description: >
|
||||
Translating text between languages, localization tasks.
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
description: >
|
||||
Writing new functions, classes, or modules from scratch.
|
||||
Implementing algorithms, boilerplate generation, API integrations.
|
||||
- name: code_review
|
||||
description: >
|
||||
Reviewing code for bugs, security vulnerabilities, performance issues.
|
||||
Suggesting refactors, explaining complex code, debugging errors.
|
||||
- name: complex_reasoning
|
||||
description: >
|
||||
Multi-step math problems, logical deduction, strategic planning,
|
||||
research synthesis requiring chain-of-thought reasoning.
|
||||
|
||||
- model: anthropic/claude-sonnet-4-5
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
routing_preferences:
|
||||
- name: summarization
|
||||
description: >
|
||||
Summarizing documents, articles, emails, or meeting transcripts.
|
||||
Extracting key points, generating TL;DR sections, condensing long text.
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
- openai/gpt-4o
|
||||
- name: classification
|
||||
description: >
|
||||
Categorizing inputs, sentiment analysis, spam detection,
|
||||
intent classification, labeling structured data fields.
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
- name: translation
|
||||
description: >
|
||||
Translating text between languages, localization tasks.
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
- anthropic/claude-sonnet-4-5
|
||||
- name: code_generation
|
||||
description: >
|
||||
Writing new functions, classes, or modules from scratch.
|
||||
Implementing algorithms, boilerplate generation, API integrations.
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
- anthropic/claude-sonnet-4-5
|
||||
- name: code_review
|
||||
description: >
|
||||
Reviewing code for bugs, security vulnerabilities, performance issues.
|
||||
Suggesting refactors, explaining complex code, debugging errors.
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-5
|
||||
- openai/gpt-4o
|
||||
- name: complex_reasoning
|
||||
description: >
|
||||
Multi-step math problems, logical deduction, strategic planning,
|
||||
research synthesis requiring chain-of-thought reasoning.
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
- anthropic/claude-sonnet-4-5
|
||||
```
|
||||
|
||||
**Key principles for good preference descriptions:**
|
||||
- Use concrete action verbs: "writing", "reviewing", "translating", "summarizing"
|
||||
- List 3–5 specific sub-tasks or synonyms for each preference
|
||||
- Ensure preferences across providers are mutually exclusive in scope
|
||||
- Ensure preferences across routes are mutually exclusive in scope
|
||||
- Order `models` from most preferred to least — the client will fall back in order on `429`/`5xx`
|
||||
- List multiple models under one route to get automatic provider fallback without additional client logic
|
||||
- Every model listed in `models` must be declared in `model_providers`
|
||||
- Test with representative queries using `planoai trace` and `--where` filters to verify routing decisions
|
||||
|
||||
Reference: https://github.com/katanemo/archgw
|
||||
Reference: [Routing API](../../docs/routing-api.md) · https://github.com/katanemo/archgw
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue