diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py index b372810d..cb07767e 100644 --- a/cli/planoai/config_generator.py +++ b/cli/planoai/config_generator.py @@ -58,6 +58,110 @@ def get_endpoint_and_port(endpoint, protocol): return endpoint, port +def migrate_inline_routing_preferences(config_yaml): + """Lift v0.3.0-style inline ``routing_preferences`` under each + ``model_providers`` entry to the v0.4.0 top-level ``routing_preferences`` + list with ``models: [...]``. + + This function is a no-op for configs whose ``version`` is already + ``v0.4.0`` or newer — those are assumed to be on the canonical + top-level shape and are passed through untouched. + + For older configs, the version is bumped to ``v0.4.0`` up front so + brightstaff's v0.4.0 gate for top-level ``routing_preferences`` + accepts the rendered config, then inline preferences under each + provider are lifted into the top-level list. Preferences with the + same ``name`` across multiple providers are merged into a single + top-level entry whose ``models`` list contains every provider's + full ``/`` string in declaration order. The first + ``description`` encountered wins; conflicts are warned, not errored, + so existing v0.3.0 configs keep compiling. Any top-level preference + already defined by the user is preserved as-is. + """ + current_version = str(config_yaml.get("version", "")) + if _version_tuple(current_version) >= (0, 4, 0): + return + + config_yaml["version"] = "v0.4.0" + + model_providers = config_yaml.get("model_providers") or [] + if not model_providers: + return + + migrated = {} + for model_provider in model_providers: + inline_prefs = model_provider.get("routing_preferences") + if not inline_prefs: + continue + + full_model_name = model_provider.get("model") + if not full_model_name: + continue + + if "/" in full_model_name and full_model_name.split("/")[-1].strip() == "*": + raise Exception( + f"Model {full_model_name} has routing_preferences but uses wildcard (*). Models with routing preferences cannot be wildcards." + ) + + for pref in inline_prefs: + name = pref.get("name") + description = pref.get("description", "") + if not name: + continue + if name in migrated: + entry = migrated[name] + if description and description != entry["description"]: + print( + f"WARNING: routing preference '{name}' has conflicting descriptions across providers; keeping the first one." + ) + if full_model_name not in entry["models"]: + entry["models"].append(full_model_name) + else: + migrated[name] = { + "name": name, + "description": description, + "models": [full_model_name], + } + + if not migrated: + return + + for model_provider in model_providers: + if "routing_preferences" in model_provider: + del model_provider["routing_preferences"] + + existing_top_level = config_yaml.get("routing_preferences") or [] + existing_names = {entry.get("name") for entry in existing_top_level} + merged = list(existing_top_level) + for name, entry in migrated.items(): + if name in existing_names: + continue + merged.append(entry) + config_yaml["routing_preferences"] = merged + + print( + "WARNING: inline routing_preferences under model_providers is deprecated " + "and has been auto-migrated to top-level routing_preferences. Update your " + "config to v0.4.0 top-level form. See docs/routing-api.md" + ) + + +def _version_tuple(version_string): + stripped = version_string.strip().lstrip("vV") + if not stripped: + return (0, 0, 0) + parts = stripped.split("-", 1)[0].split(".") + out = [] + for part in parts[:3]: + try: + out.append(int(part)) + except ValueError: + out.append(0) + while len(out) < 3: + out.append(0) + return tuple(out) + + def validate_and_render_schema(): ENVOY_CONFIG_TEMPLATE_FILE = os.getenv( "ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml" @@ -101,6 +205,8 @@ def validate_and_render_schema(): config_yaml["model_providers"] = config_yaml["llm_providers"] del config_yaml["llm_providers"] + migrate_inline_routing_preferences(config_yaml) + listeners, llm_gateway, prompt_gateway = convert_legacy_listeners( config_yaml.get("listeners"), config_yaml.get("model_providers") ) @@ -200,7 +306,16 @@ def validate_and_render_schema(): model_provider_name_set = set() llms_with_usage = [] model_name_keys = set() - model_usage_name_keys = set() + + top_level_preferences = config_yaml.get("routing_preferences") or [] + seen_pref_names = set() + for pref in top_level_preferences: + pref_name = pref.get("name") + if pref_name in seen_pref_names: + raise Exception( + f'Duplicate routing preference name "{pref_name}", please provide unique name for each routing preference' + ) + seen_pref_names.add(pref_name) print("listeners: ", listeners) @@ -259,10 +374,6 @@ def validate_and_render_schema(): raise Exception( f"Model {model_name} is configured as default but uses wildcard (*). Default models cannot be wildcards." ) - if model_provider.get("routing_preferences"): - raise Exception( - f"Model {model_name} has routing_preferences but uses wildcard (*). Models with routing preferences cannot be wildcards." - ) # Validate azure_openai and ollama provider requires base_url if (provider in SUPPORTED_PROVIDERS_WITH_BASE_URL) and model_provider.get( @@ -311,13 +422,6 @@ def validate_and_render_schema(): ) model_name_keys.add(model_id) - for routing_preference in model_provider.get("routing_preferences", []): - if routing_preference.get("name") in model_usage_name_keys: - raise Exception( - f'Duplicate routing preference name "{routing_preference.get("name")}", please provide unique name for each routing preference' - ) - model_usage_name_keys.add(routing_preference.get("name")) - # Warn if both passthrough_auth and access_key are configured if model_provider.get("passthrough_auth") and model_provider.get( "access_key" @@ -405,7 +509,7 @@ def validate_and_render_schema(): router_model_id = ( router_model.split("/", 1)[1] if "/" in router_model else router_model ) - if len(model_usage_name_keys) > 0 and router_model_id not in model_name_set: + if len(seen_pref_names) > 0 and router_model_id not in model_name_set: updated_model_providers.append( { "name": "plano-orchestrator", diff --git a/cli/test/test_config_generator.py b/cli/test/test_config_generator.py index 3aec2390..77b5b480 100644 --- a/cli/test/test_config_generator.py +++ b/cli/test/test_config_generator.py @@ -1,7 +1,11 @@ import json import pytest +import yaml from unittest import mock -from planoai.config_generator import validate_and_render_schema +from planoai.config_generator import ( + validate_and_render_schema, + migrate_inline_routing_preferences, +) @pytest.fixture(autouse=True) @@ -295,32 +299,30 @@ model_providers: "id": "duplicate_routeing_preference_name", "expected_error": "Duplicate routing preference name", "plano_config": """ -version: v0.1.0 +version: v0.4.0 listeners: - egress_traffic: - address: 0.0.0.0 + - name: llm + type: model port: 12000 - message_format: openai - timeout: 30s - -llm_providers: +model_providers: - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY default: true - model: openai/gpt-4o access_key: $OPENAI_API_KEY - routing_preferences: - - name: code understanding - description: understand and explain existing code snippets, functions, or libraries - - model: openai/gpt-4.1 - access_key: $OPENAI_API_KEY - routing_preferences: - - name: code understanding - description: generating new code snippets, functions, or boilerplate based on user prompts or requirements +routing_preferences: + - name: code understanding + description: understand and explain existing code snippets, functions, or libraries + models: + - openai/gpt-4o + - name: code understanding + description: generating new code snippets, functions, or boilerplate based on user prompts or requirements + models: + - openai/gpt-4o-mini tracing: random_sampling: 100 @@ -501,3 +503,238 @@ def test_convert_legacy_llm_providers_no_prompt_gateway(): "port": 12000, "timeout": "30s", } + + +def test_inline_routing_preferences_migrated_to_top_level(): + plano_config = """ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o-mini + access_key: $OPENAI_API_KEY + default: true + + - model: openai/gpt-4o + access_key: $OPENAI_API_KEY + routing_preferences: + - name: code understanding + description: understand and explain existing code snippets, functions, or libraries + + - model: anthropic/claude-sonnet-4-20250514 + access_key: $ANTHROPIC_API_KEY + routing_preferences: + - name: code generation + description: generating new code snippets, functions, or boilerplate based on user prompts or requirements +""" + config_yaml = yaml.safe_load(plano_config) + migrate_inline_routing_preferences(config_yaml) + + assert config_yaml["version"] == "v0.4.0" + for provider in config_yaml["model_providers"]: + assert "routing_preferences" not in provider + + top_level = config_yaml["routing_preferences"] + by_name = {entry["name"]: entry for entry in top_level} + assert set(by_name) == {"code understanding", "code generation"} + assert by_name["code understanding"]["models"] == ["openai/gpt-4o"] + assert by_name["code generation"]["models"] == [ + "anthropic/claude-sonnet-4-20250514" + ] + assert ( + by_name["code understanding"]["description"] + == "understand and explain existing code snippets, functions, or libraries" + ) + + +def test_inline_same_name_across_providers_merges_models(): + plano_config = """ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + access_key: $OPENAI_API_KEY + routing_preferences: + - name: code generation + description: generating new code snippets, functions, or boilerplate based on user prompts or requirements + + - model: anthropic/claude-sonnet-4-20250514 + access_key: $ANTHROPIC_API_KEY + routing_preferences: + - name: code generation + description: generating new code snippets, functions, or boilerplate based on user prompts or requirements +""" + config_yaml = yaml.safe_load(plano_config) + migrate_inline_routing_preferences(config_yaml) + + top_level = config_yaml["routing_preferences"] + assert len(top_level) == 1 + entry = top_level[0] + assert entry["name"] == "code generation" + assert entry["models"] == [ + "openai/gpt-4o", + "anthropic/claude-sonnet-4-20250514", + ] + assert config_yaml["version"] == "v0.4.0" + + +def test_existing_top_level_routing_preferences_preserved(): + plano_config = """ +version: v0.4.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + access_key: $OPENAI_API_KEY + - model: anthropic/claude-sonnet-4-20250514 + access_key: $ANTHROPIC_API_KEY + +routing_preferences: + - name: code generation + description: generating new code snippets or boilerplate + models: + - openai/gpt-4o + - anthropic/claude-sonnet-4-20250514 +""" + config_yaml = yaml.safe_load(plano_config) + before = yaml.safe_dump(config_yaml, sort_keys=True) + migrate_inline_routing_preferences(config_yaml) + after = yaml.safe_dump(config_yaml, sort_keys=True) + + assert before == after + + +def test_existing_top_level_wins_over_inline_migration(): + plano_config = """ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + access_key: $OPENAI_API_KEY + routing_preferences: + - name: code generation + description: inline description should lose + +routing_preferences: + - name: code generation + description: user-defined top-level description wins + models: + - openai/gpt-4o +""" + config_yaml = yaml.safe_load(plano_config) + migrate_inline_routing_preferences(config_yaml) + + top_level = config_yaml["routing_preferences"] + assert len(top_level) == 1 + entry = top_level[0] + assert entry["description"] == "user-defined top-level description wins" + assert entry["models"] == ["openai/gpt-4o"] + + +def test_wildcard_with_inline_routing_preferences_errors(): + plano_config = """ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openrouter/* + base_url: https://openrouter.ai/api/v1 + passthrough_auth: true + routing_preferences: + - name: code generation + description: generating code +""" + config_yaml = yaml.safe_load(plano_config) + with pytest.raises(Exception) as excinfo: + migrate_inline_routing_preferences(config_yaml) + assert "wildcard" in str(excinfo.value).lower() + + +def test_migration_bumps_version_even_without_inline_preferences(): + plano_config = """ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + access_key: $OPENAI_API_KEY +""" + config_yaml = yaml.safe_load(plano_config) + migrate_inline_routing_preferences(config_yaml) + + assert "routing_preferences" not in config_yaml + assert config_yaml["version"] == "v0.4.0" + + +def test_migration_is_noop_on_v040_config_with_stray_inline_preferences(): + # v0.4.0 configs are assumed to be on the canonical top-level shape. + # The migration intentionally does not rescue stray inline preferences + # at v0.4.0+ so that the deprecation boundary is a clean version gate. + plano_config = """ +version: v0.4.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + access_key: $OPENAI_API_KEY + routing_preferences: + - name: code generation + description: generating new code +""" + config_yaml = yaml.safe_load(plano_config) + migrate_inline_routing_preferences(config_yaml) + + assert config_yaml["version"] == "v0.4.0" + assert "routing_preferences" not in config_yaml + assert config_yaml["model_providers"][0]["routing_preferences"] == [ + {"name": "code generation", "description": "generating new code"} + ] + + +def test_migration_does_not_downgrade_newer_versions(): + plano_config = """ +version: v0.5.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + access_key: $OPENAI_API_KEY +""" + config_yaml = yaml.safe_load(plano_config) + migrate_inline_routing_preferences(config_yaml) + + assert config_yaml["version"] == "v0.5.0" diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml index 2f9eea63..9560b437 100644 --- a/config/plano_config_schema.yaml +++ b/config/plano_config_schema.yaml @@ -201,6 +201,7 @@ properties: description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)." routing_preferences: type: array + description: "[DEPRECATED] Inline routing_preferences under a model_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md." items: type: object properties: @@ -258,6 +259,7 @@ properties: description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)." routing_preferences: type: array + description: "[DEPRECATED] Inline routing_preferences under an llm_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md." items: type: object properties: diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 86aa331d..1275d77d 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -656,7 +656,7 @@ mod test { .expect("reference config file not found"); let config: super::Configuration = serde_yaml::from_str(&ref_config).unwrap(); - assert_eq!(config.version, "v0.3.0"); + assert_eq!(config.version, "v0.4.0"); if let Some(prompt_targets) = &config.prompt_targets { assert!( diff --git a/demos/llm_routing/claude_code_router/config.yaml b/demos/llm_routing/claude_code_router/config.yaml index e72aa73a..6235b6c6 100644 --- a/demos/llm_routing/claude_code_router/config.yaml +++ b/demos/llm_routing/claude_code_router/config.yaml @@ -19,7 +19,7 @@ model_providers: - name: code understanding description: understand and explain existing code snippets, functions, or libraries # Anthropic Models - - model: anthropic/claude-sonnet-4-5 + - model: anthropic/claude-sonnet-4-6 default: true access_key: $ANTHROPIC_API_KEY diff --git a/docs/routing-api.md b/docs/routing-api.md index c2b9c63f..4d1d6a8e 100644 --- a/docs/routing-api.md +++ b/docs/routing-api.md @@ -34,11 +34,13 @@ POST /v1/chat/completions ### `routing_preferences` fields -| Field | Type | Required | Description | -|---|---|---|---| -| `name` | string | yes | Route identifier. Must match the LLM router's route classification. | -| `description` | string | yes | Natural language description used by the router to match user intent. | -| `models` | string[] | yes | Ordered candidate pool. At least one entry required. Must be declared in `model_providers`. | + +| Field | Type | Required | Description | +| ------------- | -------- | -------- | ------------------------------------------------------------------------------------------- | +| `name` | string | yes | Route identifier. Must match the LLM router's route classification. | +| `description` | string | yes | Natural language description used by the router to match user intent. | +| `models` | string[] | yes | Ordered candidate pool. At least one entry required. Must be declared in `model_providers`. | + ### Notes @@ -64,11 +66,13 @@ POST /v1/chat/completions ### Fields -| Field | Type | Description | -|---|---|---| -| `models` | string[] | Ranked model list. Use `models[0]` as primary; retry with `models[1]` on 429/5xx, and so on. | -| `route` | string \| null | Name of the matched route. `null` if no route matched — client should use the original request `model`. | -| `trace_id` | string | Trace ID for distributed tracing and observability. | + +| Field | Type | Description | +| ---------- | ------------- | ------------------------------------------------------------------------------------------------------- | +| `models` | string[] | Ranked model list. Use `models[0]` as primary; retry with `models[1]` on 429/5xx, and so on. | +| `route` | string | null | Name of the matched route. `null` if no route matched — client should use the original request `model`. | +| `trace_id` | string | Trace ID for distributed tracing and observability. | + --- @@ -142,6 +146,7 @@ X-Model-Affinity: a1b2c3d4-5678-... ``` Response when pinned: + ```json { "models": ["anthropic/claude-sonnet-4-20250514"], @@ -155,6 +160,7 @@ Response when pinned: Without the header, routing runs fresh every time (no breaking change). Configure TTL and cache size: + ```yaml routing: session_ttl_seconds: 600 # default: 10 min @@ -165,7 +171,8 @@ routing: ## Version Requirements -| Version | Top-level `routing_preferences` | -|---|---| + +| Version | Top-level `routing_preferences` | +| ---------- | -------------------------------------- | | `< v0.4.0` | Not allowed — startup error if present | -| `v0.4.0+` | Supported (required for model routing) | +| `v0.4.0+` | Supported (required for model routing) | diff --git a/docs/source/concepts/llm_providers/supported_providers.rst b/docs/source/concepts/llm_providers/supported_providers.rst index 87163d3b..60f468e0 100644 --- a/docs/source/concepts/llm_providers/supported_providers.rst +++ b/docs/source/concepts/llm_providers/supported_providers.rst @@ -158,7 +158,9 @@ Anthropic .. code-block:: yaml - llm_providers: + version: v0.4.0 + + model_providers: # Configure all Anthropic models with wildcard - model: anthropic/* access_key: $ANTHROPIC_API_KEY @@ -179,8 +181,12 @@ Anthropic - model: anthropic/claude-sonnet-4-20250514 access_key: $ANTHROPIC_PROD_API_KEY - routing_preferences: - - name: code_generation + + routing_preferences: + - name: code_generation + description: generating new code snippets, functions, or boilerplate based on user prompts or requirements + models: + - anthropic/claude-sonnet-4-20250514 DeepSeek ~~~~~~~~ @@ -798,7 +804,9 @@ You can configure specific models with custom settings even when using wildcards .. code-block:: yaml - llm_providers: + version: v0.4.0 + + model_providers: # Expand to all Anthropic models - model: anthropic/* access_key: $ANTHROPIC_API_KEY @@ -807,14 +815,17 @@ You can configure specific models with custom settings even when using wildcards # This model will NOT be included in the wildcard expansion above - model: anthropic/claude-sonnet-4-20250514 access_key: $ANTHROPIC_PROD_API_KEY - routing_preferences: - - name: code_generation - priority: 1 # Another specific override - model: anthropic/claude-3-haiku-20240307 access_key: $ANTHROPIC_DEV_API_KEY + routing_preferences: + - name: code_generation + description: generating new code snippets, functions, or boilerplate based on user prompts or requirements + models: + - anthropic/claude-sonnet-4-20250514 + **Custom Provider Wildcards:** For providers not in Plano's registry, wildcards enable dynamic model routing: @@ -856,24 +867,36 @@ Mark one model as the default for fallback scenarios: Routing Preferences ~~~~~~~~~~~~~~~~~~~ -Configure routing preferences for dynamic model selection: +Starting in ``v0.4.0``, configure routing preferences at the top level of the config. Each preference declares an ordered ``models`` candidate pool; the first entry is primary and the rest are fallbacks the client tries on ``429``/``5xx`` errors. Multiple providers can serve the same route — just list them all under ``models``. See :doc:`/guides/llm_router` for the full routing model. .. code-block:: yaml - llm_providers: + version: v0.4.0 + + model_providers: - model: openai/gpt-5.2 access_key: $OPENAI_API_KEY - routing_preferences: - - name: complex_reasoning - description: deep analysis, mathematical problem solving, and logical reasoning - - name: code_review - description: reviewing and analyzing existing code for bugs and improvements - model: anthropic/claude-sonnet-4-5 access_key: $ANTHROPIC_API_KEY - routing_preferences: - - name: creative_writing - description: creative content generation, storytelling, and writing assistance + + routing_preferences: + - name: complex_reasoning + description: deep analysis, mathematical problem solving, and logical reasoning + models: + - openai/gpt-5.2 + - anthropic/claude-sonnet-4-5 + - name: code_review + description: reviewing and analyzing existing code for bugs and improvements + models: + - openai/gpt-5.2 + - name: creative_writing + description: creative content generation, storytelling, and writing assistance + models: + - anthropic/claude-sonnet-4-5 + +.. note:: + ``v0.3.0`` configs that declare ``routing_preferences`` inline under each ``model_provider`` are auto-migrated to this top-level shape by the Plano CLI at compile time, with a deprecation warning. Update to the form above to silence the warning and gain the multi-model fallback behavior. .. _passthrough_auth: diff --git a/docs/source/guides/llm_router.rst b/docs/source/guides/llm_router.rst index 5539dddc..b66c01f2 100644 --- a/docs/source/guides/llm_router.rst +++ b/docs/source/guides/llm_router.rst @@ -147,38 +147,53 @@ Plano-Orchestrator analyzes each prompt to infer domain and action, then applies Configuration ^^^^^^^^^^^^^ -To configure preference-aligned dynamic routing, define routing preferences that map domains and actions to specific models: +To configure preference-aligned dynamic routing, declare a top-level ``routing_preferences`` list and attach an ordered ``models`` candidate pool to each route. Starting in ``v0.4.0``, ``routing_preferences`` lives at the root of the config (not inline under ``model_providers``), which lets multiple models serve the same route — the first entry in ``models`` is primary, the rest are fallbacks that the client tries on ``429``/``5xx`` errors. .. code-block:: yaml :caption: Preference-Aligned Dynamic Routing Configuration + version: v0.4.0 + listeners: - egress_traffic: + - name: egress_traffic + type: model address: 0.0.0.0 port: 12000 - message_format: openai timeout: 30s - llm_providers: + model_providers: - model: openai/gpt-5.2 access_key: $OPENAI_API_KEY default: true - model: openai/gpt-5 access_key: $OPENAI_API_KEY - routing_preferences: - - name: code understanding - description: understand and explain existing code snippets, functions, or libraries - - name: complex reasoning - description: deep analysis, mathematical problem solving, and logical reasoning - model: anthropic/claude-sonnet-4-5 access_key: $ANTHROPIC_API_KEY - routing_preferences: - - name: creative writing - description: creative content generation, storytelling, and writing assistance - - name: code generation - description: generating new code snippets, functions, or boilerplate based on user prompts + + routing_preferences: + - name: code understanding + description: understand and explain existing code snippets, functions, or libraries + models: + - openai/gpt-5 + - anthropic/claude-sonnet-4-5 + - name: complex reasoning + description: deep analysis, mathematical problem solving, and logical reasoning + models: + - openai/gpt-5 + - name: creative writing + description: creative content generation, storytelling, and writing assistance + models: + - anthropic/claude-sonnet-4-5 + - name: code generation + description: generating new code snippets, functions, or boilerplate based on user prompts + models: + - anthropic/claude-sonnet-4-5 + - openai/gpt-5 + +.. note:: + Configs still using the ``v0.3.0`` inline style (``routing_preferences`` nested under each ``model_provider``) are auto-migrated to this top-level shape by the Plano CLI at compile time, with a deprecation warning. Update your config to the form above to silence the warning. Client usage ^^^^^^^^^^^^ @@ -253,6 +268,8 @@ Using Ollama (recommended for local development) .. code-block:: yaml + version: v0.4.0 + overrides: llm_routing_model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M @@ -266,9 +283,12 @@ Using Ollama (recommended for local development) - model: anthropic/claude-sonnet-4-5 access_key: $ANTHROPIC_API_KEY - routing_preferences: - - name: creative writing - description: creative content generation, storytelling, and writing assistance + + routing_preferences: + - name: creative writing + description: creative content generation, storytelling, and writing assistance + models: + - anthropic/claude-sonnet-4-5 4. **Verify the model is running** @@ -322,6 +342,8 @@ vLLM provides higher throughput and GPU optimizations suitable for production de .. code-block:: yaml + version: v0.4.0 + overrides: llm_routing_model: plano/Plano-Orchestrator @@ -335,9 +357,12 @@ vLLM provides higher throughput and GPU optimizations suitable for production de - model: anthropic/claude-sonnet-4-5 access_key: $ANTHROPIC_API_KEY - routing_preferences: - - name: creative writing - description: creative content generation, storytelling, and writing assistance + + routing_preferences: + - name: creative writing + description: creative content generation, storytelling, and writing assistance + models: + - anthropic/claude-sonnet-4-5 5. **Verify the server is running** @@ -468,22 +493,30 @@ You can combine static model selection with dynamic routing preferences for maxi .. code-block:: yaml :caption: Hybrid Routing Configuration - llm_providers: + version: v0.4.0 + + model_providers: - model: openai/gpt-5.2 access_key: $OPENAI_API_KEY default: true - model: openai/gpt-5 access_key: $OPENAI_API_KEY - routing_preferences: - - name: complex_reasoning - description: deep analysis and complex problem solving - model: anthropic/claude-sonnet-4-5 access_key: $ANTHROPIC_API_KEY - routing_preferences: - - name: creative_tasks - description: creative writing and content generation + + routing_preferences: + - name: complex_reasoning + description: deep analysis and complex problem solving + models: + - openai/gpt-5 + - anthropic/claude-sonnet-4-5 + - name: creative_tasks + description: creative writing and content generation + models: + - anthropic/claude-sonnet-4-5 + - openai/gpt-5 model_aliases: # Model aliases - friendly names that map to actual provider names diff --git a/docs/source/resources/includes/plano_config_full_reference.yaml b/docs/source/resources/includes/plano_config_full_reference.yaml index 808d0a98..99eb4510 100644 --- a/docs/source/resources/includes/plano_config_full_reference.yaml +++ b/docs/source/resources/includes/plano_config_full_reference.yaml @@ -1,5 +1,5 @@ # Plano Gateway configuration version -version: v0.3.0 +version: v0.4.0 # External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions) agents: @@ -32,17 +32,8 @@ model_providers: - model: mistral/ministral-3b-latest access_key: $MISTRAL_API_KEY - # routing_preferences: tags a model with named capabilities so Plano's LLM router - # can select the best model for each request based on intent. Requires the - # Plano-Orchestrator model (or equivalent) to be configured in overrides.llm_routing_model. - # Each preference has a name (short label) and a description (used for intent matching). - model: groq/llama-3.3-70b-versatile access_key: $GROQ_API_KEY - routing_preferences: - - name: code generation - description: generating new code snippets, functions, or boilerplate based on user prompts or requirements - - name: code review - description: reviewing, analyzing, and suggesting improvements to existing code # passthrough_auth: forwards the client's Authorization header upstream instead of # using the configured access_key. Useful for LiteLLM or similar proxy setups. @@ -64,6 +55,29 @@ model_aliases: smart-llm: target: gpt-4o +# routing_preferences: top-level list that tags named task categories with an +# ordered pool of candidate models. Plano's LLM router matches incoming requests +# against these descriptions and returns an ordered list of models; the client +# uses models[0] as primary and retries with models[1], models[2]... on 429/5xx. +# Requires overrides.llm_routing_model to point at Plano-Orchestrator (or equivalent). +# Each model in `models` must be declared in model_providers above. +# selection_policy is optional: {prefer: cheapest|fastest|none} lets the router +# reorder candidates using live cost/latency data from model_metrics_sources. +routing_preferences: + - name: code generation + description: generating new code snippets, functions, or boilerplate based on user prompts or requirements + models: + - anthropic/claude-sonnet-4-0 + - openai/gpt-4o + - groq/llama-3.3-70b-versatile + - name: code review + description: reviewing, analyzing, and suggesting improvements to existing code + models: + - anthropic/claude-sonnet-4-0 + - groq/llama-3.3-70b-versatile + selection_policy: + prefer: cheapest + # HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access listeners: # Agent listener for routing requests to multiple agents diff --git a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml index a0603221..e2ab9110 100644 --- a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml +++ b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml @@ -69,12 +69,6 @@ listeners: model: llama-3.3-70b-versatile name: groq/llama-3.3-70b-versatile provider_interface: groq - routing_preferences: - - description: generating new code snippets, functions, or boilerplate based on - user prompts or requirements - name: code generation - - description: reviewing, analyzing, and suggesting improvements to existing code - name: code review - base_url: https://litellm.example.com cluster_name: openai_litellm.example.com endpoint: litellm.example.com @@ -131,12 +125,6 @@ model_providers: model: llama-3.3-70b-versatile name: groq/llama-3.3-70b-versatile provider_interface: groq - routing_preferences: - - description: generating new code snippets, functions, or boilerplate based on - user prompts or requirements - name: code generation - - description: reviewing, analyzing, and suggesting improvements to existing code - name: code review - base_url: https://litellm.example.com cluster_name: openai_litellm.example.com endpoint: litellm.example.com @@ -221,6 +209,21 @@ routing: type: memory session_max_entries: 10000 session_ttl_seconds: 600 +routing_preferences: +- description: generating new code snippets, functions, or boilerplate based on user + prompts or requirements + models: + - anthropic/claude-sonnet-4-0 + - openai/gpt-4o + - groq/llama-3.3-70b-versatile + name: code generation +- description: reviewing, analyzing, and suggesting improvements to existing code + models: + - anthropic/claude-sonnet-4-0 + - groq/llama-3.3-70b-versatile + name: code review + selection_policy: + prefer: cheapest state_storage: type: memory system_prompt: 'You are a helpful assistant. Always respond concisely and accurately. @@ -237,4 +240,4 @@ tracing: environment: production service.team: platform trace_arch_internal: false -version: v0.3.0 +version: v0.4.0 diff --git a/skills/AGENTS.md b/skills/AGENTS.md index 61fd7228..dab3144b 100644 --- a/skills/AGENTS.md +++ b/skills/AGENTS.md @@ -312,20 +312,24 @@ When a request does not match any routing preference, Plano forwards it to the ` **Incorrect (no default provider set):** ```yaml -version: v0.3.0 +version: v0.4.0 model_providers: - model: openai/gpt-4o-mini # No default: true anywhere access_key: $OPENAI_API_KEY - routing_preferences: - - name: summarization - description: Summarizing documents and extracting key points - model: openai/gpt-4o access_key: $OPENAI_API_KEY - routing_preferences: - - name: code_generation - description: Writing new functions and implementing algorithms + +routing_preferences: + - name: summarization + description: Summarizing documents and extracting key points + models: + - openai/gpt-4o-mini + - name: code_generation + description: Writing new functions and implementing algorithms + models: + - openai/gpt-4o ``` **Incorrect (multiple defaults — ambiguous):** @@ -344,25 +348,35 @@ model_providers: **Correct (exactly one default, covering unmatched requests):** ```yaml -version: v0.3.0 +version: v0.4.0 model_providers: - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY default: true # Handles general/unclassified requests - routing_preferences: - - name: summarization - description: Summarizing documents, articles, and meeting notes - - name: classification - description: Categorizing inputs, labeling, and intent detection - model: openai/gpt-4o access_key: $OPENAI_API_KEY - routing_preferences: - - name: code_generation - description: Writing, debugging, and reviewing code - - name: complex_reasoning - description: Multi-step math, logical analysis, research synthesis + +routing_preferences: + - name: summarization + description: Summarizing documents, articles, and meeting notes + models: + - openai/gpt-4o-mini + - openai/gpt-4o + - name: classification + description: Categorizing inputs, labeling, and intent detection + models: + - openai/gpt-4o-mini + - name: code_generation + description: Writing, debugging, and reviewing code + models: + - openai/gpt-4o + - openai/gpt-4o-mini + - name: complex_reasoning + description: Multi-step math, logical analysis, research synthesis + models: + - openai/gpt-4o ``` Choose your most cost-effective capable model as the default — it handles all traffic that doesn't match specialized preferences. @@ -498,21 +512,27 @@ model_providers: **Combined: proxy for some models, Plano-managed for others:** ```yaml +version: v0.4.0 + model_providers: - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY # Plano manages this key default: true - routing_preferences: - - name: quick tasks - description: Short answers, simple lookups, fast completions - model: custom/vllm-llama base_url: http://gpu-server:8000 provider_interface: openai passthrough_auth: true # vLLM cluster handles its own auth - routing_preferences: - - name: long context - description: Processing very long documents, multi-document analysis + +routing_preferences: + - name: quick tasks + description: Short answers, simple lookups, fast completions + models: + - openai/gpt-4o-mini + - name: long context + description: Processing very long documents, multi-document analysis + models: + - custom/vllm-llama ``` Reference: https://github.com/katanemo/archgw @@ -526,67 +546,100 @@ Reference: https://github.com/katanemo/archgw ## Write Task-Specific Routing Preference Descriptions -Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It routes the request to the first provider whose preferences match. Description quality directly determines routing accuracy. +Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It returns an ordered `models` list for the matched route; the client uses `models[0]` as primary and falls back to `models[1]`, `models[2]`... on `429`/`5xx` errors. Description quality directly determines routing accuracy. + +Starting in `v0.4.0`, `routing_preferences` lives at the **top level** of the config and each entry carries its own `models: [...]` candidate pool. Listing multiple models under a single route gives you automatic provider fallback without extra client logic. Configs still using the legacy v0.3.0 inline shape (under each `model_provider`) are auto-migrated with a deprecation warning — prefer the top-level form below. **Incorrect (vague, overlapping descriptions):** ```yaml +version: v0.4.0 + model_providers: - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY default: true - routing_preferences: - - name: simple - description: easy tasks # Too vague — what is "easy"? - model: openai/gpt-4o access_key: $OPENAI_API_KEY - routing_preferences: - - name: hard - description: hard tasks # Too vague — overlaps with "easy" + +routing_preferences: + - name: simple + description: easy tasks # Too vague — what is "easy"? + models: + - openai/gpt-4o-mini + - name: hard + description: hard tasks # Too vague — overlaps with "easy" + models: + - openai/gpt-4o ``` -**Correct (specific, distinct task descriptions):** +**Correct (specific, distinct task descriptions, multi-model fallbacks):** ```yaml +version: v0.4.0 + model_providers: - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY default: true - routing_preferences: - - name: summarization - description: > - Summarizing documents, articles, emails, or meeting transcripts. - Extracting key points, generating TL;DR sections, condensing long text. - - name: classification - description: > - Categorizing inputs, sentiment analysis, spam detection, - intent classification, labeling structured data fields. - - name: translation - description: > - Translating text between languages, localization tasks. - model: openai/gpt-4o access_key: $OPENAI_API_KEY - routing_preferences: - - name: code_generation - description: > - Writing new functions, classes, or modules from scratch. - Implementing algorithms, boilerplate generation, API integrations. - - name: code_review - description: > - Reviewing code for bugs, security vulnerabilities, performance issues. - Suggesting refactors, explaining complex code, debugging errors. - - name: complex_reasoning - description: > - Multi-step math problems, logical deduction, strategic planning, - research synthesis requiring chain-of-thought reasoning. + + - model: anthropic/claude-sonnet-4-5 + access_key: $ANTHROPIC_API_KEY + +routing_preferences: + - name: summarization + description: > + Summarizing documents, articles, emails, or meeting transcripts. + Extracting key points, generating TL;DR sections, condensing long text. + models: + - openai/gpt-4o-mini + - openai/gpt-4o + - name: classification + description: > + Categorizing inputs, sentiment analysis, spam detection, + intent classification, labeling structured data fields. + models: + - openai/gpt-4o-mini + - name: translation + description: > + Translating text between languages, localization tasks. + models: + - openai/gpt-4o-mini + - anthropic/claude-sonnet-4-5 + - name: code_generation + description: > + Writing new functions, classes, or modules from scratch. + Implementing algorithms, boilerplate generation, API integrations. + models: + - openai/gpt-4o + - anthropic/claude-sonnet-4-5 + - name: code_review + description: > + Reviewing code for bugs, security vulnerabilities, performance issues. + Suggesting refactors, explaining complex code, debugging errors. + models: + - anthropic/claude-sonnet-4-5 + - openai/gpt-4o + - name: complex_reasoning + description: > + Multi-step math problems, logical deduction, strategic planning, + research synthesis requiring chain-of-thought reasoning. + models: + - openai/gpt-4o + - anthropic/claude-sonnet-4-5 ``` **Key principles for good preference descriptions:** - Use concrete action verbs: "writing", "reviewing", "translating", "summarizing" - List 3–5 specific sub-tasks or synonyms for each preference -- Ensure preferences across providers are mutually exclusive in scope +- Ensure preferences across routes are mutually exclusive in scope +- Order `models` from most preferred to least — the client falls back in order on `429`/`5xx` +- List multiple models under one route for automatic provider fallback without extra client logic +- Every model listed in `models` must be declared in `model_providers` - Test with representative queries using `planoai trace` and `--where` filters to verify routing decisions Reference: https://github.com/katanemo/archgw @@ -1451,7 +1504,7 @@ planoai cli_agent claude --path /path/to/project **Recommended config for Claude Code routing:** ```yaml -version: v0.3.0 +version: v0.4.0 listeners: - type: model @@ -1462,19 +1515,25 @@ model_providers: - model: anthropic/claude-sonnet-4-20250514 access_key: $ANTHROPIC_API_KEY default: true - routing_preferences: - - name: general coding - description: > - Writing code, debugging, code review, explaining concepts, - answering programming questions, general development tasks. - model: anthropic/claude-opus-4-6 access_key: $ANTHROPIC_API_KEY - routing_preferences: - - name: complex architecture - description: > - System design, complex refactoring across many files, - architectural decisions, performance optimization, security audits. + +routing_preferences: + - name: general coding + description: > + Writing code, debugging, code review, explaining concepts, + answering programming questions, general development tasks. + models: + - anthropic/claude-sonnet-4-20250514 + - anthropic/claude-opus-4-6 + - name: complex architecture + description: > + System design, complex refactoring across many files, + architectural decisions, performance optimization, security audits. + models: + - anthropic/claude-opus-4-6 + - anthropic/claude-sonnet-4-20250514 model_aliases: claude.fast.v1: @@ -1861,28 +1920,36 @@ listeners: **Multi-listener architecture (serves all client types):** ```yaml -version: v0.3.0 +version: v0.4.0 # --- Shared model providers --- model_providers: - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY default: true - routing_preferences: - - name: quick tasks - description: Short answers, formatting, classification, simple generation - model: openai/gpt-4o access_key: $OPENAI_API_KEY - routing_preferences: - - name: complex reasoning - description: Multi-step analysis, code generation, research synthesis - model: anthropic/claude-sonnet-4-20250514 access_key: $ANTHROPIC_API_KEY - routing_preferences: - - name: long documents - description: Summarizing or analyzing very long documents, PDFs, transcripts + +# --- Shared routing_preferences (top-level, v0.4.0+) --- +routing_preferences: + - name: quick tasks + description: Short answers, formatting, classification, simple generation + models: + - openai/gpt-4o-mini + - name: complex reasoning + description: Multi-step analysis, code generation, research synthesis + models: + - openai/gpt-4o + - anthropic/claude-sonnet-4-20250514 + - name: long documents + description: Summarizing or analyzing very long documents, PDFs, transcripts + models: + - anthropic/claude-sonnet-4-20250514 + - openai/gpt-4o # --- Listener 1: OpenAI-compatible API gateway --- # For: SDK clients, Claude Code, LangChain, etc. diff --git a/skills/rules/routing-preferences.md b/skills/rules/routing-preferences.md index 571a3acd..51127c5e 100644 --- a/skills/rules/routing-preferences.md +++ b/skills/rules/routing-preferences.md @@ -7,67 +7,100 @@ tags: routing, model-selection, preferences, llm-routing ## Write Task-Specific Routing Preference Descriptions -Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It routes the request to the first provider whose preferences match. Description quality directly determines routing accuracy. +Plano's `plano_orchestrator_v1` router uses a 1.5B preference-aligned LLM to classify incoming requests against your `routing_preferences` descriptions. It returns an ordered `models` list for the matched route; the client uses `models[0]` as primary and falls back to `models[1]`, `models[2]`... on `429`/`5xx` errors. Description quality directly determines routing accuracy. + +Starting in `v0.4.0`, `routing_preferences` lives at the **top level** of the config and each entry carries its own `models: [...]` candidate pool. Configs still using the legacy v0.3.0 inline shape (under each `model_provider`) are auto-migrated with a deprecation warning — prefer the top-level form below. **Incorrect (vague, overlapping descriptions):** ```yaml +version: v0.4.0 + model_providers: - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY default: true - routing_preferences: - - name: simple - description: easy tasks # Too vague — what is "easy"? - model: openai/gpt-4o access_key: $OPENAI_API_KEY - routing_preferences: - - name: hard - description: hard tasks # Too vague — overlaps with "easy" + +routing_preferences: + - name: simple + description: easy tasks # Too vague — what is "easy"? + models: + - openai/gpt-4o-mini + - name: hard + description: hard tasks # Too vague — overlaps with "easy" + models: + - openai/gpt-4o ``` -**Correct (specific, distinct task descriptions):** +**Correct (specific, distinct task descriptions, multi-model fallbacks):** ```yaml +version: v0.4.0 + model_providers: - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY default: true - routing_preferences: - - name: summarization - description: > - Summarizing documents, articles, emails, or meeting transcripts. - Extracting key points, generating TL;DR sections, condensing long text. - - name: classification - description: > - Categorizing inputs, sentiment analysis, spam detection, - intent classification, labeling structured data fields. - - name: translation - description: > - Translating text between languages, localization tasks. - model: openai/gpt-4o access_key: $OPENAI_API_KEY - routing_preferences: - - name: code_generation - description: > - Writing new functions, classes, or modules from scratch. - Implementing algorithms, boilerplate generation, API integrations. - - name: code_review - description: > - Reviewing code for bugs, security vulnerabilities, performance issues. - Suggesting refactors, explaining complex code, debugging errors. - - name: complex_reasoning - description: > - Multi-step math problems, logical deduction, strategic planning, - research synthesis requiring chain-of-thought reasoning. + + - model: anthropic/claude-sonnet-4-5 + access_key: $ANTHROPIC_API_KEY + +routing_preferences: + - name: summarization + description: > + Summarizing documents, articles, emails, or meeting transcripts. + Extracting key points, generating TL;DR sections, condensing long text. + models: + - openai/gpt-4o-mini + - openai/gpt-4o + - name: classification + description: > + Categorizing inputs, sentiment analysis, spam detection, + intent classification, labeling structured data fields. + models: + - openai/gpt-4o-mini + - name: translation + description: > + Translating text between languages, localization tasks. + models: + - openai/gpt-4o-mini + - anthropic/claude-sonnet-4-5 + - name: code_generation + description: > + Writing new functions, classes, or modules from scratch. + Implementing algorithms, boilerplate generation, API integrations. + models: + - openai/gpt-4o + - anthropic/claude-sonnet-4-5 + - name: code_review + description: > + Reviewing code for bugs, security vulnerabilities, performance issues. + Suggesting refactors, explaining complex code, debugging errors. + models: + - anthropic/claude-sonnet-4-5 + - openai/gpt-4o + - name: complex_reasoning + description: > + Multi-step math problems, logical deduction, strategic planning, + research synthesis requiring chain-of-thought reasoning. + models: + - openai/gpt-4o + - anthropic/claude-sonnet-4-5 ``` **Key principles for good preference descriptions:** - Use concrete action verbs: "writing", "reviewing", "translating", "summarizing" - List 3–5 specific sub-tasks or synonyms for each preference -- Ensure preferences across providers are mutually exclusive in scope +- Ensure preferences across routes are mutually exclusive in scope +- Order `models` from most preferred to least — the client will fall back in order on `429`/`5xx` +- List multiple models under one route to get automatic provider fallback without additional client logic +- Every model listed in `models` must be declared in `model_providers` - Test with representative queries using `planoai trace` and `--where` filters to verify routing decisions -Reference: https://github.com/katanemo/archgw +Reference: [Routing API](../../docs/routing-api.md) · https://github.com/katanemo/archgw