mirror of
https://github.com/katanemo/plano.git
synced 2026-04-26 01:06:25 +02:00
fix(routing): auto-migrate v0.3.0 inline routing_preferences to v0.4.0 top-level (#912)
* fix(routing): auto-migrate v0.3.0 inline routing_preferences to v0.4.0 top-level Lift inline routing_preferences under each model_provider into the top-level routing_preferences list with merged models[] and bump version to v0.4.0, with a deprecation warning. Existing v0.3.0 demo configs (Claude Code, Codex, preference_based_routing, etc.) keep working unchanged. Schema flags the inline shape as deprecated but still accepts it. Docs and skills updated to canonical top-level multi-model form. * test(common): bump reference config assertion to v0.4.0 The rendered reference config was bumped to v0.4.0 when its inline routing_preferences were lifted to the top level; align the configuration deserialization test with that change. * fix(config_generator): bump version to v0.4.0 up front in migration Move the v0.3.0 -> v0.4.0 version bump to the top of migrate_inline_routing_preferences so it runs unconditionally, including for configs that already declare top-level routing_preferences at v0.3.0. Previously the bump only fired when inline migration produced entries, leaving top-level v0.3.0 configs rejected by brightstaff's v0.4.0 gate. Tests updated to cover the new behavior and to confirm we never downgrade newer versions. * fix(config_generator): gate routing_preferences migration on version < v0.4.0 Short-circuit the migration when the config already declares v0.4.0 or newer. Anything at v0.4.0+ is assumed to be on the canonical top-level shape and is passed through untouched, including stray inline preferences (which are the author's bug to fix). Only v0.3.0 and older configs are rewritten and bumped.
This commit is contained in:
parent
5a652eb666
commit
897fda2deb
12 changed files with 748 additions and 225 deletions
|
|
@ -1,5 +1,5 @@
|
|||
# Plano Gateway configuration version
|
||||
version: v0.3.0
|
||||
version: v0.4.0
|
||||
|
||||
# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
|
||||
agents:
|
||||
|
|
@ -32,17 +32,8 @@ model_providers:
|
|||
- model: mistral/ministral-3b-latest
|
||||
access_key: $MISTRAL_API_KEY
|
||||
|
||||
# routing_preferences: tags a model with named capabilities so Plano's LLM router
|
||||
# can select the best model for each request based on intent. Requires the
|
||||
# Plano-Orchestrator model (or equivalent) to be configured in overrides.llm_routing_model.
|
||||
# Each preference has a name (short label) and a description (used for intent matching).
|
||||
- model: groq/llama-3.3-70b-versatile
|
||||
access_key: $GROQ_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
- name: code review
|
||||
description: reviewing, analyzing, and suggesting improvements to existing code
|
||||
|
||||
# passthrough_auth: forwards the client's Authorization header upstream instead of
|
||||
# using the configured access_key. Useful for LiteLLM or similar proxy setups.
|
||||
|
|
@ -64,6 +55,29 @@ model_aliases:
|
|||
smart-llm:
|
||||
target: gpt-4o
|
||||
|
||||
# routing_preferences: top-level list that tags named task categories with an
|
||||
# ordered pool of candidate models. Plano's LLM router matches incoming requests
|
||||
# against these descriptions and returns an ordered list of models; the client
|
||||
# uses models[0] as primary and retries with models[1], models[2]... on 429/5xx.
|
||||
# Requires overrides.llm_routing_model to point at Plano-Orchestrator (or equivalent).
|
||||
# Each model in `models` must be declared in model_providers above.
|
||||
# selection_policy is optional: {prefer: cheapest|fastest|none} lets the router
|
||||
# reorder candidates using live cost/latency data from model_metrics_sources.
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-0
|
||||
- openai/gpt-4o
|
||||
- groq/llama-3.3-70b-versatile
|
||||
- name: code review
|
||||
description: reviewing, analyzing, and suggesting improvements to existing code
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-0
|
||||
- groq/llama-3.3-70b-versatile
|
||||
selection_policy:
|
||||
prefer: cheapest
|
||||
|
||||
# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
|
||||
listeners:
|
||||
# Agent listener for routing requests to multiple agents
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue