model routing: cost/latency ranking with ranked fallback list (#849)

This commit is contained in:
Adil Hafeez 2026-03-30 13:46:52 -07:00 committed by GitHub
parent 3a531ce22a
commit e5751d6b13
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 1524 additions and 317 deletions

View file

@ -9,6 +9,7 @@ properties:
- 0.1-beta
- 0.2.0
- v0.3.0
- v0.4.0
agents:
type: array
@ -470,6 +471,106 @@ properties:
additionalProperties: false
required:
- jailbreak
routing_preferences:
type: array
items:
type: object
properties:
name:
type: string
description:
type: string
models:
type: array
items:
type: string
minItems: 1
selection_policy:
type: object
properties:
prefer:
type: string
enum:
- cheapest
- fastest
- none
additionalProperties: false
required:
- prefer
additionalProperties: false
required:
- name
- description
- models
- selection_policy
model_metrics_sources:
type: array
items:
oneOf:
- type: object
properties:
type:
type: string
const: cost_metrics
url:
type: string
refresh_interval:
type: integer
minimum: 1
auth:
type: object
properties:
type:
type: string
enum:
- bearer
token:
type: string
required:
- type
- token
additionalProperties: false
required:
- type
- url
additionalProperties: false
- type: object
properties:
type:
type: string
const: prometheus_metrics
url:
type: string
query:
type: string
refresh_interval:
type: integer
minimum: 1
description: "Refresh interval in seconds"
required:
- type
- url
- query
additionalProperties: false
- type: object
properties:
type:
type: string
const: digitalocean_pricing
refresh_interval:
type: integer
minimum: 1
description: "Refresh interval in seconds"
model_aliases:
type: object
description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
additionalProperties:
type: string
required:
- type
additionalProperties: false
additionalProperties: false
required:
- version

View file

@ -1,6 +1,16 @@
#!/bin/bash
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
CLI_DIR="$REPO_ROOT/cli"
# Use uv run if available and cli/ has a pyproject.toml, otherwise fall back to bare python
if command -v uv &> /dev/null && [ -f "$CLI_DIR/pyproject.toml" ]; then
PYTHON_CMD="uv run --directory $CLI_DIR python"
else
PYTHON_CMD="python"
fi
failed_files=()
for file in $(find . -name config.yaml -o -name plano_config_full_reference.yaml); do
@ -14,7 +24,7 @@ for file in $(find . -name config.yaml -o -name plano_config_full_reference.yaml
ENVOY_CONFIG_TEMPLATE_FILE="envoy.template.yaml" \
PLANO_CONFIG_FILE_RENDERED="$rendered_file" \
ENVOY_CONFIG_FILE_RENDERED="/dev/null" \
python -m planoai.config_generator 2>&1 > /dev/null
$PYTHON_CMD -m planoai.config_generator 2>&1 > /dev/null
if [ $? -ne 0 ]; then
echo "Validation failed for $file"