mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
model routing: cost/latency ranking with ranked fallback list (#849)
This commit is contained in:
parent
3a531ce22a
commit
e5751d6b13
23 changed files with 1524 additions and 317 deletions
|
|
@ -9,6 +9,7 @@ properties:
|
|||
- 0.1-beta
|
||||
- 0.2.0
|
||||
- v0.3.0
|
||||
- v0.4.0
|
||||
|
||||
agents:
|
||||
type: array
|
||||
|
|
@ -470,6 +471,106 @@ properties:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- jailbreak
|
||||
routing_preferences:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
models:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
minItems: 1
|
||||
selection_policy:
|
||||
type: object
|
||||
properties:
|
||||
prefer:
|
||||
type: string
|
||||
enum:
|
||||
- cheapest
|
||||
- fastest
|
||||
- none
|
||||
additionalProperties: false
|
||||
required:
|
||||
- prefer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
- description
|
||||
- models
|
||||
- selection_policy
|
||||
|
||||
model_metrics_sources:
|
||||
type: array
|
||||
items:
|
||||
oneOf:
|
||||
- type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: cost_metrics
|
||||
url:
|
||||
type: string
|
||||
refresh_interval:
|
||||
type: integer
|
||||
minimum: 1
|
||||
auth:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
enum:
|
||||
- bearer
|
||||
token:
|
||||
type: string
|
||||
required:
|
||||
- type
|
||||
- token
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- url
|
||||
additionalProperties: false
|
||||
- type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: prometheus_metrics
|
||||
url:
|
||||
type: string
|
||||
query:
|
||||
type: string
|
||||
refresh_interval:
|
||||
type: integer
|
||||
minimum: 1
|
||||
description: "Refresh interval in seconds"
|
||||
required:
|
||||
- type
|
||||
- url
|
||||
- query
|
||||
additionalProperties: false
|
||||
- type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: digitalocean_pricing
|
||||
refresh_interval:
|
||||
type: integer
|
||||
minimum: 1
|
||||
description: "Refresh interval in seconds"
|
||||
model_aliases:
|
||||
type: object
|
||||
description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
|
||||
additionalProperties:
|
||||
type: string
|
||||
required:
|
||||
- type
|
||||
additionalProperties: false
|
||||
|
||||
additionalProperties: false
|
||||
required:
|
||||
- version
|
||||
|
|
|
|||
|
|
@ -1,6 +1,16 @@
|
|||
#!/bin/bash
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
CLI_DIR="$REPO_ROOT/cli"
|
||||
|
||||
# Use uv run if available and cli/ has a pyproject.toml, otherwise fall back to bare python
|
||||
if command -v uv &> /dev/null && [ -f "$CLI_DIR/pyproject.toml" ]; then
|
||||
PYTHON_CMD="uv run --directory $CLI_DIR python"
|
||||
else
|
||||
PYTHON_CMD="python"
|
||||
fi
|
||||
|
||||
failed_files=()
|
||||
|
||||
for file in $(find . -name config.yaml -o -name plano_config_full_reference.yaml); do
|
||||
|
|
@ -14,7 +24,7 @@ for file in $(find . -name config.yaml -o -name plano_config_full_reference.yaml
|
|||
ENVOY_CONFIG_TEMPLATE_FILE="envoy.template.yaml" \
|
||||
PLANO_CONFIG_FILE_RENDERED="$rendered_file" \
|
||||
ENVOY_CONFIG_FILE_RENDERED="/dev/null" \
|
||||
python -m planoai.config_generator 2>&1 > /dev/null
|
||||
$PYTHON_CMD -m planoai.config_generator 2>&1 > /dev/null
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Validation failed for $file"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue