model routing: cost/latency ranking with ranked fallback list (#849)

This commit is contained in:
Adil Hafeez 2026-03-30 13:46:52 -07:00 committed by GitHub
parent 3a531ce22a
commit e5751d6b13
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 1524 additions and 317 deletions

View file

@ -9,6 +9,7 @@ properties:
- 0.1-beta
- 0.2.0
- v0.3.0
- v0.4.0
agents:
type: array
@ -470,6 +471,106 @@ properties:
additionalProperties: false
required:
- jailbreak
routing_preferences:
type: array
items:
type: object
properties:
name:
type: string
description:
type: string
models:
type: array
items:
type: string
minItems: 1
selection_policy:
type: object
properties:
prefer:
type: string
enum:
- cheapest
- fastest
- none
additionalProperties: false
required:
- prefer
additionalProperties: false
required:
- name
- description
- models
- selection_policy
model_metrics_sources:
type: array
items:
oneOf:
- type: object
properties:
type:
type: string
const: cost_metrics
url:
type: string
refresh_interval:
type: integer
minimum: 1
auth:
type: object
properties:
type:
type: string
enum:
- bearer
token:
type: string
required:
- type
- token
additionalProperties: false
required:
- type
- url
additionalProperties: false
- type: object
properties:
type:
type: string
const: prometheus_metrics
url:
type: string
query:
type: string
refresh_interval:
type: integer
minimum: 1
description: "Refresh interval in seconds"
required:
- type
- url
- query
additionalProperties: false
- type: object
properties:
type:
type: string
const: digitalocean_pricing
refresh_interval:
type: integer
minimum: 1
description: "Refresh interval in seconds"
model_aliases:
type: object
description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
additionalProperties:
type: string
required:
- type
additionalProperties: false
additionalProperties: false
required:
- version