mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
model routing: cost/latency ranking with ranked fallback list (#849)
This commit is contained in:
parent
3a531ce22a
commit
e5751d6b13
23 changed files with 1524 additions and 317 deletions
|
|
@ -9,6 +9,7 @@ properties:
|
|||
- 0.1-beta
|
||||
- 0.2.0
|
||||
- v0.3.0
|
||||
- v0.4.0
|
||||
|
||||
agents:
|
||||
type: array
|
||||
|
|
@ -470,6 +471,106 @@ properties:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- jailbreak
|
||||
routing_preferences:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
models:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
minItems: 1
|
||||
selection_policy:
|
||||
type: object
|
||||
properties:
|
||||
prefer:
|
||||
type: string
|
||||
enum:
|
||||
- cheapest
|
||||
- fastest
|
||||
- none
|
||||
additionalProperties: false
|
||||
required:
|
||||
- prefer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
- description
|
||||
- models
|
||||
- selection_policy
|
||||
|
||||
model_metrics_sources:
|
||||
type: array
|
||||
items:
|
||||
oneOf:
|
||||
- type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: cost_metrics
|
||||
url:
|
||||
type: string
|
||||
refresh_interval:
|
||||
type: integer
|
||||
minimum: 1
|
||||
auth:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
enum:
|
||||
- bearer
|
||||
token:
|
||||
type: string
|
||||
required:
|
||||
- type
|
||||
- token
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- url
|
||||
additionalProperties: false
|
||||
- type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: prometheus_metrics
|
||||
url:
|
||||
type: string
|
||||
query:
|
||||
type: string
|
||||
refresh_interval:
|
||||
type: integer
|
||||
minimum: 1
|
||||
description: "Refresh interval in seconds"
|
||||
required:
|
||||
- type
|
||||
- url
|
||||
- query
|
||||
additionalProperties: false
|
||||
- type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: digitalocean_pricing
|
||||
refresh_interval:
|
||||
type: integer
|
||||
minimum: 1
|
||||
description: "Refresh interval in seconds"
|
||||
model_aliases:
|
||||
type: object
|
||||
description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
|
||||
additionalProperties:
|
||||
type: string
|
||||
required:
|
||||
- type
|
||||
additionalProperties: false
|
||||
|
||||
additionalProperties: false
|
||||
required:
|
||||
- version
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue