mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
add model_aliases to digitalocean_pricing, use model_id as key, warn on missing data at request time
This commit is contained in:
parent
bd335cd8bd
commit
a7903d9271
6 changed files with 59 additions and 20 deletions
|
|
@ -563,6 +563,11 @@ properties:
|
|||
type: integer
|
||||
minimum: 1
|
||||
description: "Refresh interval in seconds"
|
||||
model_aliases:
|
||||
type: object
|
||||
description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
|
||||
additionalProperties:
|
||||
type: string
|
||||
required:
|
||||
- type
|
||||
additionalProperties: false
|
||||
|
|
|
|||
|
|
@ -193,9 +193,7 @@ async fn init_app_state(
|
|||
let provider_model_names: std::collections::HashSet<&str> = config
|
||||
.model_providers
|
||||
.iter()
|
||||
.flat_map(|p| {
|
||||
std::iter::once(p.name.as_str()).chain(p.model.as_deref())
|
||||
})
|
||||
.flat_map(|p| std::iter::once(p.name.as_str()).chain(p.model.as_deref()))
|
||||
.collect();
|
||||
for pref in route_prefs {
|
||||
for model in &pref.models {
|
||||
|
|
|
|||
|
|
@ -72,8 +72,12 @@ impl ModelMetricsService {
|
|||
});
|
||||
}
|
||||
}
|
||||
MetricsSource::DigitalOceanPricing { refresh_interval } => {
|
||||
let data = fetch_do_pricing(&client).await;
|
||||
MetricsSource::DigitalOceanPricing {
|
||||
refresh_interval,
|
||||
model_aliases,
|
||||
} => {
|
||||
let aliases = model_aliases.clone().unwrap_or_default();
|
||||
let data = fetch_do_pricing(&client, &aliases).await;
|
||||
info!(models = data.len(), "fetched digitalocean pricing");
|
||||
*cost_data.write().await = data;
|
||||
|
||||
|
|
@ -84,7 +88,7 @@ impl ModelMetricsService {
|
|||
tokio::spawn(async move {
|
||||
loop {
|
||||
tokio::time::sleep(interval).await;
|
||||
let data = fetch_do_pricing(&client_clone).await;
|
||||
let data = fetch_do_pricing(&client_clone, &aliases).await;
|
||||
info!(models = data.len(), "refreshed digitalocean pricing");
|
||||
*cost_clone.write().await = data;
|
||||
}
|
||||
|
|
@ -106,10 +110,20 @@ impl ModelMetricsService {
|
|||
match policy.prefer {
|
||||
SelectionPreference::Cheapest => {
|
||||
let data = self.cost.read().await;
|
||||
for m in models {
|
||||
if !data.contains_key(m.as_str()) {
|
||||
warn!(model = %m, "no cost data for model — ranking last (prefer: cheapest)");
|
||||
}
|
||||
}
|
||||
rank_by_ascending_metric(models, &data)
|
||||
}
|
||||
SelectionPreference::Fastest => {
|
||||
let data = self.latency.read().await;
|
||||
for m in models {
|
||||
if !data.contains_key(m.as_str()) {
|
||||
warn!(model = %m, "no latency data for model — ranking last (prefer: fastest)");
|
||||
}
|
||||
}
|
||||
rank_by_ascending_metric(models, &data)
|
||||
}
|
||||
SelectionPreference::Random => shuffle(models),
|
||||
|
|
@ -210,27 +224,31 @@ struct DoModelList {
|
|||
#[derive(serde::Deserialize)]
|
||||
struct DoModel {
|
||||
model_id: String,
|
||||
creator: String,
|
||||
pricing: DoPricing,
|
||||
pricing: Option<DoPricing>,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct DoPricing {
|
||||
input_price_per_million: f64,
|
||||
output_price_per_million: f64,
|
||||
input_price_per_million: Option<f64>,
|
||||
output_price_per_million: Option<f64>,
|
||||
}
|
||||
|
||||
async fn fetch_do_pricing(client: &reqwest::Client) -> HashMap<String, f64> {
|
||||
async fn fetch_do_pricing(
|
||||
client: &reqwest::Client,
|
||||
aliases: &HashMap<String, String>,
|
||||
) -> HashMap<String, f64> {
|
||||
match client.get(DO_PRICING_URL).send().await {
|
||||
Ok(resp) => match resp.json::<DoModelList>().await {
|
||||
Ok(list) => list
|
||||
.data
|
||||
.into_iter()
|
||||
.map(|m| {
|
||||
let key = format!("{}/{}", m.creator.to_lowercase(), m.model_id);
|
||||
let cost =
|
||||
m.pricing.input_price_per_million + m.pricing.output_price_per_million;
|
||||
(key, cost)
|
||||
.filter_map(|m| {
|
||||
let pricing = m.pricing?;
|
||||
let raw_key = m.model_id.clone();
|
||||
let key = aliases.get(&raw_key).cloned().unwrap_or(raw_key);
|
||||
let cost = pricing.input_price_per_million.unwrap_or(0.0)
|
||||
+ pricing.output_price_per_million.unwrap_or(0.0);
|
||||
Some((key, cost))
|
||||
})
|
||||
.collect(),
|
||||
Err(err) => {
|
||||
|
|
|
|||
|
|
@ -150,6 +150,9 @@ pub enum MetricsSource {
|
|||
#[serde(rename = "digitalocean_pricing")]
|
||||
DigitalOceanPricing {
|
||||
refresh_interval: Option<u64>,
|
||||
/// Map DO catalog keys (`lowercase(creator)/model_id`) to Plano model names.
|
||||
/// Example: `openai/openai-gpt-oss-120b: openai/gpt-4o`
|
||||
model_aliases: Option<HashMap<String, String>>,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -34,9 +34,18 @@ routing_preferences:
|
|||
prefer: fastest
|
||||
|
||||
model_metrics_sources:
|
||||
- type: cost_metrics
|
||||
url: http://localhost:8080/costs
|
||||
refresh_interval: 300
|
||||
- type: digitalocean_pricing
|
||||
refresh_interval: 3600
|
||||
model_aliases:
|
||||
openai-gpt-4o: openai/gpt-4o
|
||||
openai-gpt-4o-mini: openai/gpt-4o-mini
|
||||
anthropic-claude-sonnet-4: anthropic/claude-sonnet-4-20250514
|
||||
|
||||
# Use cost_metrics instead of digitalocean_pricing to supply your own pricing data.
|
||||
# The demo metrics_server.py exposes /costs with OpenAI and Anthropic pricing.
|
||||
# - type: cost_metrics
|
||||
# url: http://localhost:8080/costs
|
||||
# refresh_interval: 300
|
||||
|
||||
- type: prometheus_metrics
|
||||
url: http://localhost:9090
|
||||
|
|
|
|||
|
|
@ -201,9 +201,15 @@ Fetches public model pricing from the DigitalOcean Gen-AI catalog. No authentica
|
|||
model_metrics_sources:
|
||||
- type: digitalocean_pricing
|
||||
refresh_interval: 3600 # re-fetch every hour; omit to fetch once on startup
|
||||
model_aliases:
|
||||
openai-gpt-4o: openai/gpt-4o
|
||||
openai-gpt-4o-mini: openai/gpt-4o-mini
|
||||
anthropic-claude-sonnet-4: anthropic/claude-sonnet-4-20250514
|
||||
```
|
||||
|
||||
Model IDs are normalized as `lowercase(creator)/model_id` — for example, `creator: "OpenAI"`, `model_id: "openai-gpt-4o"` → `"openai/openai-gpt-4o"`. The cost scalar is `input_price_per_million + output_price_per_million`.
|
||||
DO catalog entries are stored by their `model_id` field (e.g. `openai-gpt-4o`). The cost scalar is `input_price_per_million + output_price_per_million`.
|
||||
|
||||
**`model_aliases`** — optional. Maps DO `model_id` values to the model names used in `routing_preferences`. Without aliases, cost data is stored under the DO model_id (e.g. `openai-gpt-4o`), which won't match models configured as `openai/gpt-4o`. Aliases let you bridge the naming gap without changing your routing config.
|
||||
|
||||
**Constraints:**
|
||||
- `cost_metrics` and `digitalocean_pricing` cannot both be configured — use one or the other.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue