mirror of
https://github.com/katanemo/plano.git
synced 2026-05-24 14:05:14 +02:00
add model_aliases to digitalocean_pricing, use model_id as key, warn on missing data at request time
This commit is contained in:
parent
bd335cd8bd
commit
a7903d9271
6 changed files with 59 additions and 20 deletions
|
|
@ -563,6 +563,11 @@ properties:
|
||||||
type: integer
|
type: integer
|
||||||
minimum: 1
|
minimum: 1
|
||||||
description: "Refresh interval in seconds"
|
description: "Refresh interval in seconds"
|
||||||
|
model_aliases:
|
||||||
|
type: object
|
||||||
|
description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
|
|
|
||||||
|
|
@ -193,9 +193,7 @@ async fn init_app_state(
|
||||||
let provider_model_names: std::collections::HashSet<&str> = config
|
let provider_model_names: std::collections::HashSet<&str> = config
|
||||||
.model_providers
|
.model_providers
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|p| {
|
.flat_map(|p| std::iter::once(p.name.as_str()).chain(p.model.as_deref()))
|
||||||
std::iter::once(p.name.as_str()).chain(p.model.as_deref())
|
|
||||||
})
|
|
||||||
.collect();
|
.collect();
|
||||||
for pref in route_prefs {
|
for pref in route_prefs {
|
||||||
for model in &pref.models {
|
for model in &pref.models {
|
||||||
|
|
|
||||||
|
|
@ -72,8 +72,12 @@ impl ModelMetricsService {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
MetricsSource::DigitalOceanPricing { refresh_interval } => {
|
MetricsSource::DigitalOceanPricing {
|
||||||
let data = fetch_do_pricing(&client).await;
|
refresh_interval,
|
||||||
|
model_aliases,
|
||||||
|
} => {
|
||||||
|
let aliases = model_aliases.clone().unwrap_or_default();
|
||||||
|
let data = fetch_do_pricing(&client, &aliases).await;
|
||||||
info!(models = data.len(), "fetched digitalocean pricing");
|
info!(models = data.len(), "fetched digitalocean pricing");
|
||||||
*cost_data.write().await = data;
|
*cost_data.write().await = data;
|
||||||
|
|
||||||
|
|
@ -84,7 +88,7 @@ impl ModelMetricsService {
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
loop {
|
loop {
|
||||||
tokio::time::sleep(interval).await;
|
tokio::time::sleep(interval).await;
|
||||||
let data = fetch_do_pricing(&client_clone).await;
|
let data = fetch_do_pricing(&client_clone, &aliases).await;
|
||||||
info!(models = data.len(), "refreshed digitalocean pricing");
|
info!(models = data.len(), "refreshed digitalocean pricing");
|
||||||
*cost_clone.write().await = data;
|
*cost_clone.write().await = data;
|
||||||
}
|
}
|
||||||
|
|
@ -106,10 +110,20 @@ impl ModelMetricsService {
|
||||||
match policy.prefer {
|
match policy.prefer {
|
||||||
SelectionPreference::Cheapest => {
|
SelectionPreference::Cheapest => {
|
||||||
let data = self.cost.read().await;
|
let data = self.cost.read().await;
|
||||||
|
for m in models {
|
||||||
|
if !data.contains_key(m.as_str()) {
|
||||||
|
warn!(model = %m, "no cost data for model — ranking last (prefer: cheapest)");
|
||||||
|
}
|
||||||
|
}
|
||||||
rank_by_ascending_metric(models, &data)
|
rank_by_ascending_metric(models, &data)
|
||||||
}
|
}
|
||||||
SelectionPreference::Fastest => {
|
SelectionPreference::Fastest => {
|
||||||
let data = self.latency.read().await;
|
let data = self.latency.read().await;
|
||||||
|
for m in models {
|
||||||
|
if !data.contains_key(m.as_str()) {
|
||||||
|
warn!(model = %m, "no latency data for model — ranking last (prefer: fastest)");
|
||||||
|
}
|
||||||
|
}
|
||||||
rank_by_ascending_metric(models, &data)
|
rank_by_ascending_metric(models, &data)
|
||||||
}
|
}
|
||||||
SelectionPreference::Random => shuffle(models),
|
SelectionPreference::Random => shuffle(models),
|
||||||
|
|
@ -210,27 +224,31 @@ struct DoModelList {
|
||||||
#[derive(serde::Deserialize)]
|
#[derive(serde::Deserialize)]
|
||||||
struct DoModel {
|
struct DoModel {
|
||||||
model_id: String,
|
model_id: String,
|
||||||
creator: String,
|
pricing: Option<DoPricing>,
|
||||||
pricing: DoPricing,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(serde::Deserialize)]
|
#[derive(serde::Deserialize)]
|
||||||
struct DoPricing {
|
struct DoPricing {
|
||||||
input_price_per_million: f64,
|
input_price_per_million: Option<f64>,
|
||||||
output_price_per_million: f64,
|
output_price_per_million: Option<f64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fetch_do_pricing(client: &reqwest::Client) -> HashMap<String, f64> {
|
async fn fetch_do_pricing(
|
||||||
|
client: &reqwest::Client,
|
||||||
|
aliases: &HashMap<String, String>,
|
||||||
|
) -> HashMap<String, f64> {
|
||||||
match client.get(DO_PRICING_URL).send().await {
|
match client.get(DO_PRICING_URL).send().await {
|
||||||
Ok(resp) => match resp.json::<DoModelList>().await {
|
Ok(resp) => match resp.json::<DoModelList>().await {
|
||||||
Ok(list) => list
|
Ok(list) => list
|
||||||
.data
|
.data
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|m| {
|
.filter_map(|m| {
|
||||||
let key = format!("{}/{}", m.creator.to_lowercase(), m.model_id);
|
let pricing = m.pricing?;
|
||||||
let cost =
|
let raw_key = m.model_id.clone();
|
||||||
m.pricing.input_price_per_million + m.pricing.output_price_per_million;
|
let key = aliases.get(&raw_key).cloned().unwrap_or(raw_key);
|
||||||
(key, cost)
|
let cost = pricing.input_price_per_million.unwrap_or(0.0)
|
||||||
|
+ pricing.output_price_per_million.unwrap_or(0.0);
|
||||||
|
Some((key, cost))
|
||||||
})
|
})
|
||||||
.collect(),
|
.collect(),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
|
|
|
||||||
|
|
@ -150,6 +150,9 @@ pub enum MetricsSource {
|
||||||
#[serde(rename = "digitalocean_pricing")]
|
#[serde(rename = "digitalocean_pricing")]
|
||||||
DigitalOceanPricing {
|
DigitalOceanPricing {
|
||||||
refresh_interval: Option<u64>,
|
refresh_interval: Option<u64>,
|
||||||
|
/// Map DO catalog keys (`lowercase(creator)/model_id`) to Plano model names.
|
||||||
|
/// Example: `openai/openai-gpt-oss-120b: openai/gpt-4o`
|
||||||
|
model_aliases: Option<HashMap<String, String>>,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,9 +34,18 @@ routing_preferences:
|
||||||
prefer: fastest
|
prefer: fastest
|
||||||
|
|
||||||
model_metrics_sources:
|
model_metrics_sources:
|
||||||
- type: cost_metrics
|
- type: digitalocean_pricing
|
||||||
url: http://localhost:8080/costs
|
refresh_interval: 3600
|
||||||
refresh_interval: 300
|
model_aliases:
|
||||||
|
openai-gpt-4o: openai/gpt-4o
|
||||||
|
openai-gpt-4o-mini: openai/gpt-4o-mini
|
||||||
|
anthropic-claude-sonnet-4: anthropic/claude-sonnet-4-20250514
|
||||||
|
|
||||||
|
# Use cost_metrics instead of digitalocean_pricing to supply your own pricing data.
|
||||||
|
# The demo metrics_server.py exposes /costs with OpenAI and Anthropic pricing.
|
||||||
|
# - type: cost_metrics
|
||||||
|
# url: http://localhost:8080/costs
|
||||||
|
# refresh_interval: 300
|
||||||
|
|
||||||
- type: prometheus_metrics
|
- type: prometheus_metrics
|
||||||
url: http://localhost:9090
|
url: http://localhost:9090
|
||||||
|
|
|
||||||
|
|
@ -201,9 +201,15 @@ Fetches public model pricing from the DigitalOcean Gen-AI catalog. No authentica
|
||||||
model_metrics_sources:
|
model_metrics_sources:
|
||||||
- type: digitalocean_pricing
|
- type: digitalocean_pricing
|
||||||
refresh_interval: 3600 # re-fetch every hour; omit to fetch once on startup
|
refresh_interval: 3600 # re-fetch every hour; omit to fetch once on startup
|
||||||
|
model_aliases:
|
||||||
|
openai-gpt-4o: openai/gpt-4o
|
||||||
|
openai-gpt-4o-mini: openai/gpt-4o-mini
|
||||||
|
anthropic-claude-sonnet-4: anthropic/claude-sonnet-4-20250514
|
||||||
```
|
```
|
||||||
|
|
||||||
Model IDs are normalized as `lowercase(creator)/model_id` — for example, `creator: "OpenAI"`, `model_id: "openai-gpt-4o"` → `"openai/openai-gpt-4o"`. The cost scalar is `input_price_per_million + output_price_per_million`.
|
DO catalog entries are stored by their `model_id` field (e.g. `openai-gpt-4o`). The cost scalar is `input_price_per_million + output_price_per_million`.
|
||||||
|
|
||||||
|
**`model_aliases`** — optional. Maps DO `model_id` values to the model names used in `routing_preferences`. Without aliases, cost data is stored under the DO model_id (e.g. `openai-gpt-4o`), which won't match models configured as `openai/gpt-4o`. Aliases let you bridge the naming gap without changing your routing config.
|
||||||
|
|
||||||
**Constraints:**
|
**Constraints:**
|
||||||
- `cost_metrics` and `digitalocean_pricing` cannot both be configured — use one or the other.
|
- `cost_metrics` and `digitalocean_pricing` cannot both be configured — use one or the other.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue