diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml index 4d1805ab..a1bd15ab 100644 --- a/config/plano_config_schema.yaml +++ b/config/plano_config_schema.yaml @@ -563,6 +563,11 @@ properties: type: integer minimum: 1 description: "Refresh interval in seconds" + model_aliases: + type: object + description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'" + additionalProperties: + type: string required: - type additionalProperties: false diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index e0970c42..19ef9efb 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -193,9 +193,7 @@ async fn init_app_state( let provider_model_names: std::collections::HashSet<&str> = config .model_providers .iter() - .flat_map(|p| { - std::iter::once(p.name.as_str()).chain(p.model.as_deref()) - }) + .flat_map(|p| std::iter::once(p.name.as_str()).chain(p.model.as_deref())) .collect(); for pref in route_prefs { for model in &pref.models { diff --git a/crates/brightstaff/src/router/model_metrics.rs b/crates/brightstaff/src/router/model_metrics.rs index 604ed2a4..75b10b7c 100644 --- a/crates/brightstaff/src/router/model_metrics.rs +++ b/crates/brightstaff/src/router/model_metrics.rs @@ -72,8 +72,12 @@ impl ModelMetricsService { }); } } - MetricsSource::DigitalOceanPricing { refresh_interval } => { - let data = fetch_do_pricing(&client).await; + MetricsSource::DigitalOceanPricing { + refresh_interval, + model_aliases, + } => { + let aliases = model_aliases.clone().unwrap_or_default(); + let data = fetch_do_pricing(&client, &aliases).await; info!(models = data.len(), "fetched digitalocean pricing"); *cost_data.write().await = data; @@ -84,7 +88,7 @@ impl ModelMetricsService { tokio::spawn(async move { loop { tokio::time::sleep(interval).await; - let data = fetch_do_pricing(&client_clone).await; + let data = fetch_do_pricing(&client_clone, &aliases).await; info!(models = data.len(), "refreshed digitalocean pricing"); *cost_clone.write().await = data; } @@ -106,10 +110,20 @@ impl ModelMetricsService { match policy.prefer { SelectionPreference::Cheapest => { let data = self.cost.read().await; + for m in models { + if !data.contains_key(m.as_str()) { + warn!(model = %m, "no cost data for model — ranking last (prefer: cheapest)"); + } + } rank_by_ascending_metric(models, &data) } SelectionPreference::Fastest => { let data = self.latency.read().await; + for m in models { + if !data.contains_key(m.as_str()) { + warn!(model = %m, "no latency data for model — ranking last (prefer: fastest)"); + } + } rank_by_ascending_metric(models, &data) } SelectionPreference::Random => shuffle(models), @@ -210,27 +224,31 @@ struct DoModelList { #[derive(serde::Deserialize)] struct DoModel { model_id: String, - creator: String, - pricing: DoPricing, + pricing: Option, } #[derive(serde::Deserialize)] struct DoPricing { - input_price_per_million: f64, - output_price_per_million: f64, + input_price_per_million: Option, + output_price_per_million: Option, } -async fn fetch_do_pricing(client: &reqwest::Client) -> HashMap { +async fn fetch_do_pricing( + client: &reqwest::Client, + aliases: &HashMap, +) -> HashMap { match client.get(DO_PRICING_URL).send().await { Ok(resp) => match resp.json::().await { Ok(list) => list .data .into_iter() - .map(|m| { - let key = format!("{}/{}", m.creator.to_lowercase(), m.model_id); - let cost = - m.pricing.input_price_per_million + m.pricing.output_price_per_million; - (key, cost) + .filter_map(|m| { + let pricing = m.pricing?; + let raw_key = m.model_id.clone(); + let key = aliases.get(&raw_key).cloned().unwrap_or(raw_key); + let cost = pricing.input_price_per_million.unwrap_or(0.0) + + pricing.output_price_per_million.unwrap_or(0.0); + Some((key, cost)) }) .collect(), Err(err) => { diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index befbc9f2..b3a42d64 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -150,6 +150,9 @@ pub enum MetricsSource { #[serde(rename = "digitalocean_pricing")] DigitalOceanPricing { refresh_interval: Option, + /// Map DO catalog keys (`lowercase(creator)/model_id`) to Plano model names. + /// Example: `openai/openai-gpt-oss-120b: openai/gpt-4o` + model_aliases: Option>, }, } diff --git a/demos/llm_routing/model_routing_service/config.yaml b/demos/llm_routing/model_routing_service/config.yaml index 543f3902..f79a0e30 100644 --- a/demos/llm_routing/model_routing_service/config.yaml +++ b/demos/llm_routing/model_routing_service/config.yaml @@ -34,9 +34,18 @@ routing_preferences: prefer: fastest model_metrics_sources: - - type: cost_metrics - url: http://localhost:8080/costs - refresh_interval: 300 + - type: digitalocean_pricing + refresh_interval: 3600 + model_aliases: + openai-gpt-4o: openai/gpt-4o + openai-gpt-4o-mini: openai/gpt-4o-mini + anthropic-claude-sonnet-4: anthropic/claude-sonnet-4-20250514 + + # Use cost_metrics instead of digitalocean_pricing to supply your own pricing data. + # The demo metrics_server.py exposes /costs with OpenAI and Anthropic pricing. + # - type: cost_metrics + # url: http://localhost:8080/costs + # refresh_interval: 300 - type: prometheus_metrics url: http://localhost:9090 diff --git a/docs/routing-api.md b/docs/routing-api.md index f4d75803..4ca91f23 100644 --- a/docs/routing-api.md +++ b/docs/routing-api.md @@ -201,9 +201,15 @@ Fetches public model pricing from the DigitalOcean Gen-AI catalog. No authentica model_metrics_sources: - type: digitalocean_pricing refresh_interval: 3600 # re-fetch every hour; omit to fetch once on startup + model_aliases: + openai-gpt-4o: openai/gpt-4o + openai-gpt-4o-mini: openai/gpt-4o-mini + anthropic-claude-sonnet-4: anthropic/claude-sonnet-4-20250514 ``` -Model IDs are normalized as `lowercase(creator)/model_id` — for example, `creator: "OpenAI"`, `model_id: "openai-gpt-4o"` → `"openai/openai-gpt-4o"`. The cost scalar is `input_price_per_million + output_price_per_million`. +DO catalog entries are stored by their `model_id` field (e.g. `openai-gpt-4o`). The cost scalar is `input_price_per_million + output_price_per_million`. + +**`model_aliases`** — optional. Maps DO `model_id` values to the model names used in `routing_preferences`. Without aliases, cost data is stored under the DO model_id (e.g. `openai-gpt-4o`), which won't match models configured as `openai/gpt-4o`. Aliases let you bridge the naming gap without changing your routing config. **Constraints:** - `cost_metrics` and `digitalocean_pricing` cannot both be configured — use one or the other.