add model_aliases to digitalocean_pricing, use model_id as key, warn on missing data at request time

2026-07-17 16:31:04 +02:00 · 2026-03-27 17:32:15 -07:00 · 2026-03-27 17:32:15 -07:00 · a7903d9271
commit a7903d9271
parent bd335cd8bd
6 changed files with 59 additions and 20 deletions
--- a/config/plano_config_schema.yaml
+++ b/config/plano_config_schema.yaml
@ -563,6 +563,11 @@ properties:
              type: integer
              minimum: 1
              description: "Refresh interval in seconds"
            model_aliases:
              type: object
              description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
              additionalProperties:
                type: string
          required:
            - type
          additionalProperties: false
--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@ -193,9 +193,7 @@ async fn init_app_state(
        let provider_model_names: std::collections::HashSet<&str> = config
            .model_providers
            .iter()
-            .flat_map(|p| {
+            .flat_map(|p| std::iter::once(p.name.as_str()).chain(p.model.as_deref()))
                std::iter::once(p.name.as_str()).chain(p.model.as_deref())
            })
            .collect();
        for pref in route_prefs {
            for model in &pref.models {
--- a/crates/brightstaff/src/router/model_metrics.rs
+++ b/crates/brightstaff/src/router/model_metrics.rs
@ -72,8 +72,12 @@ impl ModelMetricsService {
                        });
                    }
                }
-                MetricsSource::DigitalOceanPricing { refresh_interval } => {
+                MetricsSource::DigitalOceanPricing {
-                    let data = fetch_do_pricing(&client).await;
+                    refresh_interval,
                    model_aliases,
                } => {
                    let aliases = model_aliases.clone().unwrap_or_default();
                    let data = fetch_do_pricing(&client, &aliases).await;
                    info!(models = data.len(), "fetched digitalocean pricing");
                    *cost_data.write().await = data;
@ -84,7 +88,7 @@ impl ModelMetricsService {
                        tokio::spawn(async move {
                            loop {
                                tokio::time::sleep(interval).await;
-                                let data = fetch_do_pricing(&client_clone).await;
+                                let data = fetch_do_pricing(&client_clone, &aliases).await;
                                info!(models = data.len(), "refreshed digitalocean pricing");
                                *cost_clone.write().await = data;
                            }
@ -106,10 +110,20 @@ impl ModelMetricsService {
        match policy.prefer {
            SelectionPreference::Cheapest => {
                let data = self.cost.read().await;
                for m in models {
                    if !data.contains_key(m.as_str()) {
                        warn!(model = %m, "no cost data for model — ranking last (prefer: cheapest)");
                    }
                }
                rank_by_ascending_metric(models, &data)
            }
            SelectionPreference::Fastest => {
                let data = self.latency.read().await;
                for m in models {
                    if !data.contains_key(m.as_str()) {
                        warn!(model = %m, "no latency data for model — ranking last (prefer: fastest)");
                    }
                }
                rank_by_ascending_metric(models, &data)
            }
            SelectionPreference::Random => shuffle(models),
@ -210,27 +224,31 @@ struct DoModelList {
 #[derive(serde::Deserialize)]
 struct DoModel {
    model_id: String,
-    creator: String,
+    pricing: Option<DoPricing>,
    pricing: DoPricing,
 }
 #[derive(serde::Deserialize)]
 struct DoPricing {
-    input_price_per_million: f64,
+    input_price_per_million: Option<f64>,
-    output_price_per_million: f64,
+    output_price_per_million: Option<f64>,
 }
-async fn fetch_do_pricing(client: &reqwest::Client) -> HashMap<String, f64> {
+async fn fetch_do_pricing(
    client: &reqwest::Client,
    aliases: &HashMap<String, String>,
 ) -> HashMap<String, f64> {
    match client.get(DO_PRICING_URL).send().await {
        Ok(resp) => match resp.json::<DoModelList>().await {
            Ok(list) => list
                .data
                .into_iter()
-                .map(|m| {
+                .filter_map(|m| {
-                    let key = format!("{}/{}", m.creator.to_lowercase(), m.model_id);
+                    let pricing = m.pricing?;
-                    let cost =
+                    let raw_key = m.model_id.clone();
-                        m.pricing.input_price_per_million + m.pricing.output_price_per_million;
+                    let key = aliases.get(&raw_key).cloned().unwrap_or(raw_key);
-                    (key, cost)
+                    let cost = pricing.input_price_per_million.unwrap_or(0.0)
                        + pricing.output_price_per_million.unwrap_or(0.0);
                    Some((key, cost))
                })
                .collect(),
            Err(err) => {
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@ -150,6 +150,9 @@ pub enum MetricsSource {
    #[serde(rename = "digitalocean_pricing")]
    DigitalOceanPricing {
        refresh_interval: Option<u64>,
        /// Map DO catalog keys (`lowercase(creator)/model_id`) to Plano model names.
        /// Example: `openai/openai-gpt-oss-120b: openai/gpt-4o`
        model_aliases: Option<HashMap<String, String>>,
    },
 }
--- a/demos/llm_routing/model_routing_service/config.yaml
+++ b/demos/llm_routing/model_routing_service/config.yaml
@ -34,9 +34,18 @@ routing_preferences:
      prefer: fastest
 model_metrics_sources:
-  - type: cost_metrics
+  - type: digitalocean_pricing
-    url: http://localhost:8080/costs
+    refresh_interval: 3600
-    refresh_interval: 300
+    model_aliases:
      openai-gpt-4o: openai/gpt-4o
      openai-gpt-4o-mini: openai/gpt-4o-mini
      anthropic-claude-sonnet-4: anthropic/claude-sonnet-4-20250514
  # Use cost_metrics instead of digitalocean_pricing to supply your own pricing data.
  # The demo metrics_server.py exposes /costs with OpenAI and Anthropic pricing.
  # - type: cost_metrics
  #   url: http://localhost:8080/costs
  #   refresh_interval: 300
  - type: prometheus_metrics
    url: http://localhost:9090
--- a/docs/routing-api.md
+++ b/docs/routing-api.md
@ -201,9 +201,15 @@ Fetches public model pricing from the DigitalOcean Gen-AI catalog. No authentica
 model_metrics_sources:
  - type: digitalocean_pricing
    refresh_interval: 3600   # re-fetch every hour; omit to fetch once on startup
    model_aliases:
      openai-gpt-4o: openai/gpt-4o
      openai-gpt-4o-mini: openai/gpt-4o-mini
      anthropic-claude-sonnet-4: anthropic/claude-sonnet-4-20250514
 ```
-Model IDs are normalized as `lowercase(creator)/model_id` — for example, `creator: "OpenAI"`, `model_id: "openai-gpt-4o"` → `"openai/openai-gpt-4o"`. The cost scalar is `input_price_per_million + output_price_per_million`.
+DO catalog entries are stored by their `model_id` field (e.g. `openai-gpt-4o`). The cost scalar is `input_price_per_million + output_price_per_million`.
 **`model_aliases`** — optional. Maps DO `model_id` values to the model names used in `routing_preferences`. Without aliases, cost data is stored under the DO model_id (e.g. `openai-gpt-4o`), which won't match models configured as `openai/gpt-4o`. Aliases let you bridge the naming gap without changing your routing config.
 **Constraints:**
 - `cost_metrics` and `digitalocean_pricing` cannot both be configured — use one or the other.