diff --git a/crates/brightstaff/src/router/model_metrics.rs b/crates/brightstaff/src/router/model_metrics.rs index ceafe4b8..1adb408d 100644 --- a/crates/brightstaff/src/router/model_metrics.rs +++ b/crates/brightstaff/src/router/model_metrics.rs @@ -6,7 +6,7 @@ use common::configuration::{ CostProvider, LatencyProvider, MetricsSource, SelectionPolicy, SelectionPreference, }; use tokio::sync::RwLock; -use tracing::{info, warn}; +use tracing::{debug, info, warn}; const DO_PRICING_URL: &str = "https://api.digitalocean.com/v2/gen-ai/models/catalog"; @@ -80,24 +80,32 @@ impl ModelMetricsService { /// Rank `models` by `policy`, returning them in preference order. /// Models with no metric data are appended at the end in their original order. pub async fn rank_models(&self, models: &[String], policy: &SelectionPolicy) -> Vec { + let cost_data = self.cost.read().await; + let latency_data = self.latency.read().await; + debug!( + input_models = ?models, + cost_data = ?cost_data.iter().collect::>(), + latency_data = ?latency_data.iter().collect::>(), + prefer = ?policy.prefer, + "rank_models called" + ); + match policy.prefer { SelectionPreference::Cheapest => { - let data = self.cost.read().await; for m in models { - if !data.contains_key(m.as_str()) { + if !cost_data.contains_key(m.as_str()) { warn!(model = %m, "no cost data for model — ranking last (prefer: cheapest)"); } } - rank_by_ascending_metric(models, &data) + rank_by_ascending_metric(models, &cost_data) } SelectionPreference::Fastest => { - let data = self.latency.read().await; for m in models { - if !data.contains_key(m.as_str()) { + if !latency_data.contains_key(m.as_str()) { warn!(model = %m, "no latency data for model — ranking last (prefer: fastest)"); } } - rank_by_ascending_metric(models, &data) + rank_by_ascending_metric(models, &latency_data) } SelectionPreference::None => models.to_vec(), } @@ -117,13 +125,20 @@ impl ModelMetricsService { fn rank_by_ascending_metric(models: &[String], data: &HashMap) -> Vec { let mut with_data: Vec<(&String, f64)> = models .iter() - .filter_map(|m| data.get(m.as_str()).map(|v| (m, *v))) + .filter_map(|m| { + let v = *data.get(m.as_str())?; + if v.is_nan() { + None + } else { + Some((m, v)) + } + }) .collect(); with_data.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)); let without_data: Vec<&String> = models .iter() - .filter(|m| !data.contains_key(m.as_str())) + .filter(|m| data.get(m.as_str()).is_none_or(|v| v.is_nan())) .collect(); with_data @@ -352,4 +367,22 @@ mod tests { // none → original order, despite gpt-4o-mini being cheaper assert_eq!(result, vec!["gpt-4o", "gpt-4o-mini"]); } + + #[test] + fn test_rank_by_ascending_metric_nan_treated_as_missing() { + let models = vec![ + "a".to_string(), + "b".to_string(), + "c".to_string(), + "d".to_string(), + ]; + let mut data = HashMap::new(); + data.insert("a".to_string(), f64::NAN); + data.insert("b".to_string(), 0.5); + data.insert("c".to_string(), 0.1); + // "d" has no entry at all + let result = rank_by_ascending_metric(&models, &data); + // c (0.1) < b (0.5), then NaN "a" and missing "d" appended in original order + assert_eq!(result, vec!["c", "b", "a", "d"]); + } }