mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
fix ordering and add debug statement
This commit is contained in:
parent
bac03b7583
commit
1c01d0f83c
1 changed files with 42 additions and 9 deletions
|
|
@ -6,7 +6,7 @@ use common::configuration::{
|
|||
CostProvider, LatencyProvider, MetricsSource, SelectionPolicy, SelectionPreference,
|
||||
};
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::{info, warn};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
const DO_PRICING_URL: &str = "https://api.digitalocean.com/v2/gen-ai/models/catalog";
|
||||
|
||||
|
|
@ -80,24 +80,32 @@ impl ModelMetricsService {
|
|||
/// Rank `models` by `policy`, returning them in preference order.
|
||||
/// Models with no metric data are appended at the end in their original order.
|
||||
pub async fn rank_models(&self, models: &[String], policy: &SelectionPolicy) -> Vec<String> {
|
||||
let cost_data = self.cost.read().await;
|
||||
let latency_data = self.latency.read().await;
|
||||
debug!(
|
||||
input_models = ?models,
|
||||
cost_data = ?cost_data.iter().collect::<Vec<_>>(),
|
||||
latency_data = ?latency_data.iter().collect::<Vec<_>>(),
|
||||
prefer = ?policy.prefer,
|
||||
"rank_models called"
|
||||
);
|
||||
|
||||
match policy.prefer {
|
||||
SelectionPreference::Cheapest => {
|
||||
let data = self.cost.read().await;
|
||||
for m in models {
|
||||
if !data.contains_key(m.as_str()) {
|
||||
if !cost_data.contains_key(m.as_str()) {
|
||||
warn!(model = %m, "no cost data for model — ranking last (prefer: cheapest)");
|
||||
}
|
||||
}
|
||||
rank_by_ascending_metric(models, &data)
|
||||
rank_by_ascending_metric(models, &cost_data)
|
||||
}
|
||||
SelectionPreference::Fastest => {
|
||||
let data = self.latency.read().await;
|
||||
for m in models {
|
||||
if !data.contains_key(m.as_str()) {
|
||||
if !latency_data.contains_key(m.as_str()) {
|
||||
warn!(model = %m, "no latency data for model — ranking last (prefer: fastest)");
|
||||
}
|
||||
}
|
||||
rank_by_ascending_metric(models, &data)
|
||||
rank_by_ascending_metric(models, &latency_data)
|
||||
}
|
||||
SelectionPreference::None => models.to_vec(),
|
||||
}
|
||||
|
|
@ -117,13 +125,20 @@ impl ModelMetricsService {
|
|||
fn rank_by_ascending_metric(models: &[String], data: &HashMap<String, f64>) -> Vec<String> {
|
||||
let mut with_data: Vec<(&String, f64)> = models
|
||||
.iter()
|
||||
.filter_map(|m| data.get(m.as_str()).map(|v| (m, *v)))
|
||||
.filter_map(|m| {
|
||||
let v = *data.get(m.as_str())?;
|
||||
if v.is_nan() {
|
||||
None
|
||||
} else {
|
||||
Some((m, v))
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
with_data.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
let without_data: Vec<&String> = models
|
||||
.iter()
|
||||
.filter(|m| !data.contains_key(m.as_str()))
|
||||
.filter(|m| data.get(m.as_str()).is_none_or(|v| v.is_nan()))
|
||||
.collect();
|
||||
|
||||
with_data
|
||||
|
|
@ -352,4 +367,22 @@ mod tests {
|
|||
// none → original order, despite gpt-4o-mini being cheaper
|
||||
assert_eq!(result, vec!["gpt-4o", "gpt-4o-mini"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rank_by_ascending_metric_nan_treated_as_missing() {
|
||||
let models = vec![
|
||||
"a".to_string(),
|
||||
"b".to_string(),
|
||||
"c".to_string(),
|
||||
"d".to_string(),
|
||||
];
|
||||
let mut data = HashMap::new();
|
||||
data.insert("a".to_string(), f64::NAN);
|
||||
data.insert("b".to_string(), 0.5);
|
||||
data.insert("c".to_string(), 0.1);
|
||||
// "d" has no entry at all
|
||||
let result = rank_by_ascending_metric(&models, &data);
|
||||
// c (0.1) < b (0.5), then NaN "a" and missing "d" appended in original order
|
||||
assert_eq!(result, vec!["c", "b", "a", "d"]);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue