mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
remove random selection policy — consumers can shuffle client-side
Plano should only handle ranking that requires server-side data (cost metrics, latency). Random shuffling is trivial for callers.
This commit is contained in:
parent
41e6b489f5
commit
5b869648c4
4 changed files with 1 additions and 23 deletions
|
|
@ -493,7 +493,6 @@ properties:
|
|||
enum:
|
||||
- cheapest
|
||||
- fastest
|
||||
- random
|
||||
- none
|
||||
additionalProperties: false
|
||||
required:
|
||||
|
|
|
|||
|
|
@ -126,7 +126,6 @@ impl ModelMetricsService {
|
|||
}
|
||||
rank_by_ascending_metric(models, &data)
|
||||
}
|
||||
SelectionPreference::Random => shuffle(models),
|
||||
SelectionPreference::None => models.to_vec(),
|
||||
}
|
||||
}
|
||||
|
|
@ -161,24 +160,6 @@ fn rank_by_ascending_metric(models: &[String], data: &HashMap<String, f64>) -> V
|
|||
.collect()
|
||||
}
|
||||
|
||||
fn shuffle(models: &[String]) -> Vec<String> {
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
let seed = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.subsec_nanos() as usize)
|
||||
.unwrap_or(0);
|
||||
let mut result = models.to_vec();
|
||||
let mut state = seed;
|
||||
for i in (1..result.len()).rev() {
|
||||
state = state
|
||||
.wrapping_mul(6364136223846793005)
|
||||
.wrapping_add(1442695040888963407);
|
||||
let j = state % (i + 1);
|
||||
result.swap(i, j);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CostEntry {
|
||||
input_per_million: f64,
|
||||
|
|
|
|||
|
|
@ -109,7 +109,6 @@ pub enum StateStorageType {
|
|||
pub enum SelectionPreference {
|
||||
Cheapest,
|
||||
Fastest,
|
||||
Random,
|
||||
/// Return models in the same order they were defined — no reordering.
|
||||
None,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ POST /v1/chat/completions
|
|||
| `name` | string | yes | Route identifier. Must match the LLM router's route classification. |
|
||||
| `description` | string | yes | Natural language description used by the router to match user intent. |
|
||||
| `models` | string[] | yes | Ordered candidate pool. At least one entry required. Must be declared in `model_providers`. |
|
||||
| `selection_policy.prefer` | enum | yes | How to rank models: `cheapest`, `fastest`, `random`, or `none`. |
|
||||
| `selection_policy.prefer` | enum | yes | How to rank models: `cheapest`, `fastest`, or `none`. |
|
||||
|
||||
### `selection_policy.prefer` values
|
||||
|
||||
|
|
@ -49,7 +49,6 @@ POST /v1/chat/completions
|
|||
|---|---|
|
||||
| `cheapest` | Sort by ascending cost from the metrics endpoint. Models with no data appended last. |
|
||||
| `fastest` | Sort by ascending latency from the metrics endpoint. Models with no data appended last. |
|
||||
| `random` | Shuffle the model list randomly on each request. |
|
||||
| `none` | Return models in the order they were defined — no reordering. |
|
||||
|
||||
### Notes
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue