mirror of
https://github.com/katanemo/plano.git
synced 2026-05-10 16:22:42 +02:00
model routing: cost/latency ranking with ranked fallback list (#849)
This commit is contained in:
parent
3a531ce22a
commit
e5751d6b13
23 changed files with 1524 additions and 317 deletions
|
|
@ -150,6 +150,10 @@ fn get_quota(limit: Limit) -> Quota {
|
|||
TimeUnit::Second => Quota::per_second(tokens),
|
||||
TimeUnit::Minute => Quota::per_minute(tokens),
|
||||
TimeUnit::Hour => Quota::per_hour(tokens),
|
||||
TimeUnit::Day => {
|
||||
let per_hour = limit.tokens.saturating_div(24).max(1);
|
||||
Quota::per_hour(NonZero::new(per_hour).expect("per_hour must be positive"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue