mirror of
https://github.com/katanemo/plano.git
synced 2026-05-15 11:02:39 +02:00
merge main into plano-session_pinning
This commit is contained in:
commit
f699cfb059
86 changed files with 11996 additions and 8063 deletions
|
|
@ -2,7 +2,6 @@ use crate::{
|
|||
configuration::LlmProvider,
|
||||
consts::{ARCH_FC_MODEL_NAME, ASSISTANT_ROLE},
|
||||
};
|
||||
use core::{panic, str};
|
||||
use serde::{ser::SerializeMap, Deserialize, Serialize};
|
||||
use std::{
|
||||
collections::{HashMap, VecDeque},
|
||||
|
|
@ -193,7 +192,7 @@ impl Display for ContentType {
|
|||
// skip image URLs or their data in text representation
|
||||
None
|
||||
} else {
|
||||
panic!("Unsupported content type: {:?}", part.content_type);
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use hermesllm::apis::openai::{ModelDetail, ModelObject, Models};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
|
||||
|
|
@ -112,6 +112,77 @@ pub enum StateStorageType {
|
|||
Postgres,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum SelectionPreference {
|
||||
Cheapest,
|
||||
Fastest,
|
||||
/// Return models in the same order they were defined — no reordering.
|
||||
#[default]
|
||||
#[serde(alias = "")]
|
||||
None,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct SelectionPolicy {
|
||||
#[serde(default, deserialize_with = "deserialize_selection_preference")]
|
||||
pub prefer: SelectionPreference,
|
||||
}
|
||||
|
||||
fn deserialize_selection_preference<'de, D>(
|
||||
deserializer: D,
|
||||
) -> Result<SelectionPreference, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
Ok(Option::<SelectionPreference>::deserialize(deserializer)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TopLevelRoutingPreference {
|
||||
pub name: String,
|
||||
pub description: String,
|
||||
pub models: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub selection_policy: SelectionPolicy,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum MetricsSource {
|
||||
Cost(CostMetricsConfig),
|
||||
Latency(LatencyMetricsConfig),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CostMetricsConfig {
|
||||
pub provider: CostProvider,
|
||||
pub refresh_interval: Option<u64>,
|
||||
/// Map DO catalog keys (`lowercase(creator)/model_id`) to Plano model names.
|
||||
/// Example: `openai/openai-gpt-oss-120b: openai/gpt-4o`
|
||||
pub model_aliases: Option<HashMap<String, String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum CostProvider {
|
||||
Digitalocean,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct LatencyMetricsConfig {
|
||||
pub provider: LatencyProvider,
|
||||
pub url: String,
|
||||
pub query: String,
|
||||
pub refresh_interval: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum LatencyProvider {
|
||||
Prometheus,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Configuration {
|
||||
pub version: String,
|
||||
|
|
@ -131,6 +202,8 @@ pub struct Configuration {
|
|||
pub filters: Option<Vec<Agent>>,
|
||||
pub listeners: Vec<Listener>,
|
||||
pub state_storage: Option<StateStorageConfig>,
|
||||
pub routing_preferences: Option<Vec<TopLevelRoutingPreference>>,
|
||||
pub model_metrics_sources: Option<Vec<MetricsSource>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
|
|
@ -246,6 +319,8 @@ pub enum TimeUnit {
|
|||
Minute,
|
||||
#[serde(rename = "hour")]
|
||||
Hour,
|
||||
#[serde(rename = "day")]
|
||||
Day,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
|
|
@ -326,18 +401,6 @@ impl LlmProviderType {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct ModelUsagePreference {
|
||||
pub model: String,
|
||||
pub routing_preferences: Vec<RoutingPreference>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RoutingPreference {
|
||||
pub name: String,
|
||||
pub description: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct AgentUsagePreference {
|
||||
pub model: String,
|
||||
|
|
@ -387,7 +450,6 @@ pub struct LlmProvider {
|
|||
pub port: Option<u16>,
|
||||
pub rate_limits: Option<LlmRatelimit>,
|
||||
pub usage: Option<String>,
|
||||
pub routing_preferences: Option<Vec<RoutingPreference>>,
|
||||
pub cluster_name: Option<String>,
|
||||
pub base_url_path_prefix: Option<String>,
|
||||
pub internal: Option<bool>,
|
||||
|
|
@ -431,7 +493,6 @@ impl Default for LlmProvider {
|
|||
port: None,
|
||||
rate_limits: None,
|
||||
usage: None,
|
||||
routing_preferences: None,
|
||||
cluster_name: None,
|
||||
base_url_path_prefix: None,
|
||||
internal: None,
|
||||
|
|
|
|||
|
|
@ -75,7 +75,10 @@ pub trait Client: Context {
|
|||
fn add_call_context(&self, id: u32, call_context: Self::CallContext) {
|
||||
let callouts = self.callouts();
|
||||
if callouts.borrow_mut().insert(id, call_context).is_some() {
|
||||
panic!("Duplicate http call with id={}", id);
|
||||
log::warn!(
|
||||
"Duplicate http call with id={}, previous context overwritten",
|
||||
id
|
||||
);
|
||||
}
|
||||
self.active_http_calls().increment(1);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -274,7 +274,6 @@ mod tests {
|
|||
port: None,
|
||||
rate_limits: None,
|
||||
usage: None,
|
||||
routing_preferences: None,
|
||||
internal: None,
|
||||
stream: None,
|
||||
passthrough_auth: None,
|
||||
|
|
|
|||
|
|
@ -73,7 +73,10 @@ impl RatelimitMap {
|
|||
match new_ratelimit_map.datastore.get_mut(&ratelimit_config.model) {
|
||||
Some(limits) => match limits.get_mut(&ratelimit_config.selector) {
|
||||
Some(_) => {
|
||||
panic!("repeated selector. Selectors per provider must be unique")
|
||||
log::error!(
|
||||
"repeated selector for model '{}'. Selectors per provider must be unique, skipping duplicate",
|
||||
ratelimit_config.model
|
||||
);
|
||||
}
|
||||
None => {
|
||||
limits.insert(ratelimit_config.selector, limit);
|
||||
|
|
@ -150,6 +153,10 @@ fn get_quota(limit: Limit) -> Quota {
|
|||
TimeUnit::Second => Quota::per_second(tokens),
|
||||
TimeUnit::Minute => Quota::per_minute(tokens),
|
||||
TimeUnit::Hour => Quota::per_hour(tokens),
|
||||
TimeUnit::Day => {
|
||||
let per_hour = limit.tokens.saturating_div(24).max(1);
|
||||
Quota::per_hour(NonZero::new(per_hour).expect("per_hour must be positive"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue