mirror of
https://github.com/katanemo/plano.git
synced 2026-05-18 13:45:15 +02:00
rename x-session-id to x-routing-session-id and fix routing config field name
This commit is contained in:
parent
f699cfb059
commit
5789694d2f
11 changed files with 41 additions and 34 deletions
|
|
@ -1,6 +1,8 @@
|
|||
use bytes::Bytes;
|
||||
use common::configuration::{FilterPipeline, ModelAlias};
|
||||
use common::consts::{ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, SESSION_ID_HEADER};
|
||||
use common::consts::{
|
||||
ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, ROUTING_SESSION_ID_HEADER,
|
||||
};
|
||||
use common::llm_providers::LlmProviders;
|
||||
use hermesllm::apis::openai::Message;
|
||||
use hermesllm::apis::openai_responses::InputParam;
|
||||
|
|
@ -96,7 +98,7 @@ async fn llm_chat_inner(
|
|||
|
||||
// Session pinning: extract session ID and check cache before routing
|
||||
let session_id: Option<String> = request_headers
|
||||
.get(SESSION_ID_HEADER)
|
||||
.get(ROUTING_SESSION_ID_HEADER)
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.map(|s| s.to_string());
|
||||
let pinned_model: Option<String> = if let Some(ref sid) = session_id {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use bytes::Bytes;
|
||||
use common::configuration::{SpanAttributes, TopLevelRoutingPreference};
|
||||
use common::consts::{REQUEST_ID_HEADER, SESSION_ID_HEADER};
|
||||
use common::consts::{REQUEST_ID_HEADER, ROUTING_SESSION_ID_HEADER};
|
||||
use common::errors::BrightStaffError;
|
||||
use hermesllm::clients::SupportedAPIsFromClient;
|
||||
use hermesllm::ProviderRequestType;
|
||||
|
|
@ -72,7 +72,7 @@ pub async fn routing_decision(
|
|||
.unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
|
||||
|
||||
let session_id: Option<String> = request_headers
|
||||
.get(SESSION_ID_HEADER)
|
||||
.get(ROUTING_SESSION_ID_HEADER)
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ use crate::router::router_model_v1;
|
|||
|
||||
const DEFAULT_SESSION_TTL_SECONDS: u64 = 600;
|
||||
const DEFAULT_SESSION_MAX_ENTRIES: usize = 10_000;
|
||||
const MAX_SESSION_MAX_ENTRIES: usize = 10_000;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CachedRoute {
|
||||
|
|
@ -92,7 +93,9 @@ impl RouterService {
|
|||
|
||||
let session_ttl =
|
||||
Duration::from_secs(session_ttl_seconds.unwrap_or(DEFAULT_SESSION_TTL_SECONDS));
|
||||
let session_max_entries = session_max_entries.unwrap_or(DEFAULT_SESSION_MAX_ENTRIES);
|
||||
let session_max_entries = session_max_entries
|
||||
.unwrap_or(DEFAULT_SESSION_MAX_ENTRIES)
|
||||
.min(MAX_SESSION_MAX_ENTRIES);
|
||||
|
||||
RouterService {
|
||||
router_url,
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ use crate::api::open_ai::{
|
|||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Routing {
|
||||
pub model_provider: Option<String>,
|
||||
pub llm_provider: Option<String>,
|
||||
pub model: Option<String>,
|
||||
pub session_ttl_seconds: Option<u64>,
|
||||
pub session_max_entries: Option<usize>,
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ pub const X_ARCH_TOOL_CALL: &str = "x-arch-tool-call-message";
|
|||
pub const X_ARCH_FC_MODEL_RESPONSE: &str = "x-arch-fc-model-response";
|
||||
pub const ARCH_FC_MODEL_NAME: &str = "Arch-Function";
|
||||
pub const REQUEST_ID_HEADER: &str = "x-request-id";
|
||||
pub const SESSION_ID_HEADER: &str = "x-session-id";
|
||||
pub const ROUTING_SESSION_ID_HEADER: &str = "x-routing-session-id";
|
||||
pub const ENVOY_ORIGINAL_PATH_HEADER: &str = "x-envoy-original-path";
|
||||
pub const TRACE_PARENT_HEADER: &str = "traceparent";
|
||||
pub const ARCH_INTERNAL_CLUSTER_NAME: &str = "arch_internal";
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue