rename session pinning to model affinity with x-model-affinity header

This commit is contained in:
Adil Hafeez 2026-04-08 15:23:53 -07:00
parent 5789694d2f
commit da9792c2dd
14 changed files with 468 additions and 371 deletions

View file

@ -1,8 +1,6 @@
use bytes::Bytes;
use common::configuration::{FilterPipeline, ModelAlias};
use common::consts::{
ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, ROUTING_SESSION_ID_HEADER,
};
use common::consts::{ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, MODEL_AFFINITY_HEADER};
use common::llm_providers::LlmProviders;
use hermesllm::apis::openai::Message;
use hermesllm::apis::openai_responses::InputParam;
@ -98,7 +96,7 @@ async fn llm_chat_inner(
// Session pinning: extract session ID and check cache before routing
let session_id: Option<String> = request_headers
.get(ROUTING_SESSION_ID_HEADER)
.get(MODEL_AFFINITY_HEADER)
.and_then(|h| h.to_str().ok())
.map(|s| s.to_string());
let pinned_model: Option<String> = if let Some(ref sid) = session_id {

View file

@ -1,6 +1,6 @@
use bytes::Bytes;
use common::configuration::{SpanAttributes, TopLevelRoutingPreference};
use common::consts::{REQUEST_ID_HEADER, ROUTING_SESSION_ID_HEADER};
use common::consts::{MODEL_AFFINITY_HEADER, REQUEST_ID_HEADER};
use common::errors::BrightStaffError;
use hermesllm::clients::SupportedAPIsFromClient;
use hermesllm::ProviderRequestType;
@ -72,7 +72,7 @@ pub async fn routing_decision(
.unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
let session_id: Option<String> = request_headers
.get(ROUTING_SESSION_ID_HEADER)
.get(MODEL_AFFINITY_HEADER)
.and_then(|h| h.to_str().ok())
.map(|s| s.to_string());

View file

@ -22,7 +22,7 @@ pub const X_ARCH_TOOL_CALL: &str = "x-arch-tool-call-message";
pub const X_ARCH_FC_MODEL_RESPONSE: &str = "x-arch-fc-model-response";
pub const ARCH_FC_MODEL_NAME: &str = "Arch-Function";
pub const REQUEST_ID_HEADER: &str = "x-request-id";
pub const ROUTING_SESSION_ID_HEADER: &str = "x-routing-session-id";
pub const MODEL_AFFINITY_HEADER: &str = "x-model-affinity";
pub const ENVOY_ORIGINAL_PATH_HEADER: &str = "x-envoy-original-path";
pub const TRACE_PARENT_HEADER: &str = "traceparent";
pub const ARCH_INTERNAL_CLUSTER_NAME: &str = "arch_internal";