diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 13800613..4a6a2ffa 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -799,10 +799,7 @@ impl HttpContext for StreamContext { let cluster_name_opt = self.llm_provider().cluster_name.clone(); if let Some(cluster_name) = cluster_name_opt { - self.add_http_request_header( - ARCH_ROUTING_HEADER, - &cluster_name, - ); + self.add_http_request_header(ARCH_ROUTING_HEADER, &cluster_name); } else { self.add_http_request_header( ARCH_ROUTING_HEADER, diff --git a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml index 4ba89a92..a33878b6 100644 --- a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml +++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml @@ -64,6 +64,15 @@ listeners: model: ministral-3b-latest name: mistral/ministral-3b-latest provider_interface: mistral + - base_url: https://litellm.example.com + cluster_name: openai_litellm.example.com + endpoint: litellm.example.com + model: gpt-4o-litellm + name: openai/gpt-4o-litellm + passthrough_auth: true + port: 443 + protocol: https + provider_interface: openai name: egress_traffic port: 12000 timeout: 30s @@ -91,6 +100,15 @@ model_providers: model: ministral-3b-latest name: mistral/ministral-3b-latest provider_interface: mistral +- base_url: https://litellm.example.com + cluster_name: openai_litellm.example.com + endpoint: litellm.example.com + model: gpt-4o-litellm + name: openai/gpt-4o-litellm + passthrough_auth: true + port: 443 + protocol: https + provider_interface: openai - internal: true model: Arch-Function name: arch-function