diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml index ca1d3441..4e4ec80c 100644 --- a/arch/arch_config_schema.yaml +++ b/arch/arch_config_schema.yaml @@ -47,6 +47,7 @@ properties: type: string enum: - openai + - mistral access_key: type: string model: diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index a145add8..30a4497d 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -130,10 +130,10 @@ static_resources: headers: - name: "x-arch-llm-provider" string_match: - exact: {{ provider.name }} + exact: {{ provider.provider }} route: auto_host_rewrite: true - cluster: {{ provider.name }} + cluster: {{ provider.provider }} timeout: 60s {% endfor %} http_filters: @@ -375,10 +375,10 @@ static_resources: headers: - name: "x-arch-llm-provider" string_match: - exact: {{ provider.name }} + exact: {{ provider.provider }} route: auto_host_rewrite: true - cluster: {{ provider.name }} + cluster: {{ provider.provider }} timeout: 60s {% endfor %} - match: diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 5fb96ef4..d0c73c63 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -165,7 +165,18 @@ pub struct EmbeddingProviver { #[derive(Debug, Clone, Serialize, Deserialize)] pub enum LlmProviderType { #[serde(rename = "openai")] - OpenAI + OpenAI, + #[serde(rename = "mistral")] + Mistral, +} + +impl Display for LlmProviderType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LlmProviderType::OpenAI => write!(f, "openai"), + LlmProviderType::Mistral => write!(f, "mistral"), + } + } } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 9bb87de8..fba443f0 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -174,7 +174,16 @@ impl HttpContext for StreamContext { // the lifecycle of the http request and response. fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action { self.select_llm_provider(); - self.add_http_request_header(ARCH_ROUTING_HEADER, &self.llm_provider().name); + + // if endpoint is not set then use provider name as routing header so envoy can resolve the cluster name + if self.llm_provider().endpoint.is_none() { + self.add_http_request_header( + ARCH_ROUTING_HEADER, + &self.llm_provider().provider.to_string(), + ); + } else { + self.add_http_request_header(ARCH_ROUTING_HEADER, &self.llm_provider().name); + } if let Err(error) = self.modify_auth_headers() { // ensure that the provider has an endpoint if the access key is missing else return a bad request