diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml index 867836a0..53ec8e74 100644 --- a/arch/arch_config_schema.yaml +++ b/arch/arch_config_schema.yaml @@ -80,6 +80,7 @@ properties: - groq - mistral - openai + - gemini access_key: type: string model: diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index 151c9b3f..53044770 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -450,6 +450,7 @@ static_resources: name: envoy.compression.brotli.compressor typed_config: "@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli + chunk_size: 8192 - name: envoy.filters.http.compressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor @@ -483,7 +484,6 @@ static_resources: name: decompress typed_config: "@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip" - window_bits: 9 chunk_size: 8192 # If this ratio is set too low, then body data will not be decompressed completely. max_inflate_ratio: 1000 @@ -494,6 +494,7 @@ static_resources: name: envoy.compression.brotli.decompressor typed_config: "@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli + chunk_size: 8192 - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router diff --git a/arch/supervisord.conf b/arch/supervisord.conf index 7ef06b49..dfb4d0d2 100644 --- a/arch/supervisord.conf +++ b/arch/supervisord.conf @@ -2,7 +2,7 @@ nodaemon=true [program:brightstaff] -command=sh -c "/app/brightstaff 2>&1 | tee /var/log/brightstaff.log" +command=sh -c "RUST_LOG=info /app/brightstaff 2>&1 | tee /var/log/brightstaff.log" stdout_logfile=/dev/stdout redirect_stderr=true stdout_logfile_maxbytes=0 diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 3018f679..0dbd0b70 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -158,6 +158,8 @@ pub enum LlmProviderType { Mistral, #[serde(rename = "openai")] OpenAI, + #[serde(rename = "gemini")] + Gemini, } impl Display for LlmProviderType { @@ -167,6 +169,7 @@ impl Display for LlmProviderType { LlmProviderType::Claude => write!(f, "claude"), LlmProviderType::Deepseek => write!(f, "deepseek"), LlmProviderType::Groq => write!(f, "groq"), + LlmProviderType::Gemini => write!(f, "gemini"), LlmProviderType::Mistral => write!(f, "mistral"), LlmProviderType::OpenAI => write!(f, "openai"), } diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs index 9eee4693..3ff2ce5e 100644 --- a/crates/common/src/consts.rs +++ b/crates/common/src/consts.rs @@ -11,8 +11,7 @@ pub const MODEL_SERVER_NAME: &str = "model_server"; pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider"; pub const MESSAGES_KEY: &str = "messages"; pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint"; -pub const CHAT_COMPLETIONS_PATH: [&str; 2] = - ["/v1/chat/completions", "/openai/v1/chat/completions"]; +pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions"; pub const HEALTHZ_PATH: &str = "/healthz"; pub const X_ARCH_STATE_HEADER: &str = "x-arch-state"; pub const X_ARCH_API_RESPONSE: &str = "x-arch-api-response-message"; diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index bf40f337..050afce9 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -90,13 +90,26 @@ impl StreamContext { provider_hint, )); - if self.llm_provider.as_ref().unwrap().provider_interface == LlmProviderType::Groq { - if let Some(path) = self.get_http_request_header(":path") { - if path.starts_with("/v1/") { - let new_path = format!("/openai{}", path); - self.set_http_request_header(":path", Some(new_path.as_str())); + match self.llm_provider.as_ref().unwrap().provider_interface { + LlmProviderType::Groq => { + if let Some(path) = self.get_http_request_header(":path") { + if path.starts_with("/v1/") { + let new_path = format!("/openai{}", path); + self.set_http_request_header(":path", Some(new_path.as_str())); + } } } + LlmProviderType::Gemini => { + if let Some(path) = self.get_http_request_header(":path") { + if path == "/v1/chat/completions" { + self.set_http_request_header( + ":path", + Some("/v1beta/openai/chat/completions"), + ); + } + } + } + _ => {} } debug!( @@ -202,6 +215,8 @@ impl HttpContext for StreamContext { return Action::Continue; } + self.is_chat_completions_request = CHAT_COMPLETIONS_PATH == request_path; + let use_agent_orchestrator = match self.overrides.as_ref() { Some(overrides) => overrides.use_agent_orchestrator.unwrap_or_default(), None => false, @@ -242,9 +257,6 @@ impl HttpContext for StreamContext { self.delete_content_length_header(); self.save_ratelimit_header(); - let request_path = self.get_http_request_header(":path").unwrap_or_default(); - self.is_chat_completions_request = CHAT_COMPLETIONS_PATH.contains(&request_path.as_str()); - self.request_id = self.get_http_request_header(REQUEST_ID_HEADER); self.traceparent = self.get_http_request_header(TRACE_PARENT_HEADER); @@ -392,10 +404,10 @@ impl HttpContext for StreamContext { Action::Continue } - fn on_http_response_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action { + fn on_http_response_headers(&mut self, _num_headers: usize, end_of_stream: bool) -> Action { debug!( "on_http_response_headers [S={}] end_stream={}", - self.context_id, _end_of_stream + self.context_id, end_of_stream ); self.set_property( @@ -542,6 +554,13 @@ impl HttpContext for StreamContext { } }; + if log::log_enabled!(log::Level::Debug) { + debug!( + "response data (converted to utf8): {}", + String::from_utf8_lossy(&body) + ); + } + let llm_provider_str = self.llm_provider().provider_interface.to_string(); let hermes_llm_provider = Provider::from(llm_provider_str.as_str()); diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs index d6cf001f..80c5e5da 100644 --- a/crates/llm_gateway/tests/integration.rs +++ b/crates/llm_gateway/tests/integration.rs @@ -54,8 +54,6 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) { .returning(Some("selector-key")) .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key")) .returning(Some("selector-value")) - .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path")) - .returning(Some("/v1/chat/completions")) .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-request-id")) .returning(None) .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("traceparent")) diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml index 0d38335e..46acdc07 100644 --- a/demos/use_cases/llm_routing/arch_config.yaml +++ b/demos/use_cases/llm_routing/arch_config.yaml @@ -45,5 +45,10 @@ llm_providers: provider_interface: groq model: llama-3.1-8b-instant + - name: gemini + access_key: $GEMINI_API_KEY + provider_interface: gemini + model: gemini-1.5-pro-latest + tracing: random_sampling: 100