fixed issue with groq LLMs that require the openai in the /v1/chat/completions path. My first change

2026-06-17 15:25:17 +02:00 · 2025-04-12 16:24:30 -07:00 · 2025-04-12 16:24:30 -07:00 · 630081ebd3
commit 630081ebd3
parent 4d2d8bd7a1
5 changed files with 27 additions and 12 deletions
--- a/crates/common/src/consts.rs
+++ b/crates/common/src/consts.rs
@ -11,7 +11,7 @@ pub const MODEL_SERVER_NAME: &str = "model_server";
 pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
 pub const MESSAGES_KEY: &str = "messages";
 pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
-pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions";
+pub const CHAT_COMPLETIONS_PATH: [&str; 2] = ["/v1/chat/completions", "/openai/v1/chat/completions"];
 pub const HEALTHZ_PATH: &str = "/healthz";
 pub const ARCH_STATE_HEADER: &str = "x-arch-state";
 pub const ARCH_FC_MODEL_NAME: &str = "Arch-Function-1.5B";
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -89,6 +89,23 @@ impl StreamContext {
            provider_hint,
        ));

+        // Check if we need to modify the path based on the provider's base_url
+        let needs_openai_prefix = self
+            .llm_provider
+            .as_ref()
+            .and_then(|provider| provider.endpoint.as_ref())
+            .map(|url| url.contains("api.groq.com"))
+            .unwrap_or(false);
+
+        if needs_openai_prefix {
+            if let Some(path) = self.get_http_request_header(":path") {
+                if path.starts_with("/v1/") {
+                    let new_path = format!("/openai{}", path);
+                    self.set_http_request_header(":path", Some(new_path.as_str()));
+                }
+            }
+        }
+
        debug!(
            "request received: llm provider hint: {}, selected llm: {}, model: {}",
            self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
@ -237,8 +254,8 @@ impl HttpContext for StreamContext {
        self.delete_content_length_header();
        self.save_ratelimit_header();

-        self.is_chat_completions_request =
-            self.get_http_request_header(":path").unwrap_or_default() == CHAT_COMPLETIONS_PATH;
+        let request_path = self.get_http_request_header(":path").unwrap_or_default();
+        self.is_chat_completions_request = CHAT_COMPLETIONS_PATH.contains(&request_path.as_str());

        self.request_id = self.get_http_request_header(REQUEST_ID_HEADER);
        self.traceparent = self.get_http_request_header(TRACE_PARENT_HEADER);
--- a/crates/prompt_gateway/src/http_context.rs
+++ b/crates/prompt_gateway/src/http_context.rs
@ -61,7 +61,7 @@ impl HttpContext for StreamContext {
            return Action::Continue;
        }

-        self.is_chat_completions_request = request_path == CHAT_COMPLETIONS_PATH;
+        self.is_chat_completions_request = CHAT_COMPLETIONS_PATH.contains(&request_path.as_str());

        debug!(
            "on_http_request_headers S[{}] req_headers={:?}",
--- a/demos/samples_python/weather_forecast/arch_config.yaml
+++ b/demos/samples_python/weather_forecast/arch_config.yaml
@ -17,17 +17,13 @@ overrides:
  prompt_target_intent_matching_threshold: 0.6

 llm_providers:
-  - name: gpt-4o-mini
-    access_key: $OPENAI_API_KEY
+  - name: groq
+    access_key: $GROQ_API_KEY
    provider_interface: openai
-    model: gpt-4o-mini
+    model: llama-3.2-3b-preview
+    base_url: https://api.groq.com
    default: true

-  - name: gpt-3.5-turbo-0125
-    access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-3.5-turbo-0125
-
  - name: gpt-4o
    access_key: $OPENAI_API_KEY
    provider_interface: openai
--- a/demos/samples_python/weather_forecast/docker-compose.yaml
+++ b/demos/samples_python/weather_forecast/docker-compose.yaml
@ -19,3 +19,5 @@ services:
      - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
    extra_hosts:
      - "host.docker.internal:host-gateway"
+    volumes:
+      - ./arch_config.yaml:/app/arch_config.yaml