fixed issue with groq LLMs that require the openai in the /v1/chat/co… (#460)

* fixed issue with groq LLMs that require the openai in the /v1/chat/completions path. My first change * updated the GH actions with keys for Groq * adding missing groq API keys * add llama-3.2-3b-preview to the model based on addin groq to the demo --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
2026-04-25 00:36:34 +02:00 · 2025-04-13 14:00:16 -07:00 · 2025-04-13 14:00:16 -07:00 · f31aa59fac
commit f31aa59fac
parent e7b0de2a72
9 changed files with 35 additions and 16 deletions
--- a/.github/workflows/e2e_archgw.yml
+++ b/.github/workflows/e2e_archgw.yml
@ -30,6 +30,7 @@ jobs:
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
+          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
        run: |
          docker compose up | tee &> archgw.logs &

@ -55,5 +56,6 @@ jobs:
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
+          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
        run: |
          docker compose down
--- a/.github/workflows/e2e_test_demos.yml
+++ b/.github/workflows/e2e_test_demos.yml
@ -48,6 +48,7 @@ jobs:
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
+          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
        run: |
          source venv/bin/activate
          cd demos/shared/test_runner && sh run_demo_tests.sh
--- a/.github/workflows/e2e_tests.yml
+++ b/.github/workflows/e2e_tests.yml
@ -29,6 +29,7 @@ jobs:
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
+          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
        run: |
          python -mvenv venv
          source venv/bin/activate && cd tests/e2e && bash run_e2e_tests.sh
--- a/crates/common/src/consts.rs
+++ b/crates/common/src/consts.rs
@ -11,7 +11,7 @@ pub const MODEL_SERVER_NAME: &str = "model_server";
 pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
 pub const MESSAGES_KEY: &str = "messages";
 pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
-pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions";
+pub const CHAT_COMPLETIONS_PATH: [&str; 2] = ["/v1/chat/completions", "/openai/v1/chat/completions"];
 pub const HEALTHZ_PATH: &str = "/healthz";
 pub const ARCH_STATE_HEADER: &str = "x-arch-state";
 pub const ARCH_FC_MODEL_NAME: &str = "Arch-Function-1.5B";
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -89,6 +89,23 @@ impl StreamContext {
            provider_hint,
        ));

+        // Check if we need to modify the path based on the provider's base_url
+        let needs_openai_prefix = self
+            .llm_provider
+            .as_ref()
+            .and_then(|provider| provider.endpoint.as_ref())
+            .map(|url| url.contains("api.groq.com"))
+            .unwrap_or(false);
+
+        if needs_openai_prefix {
+            if let Some(path) = self.get_http_request_header(":path") {
+                if path.starts_with("/v1/") {
+                    let new_path = format!("/openai{}", path);
+                    self.set_http_request_header(":path", Some(new_path.as_str()));
+                }
+            }
+        }
+
        debug!(
            "request received: llm provider hint: {}, selected llm: {}, model: {}",
            self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
@ -237,8 +254,8 @@ impl HttpContext for StreamContext {
        self.delete_content_length_header();
        self.save_ratelimit_header();

-        self.is_chat_completions_request =
-            self.get_http_request_header(":path").unwrap_or_default() == CHAT_COMPLETIONS_PATH;
+        let request_path = self.get_http_request_header(":path").unwrap_or_default();
+        self.is_chat_completions_request = CHAT_COMPLETIONS_PATH.contains(&request_path.as_str());

        self.request_id = self.get_http_request_header(REQUEST_ID_HEADER);
        self.traceparent = self.get_http_request_header(TRACE_PARENT_HEADER);
--- a/crates/prompt_gateway/src/http_context.rs
+++ b/crates/prompt_gateway/src/http_context.rs
@ -61,7 +61,7 @@ impl HttpContext for StreamContext {
            return Action::Continue;
        }

-        self.is_chat_completions_request = request_path == CHAT_COMPLETIONS_PATH;
+        self.is_chat_completions_request = CHAT_COMPLETIONS_PATH.contains(&request_path.as_str());

        debug!(
            "on_http_request_headers S[{}] req_headers={:?}",
--- a/demos/samples_python/weather_forecast/arch_config.yaml
+++ b/demos/samples_python/weather_forecast/arch_config.yaml
@ -17,17 +17,13 @@ overrides:
  prompt_target_intent_matching_threshold: 0.6

 llm_providers:
-  - name: gpt-4o-mini
-    access_key: $OPENAI_API_KEY
+  - name: groq
+    access_key: $GROQ_API_KEY
    provider_interface: openai
-    model: gpt-4o-mini
+    model: llama-3.2-3b-preview
+    base_url: https://api.groq.com
    default: true

-  - name: gpt-3.5-turbo-0125
-    access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-3.5-turbo-0125
-
  - name: gpt-4o
    access_key: $OPENAI_API_KEY
    provider_interface: openai
--- a/demos/samples_python/weather_forecast/docker-compose.yaml
+++ b/demos/samples_python/weather_forecast/docker-compose.yaml
@ -19,3 +19,5 @@ services:
      - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
    extra_hosts:
      - "host.docker.internal:host-gateway"
+    volumes:
+      - ./arch_config.yaml:/app/arch_config.yaml
--- a/tests/e2e/test_prompt_gateway.py
+++ b/tests/e2e/test_prompt_gateway.py
@ -62,7 +62,7 @@ def test_prompt_gateway(stream):

        # third..end chunk is summarization (role = assistant)
        response_json = json.loads(chunks[2])
-        assert response_json.get("model").startswith("gpt-4o-mini")
+        assert response_json.get("model").startswith("llama-3.2-3b-preview")
        choices = response_json.get("choices", [])
        assert len(choices) > 0
        assert "role" in choices[0]["delta"]
@ -71,7 +71,7 @@ def test_prompt_gateway(stream):

    else:
        response_json = response.json()
-        assert response_json.get("model").startswith("gpt-4o-mini")
+        assert response_json.get("model").startswith("llama-3.2-3b-preview")
        choices = response_json.get("choices", [])
        assert len(choices) > 0
        assert "role" in choices[0]["message"]
@ -231,7 +231,7 @@ def test_prompt_gateway_param_tool_call(stream):

        # third..end chunk is summarization (role = assistant)
        response_json = json.loads(chunks[2])
-        assert response_json.get("model").startswith("gpt-4o-mini")
+        assert response_json.get("model").startswith("llama-3.2-3b-preview")
        choices = response_json.get("choices", [])
        assert len(choices) > 0
        assert "role" in choices[0]["delta"]
@ -240,7 +240,7 @@ def test_prompt_gateway_param_tool_call(stream):

    else:
        response_json = response.json()
-        assert response_json.get("model").startswith("gpt-4o-mini")
+        assert response_json.get("model").startswith("llama-3.2-3b-preview")
        choices = response_json.get("choices", [])
        assert len(choices) > 0
        assert "role" in choices[0]["message"]