mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
fixed issue with groq LLMs that require the openai in the /v1/chat/co… (#460)
* fixed issue with groq LLMs that require the openai in the /v1/chat/completions path. My first change * updated the GH actions with keys for Groq * adding missing groq API keys * add llama-3.2-3b-preview to the model based on addin groq to the demo --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
This commit is contained in:
parent
e7b0de2a72
commit
f31aa59fac
9 changed files with 35 additions and 16 deletions
2
.github/workflows/e2e_archgw.yml
vendored
2
.github/workflows/e2e_archgw.yml
vendored
|
|
@ -30,6 +30,7 @@ jobs:
|
|||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
run: |
|
||||
docker compose up | tee &> archgw.logs &
|
||||
|
||||
|
|
@ -55,5 +56,6 @@ jobs:
|
|||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
run: |
|
||||
docker compose down
|
||||
|
|
|
|||
1
.github/workflows/e2e_test_demos.yml
vendored
1
.github/workflows/e2e_test_demos.yml
vendored
|
|
@ -48,6 +48,7 @@ jobs:
|
|||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
run: |
|
||||
source venv/bin/activate
|
||||
cd demos/shared/test_runner && sh run_demo_tests.sh
|
||||
|
|
|
|||
1
.github/workflows/e2e_tests.yml
vendored
1
.github/workflows/e2e_tests.yml
vendored
|
|
@ -29,6 +29,7 @@ jobs:
|
|||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
run: |
|
||||
python -mvenv venv
|
||||
source venv/bin/activate && cd tests/e2e && bash run_e2e_tests.sh
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ pub const MODEL_SERVER_NAME: &str = "model_server";
|
|||
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
|
||||
pub const MESSAGES_KEY: &str = "messages";
|
||||
pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
|
||||
pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions";
|
||||
pub const CHAT_COMPLETIONS_PATH: [&str; 2] = ["/v1/chat/completions", "/openai/v1/chat/completions"];
|
||||
pub const HEALTHZ_PATH: &str = "/healthz";
|
||||
pub const ARCH_STATE_HEADER: &str = "x-arch-state";
|
||||
pub const ARCH_FC_MODEL_NAME: &str = "Arch-Function-1.5B";
|
||||
|
|
|
|||
|
|
@ -89,6 +89,23 @@ impl StreamContext {
|
|||
provider_hint,
|
||||
));
|
||||
|
||||
// Check if we need to modify the path based on the provider's base_url
|
||||
let needs_openai_prefix = self
|
||||
.llm_provider
|
||||
.as_ref()
|
||||
.and_then(|provider| provider.endpoint.as_ref())
|
||||
.map(|url| url.contains("api.groq.com"))
|
||||
.unwrap_or(false);
|
||||
|
||||
if needs_openai_prefix {
|
||||
if let Some(path) = self.get_http_request_header(":path") {
|
||||
if path.starts_with("/v1/") {
|
||||
let new_path = format!("/openai{}", path);
|
||||
self.set_http_request_header(":path", Some(new_path.as_str()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debug!(
|
||||
"request received: llm provider hint: {}, selected llm: {}, model: {}",
|
||||
self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
|
||||
|
|
@ -237,8 +254,8 @@ impl HttpContext for StreamContext {
|
|||
self.delete_content_length_header();
|
||||
self.save_ratelimit_header();
|
||||
|
||||
self.is_chat_completions_request =
|
||||
self.get_http_request_header(":path").unwrap_or_default() == CHAT_COMPLETIONS_PATH;
|
||||
let request_path = self.get_http_request_header(":path").unwrap_or_default();
|
||||
self.is_chat_completions_request = CHAT_COMPLETIONS_PATH.contains(&request_path.as_str());
|
||||
|
||||
self.request_id = self.get_http_request_header(REQUEST_ID_HEADER);
|
||||
self.traceparent = self.get_http_request_header(TRACE_PARENT_HEADER);
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ impl HttpContext for StreamContext {
|
|||
return Action::Continue;
|
||||
}
|
||||
|
||||
self.is_chat_completions_request = request_path == CHAT_COMPLETIONS_PATH;
|
||||
self.is_chat_completions_request = CHAT_COMPLETIONS_PATH.contains(&request_path.as_str());
|
||||
|
||||
debug!(
|
||||
"on_http_request_headers S[{}] req_headers={:?}",
|
||||
|
|
|
|||
|
|
@ -17,17 +17,13 @@ overrides:
|
|||
prompt_target_intent_matching_threshold: 0.6
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
- name: groq
|
||||
access_key: $GROQ_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-4o-mini
|
||||
model: llama-3.2-3b-preview
|
||||
base_url: https://api.groq.com
|
||||
default: true
|
||||
|
||||
- name: gpt-3.5-turbo-0125
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-3.5-turbo-0125
|
||||
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
|
|
|
|||
|
|
@ -19,3 +19,5 @@ services:
|
|||
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
volumes:
|
||||
- ./arch_config.yaml:/app/arch_config.yaml
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ def test_prompt_gateway(stream):
|
|||
|
||||
# third..end chunk is summarization (role = assistant)
|
||||
response_json = json.loads(chunks[2])
|
||||
assert response_json.get("model").startswith("gpt-4o-mini")
|
||||
assert response_json.get("model").startswith("llama-3.2-3b-preview")
|
||||
choices = response_json.get("choices", [])
|
||||
assert len(choices) > 0
|
||||
assert "role" in choices[0]["delta"]
|
||||
|
|
@ -71,7 +71,7 @@ def test_prompt_gateway(stream):
|
|||
|
||||
else:
|
||||
response_json = response.json()
|
||||
assert response_json.get("model").startswith("gpt-4o-mini")
|
||||
assert response_json.get("model").startswith("llama-3.2-3b-preview")
|
||||
choices = response_json.get("choices", [])
|
||||
assert len(choices) > 0
|
||||
assert "role" in choices[0]["message"]
|
||||
|
|
@ -231,7 +231,7 @@ def test_prompt_gateway_param_tool_call(stream):
|
|||
|
||||
# third..end chunk is summarization (role = assistant)
|
||||
response_json = json.loads(chunks[2])
|
||||
assert response_json.get("model").startswith("gpt-4o-mini")
|
||||
assert response_json.get("model").startswith("llama-3.2-3b-preview")
|
||||
choices = response_json.get("choices", [])
|
||||
assert len(choices) > 0
|
||||
assert "role" in choices[0]["delta"]
|
||||
|
|
@ -240,7 +240,7 @@ def test_prompt_gateway_param_tool_call(stream):
|
|||
|
||||
else:
|
||||
response_json = response.json()
|
||||
assert response_json.get("model").startswith("gpt-4o-mini")
|
||||
assert response_json.get("model").startswith("llama-3.2-3b-preview")
|
||||
choices = response_json.get("choices", [])
|
||||
assert len(choices) > 0
|
||||
assert "role" in choices[0]["message"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue