fixed issue with groq LLMs that require the openai in the /v1/chat/completions path. My first change

This commit is contained in:
Salman Paracha 2025-04-12 16:24:30 -07:00
parent 4d2d8bd7a1
commit 630081ebd3
5 changed files with 27 additions and 12 deletions

View file

@ -11,7 +11,7 @@ pub const MODEL_SERVER_NAME: &str = "model_server";
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
pub const MESSAGES_KEY: &str = "messages";
pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions";
pub const CHAT_COMPLETIONS_PATH: [&str; 2] = ["/v1/chat/completions", "/openai/v1/chat/completions"];
pub const HEALTHZ_PATH: &str = "/healthz";
pub const ARCH_STATE_HEADER: &str = "x-arch-state";
pub const ARCH_FC_MODEL_NAME: &str = "Arch-Function-1.5B";

View file

@ -89,6 +89,23 @@ impl StreamContext {
provider_hint,
));
// Check if we need to modify the path based on the provider's base_url
let needs_openai_prefix = self
.llm_provider
.as_ref()
.and_then(|provider| provider.endpoint.as_ref())
.map(|url| url.contains("api.groq.com"))
.unwrap_or(false);
if needs_openai_prefix {
if let Some(path) = self.get_http_request_header(":path") {
if path.starts_with("/v1/") {
let new_path = format!("/openai{}", path);
self.set_http_request_header(":path", Some(new_path.as_str()));
}
}
}
debug!(
"request received: llm provider hint: {}, selected llm: {}, model: {}",
self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
@ -237,8 +254,8 @@ impl HttpContext for StreamContext {
self.delete_content_length_header();
self.save_ratelimit_header();
self.is_chat_completions_request =
self.get_http_request_header(":path").unwrap_or_default() == CHAT_COMPLETIONS_PATH;
let request_path = self.get_http_request_header(":path").unwrap_or_default();
self.is_chat_completions_request = CHAT_COMPLETIONS_PATH.contains(&request_path.as_str());
self.request_id = self.get_http_request_header(REQUEST_ID_HEADER);
self.traceparent = self.get_http_request_header(TRACE_PARENT_HEADER);

View file

@ -61,7 +61,7 @@ impl HttpContext for StreamContext {
return Action::Continue;
}
self.is_chat_completions_request = request_path == CHAT_COMPLETIONS_PATH;
self.is_chat_completions_request = CHAT_COMPLETIONS_PATH.contains(&request_path.as_str());
debug!(
"on_http_request_headers S[{}] req_headers={:?}",

View file

@ -17,17 +17,13 @@ overrides:
prompt_target_intent_matching_threshold: 0.6
llm_providers:
- name: gpt-4o-mini
access_key: $OPENAI_API_KEY
- name: groq
access_key: $GROQ_API_KEY
provider_interface: openai
model: gpt-4o-mini
model: llama-3.2-3b-preview
base_url: https://api.groq.com
default: true
- name: gpt-3.5-turbo-0125
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-3.5-turbo-0125
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider_interface: openai

View file

@ -19,3 +19,5 @@ services:
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- ./arch_config.yaml:/app/arch_config.yaml