add support for claude and add first class support for groq and deepseek (#479)

This commit is contained in:
Adil Hafeez 2025-05-22 22:55:46 -07:00 committed by GitHub
parent 27c0f2fdce
commit f5e77bbe65
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 212 additions and 49 deletions

View file

@ -70,8 +70,11 @@ properties:
provider_interface:
type: string
enum:
- openai
- claude
- deepseek
- groq
- mistral
- openai
access_key:
type: string
model:

View file

@ -451,6 +451,128 @@ static_resources:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
clusters:
- name: claude
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: claude
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: api.anthropic.com
port_value: 443
hostname: "api.anthropic.com"
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: api.anthropic.com
common_tls_context:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: deepseek
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: deepseek
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: api.deepseek.com
port_value: 443
hostname: "api.deepseek.com"
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: api.deepseek.com
common_tls_context:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: gemini
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: gemini
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: generativelanguage.googleapis.com
port_value: 443
hostname: "generativelanguage.googleapis.com"
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: generativelanguage.googleapis.com
common_tls_context:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: groq
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: groq
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: api.groq.com
port_value: 443
hostname: "api.groq.com"
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: api.groq.com
common_tls_context:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: mistral
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: mistral
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: api.mistral.ai
port_value: 443
hostname: "api.mistral.ai"
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: api.mistral.ai
- name: openai
connect_timeout: 0.5s
type: LOGICAL_DNS
@ -475,26 +597,7 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: mistral
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: mistral
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: api.mistral.ai
port_value: 443
hostname: "api.mistral.ai"
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: api.mistral.ai
{% for internal_cluster in ["arch_fc", "model_server"] %}
- name: {{ internal_cluster }}
connect_timeout: 0.5s

View file

@ -103,6 +103,7 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
# Check if timeout is reached
if elapsed_time > log_timeout:
log.info(f"stopping log monitoring after {log_timeout} seconds.")
stream_gateway_logs(follow=False)
sys.exit(1)
if prompt_gateway_health_check_status or llm_gateway_health_check_status:

View file

@ -330,6 +330,7 @@ impl TryFrom<&str> for ChatCompletionStreamResponseServerEvents {
let response_chunks: VecDeque<ChatCompletionStreamResponse> = value
.lines()
.filter(|line| line.starts_with("data: "))
.filter(|line| !line.starts_with(r#"data: {"type": "ping"}"#))
.map(|line| line.get(6..).unwrap())
.filter(|data_chunk| *data_chunk != "[DONE]")
.map(serde_json::from_str::<ChatCompletionStreamResponse>)
@ -677,4 +678,37 @@ data: [DONE]
"Hello! How can I assist you today?"
);
}
#[test]
fn stream_chunk_parse_claude() {
const CHUNK_RESPONSE: &str = r#"data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{"role":"assistant"}}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"}
data: {"type": "ping"}
data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{"content":"Hello!"}}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"}
data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{"content":" How can I assist you today? Whether"}}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"}
data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{"content":" you have a question, need information"}}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"}
data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{"content":", or just want to chat about"}}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"}
data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{"content":" something, I'm here to help. What woul"}}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"}
data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{"content":"d you like to talk about?"}}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"}
data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"}
data: [DONE]
"#;
let sever_events: ChatCompletionStreamResponseServerEvents =
ChatCompletionStreamResponseServerEvents::try_from(CHUNK_RESPONSE).unwrap();
assert_eq!(sever_events.events.len(), 8);
assert_eq!(
sever_events.to_string(),
"Hello! How can I assist you today? Whether you have a question, need information, or just want to chat about something, I'm here to help. What would you like to talk about?"
);
}
}

View file

@ -143,19 +143,28 @@ pub struct EmbeddingProviver {
pub model: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum LlmProviderType {
#[serde(rename = "openai")]
OpenAI,
#[serde(rename = "claude")]
Claude,
#[serde(rename = "deepseek")]
Deepseek,
#[serde(rename = "groq")]
Groq,
#[serde(rename = "mistral")]
Mistral,
#[serde(rename = "openai")]
OpenAI,
}
impl Display for LlmProviderType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LlmProviderType::OpenAI => write!(f, "openai"),
LlmProviderType::Claude => write!(f, "claude"),
LlmProviderType::Deepseek => write!(f, "deepseek"),
LlmProviderType::Groq => write!(f, "groq"),
LlmProviderType::Mistral => write!(f, "mistral"),
LlmProviderType::OpenAI => write!(f, "openai"),
}
}
}
@ -175,6 +184,23 @@ pub struct LlmProvider {
pub usage: Option<String>,
}
impl Default for LlmProvider {
fn default() -> Self {
Self {
name: "openai".to_string(),
provider_interface: LlmProviderType::OpenAI,
access_key: None,
model: None,
default: Some(true),
stream: Some(false),
endpoint: None,
port: None,
rate_limits: None,
usage: None,
}
}
}
impl Display for LlmProvider {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.name)

View file

@ -89,15 +89,7 @@ impl StreamContext {
provider_hint,
));
// Check if we need to modify the path based on the provider's base_url
let needs_openai_prefix = self
.llm_provider
.as_ref()
.and_then(|provider| provider.endpoint.as_ref())
.map(|url| url.contains("api.groq.com"))
.unwrap_or(false);
if needs_openai_prefix {
if self.llm_provider.as_ref().unwrap().provider_interface == LlmProviderType::Groq {
if let Some(path) = self.get_http_request_header(":path") {
if path.starts_with("/v1/") {
let new_path = format!("/openai{}", path);
@ -221,14 +213,7 @@ impl HttpContext for StreamContext {
self.llm_provider = Some(Rc::new(LlmProvider {
name: routing_header_value.to_string(),
provider_interface: LlmProviderType::OpenAI,
access_key: None,
endpoint: None,
model: None,
default: None,
stream: None,
port: None,
rate_limits: None,
usage: None,
..Default::default()
}));
} else {
self.select_llm_provider();
@ -539,6 +524,9 @@ impl HttpContext for StreamContext {
}
streaming_chunk
} else {
if body_size == 0 {
return Action::Continue;
}
debug!("non streaming response bytes read: 0:{}", body_size);
match self.get_http_response_body(0, body_size) {
Some(body) => body,

View file

@ -19,9 +19,8 @@ overrides:
llm_providers:
- name: groq
access_key: $GROQ_API_KEY
provider_interface: openai
provider_interface: groq
model: llama-3.2-3b-preview
base_url: https://api.groq.com
- name: gpt-4o
access_key: $OPENAI_API_KEY

View file

@ -17,6 +17,12 @@ do
docker compose up -d 2>&1 > /dev/null
echo "starting hurl tests"
hurl --test hurl_tests/*.hurl
if [ $? -ne 0 ]; then
echo "Hurl tests failed for $demo"
echo "docker logs for archgw:"
docker logs archgw
exit 1
fi
echo "stopping docker containers and archgw"
archgw down
docker compose down -v

View file

@ -12,7 +12,6 @@ llm_providers:
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4o-mini
default: true
- name: gpt-4o
access_key: $OPENAI_API_KEY
@ -24,17 +23,21 @@ llm_providers:
provider_interface: mistral
model: ministral-3b-latest
- name: claude-sonnet
access_key: $ANTHROPY_API_KEY
provider_interface: claude
model: claude-3-7-sonnet-latest
default: true
- name: deepseek
access_key: $DEEPSEEK_API_KEY
provider_interface: openai
provider_interface: deepseek
model: deepseek-reasoner
base_url: https://api.deepseek.com/
- name: groq
access_key: $GROQ_API_KEY
provider_interface: openai
provider_interface: groq
model: llama-3.1-8b-instant
base_url: https://api.groq.com
tracing:
random_sampling: 100