diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml index a72db695..b30282a0 100644 --- a/arch/arch_config_schema.yaml +++ b/arch/arch_config_schema.yaml @@ -70,8 +70,11 @@ properties: provider_interface: type: string enum: - - openai + - claude + - deepseek + - groq - mistral + - openai access_key: type: string model: diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index 3a2856b6..c1373335 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -451,6 +451,128 @@ static_resources: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router clusters: + + - name: claude + connect_timeout: 0.5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: claude + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: api.anthropic.com + port_value: 443 + hostname: "api.anthropic.com" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: api.anthropic.com + common_tls_context: + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + + - name: deepseek + connect_timeout: 0.5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: deepseek + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: api.deepseek.com + port_value: 443 + hostname: "api.deepseek.com" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: api.deepseek.com + common_tls_context: + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + + - name: gemini + connect_timeout: 0.5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: gemini + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: generativelanguage.googleapis.com + port_value: 443 + hostname: "generativelanguage.googleapis.com" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: generativelanguage.googleapis.com + common_tls_context: + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + + - name: groq + connect_timeout: 0.5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: groq + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: api.groq.com + port_value: 443 + hostname: "api.groq.com" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: api.groq.com + common_tls_context: + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + + - name: mistral + connect_timeout: 0.5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: mistral + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: api.mistral.ai + port_value: 443 + hostname: "api.mistral.ai" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: api.mistral.ai + - name: openai connect_timeout: 0.5s type: LOGICAL_DNS @@ -475,26 +597,7 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 - - name: mistral - connect_timeout: 0.5s - type: LOGICAL_DNS - dns_lookup_family: V4_ONLY - lb_policy: ROUND_ROBIN - load_assignment: - cluster_name: mistral - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: api.mistral.ai - port_value: 443 - hostname: "api.mistral.ai" - transport_socket: - name: envoy.transport_sockets.tls - typed_config: - "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext - sni: api.mistral.ai + {% for internal_cluster in ["arch_fc", "model_server"] %} - name: {{ internal_cluster }} connect_timeout: 0.5s diff --git a/arch/tools/cli/core.py b/arch/tools/cli/core.py index 47590f2f..59d42ab4 100644 --- a/arch/tools/cli/core.py +++ b/arch/tools/cli/core.py @@ -103,6 +103,7 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False): # Check if timeout is reached if elapsed_time > log_timeout: log.info(f"stopping log monitoring after {log_timeout} seconds.") + stream_gateway_logs(follow=False) sys.exit(1) if prompt_gateway_health_check_status or llm_gateway_health_check_status: diff --git a/crates/common/src/api/open_ai.rs b/crates/common/src/api/open_ai.rs index 7b3cf66c..6db30190 100644 --- a/crates/common/src/api/open_ai.rs +++ b/crates/common/src/api/open_ai.rs @@ -330,6 +330,7 @@ impl TryFrom<&str> for ChatCompletionStreamResponseServerEvents { let response_chunks: VecDeque = value .lines() .filter(|line| line.starts_with("data: ")) + .filter(|line| !line.starts_with(r#"data: {"type": "ping"}"#)) .map(|line| line.get(6..).unwrap()) .filter(|data_chunk| *data_chunk != "[DONE]") .map(serde_json::from_str::) @@ -677,4 +678,37 @@ data: [DONE] "Hello! How can I assist you today?" ); } + + #[test] + fn stream_chunk_parse_claude() { + const CHUNK_RESPONSE: &str = r#"data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{"role":"assistant"}}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"} + +data: {"type": "ping"} + +data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{"content":"Hello!"}}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"} + +data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{"content":" How can I assist you today? Whether"}}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"} + +data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{"content":" you have a question, need information"}}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"} + +data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{"content":", or just want to chat about"}}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"} + +data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{"content":" something, I'm here to help. What woul"}}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"} + +data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{"content":"d you like to talk about?"}}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"} + +data: {"id":"msg_01DZDMxYSgq8aPQxMQoBv6Kb","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"created":1747685264,"model":"claude-3-7-sonnet-latest","object":"chat.completion.chunk"} + +data: [DONE] +"#; + + let sever_events: ChatCompletionStreamResponseServerEvents = + ChatCompletionStreamResponseServerEvents::try_from(CHUNK_RESPONSE).unwrap(); + assert_eq!(sever_events.events.len(), 8); + + assert_eq!( + sever_events.to_string(), + "Hello! How can I assist you today? Whether you have a question, need information, or just want to chat about something, I'm here to help. What would you like to talk about?" + ); + } } diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 71c13f8b..87293583 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -143,19 +143,28 @@ pub struct EmbeddingProviver { pub model: String, } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] pub enum LlmProviderType { - #[serde(rename = "openai")] - OpenAI, + #[serde(rename = "claude")] + Claude, + #[serde(rename = "deepseek")] + Deepseek, + #[serde(rename = "groq")] + Groq, #[serde(rename = "mistral")] Mistral, + #[serde(rename = "openai")] + OpenAI, } impl Display for LlmProviderType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - LlmProviderType::OpenAI => write!(f, "openai"), + LlmProviderType::Claude => write!(f, "claude"), + LlmProviderType::Deepseek => write!(f, "deepseek"), + LlmProviderType::Groq => write!(f, "groq"), LlmProviderType::Mistral => write!(f, "mistral"), + LlmProviderType::OpenAI => write!(f, "openai"), } } } @@ -175,6 +184,23 @@ pub struct LlmProvider { pub usage: Option, } +impl Default for LlmProvider { + fn default() -> Self { + Self { + name: "openai".to_string(), + provider_interface: LlmProviderType::OpenAI, + access_key: None, + model: None, + default: Some(true), + stream: Some(false), + endpoint: None, + port: None, + rate_limits: None, + usage: None, + } + } +} + impl Display for LlmProvider { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.name) diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index c5cdfe32..dd86109a 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -89,15 +89,7 @@ impl StreamContext { provider_hint, )); - // Check if we need to modify the path based on the provider's base_url - let needs_openai_prefix = self - .llm_provider - .as_ref() - .and_then(|provider| provider.endpoint.as_ref()) - .map(|url| url.contains("api.groq.com")) - .unwrap_or(false); - - if needs_openai_prefix { + if self.llm_provider.as_ref().unwrap().provider_interface == LlmProviderType::Groq { if let Some(path) = self.get_http_request_header(":path") { if path.starts_with("/v1/") { let new_path = format!("/openai{}", path); @@ -221,14 +213,7 @@ impl HttpContext for StreamContext { self.llm_provider = Some(Rc::new(LlmProvider { name: routing_header_value.to_string(), provider_interface: LlmProviderType::OpenAI, - access_key: None, - endpoint: None, - model: None, - default: None, - stream: None, - port: None, - rate_limits: None, - usage: None, + ..Default::default() })); } else { self.select_llm_provider(); @@ -539,6 +524,9 @@ impl HttpContext for StreamContext { } streaming_chunk } else { + if body_size == 0 { + return Action::Continue; + } debug!("non streaming response bytes read: 0:{}", body_size); match self.get_http_response_body(0, body_size) { Some(body) => body, diff --git a/demos/samples_python/weather_forecast/arch_config.yaml b/demos/samples_python/weather_forecast/arch_config.yaml index b6463594..8585bc6f 100644 --- a/demos/samples_python/weather_forecast/arch_config.yaml +++ b/demos/samples_python/weather_forecast/arch_config.yaml @@ -19,9 +19,8 @@ overrides: llm_providers: - name: groq access_key: $GROQ_API_KEY - provider_interface: openai + provider_interface: groq model: llama-3.2-3b-preview - base_url: https://api.groq.com - name: gpt-4o access_key: $OPENAI_API_KEY diff --git a/demos/shared/test_runner/run_demo_tests.sh b/demos/shared/test_runner/run_demo_tests.sh index 94b26e6d..bcf4c608 100644 --- a/demos/shared/test_runner/run_demo_tests.sh +++ b/demos/shared/test_runner/run_demo_tests.sh @@ -17,6 +17,12 @@ do docker compose up -d 2>&1 > /dev/null echo "starting hurl tests" hurl --test hurl_tests/*.hurl + if [ $? -ne 0 ]; then + echo "Hurl tests failed for $demo" + echo "docker logs for archgw:" + docker logs archgw + exit 1 + fi echo "stopping docker containers and archgw" archgw down docker compose down -v diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml index 11087d3a..0cf8e8f3 100644 --- a/demos/use_cases/llm_routing/arch_config.yaml +++ b/demos/use_cases/llm_routing/arch_config.yaml @@ -12,7 +12,6 @@ llm_providers: access_key: $OPENAI_API_KEY provider_interface: openai model: gpt-4o-mini - default: true - name: gpt-4o access_key: $OPENAI_API_KEY @@ -24,17 +23,21 @@ llm_providers: provider_interface: mistral model: ministral-3b-latest + - name: claude-sonnet + access_key: $ANTHROPY_API_KEY + provider_interface: claude + model: claude-3-7-sonnet-latest + default: true + - name: deepseek access_key: $DEEPSEEK_API_KEY - provider_interface: openai + provider_interface: deepseek model: deepseek-reasoner - base_url: https://api.deepseek.com/ - name: groq access_key: $GROQ_API_KEY - provider_interface: openai + provider_interface: groq model: llama-3.1-8b-instant - base_url: https://api.groq.com tracing: random_sampling: 100