From 5fcdaa640ef69a1928b6aed1cb1d77225f74dc57 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Fri, 16 May 2025 13:05:50 -0700 Subject: [PATCH] multi llm support --- arch/Dockerfile | 4 +- arch/envoy.template.yaml | 76 ++++++++++++++++++++ demos/use_cases/llm_routing/arch_config.yaml | 9 ++- 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/arch/Dockerfile b/arch/Dockerfile index 7f933da5..9d0a4d13 100644 --- a/arch/Dockerfile +++ b/arch/Dockerfile @@ -4,8 +4,8 @@ RUN rustup -v target add wasm32-wasip1 WORKDIR /arch COPY crates . -RUN cd prompt_gateway && cargo build --release --target wasm32-wasip1 -RUN cd llm_gateway && cargo build --release --target wasm32-wasip1 +RUN cargo build --release --target wasm32-wasip1 -p prompt_gateway -p llm_gateway +RUN cargo build --release --target wasm32-wasip1 -p brightstaff # copy built filter into envoy image FROM docker.io/envoyproxy/envoy:v1.32-latest as envoy diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index cac17187..549526bb 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -477,6 +477,82 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 + + - name: anthropic + connect_timeout: 0.5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: anthropic + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: api.anthropic.com + port_value: 443 + hostname: "api.anthropic.com" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: api.anthropic.com + common_tls_context: + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + + - name: gemini + connect_timeout: 0.5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: gemini + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: generativelanguage.googleapis.com + port_value: 443 + hostname: "generativelanguage.googleapis.com" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: generativelanguage.googleapis.com + common_tls_context: + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + + - name: groq + connect_timeout: 0.5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: groq + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: api.groq.com + port_value: 443 + hostname: "api.groq.com" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: api.groq.com + common_tls_context: + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + - name: mistral connect_timeout: 0.5s type: LOGICAL_DNS diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml index 11087d3a..9dcb11b0 100644 --- a/demos/use_cases/llm_routing/arch_config.yaml +++ b/demos/use_cases/llm_routing/arch_config.yaml @@ -24,15 +24,20 @@ llm_providers: provider_interface: mistral model: ministral-3b-latest + - name: claude-sonnet + access_key: $ANTHROPY_API_KEY + provider_interface: anthropic + model: claude-3-7-sonnet-latest + - name: deepseek access_key: $DEEPSEEK_API_KEY - provider_interface: openai + provider_interface: deepseek model: deepseek-reasoner base_url: https://api.deepseek.com/ - name: groq access_key: $GROQ_API_KEY - provider_interface: openai + provider_interface: groq model: llama-3.1-8b-instant base_url: https://api.groq.com