Add the ability to use LLM Providers from the Arch config (#112)

Signed-off-by: José Ulises Niño Rivera <junr03@users.noreply.github.com>
2026-04-30 03:16:28 +02:00 · 2024-10-03 10:57:01 -07:00 · 2024-10-03 10:57:01 -07:00 · 8ea917aae5
commit 8ea917aae5
parent 1b57a49c9d
16 changed files with 295 additions and 210 deletions
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@ -34,26 +34,18 @@ static_resources:
                          auto_host_rewrite: true
                          cluster: mistral_7b_instruct
                          timeout: 60s
+                    {% for provider in arch_llm_providers %}
                      - match:
-                          prefix: "/v1/chat/completions"
+                          prefix: "/"
                          headers:
                            - name: "x-arch-llm-provider"
                              string_match:
-                                exact: openai
+                                exact: {{ provider.name }}
                        route:
                          auto_host_rewrite: true
-                          cluster: openai
-                          timeout: 60s
-                      - match:
-                          prefix: "/v1/chat/completions"
-                          headers:
-                            - name: "x-arch-llm-provider"
-                              string_match:
-                                exact: mistral
-                        route:
-                          auto_host_rewrite: true
-                          cluster: mistral
+                          cluster: {{ provider.provider }}
                          timeout: 60s
+                    {% endfor %}
              http_filters:
                - name: envoy.filters.http.wasm
                  typed_config:
@ -65,7 +57,7 @@ static_resources:
                        configuration:
                          "@type": "type.googleapis.com/google.protobuf.StringValue"
                          value: |
-                              {{ katanemo_config | indent(30) }}
+                              {{ arch_config | indent(30) }}
                        vm_config:
                          runtime: "envoy.wasm.runtime.v8"
                          code:
@ -75,9 +67,6 @@ static_resources:
                  typed_config:
                    "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
  clusters:
-    # LLM Host
-    # Embedding Providers
-    # External LLM Providers
    - name: openai
      connect_timeout: 5s
      dns_lookup_family: V4_ONLY