plano/arch/envoy.template.yaml

admin:
  address:
    socket_address: { address: 0.0.0.0, port_value: 9901 }
static_resources:
  listeners:
    - name: arch_listener_http
      address:
        socket_address:
          address: 0.0.0.0
          port_value: 10000
      traffic_direction: INBOUND
      filter_chains:
        - filters:
            - name: envoy.filters.network.http_connection_manager
              typed_config:
                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
                {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
                generate_request_id: true
                tracing:
                  provider:
                    name: envoy.tracers.opentelemetry
                    typed_config:
                      "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
                      grpc_service:
                        envoy_grpc:
                          cluster_name: opentelemetry_collector
                        timeout: 0.250s
                      service_name: arch
                  random_sampling:
                    value: {{ arch_tracing.random_sampling }}
                {% endif %}
                stat_prefix: arch_listener_http
                codec_type: AUTO
                scheme_header_transformation:
                  scheme_to_overwrite: https
                access_log:
                - name: envoy.access_loggers.file
                  typed_config:
                    "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
                    path: "/var/log/access_ingress.log"
                route_config:
                  name: local_routes
                  virtual_hosts:
                    - name: local_service
                      domains:
                        - "*"
                      routes:
                      {% for provider in arch_llm_providers %}
                        - match:
                            prefix: "/"
                            headers:
                              - name: "x-arch-llm-provider"
                                string_match:
                                  exact: {{ provider.name }}
                          route:
                            auto_host_rewrite: true
                            cluster: {{ provider.provider }}
                            timeout: 60s
                      {% endfor %}
                        - match:
                            prefix: "/"
                            headers:
                              - name: "x-arch-upstream"
                                string_match:
                                  exact: arch_llm_listener
                          route:
                            auto_host_rewrite: true
                            cluster: arch_llm_listener
                            timeout: 60s
                        - match:
                            prefix: "/"
                          direct_response:
                            status: 400
                            body:
                              inline_string: "x-arch-llm-provider or x-arch-upstream header not set, cannot perform routing\n"
                http_filters:
                  - name: envoy.filters.http.wasm
                    typed_config:
                      "@type": type.googleapis.com/udpa.type.v1.TypedStruct
                      type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
                      value:
                        config:
                          name: "http_config"
                          root_id: prompt_gateway
                          configuration:
                            "@type": "type.googleapis.com/google.protobuf.StringValue"
                            value: |
                                {{ arch_config | indent(32) }}
                          vm_config:
                            runtime: "envoy.wasm.runtime.v8"
                            code:
                              local:
                                filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm"
                  - name: envoy.filters.http.router
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

    - name: arch_internal
      address:
        socket_address:
          address: 0.0.0.0
          port_value: 11000
      traffic_direction: OUTBOUND
      filter_chains:
        - filters:
            - name: envoy.filters.network.http_connection_manager
              typed_config:
                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
                {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
                generate_request_id: true
                tracing:
                  provider:
                    name: envoy.tracers.opentelemetry
                    typed_config:
                      "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
                      grpc_service:
                        envoy_grpc:
                          cluster_name: opentelemetry_collector
                        timeout: 0.250s
                      service_name: arch
                  random_sampling:
                    value: {{ arch_tracing.random_sampling }}
                {% endif %}
                stat_prefix: arch_internal
                codec_type: AUTO
                scheme_header_transformation:
                  scheme_to_overwrite: https
                access_log:
                - name: envoy.access_loggers.file
                  typed_config:
                    "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
                    path: "/var/log/access_internal.log"
                route_config:
                  name: local_routes
                  virtual_hosts:
                    - name: local_service
                      domains:
                        - "*"
                      routes:
                        - match:
                            prefix: "/"
                            headers:
                              - name: "x-arch-upstream"
                                string_match:
                                  exact: model_server
                          route:
                            auto_host_rewrite: true
                            cluster: model_server
                            timeout: 60s
                        - match:
                            prefix: "/"
                            headers:
                              - name: "x-arch-upstream"
                                string_match:
                                  exact: arch_fc
                          route:
                            auto_host_rewrite: true
                            cluster: model_server
                            timeout: 60s
                        {% for _, cluster in arch_clusters.items() %}
                        - match:
                            prefix: "/"
                            headers:
                              - name: "x-arch-upstream"
                                string_match:
                                  exact: {{ cluster.name }}
                          route:
                            auto_host_rewrite: true
                            cluster: {{ cluster.name }}
                            timeout: 60s
                        {% endfor %}
                http_filters:
                  - name: envoy.filters.http.router
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

    - name: arch_listener_llm
      address:
        socket_address:
          address: 0.0.0.0
          port_value: 12000
      filter_chains:
        - filters:
            - name: envoy.filters.network.http_connection_manager
              typed_config:
                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
                {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
                generate_request_id: true
                tracing:
                  provider:
                    name: envoy.tracers.opentelemetry
                    typed_config:
                      "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
                      grpc_service:
                        envoy_grpc:
                          cluster_name: opentelemetry_collector
                        timeout: 0.250s
                      service_name: arch
                  random_sampling:
                    value: {{ arch_tracing.random_sampling }}
                {% endif %}
                stat_prefix: arch_listener_http
                codec_type: AUTO
                scheme_header_transformation:
                  scheme_to_overwrite: https
                access_log:
                - name: envoy.access_loggers.file
                  typed_config:
                    "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
                    path: "/var/log/access_llm.log"
                route_config:
                  name: local_routes
                  virtual_hosts:
                    - name: local_service
                      domains:
                        - "*"
                      routes:
                      {% for provider in arch_llm_providers %}
                        - match:
                            prefix: "/"
                            headers:
                              - name: "x-arch-llm-provider"
                                string_match:
                                  exact: {{ provider.name }}
                          route:
                            auto_host_rewrite: true
                            cluster: {{ provider.provider }}
                            timeout: 60s
                      {% endfor %}
                        - match:
                            prefix: "/"
                          direct_response:
                            status: 400
                            body:
                              inline_string: "x-arch-llm-provider header not set, cannot perform routing\n"
                http_filters:
                  - name: envoy.filters.http.wasm
                    typed_config:
                      "@type": type.googleapis.com/udpa.type.v1.TypedStruct
                      type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
                      value:
                        config:
                          name: "http_config"
                          root_id: llm_gateway
                          configuration:
                            "@type": "type.googleapis.com/google.protobuf.StringValue"
                            value: |
                                {{ arch_llm_config | indent(32) }}
                          vm_config:
                            runtime: "envoy.wasm.runtime.v8"
                            code:
                              local:
                                filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm"
                  - name: envoy.filters.http.router
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

  clusters:
    - name: openai
      connect_timeout: 5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: openai
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: api.openai.com
                      port_value: 443
                  hostname: "api.openai.com"
      transport_socket:
        name: envoy.transport_sockets.tls
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
          sni: api.openai.com
          common_tls_context:
            tls_params:
              tls_minimum_protocol_version: TLSv1_2
              tls_maximum_protocol_version: TLSv1_3
    - name: mistral
      connect_timeout: 5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: mistral
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: api.mistral.ai
                      port_value: 443
                  hostname: "api.mistral.ai"
      transport_socket:
        name: envoy.transport_sockets.tls
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
          sni: api.mistral.ai
    - name: model_server
      connect_timeout: 5s
      type: STRICT_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: model_server
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: host.docker.internal
                      port_value: 51000
                  hostname: "model_server"
    - name: mistral_7b_instruct
      connect_timeout: 5s
      type: STRICT_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: mistral_7b_instruct
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: mistral_7b_instruct
                      port_value: 10001
                  hostname: "mistral_7b_instruct"
    - name: arch_fc
      connect_timeout: 5s
      type: STRICT_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: arch_fc
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: host.docker.internal
                      port_value: 51000
                  hostname: "arch_fc"
{% for _, cluster in arch_clusters.items() %}
    - name: {{ cluster.name }}
      {% if cluster.connect_timeout -%}
      connect_timeout: {{ cluster.connect_timeout }}
      {% else -%}
      connect_timeout: 5s
      {% endif -%}
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: {{ cluster.name }}
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: {{ cluster.endpoint }}
                      port_value: {{ cluster.port }}
                  hostname: {{ cluster.name }}
{% endfor %}
    - name: arch_internal
      connect_timeout: 5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: arch_internal
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: 0.0.0.0
                      port_value: 11000
                  hostname: arch_internal

    - name: arch_llm_listener
      connect_timeout: 5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: arch_llm_listener
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: 0.0.0.0
                      port_value: 12000
                  hostname: arch_llm_listener

{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
    - name: opentelemetry_collector
      type: STRICT_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      typed_extension_protocol_options:
        envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
          "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
          explicit_http_config:
            http2_protocol_options: {}
      load_assignment:
        cluster_name: opentelemetry_collector
        endpoints:
        - lb_endpoints:
          - endpoint:
              address:
                socket_address:
                  address: host.docker.internal
                  port_value: 4317
{% endif %}