admin: address: socket_address: { address: 0.0.0.0, port_value: 9901 } static_resources: listeners: - name: arch_listener_http address: socket_address: address: 0.0.0.0 port_value: 10000 traffic_direction: INBOUND filter_chains: - filters: - name: envoy.filters.network.http_connection_manager typed_config: "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %} generate_request_id: true tracing: provider: name: envoy.tracers.opentelemetry typed_config: "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig grpc_service: envoy_grpc: cluster_name: opentelemetry_collector timeout: 0.250s service_name: arch random_sampling: value: {{ arch_tracing.random_sampling }} {% endif %} stat_prefix: arch_listener_http codec_type: AUTO scheme_header_transformation: scheme_to_overwrite: https access_log: - name: envoy.access_loggers.file typed_config: "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog path: "/var/log/access_ingress.log" route_config: name: local_routes virtual_hosts: - name: local_service domains: - "*" routes: {% for provider in arch_llm_providers %} - match: prefix: "/" headers: - name: "x-arch-llm-provider" string_match: exact: {{ provider.name }} route: auto_host_rewrite: true cluster: {{ provider.provider }} timeout: 60s {% endfor %} - match: prefix: "/" headers: - name: "x-arch-upstream" string_match: exact: arch_llm_listener route: auto_host_rewrite: true cluster: arch_llm_listener timeout: 60s - match: prefix: "/" direct_response: status: 400 body: inline_string: "x-arch-llm-provider or x-arch-upstream header not set, cannot perform routing\n" http_filters: - name: envoy.filters.http.wasm typed_config: "@type": type.googleapis.com/udpa.type.v1.TypedStruct type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm value: config: name: "http_config" root_id: prompt_gateway configuration: "@type": "type.googleapis.com/google.protobuf.StringValue" value: | {{ arch_config | indent(32) }} vm_config: runtime: "envoy.wasm.runtime.v8" code: local: filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm" - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router - name: arch_internal address: socket_address: address: 0.0.0.0 port_value: 11000 traffic_direction: OUTBOUND filter_chains: - filters: - name: envoy.filters.network.http_connection_manager typed_config: "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %} generate_request_id: true tracing: provider: name: envoy.tracers.opentelemetry typed_config: "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig grpc_service: envoy_grpc: cluster_name: opentelemetry_collector timeout: 0.250s service_name: arch random_sampling: value: {{ arch_tracing.random_sampling }} {% endif %} stat_prefix: arch_internal codec_type: AUTO scheme_header_transformation: scheme_to_overwrite: https access_log: - name: envoy.access_loggers.file typed_config: "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog path: "/var/log/access_internal.log" route_config: name: local_routes virtual_hosts: - name: local_service domains: - "*" routes: - match: prefix: "/" headers: - name: "x-arch-upstream" string_match: exact: model_server route: auto_host_rewrite: true cluster: model_server timeout: 60s - match: prefix: "/" headers: - name: "x-arch-upstream" string_match: exact: arch_fc route: auto_host_rewrite: true cluster: model_server timeout: 60s {% for _, cluster in arch_clusters.items() %} - match: prefix: "/" headers: - name: "x-arch-upstream" string_match: exact: {{ cluster.name }} route: auto_host_rewrite: true cluster: {{ cluster.name }} timeout: 60s {% endfor %} http_filters: - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router - name: arch_listener_llm address: socket_address: address: 0.0.0.0 port_value: 12000 filter_chains: - filters: - name: envoy.filters.network.http_connection_manager typed_config: "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %} generate_request_id: true tracing: provider: name: envoy.tracers.opentelemetry typed_config: "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig grpc_service: envoy_grpc: cluster_name: opentelemetry_collector timeout: 0.250s service_name: arch random_sampling: value: {{ arch_tracing.random_sampling }} {% endif %} stat_prefix: arch_listener_http codec_type: AUTO scheme_header_transformation: scheme_to_overwrite: https access_log: - name: envoy.access_loggers.file typed_config: "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog path: "/var/log/access_llm.log" route_config: name: local_routes virtual_hosts: - name: local_service domains: - "*" routes: {% for provider in arch_llm_providers %} - match: prefix: "/" headers: - name: "x-arch-llm-provider" string_match: exact: {{ provider.name }} route: auto_host_rewrite: true cluster: {{ provider.provider }} timeout: 60s {% endfor %} - match: prefix: "/" direct_response: status: 400 body: inline_string: "x-arch-llm-provider header not set, cannot perform routing\n" http_filters: - name: envoy.filters.http.wasm typed_config: "@type": type.googleapis.com/udpa.type.v1.TypedStruct type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm value: config: name: "http_config" root_id: llm_gateway configuration: "@type": "type.googleapis.com/google.protobuf.StringValue" value: | {{ arch_llm_config | indent(32) }} vm_config: runtime: "envoy.wasm.runtime.v8" code: local: filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm" - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router clusters: - name: openai connect_timeout: 5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: openai endpoints: - lb_endpoints: - endpoint: address: socket_address: address: api.openai.com port_value: 443 hostname: "api.openai.com" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.openai.com common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 - name: mistral connect_timeout: 5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: mistral endpoints: - lb_endpoints: - endpoint: address: socket_address: address: api.mistral.ai port_value: 443 hostname: "api.mistral.ai" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.mistral.ai - name: model_server connect_timeout: 5s type: STRICT_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: model_server endpoints: - lb_endpoints: - endpoint: address: socket_address: address: host.docker.internal port_value: 51000 hostname: "model_server" - name: mistral_7b_instruct connect_timeout: 5s type: STRICT_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: mistral_7b_instruct endpoints: - lb_endpoints: - endpoint: address: socket_address: address: mistral_7b_instruct port_value: 10001 hostname: "mistral_7b_instruct" - name: arch_fc connect_timeout: 5s type: STRICT_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: arch_fc endpoints: - lb_endpoints: - endpoint: address: socket_address: address: host.docker.internal port_value: 51000 hostname: "arch_fc" {% for _, cluster in arch_clusters.items() %} - name: {{ cluster.name }} {% if cluster.connect_timeout -%} connect_timeout: {{ cluster.connect_timeout }} {% else -%} connect_timeout: 5s {% endif -%} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: {{ cluster.name }} endpoints: - lb_endpoints: - endpoint: address: socket_address: address: {{ cluster.endpoint }} port_value: {{ cluster.port }} hostname: {{ cluster.name }} {% endfor %} - name: arch_internal connect_timeout: 5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: arch_internal endpoints: - lb_endpoints: - endpoint: address: socket_address: address: 0.0.0.0 port_value: 11000 hostname: arch_internal - name: arch_llm_listener connect_timeout: 5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: arch_llm_listener endpoints: - lb_endpoints: - endpoint: address: socket_address: address: 0.0.0.0 port_value: 12000 hostname: arch_llm_listener {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %} - name: opentelemetry_collector type: STRICT_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN typed_extension_protocol_options: envoy.extensions.upstreams.http.v3.HttpProtocolOptions: "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions explicit_http_config: http2_protocol_options: {} load_assignment: cluster_name: opentelemetry_collector endpoints: - lb_endpoints: - endpoint: address: socket_address: address: host.docker.internal port_value: 4317 {% endif %}