admin: address: socket_address: { address: 0.0.0.0, port_value: 9901 } static_resources: listeners: address: socket_address: address: 0.0.0.0 port_value: 10000 filter_chains: - filters: - name: envoy.filters.network.http_connection_manager typed_config: "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager stat_prefix: ingress_http codec_type: AUTO scheme_header_transformation: scheme_to_overwrite: https route_config: name: local_routes virtual_hosts: - name: openai domains: - "api.openai.com" routes: - match: prefix: "/" route: auto_host_rewrite: true cluster: openai - name: local_service domains: - "*" routes: - match: prefix: "/mistral/v1/chat/completions" route: auto_host_rewrite: true cluster: mistral_7b_instruct timeout: 60s - match: prefix: "/bolt_fc_1b/v1/chat/completions" route: prefix_rewrite: /v1/chat/completions auto_host_rewrite: true cluster: bolt_fc_1b timeout: 120s - match: prefix: "/v1/chat/completions" headers: name: "Authorization" present_match: true route: auto_host_rewrite: true cluster: openai timeout: 60s - match: prefix: "/v1/chat/completions" route: auto_host_rewrite: true cluster: mistral_7b_instruct timeout: 60s - match: prefix: "/embeddings" route: cluster: model_server - match: prefix: "/" direct_response: status: 200 body: inline_string: "Inspect the HTTP header: custom-header.\n" http_filters: - name: envoy.filters.http.wasm typed_config: "@type": type.googleapis.com/udpa.type.v1.TypedStruct type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm value: config: name: "http_config" configuration: "@type": "type.googleapis.com/google.protobuf.StringValue" value: | {{ katanemo_config | indent(30) }} vm_config: runtime: "envoy.wasm.runtime.v8" code: local: filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm" - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router clusters: # LLM Host # Embedding Providers # External LLM Providers - name: openai connect_timeout: 5s type: LOGICAL_DNS lb_policy: ROUND_ROBIN typed_extension_protocol_options: envoy.extensions.upstreams.http.v3.HttpProtocolOptions: "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions explicit_http_config: http2_protocol_options: {} load_assignment: cluster_name: openai endpoints: - lb_endpoints: - endpoint: address: socket_address: address: api.openai.com port_value: 443 hostname: "api.openai.com" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.openai.com common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 - name: model_server connect_timeout: 5s type: STRICT_DNS lb_policy: ROUND_ROBIN load_assignment: cluster_name: model_server endpoints: - lb_endpoints: - endpoint: address: socket_address: address: model_server port_value: 80 hostname: "model_server" - name: mistral_7b_instruct connect_timeout: 5s type: STRICT_DNS lb_policy: ROUND_ROBIN load_assignment: cluster_name: mistral_7b_instruct endpoints: - lb_endpoints: - endpoint: address: socket_address: address: mistral_7b_instruct port_value: 10001 hostname: "mistral_7b_instruct" - name: bolt_fc_1b connect_timeout: 5s type: STRICT_DNS lb_policy: ROUND_ROBIN load_assignment: cluster_name: bolt_fc_1b endpoints: - lb_endpoints: - endpoint: address: socket_address: address: function_resolver port_value: 80 hostname: "bolt_fc_1b" {% for _, cluster in arch_clusters.items() %} - name: {{ cluster.name }} connect_timeout: 5s type: STRICT_DNS lb_policy: ROUND_ROBIN load_assignment: cluster_name: {{ cluster.name }} endpoints: - lb_endpoints: - endpoint: address: socket_address: address: {{ cluster.address }} port_value: {{ cluster.port }} hostname: {{ cluster.address }} {% endfor %}