plano/envoyfilter/envoy.template.yaml

199 lines
7.3 KiB
YAML

admin:
address:
socket_address: { address: 0.0.0.0, port_value: 9901 }
static_resources:
listeners:
address:
socket_address:
address: 0.0.0.0
port_value: 10000
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: ingress_http
codec_type: AUTO
scheme_header_transformation:
scheme_to_overwrite: https
access_log:
- name: envoy.access_loggers.file
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
path: "/var/log/arch_access.log"
route_config:
name: local_routes
virtual_hosts:
- name: local_service
domains:
- "*"
routes:
- match:
prefix: "/mistral/v1/chat/completions"
route:
auto_host_rewrite: true
cluster: mistral_7b_instruct
timeout: 60s
- match:
prefix: "/bolt_fc_1b/v1/chat/completions"
route:
prefix_rewrite: /v1/chat/completions
auto_host_rewrite: true
cluster: bolt_fc_1b
timeout: 120s
- match:
prefix: "/v1/chat/completions"
headers:
- name: "x-bolt-llm-provider"
string_match:
exact: openai
route:
auto_host_rewrite: true
cluster: openai
timeout: 60s
- match:
prefix: "/v1/chat/completions"
headers:
- name: "x-bolt-llm-provider"
string_match:
exact: mistral
route:
auto_host_rewrite: true
cluster: mistral
timeout: 60s
http_filters:
- name: envoy.filters.http.wasm
typed_config:
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
value:
config:
name: "http_config"
configuration:
"@type": "type.googleapis.com/google.protobuf.StringValue"
value: |
{{ katanemo_config | indent(30) }}
vm_config:
runtime: "envoy.wasm.runtime.v8"
code:
local:
filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm"
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
clusters:
# LLM Host
# Embedding Providers
# External LLM Providers
- name: openai
connect_timeout: 5s
dns_lookup_family: V4_ONLY
type: LOGICAL_DNS
lb_policy: ROUND_ROBIN
typed_extension_protocol_options:
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
explicit_http_config:
http2_protocol_options: {}
load_assignment:
cluster_name: openai
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: api.openai.com
port_value: 443
hostname: "api.openai.com"
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: api.openai.com
common_tls_context:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: mistral
connect_timeout: 5s
dns_lookup_family: V4_ONLY
type: LOGICAL_DNS
lb_policy: ROUND_ROBIN
typed_extension_protocol_options:
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
explicit_http_config:
http2_protocol_options: {}
load_assignment:
cluster_name: mistral
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: api.mistral.ai
port_value: 443
hostname: "api.mistral.ai"
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: api.mistral.ai
- name: model_server
connect_timeout: 5s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: model_server
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: model_server
port_value: 80
hostname: "model_server"
- name: mistral_7b_instruct
connect_timeout: 5s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: mistral_7b_instruct
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: mistral_7b_instruct
port_value: 10001
hostname: "mistral_7b_instruct"
- name: bolt_fc_1b
connect_timeout: 5s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: bolt_fc_1b
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: function_resolver
port_value: 80
hostname: "bolt_fc_1b"
{% for _, cluster in arch_clusters.items() %}
- name: {{ cluster.name }}
connect_timeout: 5s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: {{ cluster.name }}
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: {{ cluster.address }}
port_value: {{ cluster.port }}
hostname: {{ cluster.address }}
{% endfor %}