mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
501 lines
21 KiB
YAML
501 lines
21 KiB
YAML
admin:
|
|
address:
|
|
socket_address: { address: 0.0.0.0, port_value: 9901 }
|
|
static_resources:
|
|
listeners:
|
|
- name: arch_listener_http
|
|
address:
|
|
socket_address:
|
|
address: 0.0.0.0
|
|
port_value: 10000
|
|
traffic_direction: INBOUND
|
|
filter_chains:
|
|
- filters:
|
|
- name: envoy.filters.network.http_connection_manager
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
|
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
|
generate_request_id: true
|
|
tracing:
|
|
provider:
|
|
name: envoy.tracers.opentelemetry
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
|
grpc_service:
|
|
envoy_grpc:
|
|
cluster_name: opentelemetry_collector
|
|
timeout: 0.250s
|
|
service_name: arch_gateway
|
|
random_sampling:
|
|
value: {{ arch_tracing.random_sampling }}
|
|
{% endif %}
|
|
stat_prefix: arch_listener_http
|
|
codec_type: AUTO
|
|
scheme_header_transformation:
|
|
scheme_to_overwrite: https
|
|
access_log:
|
|
- name: envoy.access_loggers.file
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
|
path: "/var/log/access_ingress.log"
|
|
route_config:
|
|
name: local_routes
|
|
virtual_hosts:
|
|
- name: local_service
|
|
domains:
|
|
- "*"
|
|
routes:
|
|
- match:
|
|
prefix: "/"
|
|
route:
|
|
auto_host_rewrite: true
|
|
cluster: arch_prompt_gateway_listener
|
|
timeout: 60s
|
|
http_filters:
|
|
- name: envoy.filters.http.router
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
|
|
|
- name: arch_prompt_gateway_listener
|
|
address:
|
|
socket_address:
|
|
address: 0.0.0.0
|
|
port_value: 10001
|
|
traffic_direction: INBOUND
|
|
filter_chains:
|
|
- filters:
|
|
- name: envoy.filters.network.http_connection_manager
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
|
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
|
generate_request_id: true
|
|
tracing:
|
|
provider:
|
|
name: envoy.tracers.opentelemetry
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
|
grpc_service:
|
|
envoy_grpc:
|
|
cluster_name: opentelemetry_collector
|
|
timeout: 0.250s
|
|
service_name: prompt_processor
|
|
random_sampling:
|
|
value: {{ arch_tracing.random_sampling }}
|
|
{% endif %}
|
|
stat_prefix: arch_prompt_gateway_listener
|
|
codec_type: AUTO
|
|
scheme_header_transformation:
|
|
scheme_to_overwrite: https
|
|
access_log:
|
|
- name: envoy.access_loggers.file
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
|
path: "/var/log/access_ingress_prompt.log"
|
|
route_config:
|
|
name: local_routes
|
|
virtual_hosts:
|
|
- name: local_service
|
|
domains:
|
|
- "*"
|
|
routes:
|
|
{% for provider in arch_llm_providers %}
|
|
- match:
|
|
prefix: "/"
|
|
headers:
|
|
- name: "x-arch-llm-provider"
|
|
string_match:
|
|
exact: {{ provider.name }}
|
|
route:
|
|
auto_host_rewrite: true
|
|
cluster: {{ provider.provider }}
|
|
timeout: 60s
|
|
{% endfor %}
|
|
http_filters:
|
|
- name: envoy.filters.http.compressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
|
compressor_library:
|
|
name: compress
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
|
memory_level: 3
|
|
window_bits: 10
|
|
- name: envoy.filters.http.wasm
|
|
typed_config:
|
|
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
|
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
|
|
value:
|
|
config:
|
|
name: "http_config"
|
|
root_id: prompt_gateway
|
|
configuration:
|
|
"@type": "type.googleapis.com/google.protobuf.StringValue"
|
|
value: |
|
|
{{ arch_config | indent(32) }}
|
|
vm_config:
|
|
runtime: "envoy.wasm.runtime.v8"
|
|
code:
|
|
local:
|
|
filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm"
|
|
- name: envoy.filters.http.wasm
|
|
typed_config:
|
|
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
|
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
|
|
value:
|
|
config:
|
|
name: "http_config"
|
|
root_id: llm_gateway
|
|
configuration:
|
|
"@type": "type.googleapis.com/google.protobuf.StringValue"
|
|
value: |
|
|
{{ arch_llm_config | indent(32) }}
|
|
vm_config:
|
|
runtime: "envoy.wasm.runtime.v8"
|
|
code:
|
|
local:
|
|
filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
|
|
- name: envoy.filters.http.decompressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
|
decompressor_library:
|
|
name: decompress
|
|
typed_config:
|
|
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
|
window_bits: 9
|
|
chunk_size: 8192
|
|
# If this ratio is set too low, then body data will not be decompressed completely.
|
|
max_inflate_ratio: 1000
|
|
- name: envoy.filters.http.router
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
|
|
|
- name: arch_internal
|
|
address:
|
|
socket_address:
|
|
address: 0.0.0.0
|
|
port_value: 11000
|
|
traffic_direction: OUTBOUND
|
|
filter_chains:
|
|
- filters:
|
|
- name: envoy.filters.network.http_connection_manager
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
|
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
|
generate_request_id: true
|
|
tracing:
|
|
provider:
|
|
name: envoy.tracers.opentelemetry
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
|
grpc_service:
|
|
envoy_grpc:
|
|
cluster_name: opentelemetry_collector
|
|
timeout: 0.250s
|
|
service_name: prompt_processor
|
|
random_sampling:
|
|
value: {{ arch_tracing.random_sampling }}
|
|
{% endif %}
|
|
stat_prefix: arch_internal
|
|
codec_type: AUTO
|
|
scheme_header_transformation:
|
|
scheme_to_overwrite: https
|
|
access_log:
|
|
- name: envoy.access_loggers.file
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
|
path: "/var/log/access_internal.log"
|
|
route_config:
|
|
name: local_routes
|
|
virtual_hosts:
|
|
- name: local_service
|
|
domains:
|
|
- "*"
|
|
routes:
|
|
|
|
{% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination"] %}
|
|
- match:
|
|
prefix: "/"
|
|
headers:
|
|
- name: "x-arch-upstream"
|
|
string_match:
|
|
exact: {{ internal_clustrer }}
|
|
route:
|
|
auto_host_rewrite: true
|
|
cluster: {{ internal_clustrer }}
|
|
timeout: 60s
|
|
{% endfor %}
|
|
|
|
{% for _, cluster in arch_clusters.items() %}
|
|
- match:
|
|
prefix: "/"
|
|
headers:
|
|
- name: "x-arch-upstream"
|
|
string_match:
|
|
exact: {{ cluster.name }}
|
|
route:
|
|
auto_host_rewrite: true
|
|
cluster: {{ cluster.name }}
|
|
timeout: 60s
|
|
{% endfor %}
|
|
http_filters:
|
|
- name: envoy.filters.http.router
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
|
|
|
- name: arch_listener_llm
|
|
address:
|
|
socket_address:
|
|
address: 0.0.0.0
|
|
port_value: 12000
|
|
filter_chains:
|
|
- filters:
|
|
- name: envoy.filters.network.http_connection_manager
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
|
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
|
generate_request_id: true
|
|
tracing:
|
|
provider:
|
|
name: envoy.tracers.opentelemetry
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
|
grpc_service:
|
|
envoy_grpc:
|
|
cluster_name: opentelemetry_collector
|
|
timeout: 0.250s
|
|
service_name: llm_gateway
|
|
random_sampling:
|
|
value: {{ arch_tracing.random_sampling }}
|
|
{% endif %}
|
|
stat_prefix: arch_listener_http
|
|
codec_type: AUTO
|
|
scheme_header_transformation:
|
|
scheme_to_overwrite: https
|
|
access_log:
|
|
- name: envoy.access_loggers.file
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
|
path: "/var/log/access_llm.log"
|
|
route_config:
|
|
name: local_routes
|
|
virtual_hosts:
|
|
- name: local_service
|
|
domains:
|
|
- "*"
|
|
routes:
|
|
- match:
|
|
prefix: "/healthz"
|
|
route:
|
|
auto_host_rewrite: true
|
|
cluster: openai
|
|
timeout: 60s
|
|
{% for provider in arch_llm_providers %}
|
|
- match:
|
|
prefix: "/"
|
|
headers:
|
|
- name: "x-arch-llm-provider"
|
|
string_match:
|
|
exact: {{ provider.name }}
|
|
route:
|
|
auto_host_rewrite: true
|
|
cluster: {{ provider.provider }}
|
|
timeout: 60s
|
|
{% endfor %}
|
|
- match:
|
|
prefix: "/"
|
|
direct_response:
|
|
status: 400
|
|
body:
|
|
inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n"
|
|
http_filters:
|
|
- name: envoy.filters.http.compressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
|
compressor_library:
|
|
name: compress
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
|
memory_level: 3
|
|
window_bits: 10
|
|
- name: envoy.filters.http.wasm
|
|
typed_config:
|
|
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
|
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
|
|
value:
|
|
config:
|
|
name: "http_config"
|
|
root_id: llm_gateway
|
|
configuration:
|
|
"@type": "type.googleapis.com/google.protobuf.StringValue"
|
|
value: |
|
|
{{ arch_llm_config | indent(32) }}
|
|
vm_config:
|
|
runtime: "envoy.wasm.runtime.v8"
|
|
code:
|
|
local:
|
|
filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
|
|
- name: envoy.filters.http.decompressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
|
decompressor_library:
|
|
name: decompress
|
|
typed_config:
|
|
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
|
window_bits: 9
|
|
chunk_size: 8192
|
|
# If this ratio is set too low, then body data will not be decompressed completely.
|
|
max_inflate_ratio: 1000
|
|
- name: envoy.filters.http.router
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
|
|
|
clusters:
|
|
- name: openai
|
|
connect_timeout: 5s
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: openai
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: api.openai.com
|
|
port_value: 443
|
|
hostname: "api.openai.com"
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: api.openai.com
|
|
common_tls_context:
|
|
tls_params:
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
- name: mistral
|
|
connect_timeout: 5s
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: mistral
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: api.mistral.ai
|
|
port_value: 443
|
|
hostname: "api.mistral.ai"
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: api.mistral.ai
|
|
{% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination"] %}
|
|
- name: {{ internal_clustrer }}
|
|
connect_timeout: 5s
|
|
type: STRICT_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: {{ internal_clustrer }}
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: host.docker.internal
|
|
port_value: 51000
|
|
hostname: {{ internal_clustrer }}
|
|
{% endfor %}
|
|
- name: mistral_7b_instruct
|
|
connect_timeout: 5s
|
|
type: STRICT_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: mistral_7b_instruct
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: mistral_7b_instruct
|
|
port_value: 10001
|
|
hostname: "mistral_7b_instruct"
|
|
{% for _, cluster in arch_clusters.items() %}
|
|
- name: {{ cluster.name }}
|
|
{% if cluster.connect_timeout -%}
|
|
connect_timeout: {{ cluster.connect_timeout }}
|
|
{% else -%}
|
|
connect_timeout: 5s
|
|
{% endif -%}
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: {{ cluster.name }}
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: {{ cluster.endpoint }}
|
|
port_value: {{ cluster.port }}
|
|
hostname: {{ cluster.name }}
|
|
{% endfor %}
|
|
- name: arch_internal
|
|
connect_timeout: 5s
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: arch_internal
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: 0.0.0.0
|
|
port_value: 11000
|
|
hostname: arch_internal
|
|
|
|
- name: arch_prompt_gateway_listener
|
|
connect_timeout: 5s
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: arch_prompt_gateway_listener
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: 0.0.0.0
|
|
port_value: 10001
|
|
hostname: arch_prompt_gateway_listener
|
|
|
|
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
|
- name: opentelemetry_collector
|
|
type: STRICT_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
typed_extension_protocol_options:
|
|
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
|
|
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
|
|
explicit_http_config:
|
|
http2_protocol_options: {}
|
|
load_assignment:
|
|
cluster_name: opentelemetry_collector
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: host.docker.internal
|
|
port_value: 4317
|
|
{% endif %}
|