mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 08:46:24 +02:00
1079 lines
46 KiB
YAML
1079 lines
46 KiB
YAML
admin:
|
|
address:
|
|
socket_address: { address: 0.0.0.0, port_value: 9901 }
|
|
|
|
stats_config:
|
|
histogram_bucket_settings:
|
|
match:
|
|
exact: "wasmcustom.time_to_first_token"
|
|
buckets:
|
|
- 100
|
|
- 500
|
|
- 800
|
|
- 1000
|
|
- 1200
|
|
- 1400
|
|
- 1600
|
|
- 1800
|
|
- 2000
|
|
- 2200
|
|
- 2400
|
|
- 3000
|
|
- 3500
|
|
- 4000
|
|
- 4500
|
|
- 5000
|
|
- 6000
|
|
- 10000
|
|
- 60000
|
|
- 180000
|
|
static_resources:
|
|
listeners:
|
|
- name: ingress_traffic
|
|
address:
|
|
socket_address:
|
|
address: {{ prompt_gateway_listener.address }}
|
|
port_value: {{ prompt_gateway_listener.port }}
|
|
traffic_direction: INBOUND
|
|
filter_chains:
|
|
- filters:
|
|
- name: envoy.filters.network.http_connection_manager
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
|
{% if "random_sampling" in plano_tracing and plano_tracing["random_sampling"] > 0 %}
|
|
generate_request_id: true
|
|
tracing:
|
|
provider:
|
|
name: envoy.tracers.opentelemetry
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
|
grpc_service:
|
|
envoy_grpc:
|
|
cluster_name: opentelemetry_collector
|
|
timeout: 0.250s
|
|
service_name: plano(inbound)
|
|
random_sampling:
|
|
value: {{ plano_tracing.random_sampling }}
|
|
operation: "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
|
|
{% endif %}
|
|
stat_prefix: plano(inbound)
|
|
codec_type: AUTO
|
|
scheme_header_transformation:
|
|
scheme_to_overwrite: https
|
|
access_log:
|
|
- name: envoy.access_loggers.file
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
|
path: "/var/log/access_ingress.log"
|
|
format: |
|
|
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
|
|
route_config:
|
|
name: local_routes
|
|
virtual_hosts:
|
|
- name: local_service
|
|
domains:
|
|
- "*"
|
|
routes:
|
|
- match:
|
|
prefix: "/"
|
|
route:
|
|
auto_host_rewrite: true
|
|
cluster: arch_prompt_gateway_listener
|
|
timeout: {{ prompt_gateway_listener.timeout }}
|
|
http_filters:
|
|
- name: envoy.filters.http.router
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
|
|
|
- name: ingress_traffic_prompt
|
|
address:
|
|
socket_address:
|
|
address: 0.0.0.0
|
|
port_value: 10001
|
|
traffic_direction: INBOUND
|
|
filter_chains:
|
|
- filters:
|
|
- name: envoy.filters.network.http_connection_manager
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
|
stat_prefix: ingress_traffic
|
|
codec_type: AUTO
|
|
scheme_header_transformation:
|
|
scheme_to_overwrite: https
|
|
access_log:
|
|
- name: envoy.access_loggers.file
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
|
path: "/var/log/access_ingress_prompt.log"
|
|
format: |
|
|
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
|
|
route_config:
|
|
name: local_routes
|
|
virtual_hosts:
|
|
- name: local_service
|
|
domains:
|
|
- "*"
|
|
routes:
|
|
{% for provider in plano_model_providers %}
|
|
# if endpoint is set then use custom cluster for upstream llm
|
|
{% if provider.endpoint %}
|
|
{% set llm_cluster_name = provider.cluster_name %}
|
|
{% else %}
|
|
{% set llm_cluster_name = provider.provider_interface %}
|
|
{% endif %}
|
|
- match:
|
|
prefix: "/"
|
|
headers:
|
|
- name: "x-arch-llm-provider"
|
|
string_match:
|
|
exact: {{ llm_cluster_name }}
|
|
route:
|
|
auto_host_rewrite: true
|
|
cluster: {{ llm_cluster_name }}
|
|
timeout: 300s
|
|
{% endfor %}
|
|
|
|
{% if agent_orchestrator %}
|
|
- match:
|
|
prefix: "/"
|
|
headers:
|
|
- name: "x-arch-llm-provider"
|
|
string_match:
|
|
exact: {{ agent_orchestrator }}
|
|
route:
|
|
auto_host_rewrite: true
|
|
cluster: {{ agent_orchestrator }}
|
|
timeout: 300s
|
|
{% endif %}
|
|
http_filters:
|
|
- name: envoy.filters.http.compressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
|
compressor_library:
|
|
name: compress
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
|
memory_level: 3
|
|
window_bits: 10
|
|
- name: envoy.filters.http.wasm_prompt
|
|
typed_config:
|
|
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
|
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
|
|
value:
|
|
config:
|
|
name: "http_config"
|
|
root_id: prompt_gateway
|
|
configuration:
|
|
"@type": "type.googleapis.com/google.protobuf.StringValue"
|
|
value: |
|
|
{{ plano_config | indent(32) }}
|
|
vm_config:
|
|
runtime: "envoy.wasm.runtime.v8"
|
|
code:
|
|
local:
|
|
filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm"
|
|
- name: envoy.filters.http.wasm_llm
|
|
typed_config:
|
|
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
|
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
|
|
value:
|
|
config:
|
|
name: "http_config"
|
|
root_id: llm_gateway
|
|
configuration:
|
|
"@type": "type.googleapis.com/google.protobuf.StringValue"
|
|
value: |
|
|
{{ plano_llm_config | indent(32) }}
|
|
vm_config:
|
|
runtime: "envoy.wasm.runtime.v8"
|
|
code:
|
|
local:
|
|
filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
|
|
- name: envoy.filters.http.decompressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
|
decompressor_library:
|
|
name: decompress
|
|
typed_config:
|
|
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
|
window_bits: 9
|
|
chunk_size: 8192
|
|
# If this ratio is set too low, then body data will not be decompressed completely.
|
|
max_inflate_ratio: 1000
|
|
- name: envoy.filters.http.router
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
|
|
|
- name: outbound_api_traffic
|
|
address:
|
|
socket_address:
|
|
address: 0.0.0.0
|
|
port_value: 11000
|
|
traffic_direction: OUTBOUND
|
|
filter_chains:
|
|
- filters:
|
|
- name: envoy.filters.network.http_connection_manager
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
|
# {% if "random_sampling" in plano_tracing and plano_tracing["random_sampling"] > 0 %}
|
|
# generate_request_id: true
|
|
# tracing:
|
|
# provider:
|
|
# name: envoy.tracers.opentelemetry
|
|
# typed_config:
|
|
# "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
|
# grpc_service:
|
|
# envoy_grpc:
|
|
# cluster_name: opentelemetry_collector
|
|
# timeout: 0.250s
|
|
# service_name: tools
|
|
# random_sampling:
|
|
# value: {{ plano_tracing.random_sampling }}
|
|
# {% endif %}
|
|
stat_prefix: outbound_api_traffic
|
|
codec_type: AUTO
|
|
scheme_header_transformation:
|
|
scheme_to_overwrite: https
|
|
access_log:
|
|
- name: envoy.access_loggers.file
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
|
path: "/var/log/access_internal.log"
|
|
format: |
|
|
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
|
|
route_config:
|
|
name: local_routes
|
|
virtual_hosts:
|
|
- name: local_service
|
|
domains:
|
|
- "*"
|
|
routes:
|
|
- match:
|
|
prefix: "/"
|
|
headers:
|
|
- name: "x-arch-upstream"
|
|
string_match:
|
|
exact: bright_staff
|
|
route:
|
|
auto_host_rewrite: true
|
|
cluster: bright_staff
|
|
timeout: 300s
|
|
{% for cluster_name, cluster in plano_clusters.items() %}
|
|
- match:
|
|
prefix: "/"
|
|
headers:
|
|
- name: "x-arch-upstream"
|
|
string_match:
|
|
exact: {{ cluster_name }}
|
|
route:
|
|
auto_host_rewrite: true
|
|
cluster: {{ cluster_name }}
|
|
timeout: 300s
|
|
{% endfor %}
|
|
http_filters:
|
|
- name: envoy.filters.http.router
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
|
|
|
{% for listener in listeners %}
|
|
|
|
{% if listener.agents %}
|
|
|
|
# agent listeners
|
|
- name: {{ listener.name | replace(" ", "_") }}
|
|
address:
|
|
socket_address:
|
|
address: 0.0.0.0
|
|
port_value: {{ listener.port }}
|
|
filter_chains:
|
|
- filters:
|
|
- name: envoy.filters.network.http_connection_manager
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
|
{% if "random_sampling" in plano_tracing and plano_tracing["random_sampling"] > 0 %}
|
|
generate_request_id: true
|
|
tracing:
|
|
provider:
|
|
name: envoy.tracers.opentelemetry
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
|
grpc_service:
|
|
envoy_grpc:
|
|
cluster_name: opentelemetry_collector
|
|
timeout: 0.250s
|
|
service_name: plano(inbound)
|
|
random_sampling:
|
|
value: {{ plano_tracing.random_sampling }}
|
|
operation: "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
|
|
{% endif %}
|
|
stat_prefix: {{ listener.name | replace(" ", "_") }}_traffic
|
|
codec_type: AUTO
|
|
scheme_header_transformation:
|
|
scheme_to_overwrite: https
|
|
access_log:
|
|
- name: envoy.access_loggers.file
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
|
path: "/var/log/access_agent.log"
|
|
route_config:
|
|
name: local_routes
|
|
request_headers_to_add:
|
|
- header:
|
|
key: "x-arch-agent-listener-name"
|
|
value: "{{ listener.name }}"
|
|
virtual_hosts:
|
|
- name: local_service
|
|
domains:
|
|
- "*"
|
|
routes:
|
|
- match:
|
|
prefix: "/healthz"
|
|
direct_response:
|
|
status: 200
|
|
- match:
|
|
prefix: "/"
|
|
route:
|
|
auto_host_rewrite: true
|
|
prefix_rewrite: "/agents/"
|
|
cluster: bright_staff
|
|
timeout: {{ listener.timeout | default('30s') }}
|
|
http_filters:
|
|
- name: envoy.filters.http.compressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
|
compressor_library:
|
|
name: envoy.compression.brotli.compressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli
|
|
- name: envoy.filters.http.compressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
|
compressor_library:
|
|
name: compress
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
|
memory_level: 3
|
|
window_bits: 10
|
|
- name: envoy.filters.http.decompressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
|
decompressor_library:
|
|
name: decompress
|
|
typed_config:
|
|
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
|
window_bits: 9
|
|
chunk_size: 8192
|
|
# If this ratio is set too low, then body data will not be decompressed completely.
|
|
max_inflate_ratio: 1000
|
|
- name: envoy.filters.http.decompressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
|
decompressor_library:
|
|
name: envoy.compression.brotli.decompressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli
|
|
- name: envoy.filters.http.router
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
|
|
|
{% endif %}
|
|
{% endfor %}
|
|
|
|
- name: egress_traffic
|
|
address:
|
|
socket_address:
|
|
address: {{ llm_gateway_listener.address }}
|
|
port_value: {{ llm_gateway_listener.port }}
|
|
traffic_direction: OUTBOUND
|
|
filter_chains:
|
|
- filters:
|
|
- name: envoy.filters.network.http_connection_manager
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
|
stat_prefix: egress_traffic
|
|
codec_type: AUTO
|
|
scheme_header_transformation:
|
|
scheme_to_overwrite: https
|
|
access_log:
|
|
- name: envoy.access_loggers.file
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
|
path: "/var/log/access_llm.log"
|
|
format: |
|
|
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT%
|
|
route_config:
|
|
name: local_routes
|
|
virtual_hosts:
|
|
- name: local_service
|
|
domains:
|
|
- "*"
|
|
routes:
|
|
- match:
|
|
prefix: "/healthz"
|
|
direct_response:
|
|
status: 200
|
|
- match:
|
|
prefix: "/"
|
|
route:
|
|
auto_host_rewrite: true
|
|
cluster: bright_staff
|
|
timeout: {{ llm_gateway_listener.timeout }}
|
|
http_filters:
|
|
- name: envoy.filters.http.compressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
|
compressor_library:
|
|
name: envoy.compression.brotli.compressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli
|
|
- name: envoy.filters.http.compressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
|
compressor_library:
|
|
name: compress
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
|
memory_level: 3
|
|
window_bits: 10
|
|
- name: envoy.filters.http.decompressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
|
decompressor_library:
|
|
name: decompress
|
|
typed_config:
|
|
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
|
window_bits: 9
|
|
chunk_size: 8192
|
|
# If this ratio is set too low, then body data will not be decompressed completely.
|
|
max_inflate_ratio: 1000
|
|
- name: envoy.filters.http.decompressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
|
decompressor_library:
|
|
name: envoy.compression.brotli.decompressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli
|
|
- name: envoy.filters.http.router
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
|
|
|
- name: egress_traffic_llm
|
|
address:
|
|
socket_address:
|
|
address: 0.0.0.0
|
|
port_value: 12001
|
|
filter_chains:
|
|
- filters:
|
|
- name: envoy.filters.network.http_connection_manager
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
|
{% if "random_sampling" in plano_tracing and plano_tracing["random_sampling"] > 0 %}
|
|
generate_request_id: true
|
|
tracing:
|
|
provider:
|
|
name: envoy.tracers.opentelemetry
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
|
grpc_service:
|
|
envoy_grpc:
|
|
cluster_name: opentelemetry_collector
|
|
timeout: 0.250s
|
|
service_name: plano(outbound)
|
|
random_sampling:
|
|
value: {{ plano_tracing.random_sampling }}
|
|
operation: "%REQ(:METHOD)% %REQ(:AUTHORITY)%%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
|
|
{% endif %}
|
|
stat_prefix: egress_traffic
|
|
codec_type: AUTO
|
|
scheme_header_transformation:
|
|
scheme_to_overwrite: https
|
|
access_log:
|
|
- name: envoy.access_loggers.file
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
|
path: "/var/log/access_llm.log"
|
|
format: |
|
|
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT%
|
|
route_config:
|
|
name: local_routes
|
|
virtual_hosts:
|
|
- name: local_service
|
|
domains:
|
|
- "*"
|
|
routes:
|
|
{% for provider in plano_model_providers %}
|
|
# if endpoint is set then use custom cluster for upstream llm
|
|
{% if provider.endpoint %}
|
|
{% set llm_cluster_name = provider.cluster_name %}
|
|
{% else %}
|
|
{% set llm_cluster_name = provider.provider_interface %}
|
|
{% endif %}
|
|
- match:
|
|
prefix: "/"
|
|
headers:
|
|
- name: "x-arch-llm-provider"
|
|
string_match:
|
|
exact: {{ llm_cluster_name }}
|
|
route:
|
|
auto_host_rewrite: true
|
|
cluster: {{ llm_cluster_name }}
|
|
timeout: 300s
|
|
{% if llm_gateway_listener.max_retries %}
|
|
retry_policy:
|
|
retry_on: "5xx,connect-failure,refused-stream,reset,retriable-status-codes"
|
|
num_retries: {{ llm_gateway_listener.max_retries }}
|
|
per_try_timeout: 30s
|
|
retriable_status_codes: [429, 500, 502, 503, 504]
|
|
retry_back_off:
|
|
base_interval: 0.5s
|
|
max_interval: 5s
|
|
{% endif %}
|
|
{% endfor %}
|
|
- match:
|
|
prefix: "/"
|
|
direct_response:
|
|
status: 400
|
|
body:
|
|
inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n"
|
|
http_filters:
|
|
- name: envoy.filters.http.compressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
|
compressor_library:
|
|
name: envoy.compression.brotli.compressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli
|
|
chunk_size: 8192
|
|
- name: envoy.filters.http.compressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
|
compressor_library:
|
|
name: compress
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
|
memory_level: 3
|
|
window_bits: 10
|
|
- name: envoy.filters.http.wasm
|
|
typed_config:
|
|
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
|
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
|
|
value:
|
|
config:
|
|
name: "http_config"
|
|
root_id: llm_gateway
|
|
configuration:
|
|
"@type": "type.googleapis.com/google.protobuf.StringValue"
|
|
value: |
|
|
{{ plano_llm_config | indent(32) }}
|
|
vm_config:
|
|
runtime: "envoy.wasm.runtime.v8"
|
|
code:
|
|
local:
|
|
filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
|
|
- name: envoy.filters.http.decompressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
|
decompressor_library:
|
|
name: decompress
|
|
typed_config:
|
|
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
|
chunk_size: 8192
|
|
# If this ratio is set too low, then body data will not be decompressed completely.
|
|
max_inflate_ratio: 1000
|
|
- name: envoy.filters.http.decompressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
|
decompressor_library:
|
|
name: envoy.compression.brotli.decompressor
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli
|
|
chunk_size: 8192
|
|
- name: envoy.filters.http.router
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
|
|
|
clusters:
|
|
|
|
- name: arch
|
|
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: arch
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: archfc.katanemo.dev
|
|
port_value: 443
|
|
hostname: "archfc.katanemo.dev"
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: archfc.katanemo.dev
|
|
common_tls_context:
|
|
tls_params:
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
validation_context:
|
|
trusted_ca:
|
|
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
|
|
|
- name: anthropic
|
|
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: anthropic
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: api.anthropic.com
|
|
port_value: 443
|
|
hostname: "api.anthropic.com"
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: api.anthropic.com
|
|
common_tls_context:
|
|
tls_params:
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
validation_context:
|
|
trusted_ca:
|
|
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
|
|
|
- name: deepseek
|
|
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: deepseek
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: api.deepseek.com
|
|
port_value: 443
|
|
hostname: "api.deepseek.com"
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: api.deepseek.com
|
|
common_tls_context:
|
|
tls_params:
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
validation_context:
|
|
trusted_ca:
|
|
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
|
|
|
- name: xai
|
|
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: xai
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: api.x.ai
|
|
port_value: 443
|
|
hostname: "api.x.ai"
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: api.x.ai
|
|
common_tls_context:
|
|
tls_params:
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
validation_context:
|
|
trusted_ca:
|
|
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
|
|
|
- name: moonshotai
|
|
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: moonshotai
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: api.moonshot.ai
|
|
port_value: 443
|
|
hostname: "api.moonshot.ai"
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: api.moonshot.ai
|
|
common_tls_context:
|
|
tls_params:
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
validation_context:
|
|
trusted_ca:
|
|
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
|
|
|
- name: zhipu
|
|
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: zhipu
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: open.bigmodel.cn
|
|
port_value: 443
|
|
hostname: "open.bigmodel.cn"
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: open.bigmodel.cn
|
|
common_tls_context:
|
|
tls_params:
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
validation_context:
|
|
trusted_ca:
|
|
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
|
|
|
- name: together_ai
|
|
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: xai
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: api.together.xyz
|
|
port_value: 443
|
|
hostname: "api.together.xyz"
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: api.together.xyz
|
|
common_tls_context:
|
|
tls_params:
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
validation_context:
|
|
trusted_ca:
|
|
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
|
|
|
- name: gemini
|
|
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: gemini
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: generativelanguage.googleapis.com
|
|
port_value: 443
|
|
hostname: "generativelanguage.googleapis.com"
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: generativelanguage.googleapis.com
|
|
common_tls_context:
|
|
tls_params:
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
validation_context:
|
|
trusted_ca:
|
|
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
|
|
|
- name: groq
|
|
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: groq
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: api.groq.com
|
|
port_value: 443
|
|
hostname: "api.groq.com"
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: api.groq.com
|
|
common_tls_context:
|
|
tls_params:
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
validation_context:
|
|
trusted_ca:
|
|
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
|
|
|
- name: mistral
|
|
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: mistral
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: api.mistral.ai
|
|
port_value: 443
|
|
hostname: "api.mistral.ai"
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: api.mistral.ai
|
|
common_tls_context:
|
|
tls_params:
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
validation_context:
|
|
trusted_ca:
|
|
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
|
|
|
- name: openai
|
|
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: openai
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: api.openai.com
|
|
port_value: 443
|
|
hostname: "api.openai.com"
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: api.openai.com
|
|
common_tls_context:
|
|
tls_params:
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
validation_context:
|
|
trusted_ca:
|
|
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
|
- name: mistral_7b_instruct
|
|
connect_timeout: 0.5s
|
|
type: STRICT_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: mistral_7b_instruct
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: mistral_7b_instruct
|
|
port_value: 10001
|
|
hostname: "mistral_7b_instruct"
|
|
{% for cluster_name, cluster in plano_clusters.items() %}
|
|
- name: {{ cluster_name }}
|
|
{% if cluster.connect_timeout -%}
|
|
connect_timeout: {{ cluster.connect_timeout }}
|
|
{% else -%}
|
|
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
|
{% endif -%}
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: {{ cluster_name }}
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: {{ cluster.endpoint }}
|
|
port_value: {{ cluster.port }}
|
|
{% if cluster.http_host %}
|
|
hostname: {{ cluster.http_host }}
|
|
{% else %}
|
|
hostname: {{ cluster.endpoint }}
|
|
{% endif %}
|
|
{% if cluster.protocol == "https" %}
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: {{ cluster.endpoint }}
|
|
common_tls_context:
|
|
tls_params:
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
validation_context:
|
|
trusted_ca:
|
|
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
|
{% endif %}
|
|
{% endfor %}
|
|
|
|
{% for local_llm_provider in local_llms %}
|
|
- name: {{ local_llm_provider.cluster_name }}
|
|
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: {{ local_llm_provider.cluster_name }}
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: {{ local_llm_provider.endpoint }}
|
|
port_value: {{ local_llm_provider.port }}
|
|
{% if local_llm_provider.http_host %}
|
|
hostname: {{ local_llm_provider.http_host }}
|
|
{% else %}
|
|
hostname: {{ local_llm_provider.endpoint }}
|
|
{% endif %}
|
|
{% if local_llm_provider.protocol == "https" %}
|
|
transport_socket:
|
|
name: envoy.transport_sockets.tls
|
|
typed_config:
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
sni: {{ local_llm_provider.endpoint }}
|
|
common_tls_context:
|
|
tls_params:
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
validation_context:
|
|
trusted_ca:
|
|
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
|
{% endif %}
|
|
|
|
{% endfor %}
|
|
- name: arch_internal
|
|
connect_timeout: 0.5s
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: arch_internal
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: 0.0.0.0
|
|
port_value: 11000
|
|
hostname: arch_internal
|
|
|
|
- name: bright_staff
|
|
connect_timeout: 0.5s
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: bright_staff
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: 0.0.0.0
|
|
port_value: 9091
|
|
hostname: localhost
|
|
|
|
- name: arch_prompt_gateway_listener
|
|
connect_timeout: 0.5s
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: arch_prompt_gateway_listener
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: 0.0.0.0
|
|
port_value: 10001
|
|
hostname: arch_prompt_gateway_listener
|
|
|
|
- name: arch_listener_llm
|
|
connect_timeout: 0.5s
|
|
type: LOGICAL_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
load_assignment:
|
|
cluster_name: arch_listener_llm
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
address: 0.0.0.0
|
|
port_value: 12001
|
|
hostname: arch_listener_llm
|
|
|
|
{% if "random_sampling" in plano_tracing and plano_tracing["random_sampling"] > 0 %}
|
|
- name: opentelemetry_collector
|
|
type: STRICT_DNS
|
|
dns_lookup_family: V4_ONLY
|
|
lb_policy: ROUND_ROBIN
|
|
typed_extension_protocol_options:
|
|
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
|
|
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
|
|
explicit_http_config:
|
|
http2_protocol_options: {}
|
|
load_assignment:
|
|
cluster_name: opentelemetry_collector
|
|
endpoints:
|
|
- lb_endpoints:
|
|
- endpoint:
|
|
address:
|
|
socket_address:
|
|
{% set _otel_endpoint = plano_tracing.opentracing_grpc_endpoint | default('host.docker.internal:4317') | replace("http://", "") | replace("https://", "") %}
|
|
address: {{ _otel_endpoint.split(":") | first }}
|
|
port_value: {{ _otel_endpoint.split(":") | last }}
|
|
{% endif %}
|