admin: address: socket_address: { address: 0.0.0.0, port_value: 9901 } stats_config: histogram_bucket_settings: match: exact: "wasmcustom.time_to_first_token" buckets: - 100 - 500 - 800 - 1000 - 1200 - 1400 - 1600 - 1800 - 2000 - 2200 - 2400 - 3000 - 3500 - 4000 - 4500 - 5000 - 6000 - 10000 - 60000 - 180000 static_resources: listeners: - name: ingress_traffic address: socket_address: address: {{ prompt_gateway_listener.address }} port_value: {{ prompt_gateway_listener.port }} traffic_direction: INBOUND filter_chains: - filters: - name: envoy.filters.network.http_connection_manager typed_config: "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager {% if "random_sampling" in plano_tracing and plano_tracing["random_sampling"] > 0 %} generate_request_id: true tracing: provider: name: envoy.tracers.opentelemetry typed_config: "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig grpc_service: envoy_grpc: cluster_name: opentelemetry_collector timeout: 0.250s service_name: plano(inbound) random_sampling: value: {{ plano_tracing.random_sampling }} operation: "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%" {% endif %} stat_prefix: plano(inbound) codec_type: AUTO scheme_header_transformation: scheme_to_overwrite: https access_log: - name: envoy.access_loggers.file typed_config: "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog path: "/var/log/access_ingress.log" format: | [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" route_config: name: local_routes virtual_hosts: - name: local_service domains: - "*" routes: - match: prefix: "/" route: auto_host_rewrite: true cluster: arch_prompt_gateway_listener timeout: {{ prompt_gateway_listener.timeout }} http_filters: - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router - name: ingress_traffic_prompt address: socket_address: address: 0.0.0.0 port_value: 10001 traffic_direction: INBOUND filter_chains: - filters: - name: envoy.filters.network.http_connection_manager typed_config: "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager stat_prefix: ingress_traffic codec_type: AUTO scheme_header_transformation: scheme_to_overwrite: https access_log: - name: envoy.access_loggers.file typed_config: "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog path: "/var/log/access_ingress_prompt.log" format: | [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" route_config: name: local_routes virtual_hosts: - name: local_service domains: - "*" routes: {% for provider in plano_model_providers %} # if endpoint is set then use custom cluster for upstream llm {% if provider.endpoint %} {% set llm_cluster_name = provider.cluster_name %} {% else %} {% set llm_cluster_name = provider.provider_interface %} {% endif %} - match: prefix: "/" headers: - name: "x-arch-llm-provider" string_match: exact: {{ llm_cluster_name }} route: auto_host_rewrite: true cluster: {{ llm_cluster_name }} timeout: 300s {% endfor %} {% if agent_orchestrator %} - match: prefix: "/" headers: - name: "x-arch-llm-provider" string_match: exact: {{ agent_orchestrator }} route: auto_host_rewrite: true cluster: {{ agent_orchestrator }} timeout: 300s {% endif %} http_filters: - name: envoy.filters.http.compressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor compressor_library: name: compress typed_config: "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip memory_level: 3 window_bits: 10 - name: envoy.filters.http.wasm_prompt typed_config: "@type": type.googleapis.com/udpa.type.v1.TypedStruct type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm value: config: name: "http_config" root_id: prompt_gateway configuration: "@type": "type.googleapis.com/google.protobuf.StringValue" value: | {{ plano_config | indent(32) }} vm_config: runtime: "envoy.wasm.runtime.v8" code: local: filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm" - name: envoy.filters.http.wasm_llm typed_config: "@type": type.googleapis.com/udpa.type.v1.TypedStruct type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm value: config: name: "http_config" root_id: llm_gateway configuration: "@type": "type.googleapis.com/google.protobuf.StringValue" value: | {{ plano_llm_config | indent(32) }} vm_config: runtime: "envoy.wasm.runtime.v8" code: local: filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm" - name: envoy.filters.http.decompressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor decompressor_library: name: decompress typed_config: "@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip" window_bits: 9 chunk_size: 8192 # If this ratio is set too low, then body data will not be decompressed completely. max_inflate_ratio: 1000 - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router - name: outbound_api_traffic address: socket_address: address: 0.0.0.0 port_value: 11000 traffic_direction: OUTBOUND filter_chains: - filters: - name: envoy.filters.network.http_connection_manager typed_config: "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager # {% if "random_sampling" in plano_tracing and plano_tracing["random_sampling"] > 0 %} # generate_request_id: true # tracing: # provider: # name: envoy.tracers.opentelemetry # typed_config: # "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig # grpc_service: # envoy_grpc: # cluster_name: opentelemetry_collector # timeout: 0.250s # service_name: tools # random_sampling: # value: {{ plano_tracing.random_sampling }} # {% endif %} stat_prefix: outbound_api_traffic codec_type: AUTO scheme_header_transformation: scheme_to_overwrite: https access_log: - name: envoy.access_loggers.file typed_config: "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog path: "/var/log/access_internal.log" format: | [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" route_config: name: local_routes virtual_hosts: - name: local_service domains: - "*" routes: - match: prefix: "/" headers: - name: "x-arch-upstream" string_match: exact: bright_staff route: auto_host_rewrite: true cluster: bright_staff timeout: 300s {% for cluster_name, cluster in plano_clusters.items() %} - match: prefix: "/" headers: - name: "x-arch-upstream" string_match: exact: {{ cluster_name }} route: auto_host_rewrite: true cluster: {{ cluster_name }} timeout: 300s {% endfor %} http_filters: - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router {% for listener in listeners %} {% if listener.agents %} # agent listeners - name: {{ listener.name | replace(" ", "_") }} address: socket_address: address: 0.0.0.0 port_value: {{ listener.port }} filter_chains: - filters: - name: envoy.filters.network.http_connection_manager typed_config: "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager {% if "random_sampling" in plano_tracing and plano_tracing["random_sampling"] > 0 %} generate_request_id: true tracing: provider: name: envoy.tracers.opentelemetry typed_config: "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig grpc_service: envoy_grpc: cluster_name: opentelemetry_collector timeout: 0.250s service_name: plano(inbound) random_sampling: value: {{ plano_tracing.random_sampling }} operation: "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%" {% endif %} stat_prefix: {{ listener.name | replace(" ", "_") }}_traffic codec_type: AUTO scheme_header_transformation: scheme_to_overwrite: https access_log: - name: envoy.access_loggers.file typed_config: "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog path: "/var/log/access_agent.log" route_config: name: local_routes request_headers_to_add: - header: key: "x-arch-agent-listener-name" value: "{{ listener.name }}" virtual_hosts: - name: local_service domains: - "*" routes: - match: prefix: "/healthz" direct_response: status: 200 - match: prefix: "/" route: auto_host_rewrite: true prefix_rewrite: "/agents/" cluster: bright_staff timeout: {{ listener.timeout | default('30s') }} http_filters: - name: envoy.filters.http.compressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor compressor_library: name: envoy.compression.brotli.compressor typed_config: "@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli - name: envoy.filters.http.compressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor compressor_library: name: compress typed_config: "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip memory_level: 3 window_bits: 10 - name: envoy.filters.http.decompressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor decompressor_library: name: decompress typed_config: "@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip" window_bits: 9 chunk_size: 8192 # If this ratio is set too low, then body data will not be decompressed completely. max_inflate_ratio: 1000 - name: envoy.filters.http.decompressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor decompressor_library: name: envoy.compression.brotli.decompressor typed_config: "@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router {% endif %} {% endfor %} - name: egress_traffic address: socket_address: address: {{ llm_gateway_listener.address }} port_value: {{ llm_gateway_listener.port }} traffic_direction: OUTBOUND filter_chains: - filters: - name: envoy.filters.network.http_connection_manager typed_config: "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager stat_prefix: egress_traffic codec_type: AUTO scheme_header_transformation: scheme_to_overwrite: https access_log: - name: envoy.access_loggers.file typed_config: "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog path: "/var/log/access_llm.log" format: | [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT% route_config: name: local_routes virtual_hosts: - name: local_service domains: - "*" routes: - match: prefix: "/healthz" direct_response: status: 200 - match: prefix: "/" route: auto_host_rewrite: true cluster: bright_staff timeout: {{ llm_gateway_listener.timeout }} http_filters: - name: envoy.filters.http.compressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor compressor_library: name: envoy.compression.brotli.compressor typed_config: "@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli - name: envoy.filters.http.compressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor compressor_library: name: compress typed_config: "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip memory_level: 3 window_bits: 10 - name: envoy.filters.http.decompressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor decompressor_library: name: decompress typed_config: "@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip" window_bits: 9 chunk_size: 8192 # If this ratio is set too low, then body data will not be decompressed completely. max_inflate_ratio: 1000 - name: envoy.filters.http.decompressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor decompressor_library: name: envoy.compression.brotli.decompressor typed_config: "@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router - name: egress_traffic_llm address: socket_address: address: 0.0.0.0 port_value: 12001 filter_chains: - filters: - name: envoy.filters.network.http_connection_manager typed_config: "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager {% if "random_sampling" in plano_tracing and plano_tracing["random_sampling"] > 0 %} generate_request_id: true tracing: provider: name: envoy.tracers.opentelemetry typed_config: "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig grpc_service: envoy_grpc: cluster_name: opentelemetry_collector timeout: 0.250s service_name: plano(outbound) random_sampling: value: {{ plano_tracing.random_sampling }} operation: "%REQ(:METHOD)% %REQ(:AUTHORITY)%%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%" {% endif %} stat_prefix: egress_traffic codec_type: AUTO scheme_header_transformation: scheme_to_overwrite: https access_log: - name: envoy.access_loggers.file typed_config: "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog path: "/var/log/access_llm.log" format: | [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT% route_config: name: local_routes virtual_hosts: - name: local_service domains: - "*" routes: {% for provider in plano_model_providers %} # if endpoint is set then use custom cluster for upstream llm {% if provider.endpoint %} {% set llm_cluster_name = provider.cluster_name %} {% else %} {% set llm_cluster_name = provider.provider_interface %} {% endif %} - match: prefix: "/" headers: - name: "x-arch-llm-provider" string_match: exact: {{ llm_cluster_name }} route: auto_host_rewrite: true cluster: {{ llm_cluster_name }} timeout: 300s {% if llm_gateway_listener.max_retries %} retry_policy: retry_on: "5xx,connect-failure,refused-stream,reset,retriable-status-codes" num_retries: {{ llm_gateway_listener.max_retries }} per_try_timeout: 30s retriable_status_codes: [429, 500, 502, 503, 504] retry_back_off: base_interval: 0.5s max_interval: 5s {% endif %} {% endfor %} - match: prefix: "/" direct_response: status: 400 body: inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n" http_filters: - name: envoy.filters.http.compressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor compressor_library: name: envoy.compression.brotli.compressor typed_config: "@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli chunk_size: 8192 - name: envoy.filters.http.compressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor compressor_library: name: compress typed_config: "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip memory_level: 3 window_bits: 10 - name: envoy.filters.http.wasm typed_config: "@type": type.googleapis.com/udpa.type.v1.TypedStruct type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm value: config: name: "http_config" root_id: llm_gateway configuration: "@type": "type.googleapis.com/google.protobuf.StringValue" value: | {{ plano_llm_config | indent(32) }} vm_config: runtime: "envoy.wasm.runtime.v8" code: local: filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm" - name: envoy.filters.http.decompressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor decompressor_library: name: decompress typed_config: "@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip" chunk_size: 8192 # If this ratio is set too low, then body data will not be decompressed completely. max_inflate_ratio: 1000 - name: envoy.filters.http.decompressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor decompressor_library: name: envoy.compression.brotli.decompressor typed_config: "@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli chunk_size: 8192 - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router clusters: - name: plano connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: plano endpoints: - lb_endpoints: - endpoint: address: socket_address: address: archfc.katanemo.dev port_value: 443 hostname: "archfc.katanemo.dev" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: archfc.katanemo.dev common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 validation_context: trusted_ca: filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: anthropic connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: anthropic endpoints: - lb_endpoints: - endpoint: address: socket_address: address: api.anthropic.com port_value: 443 hostname: "api.anthropic.com" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.anthropic.com common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 validation_context: trusted_ca: filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: deepseek connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: deepseek endpoints: - lb_endpoints: - endpoint: address: socket_address: address: api.deepseek.com port_value: 443 hostname: "api.deepseek.com" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.deepseek.com common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 validation_context: trusted_ca: filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: xai connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: xai endpoints: - lb_endpoints: - endpoint: address: socket_address: address: api.x.ai port_value: 443 hostname: "api.x.ai" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.x.ai common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 validation_context: trusted_ca: filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: moonshotai connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: moonshotai endpoints: - lb_endpoints: - endpoint: address: socket_address: address: api.moonshot.ai port_value: 443 hostname: "api.moonshot.ai" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.moonshot.ai common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 validation_context: trusted_ca: filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: zhipu connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: zhipu endpoints: - lb_endpoints: - endpoint: address: socket_address: address: open.bigmodel.cn port_value: 443 hostname: "open.bigmodel.cn" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: open.bigmodel.cn common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 validation_context: trusted_ca: filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: together_ai connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: xai endpoints: - lb_endpoints: - endpoint: address: socket_address: address: api.together.xyz port_value: 443 hostname: "api.together.xyz" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.together.xyz common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 validation_context: trusted_ca: filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: gemini connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: gemini endpoints: - lb_endpoints: - endpoint: address: socket_address: address: generativelanguage.googleapis.com port_value: 443 hostname: "generativelanguage.googleapis.com" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: generativelanguage.googleapis.com common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 validation_context: trusted_ca: filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: groq connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: groq endpoints: - lb_endpoints: - endpoint: address: socket_address: address: api.groq.com port_value: 443 hostname: "api.groq.com" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.groq.com common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 validation_context: trusted_ca: filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: mistral connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: mistral endpoints: - lb_endpoints: - endpoint: address: socket_address: address: api.mistral.ai port_value: 443 hostname: "api.mistral.ai" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.mistral.ai common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 validation_context: trusted_ca: filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: openai connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: openai endpoints: - lb_endpoints: - endpoint: address: socket_address: address: api.openai.com port_value: 443 hostname: "api.openai.com" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.openai.com common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 validation_context: trusted_ca: filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} - name: mistral_7b_instruct connect_timeout: 0.5s type: STRICT_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: mistral_7b_instruct endpoints: - lb_endpoints: - endpoint: address: socket_address: address: mistral_7b_instruct port_value: 10001 hostname: "mistral_7b_instruct" {% for cluster_name, cluster in plano_clusters.items() %} - name: {{ cluster_name }} {% if cluster.connect_timeout -%} connect_timeout: {{ cluster.connect_timeout }} {% else -%} connect_timeout: {{ upstream_connect_timeout | default('5s') }} {% endif -%} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: {{ cluster_name }} endpoints: - lb_endpoints: - endpoint: address: socket_address: address: {{ cluster.endpoint }} port_value: {{ cluster.port }} {% if cluster.http_host %} hostname: {{ cluster.http_host }} {% else %} hostname: {{ cluster.endpoint }} {% endif %} {% if cluster.protocol == "https" %} transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: {{ cluster.endpoint }} common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 validation_context: trusted_ca: filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} {% endif %} {% endfor %} {% for local_llm_provider in local_llms %} - name: {{ local_llm_provider.cluster_name }} connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: {{ local_llm_provider.cluster_name }} endpoints: - lb_endpoints: - endpoint: address: socket_address: address: {{ local_llm_provider.endpoint }} port_value: {{ local_llm_provider.port }} {% if local_llm_provider.http_host %} hostname: {{ local_llm_provider.http_host }} {% else %} hostname: {{ local_llm_provider.endpoint }} {% endif %} {% if local_llm_provider.protocol == "https" %} transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: {{ local_llm_provider.endpoint }} common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 validation_context: trusted_ca: filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }} {% endif %} {% endfor %} - name: arch_internal connect_timeout: 0.5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: arch_internal endpoints: - lb_endpoints: - endpoint: address: socket_address: address: 0.0.0.0 port_value: 11000 hostname: arch_internal - name: bright_staff connect_timeout: 0.5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: bright_staff endpoints: - lb_endpoints: - endpoint: address: socket_address: address: 0.0.0.0 port_value: 9091 hostname: localhost - name: arch_prompt_gateway_listener connect_timeout: 0.5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: arch_prompt_gateway_listener endpoints: - lb_endpoints: - endpoint: address: socket_address: address: 0.0.0.0 port_value: 10001 hostname: arch_prompt_gateway_listener - name: arch_listener_llm connect_timeout: 0.5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: cluster_name: arch_listener_llm endpoints: - lb_endpoints: - endpoint: address: socket_address: address: 0.0.0.0 port_value: 12001 hostname: arch_listener_llm {% if "random_sampling" in plano_tracing and plano_tracing["random_sampling"] > 0 %} - name: opentelemetry_collector type: STRICT_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN typed_extension_protocol_options: envoy.extensions.upstreams.http.v3.HttpProtocolOptions: "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions explicit_http_config: http2_protocol_options: {} load_assignment: cluster_name: opentelemetry_collector endpoints: - lb_endpoints: - endpoint: address: socket_address: {% set _otel_endpoint = plano_tracing.opentracing_grpc_endpoint | default('host.docker.internal:4317') | replace("http://", "") | replace("https://", "") %} address: {{ _otel_endpoint.split(":") | first }} port_value: {{ _otel_endpoint.split(":") | last }} {% endif %}