more changes

This commit is contained in:
Adil Hafeez 2025-09-15 16:01:48 -07:00
parent a016212588
commit e57000000d
No known key found for this signature in database
GPG key ID: 9B18EF7691369645
7 changed files with 168 additions and 12 deletions

View file

@ -29,6 +29,7 @@ stats_config:
- 180000
static_resources:
listeners:
## begin - legacy listeners
- name: ingress_traffic
address:
socket_address:
@ -214,7 +215,10 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
- name: egress_api_traffic
## end - legacy listeners
# Listener for outbound API traffic to services and clusters
- name: outbound_api_traffic
address:
socket_address:
address: 0.0.0.0
@ -236,11 +240,11 @@ static_resources:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: egress_api_traffic
service_name: outbound_api_traffic
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: egress_api_traffic
stat_prefix: outbound_api_traffic
codec_type: AUTO
scheme_header_transformation:
scheme_to_overwrite: https
@ -288,12 +292,16 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
- name: agents_traffic
# Listeners for LLM agents
{% for listener in listeners %}
{% if listener.agents %}
- name: {{ listener.name | replace(" ", "_") }}
address:
socket_address:
address: 0.0.0.0
port_value: 8001
traffic_direction: OUTBOUND
port_value: {{ listener.port }}
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
@ -325,6 +333,10 @@ static_resources:
path: "/var/log/access_llm.log"
route_config:
name: local_routes
request_headers_to_add:
- header:
key: "x-arch-agent-listener-name"
value: "{{ listener.name }}"
virtual_hosts:
- name: local_service
domains:
@ -380,7 +392,141 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
{% endif %}
{% endfor %}
# Listeners for LLMs
{% for listener in listeners %}
{% if listener.llm_providers %}
- name: {{ listener.name | replace(" ", "_") }}
address:
socket_address:
address: {{ listener.address }}
port_value: {{ listener.port }}
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
generate_request_id: true
tracing:
provider:
name: envoy.tracers.opentelemetry
typed_config:
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
grpc_service:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: egress_traffic_llm
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: egress_traffic
codec_type: AUTO
scheme_header_transformation:
scheme_to_overwrite: https
access_log:
- name: envoy.access_loggers.file
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
path: "/var/log/access_llm.log"
route_config:
name: local_routes
virtual_hosts:
- name: local_service
domains:
- "*"
routes:
{% for provider in listener.llm_providers %}
# if endpoint is set then use custom cluster for upstream llm
{% if provider.endpoint %}
{% set llm_cluster_name = provider.name %}
{% else %}
{% set llm_cluster_name = provider.provider_interface %}
{% endif %}
- match:
prefix: "/"
headers:
- name: "x-arch-llm-provider"
string_match:
exact: {{ llm_cluster_name }}
route:
auto_host_rewrite: true
cluster: {{ llm_cluster_name }}
timeout: 60s
{% endfor %}
- match:
prefix: "/"
direct_response:
status: 400
body:
inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n"
http_filters:
- name: envoy.filters.http.compressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
compressor_library:
name: envoy.compression.brotli.compressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli
chunk_size: 8192
- name: envoy.filters.http.compressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
compressor_library:
name: compress
typed_config:
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
memory_level: 3
window_bits: 10
- name: envoy.filters.http.wasm
typed_config:
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
value:
config:
name: "http_config"
root_id: llm_gateway
configuration:
"@type": "type.googleapis.com/google.protobuf.StringValue"
value: |
{{ arch_llm_config | indent(32) }}
vm_config:
runtime: "envoy.wasm.runtime.v8"
code:
local:
filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
- name: envoy.filters.http.decompressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
decompressor_library:
name: decompress
typed_config:
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
chunk_size: 8192
# If this ratio is set too low, then body data will not be decompressed completely.
max_inflate_ratio: 1000
- name: envoy.filters.http.decompressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
decompressor_library:
name: envoy.compression.brotli.decompressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli
chunk_size: 8192
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
{% endif %}
{% endfor %}
# begin - legacy llm listeners
- name: egress_traffic
address:
@ -595,6 +741,7 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
# end - legacy llm listeners
clusters:
- name: arch

View file

@ -126,6 +126,7 @@ def validate_and_render_schema():
model_name_keys = set()
model_usage_name_keys = set()
# legacy listeners
# check if type is array or object
# if its dict its legacy format let's convert it to array
prompt_gateway_listener = {
@ -344,6 +345,7 @@ def validate_and_render_schema():
"arch_tracing": arch_tracing,
"local_llms": llms_with_endpoint,
"agent_orchestrator": agent_orchestrator,
"listeners": config_yaml["listeners"].copy(),
}
rendered = template.render(data)