mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
more changes
This commit is contained in:
parent
a016212588
commit
e57000000d
7 changed files with 168 additions and 12 deletions
|
|
@ -29,6 +29,7 @@ stats_config:
|
|||
- 180000
|
||||
static_resources:
|
||||
listeners:
|
||||
## begin - legacy listeners
|
||||
- name: ingress_traffic
|
||||
address:
|
||||
socket_address:
|
||||
|
|
@ -214,7 +215,10 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
- name: egress_api_traffic
|
||||
## end - legacy listeners
|
||||
|
||||
# Listener for outbound API traffic to services and clusters
|
||||
- name: outbound_api_traffic
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
|
|
@ -236,11 +240,11 @@ static_resources:
|
|||
envoy_grpc:
|
||||
cluster_name: opentelemetry_collector
|
||||
timeout: 0.250s
|
||||
service_name: egress_api_traffic
|
||||
service_name: outbound_api_traffic
|
||||
random_sampling:
|
||||
value: {{ arch_tracing.random_sampling }}
|
||||
{% endif %}
|
||||
stat_prefix: egress_api_traffic
|
||||
stat_prefix: outbound_api_traffic
|
||||
codec_type: AUTO
|
||||
scheme_header_transformation:
|
||||
scheme_to_overwrite: https
|
||||
|
|
@ -288,12 +292,16 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
- name: agents_traffic
|
||||
# Listeners for LLM agents
|
||||
{% for listener in listeners %}
|
||||
|
||||
{% if listener.agents %}
|
||||
|
||||
- name: {{ listener.name | replace(" ", "_") }}
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
port_value: 8001
|
||||
traffic_direction: OUTBOUND
|
||||
port_value: {{ listener.port }}
|
||||
filter_chains:
|
||||
- filters:
|
||||
- name: envoy.filters.network.http_connection_manager
|
||||
|
|
@ -325,6 +333,10 @@ static_resources:
|
|||
path: "/var/log/access_llm.log"
|
||||
route_config:
|
||||
name: local_routes
|
||||
request_headers_to_add:
|
||||
- header:
|
||||
key: "x-arch-agent-listener-name"
|
||||
value: "{{ listener.name }}"
|
||||
virtual_hosts:
|
||||
- name: local_service
|
||||
domains:
|
||||
|
|
@ -380,7 +392,141 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
# Listeners for LLMs
|
||||
{% for listener in listeners %}
|
||||
|
||||
{% if listener.llm_providers %}
|
||||
|
||||
- name: {{ listener.name | replace(" ", "_") }}
|
||||
address:
|
||||
socket_address:
|
||||
address: {{ listener.address }}
|
||||
port_value: {{ listener.port }}
|
||||
filter_chains:
|
||||
- filters:
|
||||
- name: envoy.filters.network.http_connection_manager
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
||||
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
||||
generate_request_id: true
|
||||
tracing:
|
||||
provider:
|
||||
name: envoy.tracers.opentelemetry
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
||||
grpc_service:
|
||||
envoy_grpc:
|
||||
cluster_name: opentelemetry_collector
|
||||
timeout: 0.250s
|
||||
service_name: egress_traffic_llm
|
||||
random_sampling:
|
||||
value: {{ arch_tracing.random_sampling }}
|
||||
{% endif %}
|
||||
stat_prefix: egress_traffic
|
||||
codec_type: AUTO
|
||||
scheme_header_transformation:
|
||||
scheme_to_overwrite: https
|
||||
access_log:
|
||||
- name: envoy.access_loggers.file
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
||||
path: "/var/log/access_llm.log"
|
||||
route_config:
|
||||
name: local_routes
|
||||
virtual_hosts:
|
||||
- name: local_service
|
||||
domains:
|
||||
- "*"
|
||||
routes:
|
||||
{% for provider in listener.llm_providers %}
|
||||
# if endpoint is set then use custom cluster for upstream llm
|
||||
{% if provider.endpoint %}
|
||||
{% set llm_cluster_name = provider.name %}
|
||||
{% else %}
|
||||
{% set llm_cluster_name = provider.provider_interface %}
|
||||
{% endif %}
|
||||
- match:
|
||||
prefix: "/"
|
||||
headers:
|
||||
- name: "x-arch-llm-provider"
|
||||
string_match:
|
||||
exact: {{ llm_cluster_name }}
|
||||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ llm_cluster_name }}
|
||||
timeout: 60s
|
||||
{% endfor %}
|
||||
- match:
|
||||
prefix: "/"
|
||||
direct_response:
|
||||
status: 400
|
||||
body:
|
||||
inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n"
|
||||
http_filters:
|
||||
- name: envoy.filters.http.compressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
||||
compressor_library:
|
||||
name: envoy.compression.brotli.compressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli
|
||||
chunk_size: 8192
|
||||
- name: envoy.filters.http.compressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
||||
compressor_library:
|
||||
name: compress
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
||||
memory_level: 3
|
||||
window_bits: 10
|
||||
- name: envoy.filters.http.wasm
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
||||
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
|
||||
value:
|
||||
config:
|
||||
name: "http_config"
|
||||
root_id: llm_gateway
|
||||
configuration:
|
||||
"@type": "type.googleapis.com/google.protobuf.StringValue"
|
||||
value: |
|
||||
{{ arch_llm_config | indent(32) }}
|
||||
vm_config:
|
||||
runtime: "envoy.wasm.runtime.v8"
|
||||
code:
|
||||
local:
|
||||
filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
|
||||
- name: envoy.filters.http.decompressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
||||
decompressor_library:
|
||||
name: decompress
|
||||
typed_config:
|
||||
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
||||
chunk_size: 8192
|
||||
# If this ratio is set too low, then body data will not be decompressed completely.
|
||||
max_inflate_ratio: 1000
|
||||
- name: envoy.filters.http.decompressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
||||
decompressor_library:
|
||||
name: envoy.compression.brotli.decompressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli
|
||||
chunk_size: 8192
|
||||
- name: envoy.filters.http.router
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
# begin - legacy llm listeners
|
||||
|
||||
- name: egress_traffic
|
||||
address:
|
||||
|
|
@ -595,6 +741,7 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
# end - legacy llm listeners
|
||||
clusters:
|
||||
|
||||
- name: arch
|
||||
|
|
|
|||
|
|
@ -126,6 +126,7 @@ def validate_and_render_schema():
|
|||
model_name_keys = set()
|
||||
model_usage_name_keys = set()
|
||||
|
||||
# legacy listeners
|
||||
# check if type is array or object
|
||||
# if its dict its legacy format let's convert it to array
|
||||
prompt_gateway_listener = {
|
||||
|
|
@ -344,6 +345,7 @@ def validate_and_render_schema():
|
|||
"arch_tracing": arch_tracing,
|
||||
"local_llms": llms_with_endpoint,
|
||||
"agent_orchestrator": agent_orchestrator,
|
||||
"listeners": config_yaml["listeners"].copy(),
|
||||
}
|
||||
|
||||
rendered = template.render(data)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue