mirror of
https://github.com/katanemo/plano.git
synced 2026-05-02 04:12:56 +02:00
add support for agents (#564)
This commit is contained in:
parent
f8991a3c4b
commit
96e0732089
41 changed files with 3571 additions and 856 deletions
|
|
@ -128,7 +128,7 @@ static_resources:
|
|||
domains:
|
||||
- "*"
|
||||
routes:
|
||||
{% for provider in arch_llm_providers %}
|
||||
{% for provider in arch_model_providers %}
|
||||
# if endpoint is set then use custom cluster for upstream llm
|
||||
{% if provider.endpoint %}
|
||||
{% set llm_cluster_name = provider.cluster_name %}
|
||||
|
|
@ -218,7 +218,7 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
- name: egress_api_traffic
|
||||
- name: outbound_api_traffic
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
|
|
@ -240,11 +240,11 @@ static_resources:
|
|||
envoy_grpc:
|
||||
cluster_name: opentelemetry_collector
|
||||
timeout: 0.250s
|
||||
service_name: egress_api_traffic
|
||||
service_name: outbound_api_traffic
|
||||
random_sampling:
|
||||
value: {{ arch_tracing.random_sampling }}
|
||||
{% endif %}
|
||||
stat_prefix: egress_api_traffic
|
||||
stat_prefix: outbound_api_traffic
|
||||
codec_type: AUTO
|
||||
scheme_header_transformation:
|
||||
scheme_to_overwrite: https
|
||||
|
|
@ -292,6 +292,108 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
{% for listener in listeners %}
|
||||
|
||||
{% if listener.agents %}
|
||||
|
||||
- name: {{ listener.name | replace(" ", "_") }}
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
port_value: {{ listener.port }}
|
||||
filter_chains:
|
||||
- filters:
|
||||
- name: envoy.filters.network.http_connection_manager
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
||||
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
||||
generate_request_id: true
|
||||
tracing:
|
||||
provider:
|
||||
name: envoy.tracers.opentelemetry
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
||||
grpc_service:
|
||||
envoy_grpc:
|
||||
cluster_name: opentelemetry_collector
|
||||
timeout: 0.250s
|
||||
service_name: arch_gateway
|
||||
random_sampling:
|
||||
value: {{ arch_tracing.random_sampling }}
|
||||
{% endif %}
|
||||
stat_prefix: {{ listener.name | replace(" ", "_") }}_traffic
|
||||
codec_type: AUTO
|
||||
scheme_header_transformation:
|
||||
scheme_to_overwrite: https
|
||||
access_log:
|
||||
- name: envoy.access_loggers.file
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
||||
path: "/var/log/access_llm.log"
|
||||
route_config:
|
||||
name: local_routes
|
||||
request_headers_to_add:
|
||||
- header:
|
||||
key: "x-arch-agent-listener-name"
|
||||
value: "{{ listener.name }}"
|
||||
virtual_hosts:
|
||||
- name: local_service
|
||||
domains:
|
||||
- "*"
|
||||
routes:
|
||||
- match:
|
||||
prefix: "/healthz"
|
||||
direct_response:
|
||||
status: 200
|
||||
- match:
|
||||
prefix: "/"
|
||||
route:
|
||||
auto_host_rewrite: true
|
||||
prefix_rewrite: "/agents/"
|
||||
cluster: bright_staff
|
||||
timeout: {{ llm_gateway_listener.timeout }}
|
||||
http_filters:
|
||||
- name: envoy.filters.http.compressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
||||
compressor_library:
|
||||
name: envoy.compression.brotli.compressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli
|
||||
- name: envoy.filters.http.compressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
||||
compressor_library:
|
||||
name: compress
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
||||
memory_level: 3
|
||||
window_bits: 10
|
||||
- name: envoy.filters.http.decompressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
||||
decompressor_library:
|
||||
name: decompress
|
||||
typed_config:
|
||||
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
||||
window_bits: 9
|
||||
chunk_size: 8192
|
||||
# If this ratio is set too low, then body data will not be decompressed completely.
|
||||
max_inflate_ratio: 1000
|
||||
- name: envoy.filters.http.decompressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
||||
decompressor_library:
|
||||
name: envoy.compression.brotli.decompressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli
|
||||
- name: envoy.filters.http.router
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
- name: egress_traffic
|
||||
address:
|
||||
socket_address:
|
||||
|
|
@ -428,7 +530,7 @@ static_resources:
|
|||
domains:
|
||||
- "*"
|
||||
routes:
|
||||
{% for provider in arch_llm_providers %}
|
||||
{% for provider in arch_model_providers %}
|
||||
# if endpoint is set then use custom cluster for upstream llm
|
||||
{% if provider.endpoint %}
|
||||
{% set llm_cluster_name = provider.cluster_name %}
|
||||
|
|
@ -796,7 +898,7 @@ static_resources:
|
|||
address:
|
||||
socket_address:
|
||||
address: host.docker.internal
|
||||
port_value: $MODEL_SERVER_PORT
|
||||
port_value: 51000
|
||||
hostname: {{ internal_cluster }}
|
||||
{% endfor %}
|
||||
- name: mistral_7b_instruct
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue