This commit is contained in:
Adil Hafeez 2024-11-07 10:13:18 -08:00
parent 23ed25342a
commit 7b99379275
11 changed files with 761 additions and 392 deletions

View file

@ -26,4 +26,4 @@ COPY arch/envoy.template.yaml .
COPY arch/arch_config_schema.yaml .
ENTRYPOINT ["sh", "-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:trace"]
ENTRYPOINT ["sh", "-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug"]

View file

@ -3,6 +3,7 @@ services:
image: katanemo/archgw:latest
ports:
- "10000:10000"
- "10001:10001"
- "11000:11000"
- "12000:12000"
- "19901:9901"

View file

@ -3,6 +3,7 @@ services:
image: katanemo/archgw:latest
ports:
- "10000:10000"
- "10001:10001"
- "11000:11000"
- "12000:12000"
- "19901:9901"

View file

@ -3,6 +3,7 @@ services:
image: katanemo/archgw:latest
ports:
- "10000:10000"
- "10001:10001"
- "11000:11000"
- "12000:12000"
- "19901:9901"

View file

@ -98,12 +98,18 @@ static_resources:
domains:
- "*"
routes:
{% for provider in arch_llm_providers %}
- match:
prefix: "/"
headers:
- name: "x-arch-llm-provider"
string_match:
exact: {{ provider.name }}
route:
auto_host_rewrite: true
cluster: arch_llm_listener
cluster: {{ provider.provider }}
timeout: 60s
{% endfor %}
http_filters:
- name: envoy.filters.http.compressor
typed_config:
@ -131,6 +137,23 @@ static_resources:
code:
local:
filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm"
- name: envoy.filters.http.wasm
typed_config:
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
value:
config:
name: "http_config"
root_id: llm_gateway
configuration:
"@type": "type.googleapis.com/google.protobuf.StringValue"
value: |
{{ arch_llm_config | indent(32) }}
vm_config:
runtime: "envoy.wasm.runtime.v8"
code:
local:
filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
- name: envoy.filters.http.decompressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
@ -146,7 +169,6 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
- name: arch_internal
address:
socket_address:
@ -457,22 +479,6 @@ static_resources:
port_value: 10001
hostname: arch_prompt_gateway_listener
- name: arch_llm_listener
connect_timeout: 5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: arch_llm_listener
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: 0.0.0.0
port_value: 12000
hostname: arch_llm_listener
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
- name: opentelemetry_collector
type: STRICT_DNS