move custom tracer to llm filter

This commit is contained in:
Adil Hafeez 2024-11-13 16:55:23 -08:00
parent d1dd8710a4
commit 7c8205abc2
16 changed files with 308 additions and 126 deletions

View file

@ -13,16 +13,20 @@ FROM envoyproxy/envoy:v1.32-latest as envoy
#Build config generator, so that we have a single build image for both Rust and Python
FROM python:3.12-slim as arch
RUN apt-get update && apt-get install -y gettext-base curl && apt-get clean && rm -rf /var/lib/apt/lists/*
RUN apt-get update && apt-get install -y gettext-base curl supervisor && apt-get clean && rm -rf /var/lib/apt/lists/*
COPY --from=builder /arch/target/wasm32-wasip1/release/prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
COPY --from=builder /arch/target/wasm32-wasip1/release/llm_gateway.wasm /etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
COPY --from=envoy /usr/local/bin/envoy /usr/local/bin/envoy
WORKDIR /config
WORKDIR /app
COPY arch/requirements.txt .
RUN pip install -r requirements.txt
COPY arch/tools/cli/config_generator.py .
COPY arch/envoy.template.yaml .
COPY arch/arch_config_schema.yaml .
COPY arch/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
COPY arch/stream_traces.py .
ENTRYPOINT ["sh", "-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log/envoy.log"]
RUN pip install requests
ENTRYPOINT ["supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

View file

@ -242,11 +242,66 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
- name: arch_listener_llm
- name: arch_listener_http_llm
address:
socket_address:
address: 0.0.0.0
port_value: 12000
traffic_direction: INBOUND
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
generate_request_id: true
tracing:
provider:
name: envoy.tracers.opentelemetry
typed_config:
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
grpc_service:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: arch_gateway
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: arch_listener_http
codec_type: AUTO
scheme_header_transformation:
scheme_to_overwrite: https
access_log:
- name: envoy.access_loggers.file
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
path: "/var/log/access_llm.log"
route_config:
name: local_routes
virtual_hosts:
- name: local_service
domains:
- "*"
routes:
- match:
prefix: "/"
route:
auto_host_rewrite: true
cluster: arch_listener_llm
timeout: 60s
http_filters:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
- name: arch_listener_llm
address:
socket_address:
address: 0.0.0.0
port_value: 12001
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
@ -479,6 +534,23 @@ static_resources:
port_value: 10001
hostname: arch_prompt_gateway_listener
- name: arch_listener_llm
connect_timeout: 5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: arch_listener_llm
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: 0.0.0.0
port_value: 12001
hostname: arch_listener_llm
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
- name: opentelemetry_collector
type: STRICT_DNS

40
arch/stream_traces.py Normal file
View file

@ -0,0 +1,40 @@
import os
import time
import requests
import logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
otel_tracing_endpoint = os.getenv(
"OTEL_TRACING_HTTP_ENDPOINT", "http://localhost:4318/v1/traces"
)
envoy_log_path = os.getenv("ENVOY_LOG_PATH", "/var/log/envoy.log")
logging.info(f"Using otel-tracing host: {otel_tracing_endpoint}")
logging.info(f"Using envoy log path: {envoy_log_path}")
def process_log_line(line):
try:
response = requests.post(
url=otel_tracing_endpoint,
data=line,
headers={"Content-Type": "application/json"},
)
logging.info(f"Sent trace to otel-tracing: {response.status_code}")
except Exception as e:
logging.error(f"Failed to send trace to otel-tracing: {e}")
with open(envoy_log_path, "r") as f:
while True:
line = f.readline()
if not line:
time.sleep(1)
continue
tokens = line.split("gateway: upstream_llm trace details: ")
if len(tokens) > 1:
process_log_line(tokens[1])

25
arch/supervisord.conf Normal file
View file

@ -0,0 +1,25 @@
[supervisord]
nodaemon=true
[program:trace_streamer]
command=python stream_traces.py
autostart=true
autorestart=false
startretries=3
priority=1
stdout_logfile=/dev/stdout
stderr_logfile=/dev/stderr
stdout_logfile_maxbytes = 0
stderr_logfile_maxbytes = 0
[program:envoy]
command=sh -c "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log/envoy.log"
autostart=true
autorestart=true
startretries=3
priority=2
stdout_logfile=/dev/stdout
stderr_logfile=/dev/stderr
stdout_logfile_maxbytes = 0
stderr_logfile_maxbytes = 0