mirror of
https://github.com/katanemo/plano.git
synced 2026-06-14 15:15:15 +02:00
move custom tracer to llm filter (#267)
This commit is contained in:
parent
1d229cba8f
commit
d3c17c7abd
22 changed files with 335 additions and 133 deletions
|
|
@ -13,16 +13,21 @@ FROM envoyproxy/envoy:v1.32-latest as envoy
|
|||
#Build config generator, so that we have a single build image for both Rust and Python
|
||||
FROM python:3.12-slim as arch
|
||||
|
||||
RUN apt-get update && apt-get install -y gettext-base curl && apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
RUN apt-get update && apt-get install -y gettext-base curl supervisor && apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /arch/target/wasm32-wasip1/release/prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
|
||||
COPY --from=builder /arch/target/wasm32-wasip1/release/llm_gateway.wasm /etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
|
||||
COPY --from=envoy /usr/local/bin/envoy /usr/local/bin/envoy
|
||||
WORKDIR /config
|
||||
WORKDIR /app
|
||||
COPY arch/requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
COPY arch/tools/cli/config_generator.py .
|
||||
COPY arch/envoy.template.yaml .
|
||||
COPY arch/arch_config_schema.yaml .
|
||||
COPY arch/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
COPY arch/stream_traces.py .
|
||||
|
||||
ENTRYPOINT ["sh", "-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log/envoy.log"]
|
||||
RUN pip install requests
|
||||
RUN touch /var/log/envoy.log
|
||||
|
||||
ENTRYPOINT ["supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
||||
|
|
|
|||
|
|
@ -8,11 +8,11 @@ services:
|
|||
- "12000:12000"
|
||||
- "19901:9901"
|
||||
volumes:
|
||||
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/config/arch_config.yaml
|
||||
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/app/arch_config.yaml
|
||||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||
- ./envoy.template.yaml:/config/envoy.template.yaml
|
||||
- ./arch_config_schema.yaml:/config/arch_config_schema.yaml
|
||||
- ./tools/cli/config_generator.py:/config/config_generator.py
|
||||
- ./envoy.template.yaml:/app/envoy.template.yaml
|
||||
- ./arch_config_schema.yaml:/app/arch_config_schema.yaml
|
||||
- ./tools/cli/config_generator.py:/app/config_generator.py
|
||||
- ../crates/target/wasm32-wasip1/release/llm_gateway.wasm:/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
|
||||
- ../crates/target/wasm32-wasip1/release/prompt_gateway.wasm:/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
|
||||
- ~/archgw_logs:/var/log/
|
||||
|
|
@ -21,3 +21,4 @@ services:
|
|||
environment:
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
||||
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
|
||||
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
|
||||
|
|
|
|||
|
|
@ -16,3 +16,4 @@ services:
|
|||
environment:
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
||||
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
|
||||
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
|
||||
|
|
|
|||
|
|
@ -8,11 +8,13 @@ services:
|
|||
- "12000:12000"
|
||||
- "19901:9901"
|
||||
volumes:
|
||||
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/config/arch_config.yaml
|
||||
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/app/arch_config.yaml
|
||||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||
- ~/archgw_logs:/var/log/
|
||||
env_file:
|
||||
- env.list
|
||||
environment:
|
||||
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
healthcheck:
|
||||
|
|
|
|||
|
|
@ -242,11 +242,66 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
- name: arch_listener_llm
|
||||
|
||||
- name: arch_listener_http_llm
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
port_value: 12000
|
||||
traffic_direction: INBOUND
|
||||
filter_chains:
|
||||
- filters:
|
||||
- name: envoy.filters.network.http_connection_manager
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
||||
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
||||
generate_request_id: true
|
||||
tracing:
|
||||
provider:
|
||||
name: envoy.tracers.opentelemetry
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
||||
grpc_service:
|
||||
envoy_grpc:
|
||||
cluster_name: opentelemetry_collector
|
||||
timeout: 0.250s
|
||||
service_name: arch_gateway
|
||||
random_sampling:
|
||||
value: {{ arch_tracing.random_sampling }}
|
||||
{% endif %}
|
||||
stat_prefix: arch_listener_http
|
||||
codec_type: AUTO
|
||||
scheme_header_transformation:
|
||||
scheme_to_overwrite: https
|
||||
access_log:
|
||||
- name: envoy.access_loggers.file
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
||||
path: "/var/log/access_llm.log"
|
||||
route_config:
|
||||
name: local_routes
|
||||
virtual_hosts:
|
||||
- name: local_service
|
||||
domains:
|
||||
- "*"
|
||||
routes:
|
||||
- match:
|
||||
prefix: "/"
|
||||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: arch_listener_llm
|
||||
timeout: 60s
|
||||
http_filters:
|
||||
- name: envoy.filters.http.router
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
|
||||
- name: arch_listener_llm
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
port_value: 12001
|
||||
filter_chains:
|
||||
- filters:
|
||||
- name: envoy.filters.network.http_connection_manager
|
||||
|
|
@ -479,6 +534,23 @@ static_resources:
|
|||
port_value: 10001
|
||||
hostname: arch_prompt_gateway_listener
|
||||
|
||||
- name: arch_listener_llm
|
||||
connect_timeout: 5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
load_assignment:
|
||||
cluster_name: arch_listener_llm
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
port_value: 12001
|
||||
hostname: arch_listener_llm
|
||||
|
||||
|
||||
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
||||
- name: opentelemetry_collector
|
||||
type: STRICT_DNS
|
||||
|
|
|
|||
37
arch/stream_traces.py
Normal file
37
arch/stream_traces.py
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
import os
|
||||
import sys
|
||||
import time
|
||||
import requests
|
||||
import logging
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
|
||||
|
||||
otel_tracing_endpoint = os.getenv(
|
||||
"OTEL_TRACING_HTTP_ENDPOINT", "http://localhost:4318/v1/traces"
|
||||
)
|
||||
envoy_log_path = os.getenv("ENVOY_LOG_PATH", "/var/log/envoy.log")
|
||||
|
||||
logging.info(f"Using otel-tracing host: {otel_tracing_endpoint}")
|
||||
logging.info(f"Using envoy log path: {envoy_log_path}")
|
||||
|
||||
|
||||
def process_log_line(line):
|
||||
try:
|
||||
response = requests.post(
|
||||
url=otel_tracing_endpoint,
|
||||
data=line,
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
logging.info(f"Sent trace to otel-tracing: {response.status_code}")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to send trace to otel-tracing: {e}")
|
||||
|
||||
|
||||
for line in sys.stdin:
|
||||
if line:
|
||||
tokens = line.split("gateway: upstream_llm trace details: ")
|
||||
if len(tokens) > 1:
|
||||
process_log_line(tokens[1])
|
||||
25
arch/supervisord.conf
Normal file
25
arch/supervisord.conf
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
[supervisord]
|
||||
nodaemon=true
|
||||
|
||||
[program:trace_streamer]
|
||||
command=sh -c "tail -F /var/log/envoy.log | python stream_traces.py"
|
||||
autostart=true
|
||||
autorestart=false
|
||||
startretries=3
|
||||
priority=1
|
||||
stdout_logfile=/dev/stdout
|
||||
stderr_logfile=/dev/stderr
|
||||
stdout_logfile_maxbytes = 0
|
||||
stderr_logfile_maxbytes = 0
|
||||
|
||||
|
||||
[program:envoy]
|
||||
command=sh -c "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log/envoy.log"
|
||||
autostart=true
|
||||
autorestart=true
|
||||
startretries=3
|
||||
priority=2
|
||||
stdout_logfile=/dev/stdout
|
||||
stderr_logfile=/dev/stderr
|
||||
stdout_logfile_maxbytes = 0
|
||||
stderr_logfile_maxbytes = 0
|
||||
|
|
@ -6,7 +6,7 @@ from jsonschema import validate
|
|||
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
|
||||
"ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
|
||||
)
|
||||
ARCH_CONFIG_FILE = os.getenv("ARCH_CONFIG_FILE", "/config/arch_config.yaml")
|
||||
ARCH_CONFIG_FILE = os.getenv("ARCH_CONFIG_FILE", "/app/arch_config.yaml")
|
||||
ENVOY_CONFIG_FILE_RENDERED = os.getenv(
|
||||
"ENVOY_CONFIG_FILE_RENDERED", "/etc/envoy/envoy.yaml"
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue