move custom tracer to llm filter

2026-06-17 15:25:17 +02:00 · 2024-11-13 16:55:23 -08:00 · 2024-11-13 16:55:23 -08:00 · 7c8205abc2
commit 7c8205abc2
parent d1dd8710a4
16 changed files with 308 additions and 126 deletions
--- a/arch/Dockerfile
+++ b/arch/Dockerfile
@ -13,16 +13,20 @@ FROM envoyproxy/envoy:v1.32-latest as envoy
 #Build config generator, so that we have a single build image for both Rust and Python
 FROM python:3.12-slim as arch

-RUN apt-get update && apt-get install -y gettext-base curl && apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN apt-get update && apt-get install -y gettext-base curl supervisor && apt-get clean && rm -rf /var/lib/apt/lists/*

 COPY --from=builder /arch/target/wasm32-wasip1/release/prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
 COPY --from=builder /arch/target/wasm32-wasip1/release/llm_gateway.wasm /etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
 COPY --from=envoy /usr/local/bin/envoy /usr/local/bin/envoy
-WORKDIR /config
+WORKDIR /app
 COPY arch/requirements.txt .
 RUN pip install -r requirements.txt
 COPY arch/tools/cli/config_generator.py .
 COPY arch/envoy.template.yaml .
 COPY arch/arch_config_schema.yaml .
+COPY arch/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
+COPY arch/stream_traces.py .

-ENTRYPOINT ["sh", "-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log/envoy.log"]
+RUN pip install requests
+
+ENTRYPOINT ["supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@ -242,11 +242,66 @@ static_resources:
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

-    - name: arch_listener_llm
+
+    - name: arch_listener_http_llm
      address:
        socket_address:
          address: 0.0.0.0
          port_value: 12000
+      traffic_direction: INBOUND
+      filter_chains:
+        - filters:
+            - name: envoy.filters.network.http_connection_manager
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+                {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
+                generate_request_id: true
+                tracing:
+                  provider:
+                    name: envoy.tracers.opentelemetry
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
+                      grpc_service:
+                        envoy_grpc:
+                          cluster_name: opentelemetry_collector
+                        timeout: 0.250s
+                      service_name: arch_gateway
+                  random_sampling:
+                    value: {{ arch_tracing.random_sampling }}
+                {% endif %}
+                stat_prefix: arch_listener_http
+                codec_type: AUTO
+                scheme_header_transformation:
+                  scheme_to_overwrite: https
+                access_log:
+                - name: envoy.access_loggers.file
+                  typed_config:
+                    "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
+                    path: "/var/log/access_llm.log"
+                route_config:
+                  name: local_routes
+                  virtual_hosts:
+                    - name: local_service
+                      domains:
+                        - "*"
+                      routes:
+                        - match:
+                            prefix: "/"
+                          route:
+                            auto_host_rewrite: true
+                            cluster: arch_listener_llm
+                            timeout: 60s
+                http_filters:
+                  - name: envoy.filters.http.router
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+
+
+    - name: arch_listener_llm
+      address:
+        socket_address:
+          address: 0.0.0.0
+          port_value: 12001
      filter_chains:
        - filters:
            - name: envoy.filters.network.http_connection_manager
@ -479,6 +534,23 @@ static_resources:
                      port_value: 10001
                  hostname: arch_prompt_gateway_listener

+    - name: arch_listener_llm
+      connect_timeout: 5s
+      type: LOGICAL_DNS
+      dns_lookup_family: V4_ONLY
+      lb_policy: ROUND_ROBIN
+      load_assignment:
+        cluster_name: arch_listener_llm
+        endpoints:
+          - lb_endpoints:
+              - endpoint:
+                  address:
+                    socket_address:
+                      address: 0.0.0.0
+                      port_value: 12001
+                  hostname: arch_listener_llm
+
+
 {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
    - name: opentelemetry_collector
      type: STRICT_DNS
--- a/arch/stream_traces.py
+++ b/arch/stream_traces.py
@ -0,0 +1,40 @@
+import os
+import time
+import requests
+import logging
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+
+otel_tracing_endpoint = os.getenv(
+    "OTEL_TRACING_HTTP_ENDPOINT", "http://localhost:4318/v1/traces"
+)
+envoy_log_path = os.getenv("ENVOY_LOG_PATH", "/var/log/envoy.log")
+
+logging.info(f"Using otel-tracing host: {otel_tracing_endpoint}")
+logging.info(f"Using envoy log path: {envoy_log_path}")
+
+
+def process_log_line(line):
+    try:
+        response = requests.post(
+            url=otel_tracing_endpoint,
+            data=line,
+            headers={"Content-Type": "application/json"},
+        )
+        logging.info(f"Sent trace to otel-tracing: {response.status_code}")
+    except Exception as e:
+        logging.error(f"Failed to send trace to otel-tracing: {e}")
+
+
+with open(envoy_log_path, "r") as f:
+    while True:
+        line = f.readline()
+        if not line:
+            time.sleep(1)
+            continue
+        tokens = line.split("gateway: upstream_llm trace details: ")
+        if len(tokens) > 1:
+            process_log_line(tokens[1])
--- a/arch/supervisord.conf
+++ b/arch/supervisord.conf
@ -0,0 +1,25 @@
+[supervisord]
+nodaemon=true
+
+[program:trace_streamer]
+command=python stream_traces.py
+autostart=true
+autorestart=false
+startretries=3
+priority=1
+stdout_logfile=/dev/stdout
+stderr_logfile=/dev/stderr
+stdout_logfile_maxbytes = 0
+stderr_logfile_maxbytes = 0
+
+
+[program:envoy]
+command=sh -c "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log/envoy.log"
+autostart=true
+autorestart=true
+startretries=3
+priority=2
+stdout_logfile=/dev/stdout
+stderr_logfile=/dev/stderr
+stdout_logfile_maxbytes = 0
+stderr_logfile_maxbytes = 0