use standard tracing and logging in brightstaff (#721)

This commit is contained in:
Adil Hafeez 2026-02-09 13:33:27 -08:00 committed by GitHub
parent 4d9ed74b68
commit 46de89590b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 1494 additions and 2432 deletions

View file

@ -54,6 +54,7 @@ static_resources:
service_name: plano(inbound)
random_sampling:
value: {{ arch_tracing.random_sampling }}
operation: "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
{% endif %}
stat_prefix: plano(inbound)
codec_type: AUTO
@ -206,7 +207,7 @@ static_resources:
- name: outbound_api_traffic
address:
socket_address:
address: 127.0.0.1
address: 0.0.0.0
port_value: 11000
traffic_direction: OUTBOUND
filter_chains:
@ -303,6 +304,7 @@ static_resources:
service_name: plano(inbound)
random_sampling:
value: {{ arch_tracing.random_sampling }}
operation: "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
{% endif %}
stat_prefix: {{ listener.name | replace(" ", "_") }}_traffic
codec_type: AUTO
@ -388,21 +390,6 @@ static_resources:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
generate_request_id: true
tracing:
provider:
name: envoy.tracers.opentelemetry
typed_config:
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
grpc_service:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: plano(outbound)
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: egress_traffic
codec_type: AUTO
scheme_header_transformation:
@ -470,50 +457,6 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
- name: otel_collector_proxy
address:
socket_address:
address: 127.0.0.1
port_value: 9903
traffic_direction: OUTBOUND
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: otel_proxy
codec_type: AUTO
# access_log:
# - name: envoy.access_loggers.file
# typed_config:
# "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
# path: "/var/log/access_otel.log"
# format: |
# [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
route_config:
name: otel_route
virtual_hosts:
- name: otel_backend
domains: ["*"]
routes:
- match:
prefix: "/v1/traces"
route:
cluster: opentelemetry_collector_http
timeout: 5s
retry_policy:
retry_on: "5xx,connect-failure,refused-stream,reset"
num_retries: 3
per_try_timeout: 2s
host_selection_retry_max_attempts: 5
retriable_status_codes: [500, 502, 503, 504]
http_filters:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
{% endif %}
- name: egress_traffic_llm
address:
socket_address:
@ -524,6 +467,22 @@ static_resources:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
generate_request_id: true
tracing:
provider:
name: envoy.tracers.opentelemetry
typed_config:
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
grpc_service:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: plano(outbound)
random_sampling:
value: {{ arch_tracing.random_sampling }}
operation: "%REQ(:METHOD)% %REQ(:AUTHORITY)%%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
{% endif %}
stat_prefix: egress_traffic
codec_type: AUTO
scheme_header_transformation:
@ -1071,35 +1030,7 @@ static_resources:
- endpoint:
address:
socket_address:
address: host.docker.internal
port_value: 4317
- name: opentelemetry_collector_http
type: STRICT_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
typed_extension_protocol_options:
load_assignment:
cluster_name: opentelemetry_collector_http
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: host.docker.internal
port_value: 4318
# Circuit breaker configuration to prevent overwhelming OTEL collector
circuit_breakers:
thresholds:
- priority: DEFAULT
max_connections: 100
max_pending_requests: 100
max_requests: 100
max_retries: 3
# Health checking and outlier detection
outlier_detection:
consecutive_5xx: 5
interval: 10s
base_ejection_time: 30s
max_ejection_percent: 50
enforcing_consecutive_5xx: 100
{% set _otel_endpoint = arch_tracing.opentracing_grpc_endpoint | default('host.docker.internal:4317') | replace("http://", "") | replace("https://", "") %}
address: {{ _otel_endpoint.split(":") | first }}
port_value: {{ _otel_endpoint.split(":") | last }}
{% endif %}