fix tracing flags

This commit is contained in:
Adil Hafeez 2026-02-07 15:16:12 -08:00
parent 509976dec6
commit 31f3d38fad
No known key found for this signature in database
GPG key ID: 9B18EF7691369645
11 changed files with 61 additions and 48 deletions

View file

@ -6,6 +6,7 @@ import yaml
from jsonschema import validate
from urllib.parse import urlparse
from copy import deepcopy
from planoai.consts import DEFAULT_OTEL_TRACING_GRPC_ENDPOINT
SUPPORTED_PROVIDERS_WITH_BASE_URL = [
@ -158,6 +159,29 @@ def validate_and_render_schema():
arch_tracing = config_yaml.get("tracing", {})
opentracing_grpc_endpoint = arch_tracing.get(
"opentracing_grpc_endpoint", DEFAULT_OTEL_TRACING_GRPC_ENDPOINT
)
arch_tracing["opentracing_grpc_endpoint"] = opentracing_grpc_endpoint
# resolve env vars in opentracing_grpc_endpoint if present
if opentracing_grpc_endpoint and "$" in opentracing_grpc_endpoint:
opentracing_grpc_endpoint = os.path.expandvars(opentracing_grpc_endpoint)
print(
f"Resolved opentracing_grpc_endpoint to {opentracing_grpc_endpoint} after expanding environment variables"
)
arch_tracing["opentracing_grpc_endpoint"] = opentracing_grpc_endpoint
# ensure that opentracing_grpc_endpoint is a valid URL if present and start with http and must not have any path
if opentracing_grpc_endpoint:
urlparse_result = urlparse(opentracing_grpc_endpoint)
if urlparse_result.scheme != "http":
raise Exception(
f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, scheme must be http"
)
if urlparse_result.path and urlparse_result.path != "/":
raise Exception(
f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, path must be empty"
)
llms_with_endpoint = []
llms_with_endpoint_cluster_names = set()
updated_model_providers = []

View file

@ -3,3 +3,4 @@ import os
SERVICE_NAME_ARCHGW = "plano"
PLANO_DOCKER_NAME = "plano"
PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.4")
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://host.docker.internal:4317"

View file

@ -116,11 +116,18 @@ def stream_gateway_logs(follow, service="plano"):
def docker_validate_plano_schema(arch_config_file):
import os
env = os.environ.copy()
env.pop("PATH", None)
env_args = [item for key, value in env.items() for item in ["-e", f"{key}={value}"]]
result = subprocess.run(
[
"docker",
"run",
"--rm",
*env_args,
"-v",
f"{arch_config_file}:/app/arch_config.yaml:ro",
"--entrypoint",

View file

@ -26,6 +26,7 @@ from planoai.core import (
start_cli_agent,
)
from planoai.consts import (
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
PLANO_DOCKER_IMAGE,
PLANO_DOCKER_NAME,
SERVICE_NAME_ARCHGW,
@ -153,7 +154,7 @@ def up(file, path, foreground):
# Set the ARCH_CONFIG_FILE environment variable
env_stage = {
"OTEL_TRACING_GRPC_ENDPOINT": "http://host.docker.internal:4317",
"OTEL_TRACING_GRPC_ENDPOINT": DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
}
env = os.environ.copy()
# Remove PATH variable if present

View file

@ -382,6 +382,8 @@ properties:
type: integer
trace_arch_internal:
type: boolean
opentracing_grpc_endpoint:
type: string
additionalProperties: false
mode:
type: string

View file

@ -1030,35 +1030,7 @@ static_resources:
- endpoint:
address:
socket_address:
address: host.docker.internal
port_value: 4317
- name: opentelemetry_collector_http
type: STRICT_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
typed_extension_protocol_options:
load_assignment:
cluster_name: opentelemetry_collector_http
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: host.docker.internal
port_value: 4318
# Circuit breaker configuration to prevent overwhelming OTEL collector
circuit_breakers:
thresholds:
- priority: DEFAULT
max_connections: 100
max_pending_requests: 100
max_requests: 100
max_retries: 3
# Health checking and outlier detection
outlier_detection:
consecutive_5xx: 5
interval: 10s
base_ejection_time: 30s
max_ejection_percent: 50
enforcing_consecutive_5xx: 100
{% set _otel_endpoint = arch_tracing.opentracing_grpc_endpoint | default('host.docker.internal:4317') | replace("http://", "") | replace("https://", "") %}
address: {{ _otel_endpoint.split(":") | first }}
port_value: {{ _otel_endpoint.split(":") | last }}
{% endif %}

View file

@ -50,13 +50,12 @@ fn empty() -> BoxBody<Bytes, hyper::Error> {
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let _tracer_provider = init_tracer();
let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string());
// loading arch_config.yaml file
// loading arch_config.yaml file (before tracing init so we can read tracing config)
let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED")
.unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string());
info!(path = %arch_config_path, "loading arch_config.yaml");
eprintln!("loading arch_config.yaml from {}", arch_config_path);
let config_contents =
fs::read_to_string(&arch_config_path).expect("Failed to read arch_config.yaml");
@ -64,6 +63,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let config: Configuration =
serde_yaml::from_str(&config_contents).expect("Failed to parse arch_config.yaml");
// Initialize tracing using config.yaml tracing section
let _tracer_provider = init_tracer(config.tracing.as_ref());
info!(path = %arch_config_path, "loaded arch_config.yaml");
let arch_config = Arc::new(config);
// combine agents and filters into a single list of agents

View file

@ -12,6 +12,7 @@ use tracing_subscriber::util::SubscriberInitExt;
use tracing_subscriber::EnvFilter;
use crate::tracing::ServiceNameOverrideExporter;
use common::configuration::Tracing;
struct BracketedTime;
@ -80,18 +81,20 @@ use tracing_subscriber::fmt::FormattedFields;
static INIT_LOGGER: OnceLock<SdkTracerProvider> = OnceLock::new();
pub fn init_tracer() -> &'static SdkTracerProvider {
pub fn init_tracer(tracing_config: Option<&Tracing>) -> &'static SdkTracerProvider {
INIT_LOGGER.get_or_init(|| {
global::set_text_map_propagator(TraceContextPropagator::new());
// Get OTEL collector URL from environment
let otel_endpoint = std::env::var("OTEL_TRACING_GRPC_ENDPOINT")
.unwrap_or_else(|_| "http://localhost:4317".to_string());
// Get OTEL endpoint and sampling from config.yaml tracing section
let otel_endpoint = tracing_config.and_then(|t| t.opentracing_grpc_endpoint.clone());
let tracing_enabled = std::env::var("OTEL_TRACING_ENABLED")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(false);
let random_sampling = tracing_config.and_then(|t| t.random_sampling).unwrap_or(0);
let tracing_enabled = random_sampling > 0 && otel_endpoint.is_some();
eprintln!(
"initializing tracing: tracing_enabled={}, otel_endpoint={:?}, random_sampling={}",
tracing_enabled, otel_endpoint, random_sampling
);
// Create OTLP exporter to send spans to collector
if tracing_enabled {
@ -103,7 +106,7 @@ pub fn init_tracer() -> &'static SdkTracerProvider {
// Create ServiceNameOverrideExporter to support per-span service names
// This allows spans to have different service names (e.g., plano(orchestrator),
// plano(filter), plano(llm)) by setting the "service.name.override" attribute
let exporter = ServiceNameOverrideExporter::new(&otel_endpoint);
let exporter = ServiceNameOverrideExporter::new(otel_endpoint.as_ref().unwrap());
let provider = SdkTracerProvider::builder()
.with_batch_exporter(exporter)

View file

@ -90,6 +90,8 @@ pub struct Overrides {
pub struct Tracing {
pub sampling_rate: Option<f64>,
pub trace_arch_internal: Option<bool>,
pub random_sampling: Option<u32>,
pub opentracing_grpc_endpoint: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Default)]

View file

@ -55,3 +55,4 @@ listeners:
tracing:
random_sampling: 100
opentracing_grpc_endpoint: $OTEL_TRACING_GRPC_ENDPOINT

View file

@ -6,12 +6,10 @@ services:
dockerfile: Dockerfile
ports:
- "8001:8001"
- "11000:11000"
- "12001:12001"
environment:
- ARCH_CONFIG_PATH=/app/arch_config.yaml
- OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
- OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317
- OTEL_TRACING_GRPC_ENDPOINT=http://jaeger:4317
volumes:
- ./config.yaml:/app/arch_config.yaml:ro
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
@ -60,4 +58,3 @@ services:
ports:
- "16686:16686" # Jaeger UI
- "4317:4317" # OTLP gRPC receiver
- "4318:4318" # OTLP HTTP receiver