From 31f3d38fad5ddad34f92ff44e36551f8d0dcee9f Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Sat, 7 Feb 2026 15:16:12 -0800 Subject: [PATCH] fix tracing flags --- cli/planoai/config_generator.py | 24 +++++++++++++ cli/planoai/consts.py | 1 + cli/planoai/docker_cli.py | 7 ++++ cli/planoai/main.py | 3 +- config/arch_config_schema.yaml | 2 ++ config/envoy.template.yaml | 34 ++----------------- crates/brightstaff/src/main.rs | 9 +++-- crates/brightstaff/src/utils/tracing.rs | 21 +++++++----- crates/common/src/configuration.rs | 2 ++ .../config.yaml | 1 + .../docker-compose.yaml | 5 +-- 11 files changed, 61 insertions(+), 48 deletions(-) diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py index 2157b714..e77e9255 100644 --- a/cli/planoai/config_generator.py +++ b/cli/planoai/config_generator.py @@ -6,6 +6,7 @@ import yaml from jsonschema import validate from urllib.parse import urlparse from copy import deepcopy +from planoai.consts import DEFAULT_OTEL_TRACING_GRPC_ENDPOINT SUPPORTED_PROVIDERS_WITH_BASE_URL = [ @@ -158,6 +159,29 @@ def validate_and_render_schema(): arch_tracing = config_yaml.get("tracing", {}) + opentracing_grpc_endpoint = arch_tracing.get( + "opentracing_grpc_endpoint", DEFAULT_OTEL_TRACING_GRPC_ENDPOINT + ) + arch_tracing["opentracing_grpc_endpoint"] = opentracing_grpc_endpoint + # resolve env vars in opentracing_grpc_endpoint if present + if opentracing_grpc_endpoint and "$" in opentracing_grpc_endpoint: + opentracing_grpc_endpoint = os.path.expandvars(opentracing_grpc_endpoint) + print( + f"Resolved opentracing_grpc_endpoint to {opentracing_grpc_endpoint} after expanding environment variables" + ) + arch_tracing["opentracing_grpc_endpoint"] = opentracing_grpc_endpoint + # ensure that opentracing_grpc_endpoint is a valid URL if present and start with http and must not have any path + if opentracing_grpc_endpoint: + urlparse_result = urlparse(opentracing_grpc_endpoint) + if urlparse_result.scheme != "http": + raise Exception( + f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, scheme must be http" + ) + if urlparse_result.path and urlparse_result.path != "/": + raise Exception( + f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, path must be empty" + ) + llms_with_endpoint = [] llms_with_endpoint_cluster_names = set() updated_model_providers = [] diff --git a/cli/planoai/consts.py b/cli/planoai/consts.py index 52c92130..e51466f3 100644 --- a/cli/planoai/consts.py +++ b/cli/planoai/consts.py @@ -3,3 +3,4 @@ import os SERVICE_NAME_ARCHGW = "plano" PLANO_DOCKER_NAME = "plano" PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.4") +DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://host.docker.internal:4317" diff --git a/cli/planoai/docker_cli.py b/cli/planoai/docker_cli.py index 518606a6..599d57a1 100644 --- a/cli/planoai/docker_cli.py +++ b/cli/planoai/docker_cli.py @@ -116,11 +116,18 @@ def stream_gateway_logs(follow, service="plano"): def docker_validate_plano_schema(arch_config_file): + import os + + env = os.environ.copy() + env.pop("PATH", None) + env_args = [item for key, value in env.items() for item in ["-e", f"{key}={value}"]] + result = subprocess.run( [ "docker", "run", "--rm", + *env_args, "-v", f"{arch_config_file}:/app/arch_config.yaml:ro", "--entrypoint", diff --git a/cli/planoai/main.py b/cli/planoai/main.py index d0ef74c3..116b54cf 100644 --- a/cli/planoai/main.py +++ b/cli/planoai/main.py @@ -26,6 +26,7 @@ from planoai.core import ( start_cli_agent, ) from planoai.consts import ( + DEFAULT_OTEL_TRACING_GRPC_ENDPOINT, PLANO_DOCKER_IMAGE, PLANO_DOCKER_NAME, SERVICE_NAME_ARCHGW, @@ -153,7 +154,7 @@ def up(file, path, foreground): # Set the ARCH_CONFIG_FILE environment variable env_stage = { - "OTEL_TRACING_GRPC_ENDPOINT": "http://host.docker.internal:4317", + "OTEL_TRACING_GRPC_ENDPOINT": DEFAULT_OTEL_TRACING_GRPC_ENDPOINT, } env = os.environ.copy() # Remove PATH variable if present diff --git a/config/arch_config_schema.yaml b/config/arch_config_schema.yaml index 003bb9b4..0f3cefb7 100644 --- a/config/arch_config_schema.yaml +++ b/config/arch_config_schema.yaml @@ -382,6 +382,8 @@ properties: type: integer trace_arch_internal: type: boolean + opentracing_grpc_endpoint: + type: string additionalProperties: false mode: type: string diff --git a/config/envoy.template.yaml b/config/envoy.template.yaml index aed44dae..308a35b5 100644 --- a/config/envoy.template.yaml +++ b/config/envoy.template.yaml @@ -1030,35 +1030,7 @@ static_resources: - endpoint: address: socket_address: - address: host.docker.internal - port_value: 4317 - - name: opentelemetry_collector_http - type: STRICT_DNS - dns_lookup_family: V4_ONLY - lb_policy: ROUND_ROBIN - typed_extension_protocol_options: - load_assignment: - cluster_name: opentelemetry_collector_http - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: host.docker.internal - port_value: 4318 - # Circuit breaker configuration to prevent overwhelming OTEL collector - circuit_breakers: - thresholds: - - priority: DEFAULT - max_connections: 100 - max_pending_requests: 100 - max_requests: 100 - max_retries: 3 - # Health checking and outlier detection - outlier_detection: - consecutive_5xx: 5 - interval: 10s - base_ejection_time: 30s - max_ejection_percent: 50 - enforcing_consecutive_5xx: 100 + {% set _otel_endpoint = arch_tracing.opentracing_grpc_endpoint | default('host.docker.internal:4317') | replace("http://", "") | replace("https://", "") %} + address: {{ _otel_endpoint.split(":") | first }} + port_value: {{ _otel_endpoint.split(":") | last }} {% endif %} diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index eb75d21a..3469d8bd 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -50,13 +50,12 @@ fn empty() -> BoxBody { #[tokio::main] async fn main() -> Result<(), Box> { - let _tracer_provider = init_tracer(); let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string()); - // loading arch_config.yaml file + // loading arch_config.yaml file (before tracing init so we can read tracing config) let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED") .unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string()); - info!(path = %arch_config_path, "loading arch_config.yaml"); + eprintln!("loading arch_config.yaml from {}", arch_config_path); let config_contents = fs::read_to_string(&arch_config_path).expect("Failed to read arch_config.yaml"); @@ -64,6 +63,10 @@ async fn main() -> Result<(), Box> { let config: Configuration = serde_yaml::from_str(&config_contents).expect("Failed to parse arch_config.yaml"); + // Initialize tracing using config.yaml tracing section + let _tracer_provider = init_tracer(config.tracing.as_ref()); + info!(path = %arch_config_path, "loaded arch_config.yaml"); + let arch_config = Arc::new(config); // combine agents and filters into a single list of agents diff --git a/crates/brightstaff/src/utils/tracing.rs b/crates/brightstaff/src/utils/tracing.rs index eda78c43..62bf1ecc 100644 --- a/crates/brightstaff/src/utils/tracing.rs +++ b/crates/brightstaff/src/utils/tracing.rs @@ -12,6 +12,7 @@ use tracing_subscriber::util::SubscriberInitExt; use tracing_subscriber::EnvFilter; use crate::tracing::ServiceNameOverrideExporter; +use common::configuration::Tracing; struct BracketedTime; @@ -80,18 +81,20 @@ use tracing_subscriber::fmt::FormattedFields; static INIT_LOGGER: OnceLock = OnceLock::new(); -pub fn init_tracer() -> &'static SdkTracerProvider { +pub fn init_tracer(tracing_config: Option<&Tracing>) -> &'static SdkTracerProvider { INIT_LOGGER.get_or_init(|| { global::set_text_map_propagator(TraceContextPropagator::new()); - // Get OTEL collector URL from environment - let otel_endpoint = std::env::var("OTEL_TRACING_GRPC_ENDPOINT") - .unwrap_or_else(|_| "http://localhost:4317".to_string()); + // Get OTEL endpoint and sampling from config.yaml tracing section + let otel_endpoint = tracing_config.and_then(|t| t.opentracing_grpc_endpoint.clone()); - let tracing_enabled = std::env::var("OTEL_TRACING_ENABLED") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(false); + let random_sampling = tracing_config.and_then(|t| t.random_sampling).unwrap_or(0); + + let tracing_enabled = random_sampling > 0 && otel_endpoint.is_some(); + eprintln!( + "initializing tracing: tracing_enabled={}, otel_endpoint={:?}, random_sampling={}", + tracing_enabled, otel_endpoint, random_sampling + ); // Create OTLP exporter to send spans to collector if tracing_enabled { @@ -103,7 +106,7 @@ pub fn init_tracer() -> &'static SdkTracerProvider { // Create ServiceNameOverrideExporter to support per-span service names // This allows spans to have different service names (e.g., plano(orchestrator), // plano(filter), plano(llm)) by setting the "service.name.override" attribute - let exporter = ServiceNameOverrideExporter::new(&otel_endpoint); + let exporter = ServiceNameOverrideExporter::new(otel_endpoint.as_ref().unwrap()); let provider = SdkTracerProvider::builder() .with_batch_exporter(exporter) diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index c600ed5d..ccca89c3 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -90,6 +90,8 @@ pub struct Overrides { pub struct Tracing { pub sampling_rate: Option, pub trace_arch_internal: Option, + pub random_sampling: Option, + pub opentracing_grpc_endpoint: Option, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] diff --git a/demos/use_cases/multi_agent_with_crewai_langchain/config.yaml b/demos/use_cases/multi_agent_with_crewai_langchain/config.yaml index b3a204f3..5aa8cd70 100644 --- a/demos/use_cases/multi_agent_with_crewai_langchain/config.yaml +++ b/demos/use_cases/multi_agent_with_crewai_langchain/config.yaml @@ -55,3 +55,4 @@ listeners: tracing: random_sampling: 100 + opentracing_grpc_endpoint: $OTEL_TRACING_GRPC_ENDPOINT diff --git a/demos/use_cases/multi_agent_with_crewai_langchain/docker-compose.yaml b/demos/use_cases/multi_agent_with_crewai_langchain/docker-compose.yaml index 5f49fd15..00875ae5 100644 --- a/demos/use_cases/multi_agent_with_crewai_langchain/docker-compose.yaml +++ b/demos/use_cases/multi_agent_with_crewai_langchain/docker-compose.yaml @@ -6,12 +6,10 @@ services: dockerfile: Dockerfile ports: - "8001:8001" - - "11000:11000" - - "12001:12001" environment: - ARCH_CONFIG_PATH=/app/arch_config.yaml - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set} - - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317 + - OTEL_TRACING_GRPC_ENDPOINT=http://jaeger:4317 volumes: - ./config.yaml:/app/arch_config.yaml:ro - /etc/ssl/cert.pem:/etc/ssl/cert.pem @@ -60,4 +58,3 @@ services: ports: - "16686:16686" # Jaeger UI - "4317:4317" # OTLP gRPC receiver - - "4318:4318" # OTLP HTTP receiver