diff --git a/crates/brightstaff/src/app_state.rs b/crates/brightstaff/src/app_state.rs index 8b781ee9..20ed7ced 100644 --- a/crates/brightstaff/src/app_state.rs +++ b/crates/brightstaff/src/app_state.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use std::sync::Arc; -use common::configuration::{Agent, Listener, ModelAlias}; +use common::configuration::{Agent, Listener, ModelAlias, SpanAttributes}; use common::llm_providers::LlmProviders; use tokio::sync::RwLock; @@ -22,6 +22,7 @@ pub struct AppState { pub listeners: Arc>>, pub state_storage: Option>, pub llm_provider_url: String, + pub span_attributes: Arc>, /// Shared HTTP client for upstream LLM requests (connection pooling / keep-alive). pub http_client: reqwest::Client, } diff --git a/crates/brightstaff/src/handlers/agents/orchestrator.rs b/crates/brightstaff/src/handlers/agents/orchestrator.rs index 5af69af1..9fa4ca63 100644 --- a/crates/brightstaff/src/handlers/agents/orchestrator.rs +++ b/crates/brightstaff/src/handlers/agents/orchestrator.rs @@ -19,7 +19,7 @@ use crate::app_state::AppState; use crate::handlers::errors::build_error_chain_response; use crate::handlers::request::extract_request_id; use crate::handlers::response::ResponseHandler; -use crate::tracing::{operation_component, set_service_name}; +use crate::tracing::{collect_custom_trace_attributes, operation_component, set_service_name}; /// Main errors for agent chat completions #[derive(Debug, thiserror::Error)] @@ -41,6 +41,8 @@ pub async fn agent_chat( state: Arc, ) -> Result>, hyper::Error> { let request_id = extract_request_id(&request); + let custom_attrs = + collect_custom_trace_attributes(request.headers(), state.span_attributes.as_ref().as_ref()); // Create a span with request_id that will be included in all log lines let request_span = info_span!( @@ -56,7 +58,7 @@ pub async fn agent_chat( // Set service name for orchestrator operations set_service_name(operation_component::ORCHESTRATOR); - match handle_agent_chat_inner(request, state, request_id).await { + match handle_agent_chat_inner(request, state, request_id, custom_attrs).await { Ok(response) => Ok(response), Err(err) => { // Check if this is a client error from the pipeline that should be cascaded @@ -84,7 +86,7 @@ pub async fn agent_chat( let mut response = Response::new(ResponseHandler::create_full_body(json_string)); *response.status_mut() = hyper::StatusCode::from_u16(*status) - .unwrap_or(hyper::StatusCode::BAD_REQUEST); + .unwrap_or(hyper::StatusCode::INTERNAL_SERVER_ERROR); response.headers_mut().insert( hyper::header::CONTENT_TYPE, "application/json".parse().unwrap(), @@ -104,6 +106,7 @@ async fn handle_agent_chat_inner( request: Request, state: Arc, request_id: String, + custom_attrs: std::collections::HashMap, ) -> Result>, AgentFilterChainError> { // Initialize services let agent_selector = AgentSelector::new(Arc::clone(&state.orchestrator_service)); @@ -126,6 +129,9 @@ async fn handle_agent_chat_inner( get_active_span(|span| { span.update_name(listener.name.to_string()); + for (key, value) in &custom_attrs { + span.set_attribute(opentelemetry::KeyValue::new(key.clone(), value.clone())); + } }); info!(listener = %listener.name, "handling request"); @@ -276,6 +282,12 @@ async fn handle_agent_chat_inner( set_service_name(operation_component::AGENT); get_active_span(|span| { span.update_name(format!("{} /v1/chat/completions", agent_name)); + for (key, value) in &custom_attrs { + span.set_attribute(opentelemetry::KeyValue::new( + key.clone(), + value.clone(), + )); + } }); pipeline_processor diff --git a/crates/brightstaff/src/handlers/llm/mod.rs b/crates/brightstaff/src/handlers/llm/mod.rs index bc4ff811..724bb6f2 100644 --- a/crates/brightstaff/src/handlers/llm/mod.rs +++ b/crates/brightstaff/src/handlers/llm/mod.rs @@ -29,7 +29,9 @@ use crate::state::response_state_processor::ResponsesStateProcessor; use crate::state::{ extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError, }; -use crate::tracing::{llm as tracing_llm, operation_component, set_service_name}; +use crate::tracing::{ + collect_custom_trace_attributes, llm as tracing_llm, operation_component, set_service_name, +}; use router::router_chat_get_upstream_model; fn full>(chunk: T) -> BoxBody { @@ -45,6 +47,8 @@ pub async fn llm_chat( let request_path = request.uri().path().to_string(); let request_headers = request.headers().clone(); let request_id = extract_request_id(&request); + let custom_attrs = + collect_custom_trace_attributes(&request_headers, state.span_attributes.as_ref().as_ref()); // Create a span with request_id that will be included in all log lines let request_span = info_span!( @@ -60,20 +64,33 @@ pub async fn llm_chat( ); // Execute the rest of the handler inside the span - llm_chat_inner(request, state, request_id, request_path, request_headers) - .instrument(request_span) - .await + llm_chat_inner( + request, + state, + custom_attrs, + request_id, + request_path, + request_headers, + ) + .instrument(request_span) + .await } async fn llm_chat_inner( request: Request, state: Arc, + custom_attrs: HashMap, request_id: String, request_path: String, mut request_headers: hyper::HeaderMap, ) -> Result>, hyper::Error> { // Set service name for LLM operations set_service_name(operation_component::LLM); + get_active_span(|span| { + for (key, value) in &custom_attrs { + span.set_attribute(opentelemetry::KeyValue::new(key.clone(), value.clone())); + } + }); let traceparent = extract_or_generate_traceparent(&request_headers); diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index 540a96db..be25e383 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -72,11 +72,11 @@ fn cors_preflight() -> Result>, hyper::Err /// Load and parse the YAML configuration file. /// /// The path is read from `PLANO_CONFIG_PATH_RENDERED` (env) or falls back to -/// `./arch_config_rendered.yaml`. +/// `./plano_config_rendered.yaml`. fn load_config() -> Result> { let path = env::var("PLANO_CONFIG_PATH_RENDERED") - .unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string()); - eprintln!("loading arch_config.yaml from {}", path); + .unwrap_or_else(|_| "./plano_config_rendered.yaml".to_string()); + eprintln!("loading plano_config.yaml from {}", path); let contents = fs::read_to_string(&path).map_err(|e| format!("failed to read {path}: {e}"))?; @@ -136,6 +136,13 @@ async fn init_app_state( let state_storage = init_state_storage(config).await?; + let span_attributes = Arc::new( + config + .tracing + .as_ref() + .and_then(|tracing| tracing.span_attributes.clone()), + ); + Ok(AppState { router_service, orchestrator_service, @@ -145,6 +152,7 @@ async fn init_app_state( listeners: Arc::new(RwLock::new(config.listeners.clone())), state_storage, llm_provider_url, + span_attributes, http_client: reqwest::Client::new(), }) } diff --git a/crates/brightstaff/src/tracing/custom_attributes.rs b/crates/brightstaff/src/tracing/custom_attributes.rs index 24abc72b..7d4244d2 100644 --- a/crates/brightstaff/src/tracing/custom_attributes.rs +++ b/crates/brightstaff/src/tracing/custom_attributes.rs @@ -52,6 +52,7 @@ pub fn collect_custom_trace_attributes( attributes } +#[allow(dead_code)] pub fn append_span_attributes( mut span_builder: SpanBuilder, attributes: &HashMap, diff --git a/crates/brightstaff/src/tracing/init.rs b/crates/brightstaff/src/tracing/init.rs index 4f1ef302..ed351148 100644 --- a/crates/brightstaff/src/tracing/init.rs +++ b/crates/brightstaff/src/tracing/init.rs @@ -99,6 +99,9 @@ pub fn init_tracer(tracing_config: Option<&Tracing>) -> &'static SdkTracerProvid // Create OTLP exporter to send spans to collector. // Use `if let` to destructure the endpoint, avoiding an unwrap. if let Some(endpoint) = otel_endpoint.as_deref().filter(|_| tracing_enabled) { + if std::env::var("OTEL_SERVICE_NAME").is_err() { + std::env::set_var("OTEL_SERVICE_NAME", "plano"); + } // Create ServiceNameOverrideExporter to support per-span service names // This allows spans to have different service names (e.g., plano(orchestrator), // plano(filter), plano(llm)) by setting the "service.name.override" attribute diff --git a/crates/brightstaff/src/tracing/mod.rs b/crates/brightstaff/src/tracing/mod.rs index 929fbbe0..644db31a 100644 --- a/crates/brightstaff/src/tracing/mod.rs +++ b/crates/brightstaff/src/tracing/mod.rs @@ -1,10 +1,12 @@ mod constants; +mod custom_attributes; mod init; mod service_name_exporter; pub use constants::{ error, http, llm, operation_component, routing, signals, OperationNameBuilder, }; +pub use custom_attributes::collect_custom_trace_attributes; pub use init::init_tracer; pub use service_name_exporter::{ServiceNameOverrideExporter, SERVICE_NAME_OVERRIDE_KEY};