From d53a1ab3d11a1a548ee961a972ed2465276c8d81 Mon Sep 17 00:00:00 2001
From: Musa <malikmusa1323@gmail.com>
Date: Tue, 17 Feb 2026 13:13:12 -0800
Subject: [PATCH] refactor: remove TraceCollector usage and enhance logging
 with structured attributes

---
 .../src/handlers/agent_chat_completions.rs    | 438 ++++++++----------
 crates/brightstaff/src/handlers/llm.rs        | 341 +++++++-------
 .../brightstaff/src/handlers/router_chat.rs   | 132 +-----
 crates/brightstaff/src/main.rs                |  37 +-
 crates/brightstaff/src/tracing/mod.rs         |  30 ++
 crates/common/src/configuration.rs            |   2 +
 .../travel_agents/config.yaml                 |   2 +-
 .../travel_agents/test.rest                   |  13 +-
 8 files changed, 440 insertions(+), 555 deletions(-)
diff --git a/crates/brightstaff/src/handlers/agent_chat_completions.rs b/crates/brightstaff/src/handlers/agent_chat_completions.rs
index 09dc72a7..73f5cb2a 100644
--- a/crates/brightstaff/src/handlers/agent_chat_completions.rs
+++ b/crates/brightstaff/src/handlers/agent_chat_completions.rs
@@ -1,10 +1,8 @@
 use std::sync::Arc;
-use std::time::{Instant, SystemTime};
+use std::time::Instant;
 
 use bytes::Bytes;
 use common::configuration::SpanAttributes;
-use common::consts::TRACE_PARENT_HEADER;
-use common::traces::{generate_random_span_id, parse_traceparent, SpanBuilder, SpanKind};
 use hermesllm::apis::OpenAIMessage;
 use hermesllm::clients::SupportedAPIsFromClient;
 use hermesllm::providers::request::ProviderRequest;
@@ -12,17 +10,15 @@ use hermesllm::ProviderRequestType;
 use http_body_util::combinators::BoxBody;
 use http_body_util::BodyExt;
 use hyper::{Request, Response};
+use opentelemetry::trace::get_active_span;
 use serde::ser::Error as SerError;
-use tracing::{debug, info, warn};
+use tracing::{debug, info, info_span, warn, Instrument};
 
 use super::agent_selector::{AgentSelectionError, AgentSelector};
 use super::pipeline_processor::{PipelineError, PipelineProcessor};
 use super::response_handler::ResponseHandler;
 use crate::router::plano_orchestrator::OrchestratorService;
-use crate::tracing::{
-    append_span_attributes, collect_custom_trace_attributes, http, operation_component,
-    OperationNameBuilder,
-};
+use crate::tracing::{collect_custom_trace_attributes, operation_component, set_service_name};
 
 /// Main errors for agent chat completions
 #[derive(Debug, thiserror::Error)]
@@ -45,95 +41,127 @@ pub async fn agent_chat(
     _: String,
     agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
     listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
-    trace_collector: Arc<common::traces::TraceCollector>,
     span_attributes: Arc<Option<SpanAttributes>>,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
-    match handle_agent_chat(
-        request,
-        orchestrator_service,
-        agents_list,
-        listeners,
-        trace_collector,
-        span_attributes,
-    )
-    .await
+    let custom_attrs =
+        collect_custom_trace_attributes(request.headers(), span_attributes.as_ref().as_ref());
+    // Extract request_id from headers or generate a new one
+    let request_id: String = match request
+        .headers()
+        .get(common::consts::REQUEST_ID_HEADER)
+        .and_then(|h| h.to_str().ok())
+        .map(|s| s.to_string())
     {
-        Ok(response) => Ok(response),
-        Err(err) => {
-            // Check if this is a client error from the pipeline that should be cascaded
-            if let AgentFilterChainError::Pipeline(PipelineError::ClientError {
-                agent,
-                status,
-                body,
-            }) = &err
-            {
-                warn!(
-                    "Client error from agent '{}' (HTTP {}): {}",
-                    agent, status, body
-                );
+        Some(id) => id,
+        None => uuid::Uuid::new_v4().to_string(),
+    };
 
-                // Create error response with the original status code and body
+    // Create a span with request_id that will be included in all log lines
+    let request_span = info_span!(
+        "(orchestrator)",
+        component = "orchestrator",
+        request_id = %request_id,
+        http.method = %request.method(),
+        http.path = %request.uri().path()
+    );
+
+    // Execute the handler inside the span
+    async {
+        // Set service name for orchestrator operations
+        set_service_name(operation_component::ORCHESTRATOR);
+
+        match handle_agent_chat_inner(
+            request,
+            orchestrator_service,
+            agents_list,
+            listeners,
+            request_id,
+            custom_attrs,
+        )
+        .await
+        {
+            Ok(response) => Ok(response),
+            Err(err) => {
+                // Check if this is a client error from the pipeline that should be cascaded
+                if let AgentFilterChainError::Pipeline(PipelineError::ClientError {
+                    agent,
+                    status,
+                    body,
+                }) = &err
+                {
+                    warn!(
+                        agent = %agent,
+                        status = %status,
+                        body = %body,
+                        "client error from agent"
+                    );
+
+                    // Create error response with the original status code and body
+                    let error_json = serde_json::json!({
+                        "error": "ClientError",
+                        "agent": agent,
+                        "status": status,
+                        "agent_response": body
+                    });
+
+                    let json_string = error_json.to_string();
+                    let mut response =
+                        Response::new(ResponseHandler::create_full_body(json_string));
+                    *response.status_mut() = hyper::StatusCode::from_u16(*status)
+                        .unwrap_or(hyper::StatusCode::BAD_REQUEST);
+                    response.headers_mut().insert(
+                        hyper::header::CONTENT_TYPE,
+                        "application/json".parse().unwrap(),
+                    );
+                    return Ok(response);
+                }
+
+                // Print detailed error information with full error chain for other errors
+                let mut error_chain = Vec::new();
+                let mut current_error: &dyn std::error::Error = &err;
+
+                // Collect the full error chain
+                loop {
+                    error_chain.push(current_error.to_string());
+                    match current_error.source() {
+                        Some(source) => current_error = source,
+                        None => break,
+                    }
+                }
+
+                // Log the complete error chain
+                warn!(error_chain = ?error_chain, "agent chat error chain");
+                warn!(root_error = ?err, "root error");
+
+                // Create structured error response as JSON
                 let error_json = serde_json::json!({
-                    "error": "ClientError",
-                    "agent": agent,
-                    "status": status,
-                    "agent_response": body
+                    "error": {
+                        "type": "AgentFilterChainError",
+                        "message": err.to_string(),
+                        "error_chain": error_chain,
+                        "debug_info": format!("{:?}", err)
+                    }
                 });
 
-                let json_string = error_json.to_string();
-                let mut response = Response::new(ResponseHandler::create_full_body(json_string));
-                *response.status_mut() =
-                    hyper::StatusCode::from_u16(*status).unwrap_or(hyper::StatusCode::BAD_REQUEST);
-                response.headers_mut().insert(
-                    hyper::header::CONTENT_TYPE,
-                    "application/json".parse().unwrap(),
-                );
-                return Ok(response);
+                // Log the error for debugging
+                info!(error = %error_json, "structured error info");
+
+                // Return JSON error response
+                Ok(ResponseHandler::create_json_error_response(&error_json))
             }
-
-            // Print detailed error information with full error chain for other errors
-            let mut error_chain = Vec::new();
-            let mut current_error: &dyn std::error::Error = &err;
-
-            // Collect the full error chain
-            loop {
-                error_chain.push(current_error.to_string());
-                match current_error.source() {
-                    Some(source) => current_error = source,
-                    None => break,
-                }
-            }
-
-            // Log the complete error chain
-            warn!("Agent chat error chain: {:#?}", error_chain);
-            warn!("Root error: {:?}", err);
-
-            // Create structured error response as JSON
-            let error_json = serde_json::json!({
-                "error": {
-                    "type": "AgentFilterChainError",
-                    "message": err.to_string(),
-                    "error_chain": error_chain,
-                    "debug_info": format!("{:?}", err)
-                }
-            });
-
-            // Log the error for debugging
-            info!("Structured error info: {}", error_json);
-
-            // Return JSON error response
-            Ok(ResponseHandler::create_json_error_response(&error_json))
         }
     }
+    .instrument(request_span)
+    .await
 }
 
-async fn handle_agent_chat(
+async fn handle_agent_chat_inner(
     request: Request<hyper::body::Incoming>,
     orchestrator_service: Arc<OrchestratorService>,
     agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
     listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
-    trace_collector: Arc<common::traces::TraceCollector>,
-    span_attributes: Arc<Option<SpanAttributes>>,
+    request_id: String,
+    custom_attrs: std::collections::HashMap<String, String>,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, AgentFilterChainError> {
     // Initialize services
     let agent_selector = AgentSelector::new(orchestrator_service);
@@ -147,14 +175,21 @@ async fn handle_agent_chat(
         .and_then(|name| name.to_str().ok());
 
     // Find the appropriate listener
-    let listener = {
+    let listener: common::configuration::Listener = {
         let listeners = listeners.read().await;
         agent_selector
             .find_listener(listener_name, &listeners)
             .await?
     };
 
-    info!("Handling request for listener: {}", listener.name);
+    get_active_span(|span| {
+        span.update_name(listener.name.to_string());
+        for (key, value) in &custom_attrs {
+            span.set_attribute(opentelemetry::KeyValue::new(key.clone(), value.clone()));
+        }
+    });
+
+    info!(listener = %listener.name, "handling request");
 
     // Parse request body
     let request_path = request
@@ -169,12 +204,8 @@ async fn handle_agent_chat(
         let mut headers = request.headers().clone();
         headers.remove(common::consts::ENVOY_ORIGINAL_PATH_HEADER);
 
+        // Set the request_id in headers if not already present
         if !headers.contains_key(common::consts::REQUEST_ID_HEADER) {
-            let request_id = uuid::Uuid::new_v4().to_string();
-            info!(
-                "Request id not found in headers, generated new request id: {}",
-                request_id
-            );
             headers.insert(
                 common::consts::REQUEST_ID_HEADER,
                 hyper::header::HeaderValue::from_str(&request_id).unwrap(),
@@ -183,16 +214,12 @@ async fn handle_agent_chat(
 
         headers
     };
-    let custom_attrs = collect_custom_trace_attributes(
-        &request_headers,
-        span_attributes.as_ref().as_ref(),
-    );
 
     let chat_request_bytes = request.collect().await?.to_bytes();
 
     debug!(
-        "Received request body (raw utf8): {}",
-        String::from_utf8_lossy(&chat_request_bytes)
+        body = %String::from_utf8_lossy(&chat_request_bytes),
+        "received request body"
     );
 
     // Determine the API type from the endpoint
@@ -206,7 +233,7 @@ async fn handle_agent_chat(
     let client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) {
         Ok(request) => request,
         Err(err) => {
-            warn!("Failed to parse request as ProviderRequestType: {}", err);
+            warn!("failed to parse request as ProviderRequestType: {}", err);
             let err_msg = format!("Failed to parse request: {}", err);
             return Err(AgentFilterChainError::RequestParsing(
                 serde_json::Error::custom(err_msg),
@@ -216,12 +243,6 @@ async fn handle_agent_chat(
 
     let message: Vec<OpenAIMessage> = client_request.get_messages();
 
-    // Extract trace parent for routing
-    let traceparent = request_headers
-        .iter()
-        .find(|(key, _)| key.as_str() == TRACE_PARENT_HEADER)
-        .map(|(_, value)| value.to_str().unwrap_or_default().to_string());
-
     let request_id = request_headers
         .get(common::consts::REQUEST_ID_HEADER)
         .and_then(|val| val.to_str().ok())
@@ -234,90 +255,58 @@ async fn handle_agent_chat(
         agent_selector.create_agent_map(agents)
     };
 
-    // Parse trace parent to get trace_id and parent_span_id
-    let (trace_id, parent_span_id) = if let Some(ref tp) = traceparent {
-        parse_traceparent(tp)
-    } else {
-        (String::new(), None)
-    };
-
     // Select appropriate agents using arch orchestrator llm model
-    let selection_span_id = generate_random_span_id();
-    let selection_start_time = SystemTime::now();
-    let selection_start_instant = Instant::now();
-
+    let selection_start = Instant::now();
     let selected_agents = agent_selector
-        .select_agents(&message, &listener, traceparent.clone(), request_id.clone())
+        .select_agents(&message, &listener, request_id.clone())
         .await?;
 
-    // Record agent selection span
-    let selection_end_time = SystemTime::now();
-    let selection_elapsed = selection_start_instant.elapsed();
-    let selection_operation_name = OperationNameBuilder::new()
-        .with_method("POST")
-        .with_path("/agents/select")
-        .with_target(&listener.name)
-        .build();
+    // Record selection attributes on the current orchestrator span
+    let selection_elapsed_ms = selection_start.elapsed().as_secs_f64() * 1000.0;
+    get_active_span(|span| {
+        span.set_attribute(opentelemetry::KeyValue::new(
+            "selection.listener",
+            listener.name.clone(),
+        ));
+        span.set_attribute(opentelemetry::KeyValue::new(
+            "selection.agent_count",
+            selected_agents.len() as i64,
+        ));
+        span.set_attribute(opentelemetry::KeyValue::new(
+            "selection.agents",
+            selected_agents
+                .iter()
+                .map(|a| a.id.as_str())
+                .collect::<Vec<_>>()
+                .join(","),
+        ));
+        span.set_attribute(opentelemetry::KeyValue::new(
+            "selection.determination_ms",
+            format!("{:.2}", selection_elapsed_ms),
+        ));
+    });
 
-    let mut selection_span_builder = append_span_attributes(
-        SpanBuilder::new(&selection_operation_name)
-            .with_span_id(selection_span_id)
-            .with_kind(SpanKind::Internal)
-            .with_start_time(selection_start_time)
-            .with_end_time(selection_end_time)
-            .with_attribute(http::METHOD, "POST")
-            .with_attribute(http::TARGET, "/agents/select")
-            .with_attribute("selection.listener", listener.name.clone())
-            .with_attribute("selection.agent_count", selected_agents.len().to_string())
-            .with_attribute(
-                "selection.agents",
-                selected_agents
-                    .iter()
-                    .map(|a| a.id.as_str())
-                    .collect::<Vec<_>>()
-                    .join(","),
-            )
-            .with_attribute(
-                "duration_ms",
-                format!("{:.2}", selection_elapsed.as_secs_f64() * 1000.0),
-            ),
-        &custom_attrs,
+    info!(
+        count = selected_agents.len(),
+        "selected agents for execution"
     );
 
-    if !trace_id.is_empty() {
-        selection_span_builder = selection_span_builder.with_trace_id(trace_id.clone());
-    }
-    if let Some(parent_id) = parent_span_id.clone() {
-        selection_span_builder = selection_span_builder.with_parent_span_id(parent_id);
-    }
-
-    let selection_span = selection_span_builder.build();
-    trace_collector.record_span(operation_component::ORCHESTRATOR, selection_span);
-
-    info!("Selected {} agent(s) for execution", selected_agents.len());
-
     // Execute agents sequentially, passing output from one to the next
     let mut current_messages = message.clone();
     let agent_count = selected_agents.len();
 
     for (agent_index, selected_agent) in selected_agents.iter().enumerate() {
+        // Get agent name
+        let agent_name = selected_agent.id.clone();
         let is_last_agent = agent_index == agent_count - 1;
 
         debug!(
-            "Processing agent {}/{}: {}",
-            agent_index + 1,
-            agent_count,
-            selected_agent.id
+            agent_index = agent_index + 1,
+            total = agent_count,
+            agent = %agent_name,
+            "processing agent"
         );
 
-        // Record the start time for agent span
-        let agent_start_time = SystemTime::now();
-        let agent_start_instant = Instant::now();
-        let span_id = generate_random_span_id();
-
-        // Get agent name
-        let agent_name = selected_agent.id.clone();
-
         // Process the filter chain
         let chat_history = pipeline_processor
             .process_filter_chain(
@@ -325,91 +314,74 @@ async fn handle_agent_chat(
                 selected_agent,
                 &agent_map,
                 &request_headers,
-                Some(&trace_collector),
-                trace_id.clone(),
-                span_id.clone(),
             )
             .await?;
 
         // Get agent details and invoke
         let agent = agent_map.get(&agent_name).unwrap();
 
-        debug!("Invoking agent: {}", agent_name);
+        debug!(agent = %agent_name, "invoking agent");
 
-        let llm_response = pipeline_processor
-            .invoke_agent(
-                &chat_history,
-                client_request.clone(),
-                agent,
-                &request_headers,
-                trace_id.clone(),
-                span_id.clone(),
-            )
-            .await?;
-
-        // Record agent span
-        let agent_end_time = SystemTime::now();
-        let agent_elapsed = agent_start_instant.elapsed();
-        let full_path = format!("/agents{}", request_path);
-        let operation_name = OperationNameBuilder::new()
-            .with_method("POST")
-            .with_path(&full_path)
-            .with_target(&agent_name)
-            .build();
-
-        let mut span_builder = append_span_attributes(
-            SpanBuilder::new(&operation_name)
-                .with_span_id(span_id)
-                .with_kind(SpanKind::Internal)
-                .with_start_time(agent_start_time)
-                .with_end_time(agent_end_time)
-                .with_attribute(http::METHOD, "POST")
-                .with_attribute(http::TARGET, full_path)
-                .with_attribute("agent.name", agent_name.clone())
-                .with_attribute(
-                    "agent.sequence",
-                    format!("{}/{}", agent_index + 1, agent_count),
-                )
-                .with_attribute(
-                    "duration_ms",
-                    format!("{:.2}", agent_elapsed.as_secs_f64() * 1000.0),
-                ),
-            &custom_attrs,
+        let agent_span = info_span!(
+            "agent",
+            agent_id = %agent_name,
+            message_count = chat_history.len(),
         );
 
-        if !trace_id.is_empty() {
-            span_builder = span_builder.with_trace_id(trace_id.clone());
-        }
-        if let Some(parent_id) = parent_span_id.clone() {
-            span_builder = span_builder.with_parent_span_id(parent_id);
-        }
+        let llm_response = async {
+            set_service_name(operation_component::AGENT);
+            get_active_span(|span| {
+                span.update_name(format!("{} /v1/chat/completions", agent_name));
+                for (key, value) in &custom_attrs {
+                    span.set_attribute(opentelemetry::KeyValue::new(key.clone(), value.clone()));
+                }
+            });
 
-        let span = span_builder.build();
-        trace_collector.record_span(operation_component::AGENT, span);
+            pipeline_processor
+                .invoke_agent(
+                    &chat_history,
+                    client_request.clone(),
+                    agent,
+                    &request_headers,
+                )
+                .await
+        }
+        .instrument(agent_span.clone())
+        .await?;
 
         // If this is the last agent, return the streaming response
         if is_last_agent {
             info!(
-                "Completed agent chain, returning response from last agent: {}",
-                agent_name
+                agent = %agent_name,
+                "completed agent chain, returning response"
             );
-            return response_handler
-                .create_streaming_response(llm_response)
-                .await
-                .map_err(AgentFilterChainError::from);
+            // Capture the orchestrator span (parent of the agent span) so it
+            // stays open for the full streaming duration alongside the agent span.
+            let orchestrator_span = tracing::Span::current();
+            return async {
+                response_handler
+                    .create_streaming_response(
+                        llm_response,
+                        tracing::Span::current(), // agent span (inner)
+                        orchestrator_span,        // orchestrator span (outer)
+                    )
+                    .await
+                    .map_err(AgentFilterChainError::from)
+            }
+            .instrument(agent_span)
+            .await;
         }
 
         // For intermediate agents, collect the full response and pass to next agent
-        debug!(
-            "Collecting response from intermediate agent: {}",
-            agent_name
-        );
-        let response_text = response_handler.collect_full_response(llm_response).await?;
+        debug!(agent = %agent_name, "collecting response from intermediate agent");
+        let response_text = async { response_handler.collect_full_response(llm_response).await }
+            .instrument(agent_span)
+            .await?;
 
         info!(
-            "Agent {} completed, passing {} character response to next agent",
-            agent_name,
-            response_text.len()
+            agent = %agent_name,
+            response_len = response_text.len(),
+            "agent completed, passing response to next agent"
         );
 
         // remove last message and add new one at the end
diff --git a/crates/brightstaff/src/handlers/llm.rs b/crates/brightstaff/src/handlers/llm.rs
index 6d5b236a..6e7ac226 100644
--- a/crates/brightstaff/src/handlers/llm.rs
+++ b/crates/brightstaff/src/handlers/llm.rs
@@ -4,7 +4,6 @@ use common::consts::{
     ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
 };
 use common::llm_providers::LlmProviders;
-use common::traces::TraceCollector;
 use hermesllm::apis::openai_responses::InputParam;
 use hermesllm::clients::{SupportedAPIsFromClient, SupportedUpstreamAPIs};
 use hermesllm::{ProviderRequest, ProviderRequestType};
@@ -12,10 +11,13 @@ use http_body_util::combinators::BoxBody;
 use http_body_util::{BodyExt, Full};
 use hyper::header::{self};
 use hyper::{Request, Response, StatusCode};
+use opentelemetry::global;
+use opentelemetry::trace::get_active_span;
+use opentelemetry_http::HeaderInjector;
 use std::collections::HashMap;
 use std::sync::Arc;
 use tokio::sync::RwLock;
-use tracing::{debug, info, warn};
+use tracing::{debug, info, info_span, warn, Instrument};
 
 use crate::handlers::router_chat::router_chat_get_upstream_model;
 use crate::handlers::utils::{
@@ -26,7 +28,9 @@ use crate::state::response_state_processor::ResponsesStateProcessor;
 use crate::state::{
     extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError,
 };
-use crate::tracing::{collect_custom_trace_attributes, operation_component};
+use crate::tracing::{
+    collect_custom_trace_attributes, llm as tracing_llm, operation_component, set_service_name,
+};
 
 fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
     Full::new(chunk.into())
@@ -34,38 +38,78 @@ fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
         .boxed()
 }
 
-#[allow(clippy::too_many_arguments)]
 pub async fn llm_chat(
     request: Request<hyper::body::Incoming>,
     router_service: Arc<RouterService>,
     full_qualified_llm_provider_url: String,
     model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
     llm_providers: Arc<RwLock<LlmProviders>>,
-    trace_collector: Arc<TraceCollector>,
     span_attributes: Arc<Option<SpanAttributes>>,
     state_storage: Option<Arc<dyn StateStorage>>,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
     let request_path = request.uri().path().to_string();
     let request_headers = request.headers().clone();
-    let custom_attrs = collect_custom_trace_attributes(
-        &request_headers,
-        span_attributes.as_ref().as_ref(),
-    );
     let request_id: String = match request_headers
         .get(REQUEST_ID_HEADER)
         .and_then(|h| h.to_str().ok())
         .map(|s| s.to_string())
     {
         Some(id) => id,
-        None => {
-            let generated_id = uuid::Uuid::new_v4().to_string();
-            warn!(
-                "[PLANO_REQ_ID:{}] | REQUEST_ID header missing, generated new ID",
-                generated_id
-            );
-            generated_id
-        }
+        None => uuid::Uuid::new_v4().to_string(),
     };
+    let custom_attrs =
+        collect_custom_trace_attributes(&request_headers, span_attributes.as_ref().as_ref());
+
+    // Create a span with request_id that will be included in all log lines
+    let request_span = info_span!(
+        "llm",
+        component = "llm",
+        request_id = %request_id,
+        http.method = %request.method(),
+        http.path = %request_path,
+        llm.model = tracing::field::Empty,
+        llm.tools = tracing::field::Empty,
+        llm.user_message_preview = tracing::field::Empty,
+        llm.temperature = tracing::field::Empty,
+    );
+
+    // Execute the rest of the handler inside the span
+    llm_chat_inner(
+        request,
+        router_service,
+        full_qualified_llm_provider_url,
+        model_aliases,
+        llm_providers,
+        custom_attrs,
+        state_storage,
+        request_id,
+        request_path,
+        request_headers,
+    )
+    .instrument(request_span)
+    .await
+}
+
+#[allow(clippy::too_many_arguments)]
+async fn llm_chat_inner(
+    request: Request<hyper::body::Incoming>,
+    router_service: Arc<RouterService>,
+    full_qualified_llm_provider_url: String,
+    model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
+    llm_providers: Arc<RwLock<LlmProviders>>,
+    custom_attrs: HashMap<String, String>,
+    state_storage: Option<Arc<dyn StateStorage>>,
+    request_id: String,
+    request_path: String,
+    mut request_headers: hyper::HeaderMap,
+) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
+    // Set service name for LLM operations
+    set_service_name(operation_component::LLM);
+    get_active_span(|span| {
+        for (key, value) in &custom_attrs {
+            span.set_attribute(opentelemetry::KeyValue::new(key.clone(), value.clone()));
+        }
+    });
 
     // Extract or generate traceparent - this establishes the trace context for all spans
     let traceparent: String = match request_headers
@@ -79,20 +123,18 @@ pub async fn llm_chat(
             let trace_id = Uuid::new_v4().to_string().replace("-", "");
             let generated_tp = format!("00-{}-0000000000000000-01", trace_id);
             warn!(
-                "[PLANO_REQ_ID:{}] | TRACE_PARENT header missing, generated new traceparent: {}",
-                request_id, generated_tp
+                generated_traceparent = %generated_tp,
+                "TRACE_PARENT header missing, generated new traceparent"
             );
             generated_tp
         }
     };
 
-    let mut request_headers = request_headers;
     let chat_request_bytes = request.collect().await?.to_bytes();
 
     debug!(
-        "[PLANO_REQ_ID:{}] | REQUEST_BODY (UTF8): {}",
-        request_id,
-        String::from_utf8_lossy(&chat_request_bytes)
+        body = %String::from_utf8_lossy(&chat_request_bytes),
+        "request body received"
     );
 
     let mut client_request = match ProviderRequestType::try_from((
@@ -102,13 +144,10 @@ pub async fn llm_chat(
         Ok(request) => request,
         Err(err) => {
             warn!(
-                "[PLANO_REQ_ID:{}] | FAILURE | Failed to parse request as ProviderRequestType: {}",
-                request_id, err
-            );
-            let err_msg = format!(
-                "[PLANO_REQ_ID:{}] | FAILURE | Failed to parse request: {}",
-                request_id, err
+                error = %err,
+                "failed to parse request as ProviderRequestType"
             );
+            let err_msg = format!("Failed to parse request: {}", err);
             let mut bad_request = Response::new(full(err_msg));
             *bad_request.status_mut() = StatusCode::BAD_REQUEST;
             return Ok(bad_request);
@@ -128,16 +167,21 @@ pub async fn llm_chat(
     let model_from_request = client_request.model().to_string();
     let temperature = client_request.get_temperature();
     let is_streaming_request = client_request.is_streaming();
-    let resolved_model = resolve_model_alias(&model_from_request, &model_aliases);
+    let alias_resolved_model = resolve_model_alias(&model_from_request, &model_aliases);
 
     // Validate that the requested model exists in configuration
     // This matches the validation in llm_gateway routing.rs
-    if llm_providers.read().await.get(&resolved_model).is_none() {
+    if llm_providers
+        .read()
+        .await
+        .get(&alias_resolved_model)
+        .is_none()
+    {
         let err_msg = format!(
             "Model '{}' not found in configured providers",
-            resolved_model
+            alias_resolved_model
         );
-        warn!("[PLANO_REQ_ID:{}] | FAILURE | {}", request_id, err_msg);
+        warn!(model = %alias_resolved_model, "model not found in configured providers");
         let mut bad_request = Response::new(full(err_msg));
         *bad_request.status_mut() = StatusCode::BAD_REQUEST;
         return Ok(bad_request);
@@ -145,10 +189,10 @@ pub async fn llm_chat(
 
     // Handle provider/model slug format (e.g., "openai/gpt-4")
     // Extract just the model name for upstream (providers don't understand the slug)
-    let model_name_only = if let Some((_, model)) = resolved_model.split_once('/') {
+    let model_name_only = if let Some((_, model)) = alias_resolved_model.split_once('/') {
         model.to_string()
     } else {
-        resolved_model.clone()
+        alias_resolved_model.clone()
     };
 
     // Extract tool names and user message preview for span attributes
@@ -156,18 +200,30 @@ pub async fn llm_chat(
     let user_message_preview = client_request
         .get_recent_user_message()
         .map(|msg| truncate_message(&msg, 50));
+    let span = tracing::Span::current();
+    if let Some(temp) = temperature {
+        span.record(tracing_llm::TEMPERATURE, tracing::field::display(temp));
+    }
+    if let Some(tools) = &tool_names {
+        let formatted_tools = tools
+            .iter()
+            .map(|name| format!("{}(...)", name))
+            .collect::<Vec<_>>()
+            .join("\n");
+        span.record(tracing_llm::TOOLS, formatted_tools.as_str());
+    }
+    if let Some(preview) = &user_message_preview {
+        span.record(tracing_llm::USER_MESSAGE_PREVIEW, preview.as_str());
+    }
 
     // Extract messages for signal analysis (clone before moving client_request)
-    let messages_for_signals = client_request.get_messages();
+    let messages_for_signals = Some(client_request.get_messages());
 
     // Set the model to just the model name (without provider prefix)
     // This ensures upstream receives "gpt-4" not "openai/gpt-4"
     client_request.set_model(model_name_only.clone());
-    if client_request.remove_metadata_key("archgw_preference_config") {
-        debug!(
-            "[PLANO_REQ_ID:{}] Removed archgw_preference_config from metadata",
-            request_id
-        );
+    if client_request.remove_metadata_key("plano_preference_config") {
+        debug!("removed plano_preference_config from metadata");
     }
 
     // === v1/responses state management: Determine upstream API and combine input if needed ===
@@ -186,9 +242,9 @@ pub async fn llm_chat(
             // Get the upstream path and check if it's ResponsesAPI
             let upstream_path = get_upstream_path(
                 &llm_providers,
-                &resolved_model,
+                &alias_resolved_model,
                 &request_path,
-                &resolved_model,
+                &alias_resolved_model,
                 is_streaming_request,
             )
             .await;
@@ -215,14 +271,17 @@ pub async fn llm_chat(
                             // Update both the request and original_input_items
                             responses_req.input = InputParam::Items(combined_input.clone());
                             original_input_items = combined_input;
-                            info!("[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Updated request with conversation history ({} items)", request_id, original_input_items.len());
+                            info!(
+                                items = original_input_items.len(),
+                                "updated request with conversation history"
+                            );
                         }
                         Err(StateStorageError::NotFound(_)) => {
                             // Return 409 Conflict when previous_response_id not found
-                            warn!("[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Previous response_id not found: {}", request_id, prev_resp_id);
+                            warn!(previous_response_id = %prev_resp_id, "previous response_id not found");
                             let err_msg = format!(
-                                "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Conversation state not found for previous_response_id: {}",
-                                request_id, prev_resp_id
+                                "Conversation state not found for previous_response_id: {}",
+                                prev_resp_id
                             );
                             let mut conflict_response = Response::new(full(err_msg));
                             *conflict_response.status_mut() = StatusCode::CONFLICT;
@@ -231,8 +290,9 @@ pub async fn llm_chat(
                         Err(e) => {
                             // Log warning but continue on other storage errors
                             warn!(
-                                "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to retrieve conversation state for {}: {}",
-                                request_id, prev_resp_id, e
+                                previous_response_id = %prev_resp_id,
+                                error = %e,
+                                "failed to retrieve conversation state"
                             );
                             // Restore original_input_items since we passed ownership
                             original_input_items = extract_input_items(&responses_req.input);
@@ -240,10 +300,7 @@ pub async fn llm_chat(
                     }
                 }
             } else {
-                debug!(
-                    "[PLANO_REQ_ID:{}] | BRIGHT_STAFF | Upstream supports ResponsesAPI natively.",
-                    request_id
-                );
+                debug!("upstream supports ResponsesAPI natively");
             }
         }
     }
@@ -252,15 +309,29 @@ pub async fn llm_chat(
     let client_request_bytes_for_upstream = ProviderRequestType::to_bytes(&client_request).unwrap();
 
     // Determine routing using the dedicated router_chat module
-    let routing_result = match router_chat_get_upstream_model(
-        router_service,
-        client_request, // Pass the original request - router_chat will convert it
-        trace_collector.clone(),
-        &traceparent,
-        &request_path,
-        &request_id,
-        &custom_attrs,
-    )
+    // This gets its own span for latency and error tracking
+    let routing_span = info_span!(
+        "routing",
+        component = "routing",
+        http.method = "POST",
+        http.target = %request_path,
+        model.requested = %model_from_request,
+        model.alias_resolved = %alias_resolved_model,
+        route.selected_model = tracing::field::Empty,
+        routing.determination_ms = tracing::field::Empty,
+    );
+    let routing_result = match async {
+        set_service_name(operation_component::ROUTING);
+        router_chat_get_upstream_model(
+            router_service,
+            client_request, // Pass the original request - router_chat will convert it
+            &traceparent,
+            &request_path,
+            &request_id,
+        )
+        .await
+    }
+    .instrument(routing_span)
     .await
     {
         Ok(result) => result,
@@ -274,22 +345,37 @@ pub async fn llm_chat(
     // Determine final model to use
     // Router returns "none" as a sentinel value when it doesn't select a specific model
     let router_selected_model = routing_result.model_name;
-    let model_name = if router_selected_model != "none" {
+    let resolved_model = if router_selected_model != "none" {
         // Router selected a specific model via routing preferences
         router_selected_model
     } else {
         // Router returned "none" sentinel, use validated resolved_model from request
-        resolved_model.clone()
+        alias_resolved_model.clone()
     };
+    tracing::Span::current().record(tracing_llm::MODEL_NAME, resolved_model.as_str());
+
+    let span_name = if model_from_request == resolved_model {
+        format!("POST {} {}", request_path, resolved_model)
+    } else {
+        format!(
+            "POST {} {} -> {}",
+            request_path, model_from_request, resolved_model
+        )
+    };
+    get_active_span(|span| {
+        span.update_name(span_name.clone());
+    });
 
     debug!(
-        "[PLANO_REQ_ID:{}] | ARCH_ROUTER URL | {}, Provider Hint: {}, Model for upstream: {}",
-        request_id, full_qualified_llm_provider_url, model_name, model_name_only
+        url = %full_qualified_llm_provider_url,
+        provider_hint = %resolved_model,
+        upstream_model = %model_name_only,
+        "Routing to upstream"
     );
 
     request_headers.insert(
         ARCH_PROVIDER_HINT_HEADER,
-        header::HeaderValue::from_str(&model_name).unwrap(),
+        header::HeaderValue::from_str(&resolved_model).unwrap(),
     );
 
     request_headers.insert(
@@ -299,12 +385,18 @@ pub async fn llm_chat(
     // remove content-length header if it exists
     request_headers.remove(header::CONTENT_LENGTH);
 
+    // Inject current LLM span's trace context so upstream spans are children of plano(llm)
+    global::get_text_map_propagator(|propagator| {
+        let cx = tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
+        propagator.inject_context(&cx, &mut HeaderInjector(&mut request_headers));
+    });
+
     // Capture start time right before sending request to upstream
     let request_start_time = std::time::Instant::now();
-    let request_start_system_time = std::time::SystemTime::now();
+    let _request_start_system_time = std::time::SystemTime::now();
 
     let llm_response = match reqwest::Client::new()
-        .post(full_qualified_llm_provider_url)
+        .post(&full_qualified_llm_provider_url)
         .headers(request_headers)
         .body(client_request_bytes_for_upstream)
         .send()
@@ -331,30 +423,12 @@ pub async fn llm_chat(
     // Build LLM span with actual status code using constants
     let byte_stream = llm_response.bytes_stream();
 
-    // Build the LLM span (will be finalized after streaming completes)
-    let llm_span = build_llm_span(
-        &traceparent,
-        &request_path,
-        &resolved_model,
-        &model_name,
-        upstream_status.as_u16(),
-        is_streaming_request,
-        request_start_system_time,
-        tool_names,
-        user_message_preview,
-        temperature,
-        &llm_providers,
-        &custom_attrs,
-    )
-    .await;
-
     // Create base processor for metrics and tracing
     let base_processor = ObservableStreamProcessor::new(
-        trace_collector,
         operation_component::LLM,
-        llm_span,
+        span_name,
         request_start_time,
-        Some(messages_for_signals),
+        messages_for_signals,
     );
 
     // === v1/responses state management: Wrap with ResponsesStateProcessor ===
@@ -375,8 +449,8 @@ pub async fn llm_chat(
             base_processor,
             state_store,
             original_input_items,
+            alias_resolved_model.clone(),
             resolved_model.clone(),
-            model_name.clone(),
             is_streaming_request,
             false, // Not OpenAI upstream since should_manage_state is true
             content_encoding,
@@ -417,93 +491,6 @@ fn resolve_model_alias(
     model_from_request.to_string()
 }
 
-/// Builds the LLM span with all required and optional attributes.
-#[allow(clippy::too_many_arguments)]
-async fn build_llm_span(
-    traceparent: &str,
-    request_path: &str,
-    resolved_model: &str,
-    model_name: &str,
-    status_code: u16,
-    is_streaming: bool,
-    start_time: std::time::SystemTime,
-    tool_names: Option<Vec<String>>,
-    user_message_preview: Option<String>,
-    temperature: Option<f32>,
-    llm_providers: &Arc<RwLock<LlmProviders>>,
-    custom_attrs: &HashMap<String, String>,
-) -> common::traces::Span {
-    use crate::tracing::{http, llm, OperationNameBuilder};
-    use common::traces::{parse_traceparent, SpanBuilder, SpanKind};
-
-    // Calculate the upstream path based on provider configuration
-    let upstream_path = get_upstream_path(
-        llm_providers,
-        model_name,
-        request_path,
-        resolved_model,
-        is_streaming,
-    )
-    .await;
-
-    // Build operation name showing path transformation if different
-    let operation_name = if request_path != upstream_path {
-        OperationNameBuilder::new()
-            .with_method("POST")
-            .with_path(format!("{} >> {}", request_path, upstream_path))
-            .with_target(resolved_model)
-            .build()
-    } else {
-        OperationNameBuilder::new()
-            .with_method("POST")
-            .with_path(request_path)
-            .with_target(resolved_model)
-            .build()
-    };
-
-    let (trace_id, parent_span_id) = parse_traceparent(traceparent);
-
-    let mut span_builder = SpanBuilder::new(&operation_name)
-        .with_trace_id(&trace_id)
-        .with_kind(SpanKind::Client)
-        .with_start_time(start_time)
-        .with_attribute(http::METHOD, "POST")
-        .with_attribute(http::STATUS_CODE, status_code.to_string())
-        .with_attribute(http::TARGET, request_path.to_string())
-        .with_attribute(http::UPSTREAM_TARGET, upstream_path)
-        .with_attribute(llm::MODEL_NAME, resolved_model.to_string())
-        .with_attribute(llm::IS_STREAMING, is_streaming.to_string());
-
-    // Only set parent span ID if it exists (not a root span)
-    if let Some(parent) = parent_span_id {
-        span_builder = span_builder.with_parent_span_id(&parent);
-    }
-
-    // Add optional attributes
-    if let Some(temp) = temperature {
-        span_builder = span_builder.with_attribute(llm::TEMPERATURE, temp.to_string());
-    }
-
-    if let Some(tools) = tool_names {
-        let formatted_tools = tools
-            .iter()
-            .map(|name| format!("{}(...)", name))
-            .collect::<Vec<_>>()
-            .join("\n");
-        span_builder = span_builder.with_attribute(llm::TOOLS, formatted_tools);
-    }
-
-    if let Some(preview) = user_message_preview {
-        span_builder = span_builder.with_attribute(llm::USER_MESSAGE_PREVIEW, preview);
-    }
-
-    for (key, value) in custom_attrs {
-        span_builder = span_builder.with_attribute(key, value);
-    }
-
-    span_builder.build()
-}
-
 /// Calculates the upstream path for the provider based on the model name.
 /// Looks up provider configuration, gets the ProviderId and base_url_path_prefix,
 /// then uses target_endpoint_for_provider to calculate the correct upstream path.
diff --git a/crates/brightstaff/src/handlers/router_chat.rs b/crates/brightstaff/src/handlers/router_chat.rs
index 210e6bf6..d71734fa 100644
--- a/crates/brightstaff/src/handlers/router_chat.rs
+++ b/crates/brightstaff/src/handlers/router_chat.rs
@@ -1,14 +1,12 @@
 use common::configuration::ModelUsagePreference;
-use common::traces::{parse_traceparent, SpanBuilder, SpanKind, TraceCollector};
 use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
 use hermesllm::{ProviderRequest, ProviderRequestType};
 use hyper::StatusCode;
-use std::collections::HashMap;
 use std::sync::Arc;
 use tracing::{debug, info, warn};
 
 use crate::router::llm_router::RouterService;
-use crate::tracing::{http, operation_component, routing, OperationNameBuilder};
+use crate::tracing::routing;
 
 pub struct RoutingResult {
     pub model_name: String,
@@ -36,11 +34,9 @@ impl RoutingError {
 pub async fn router_chat_get_upstream_model(
     router_service: Arc<RouterService>,
     client_request: ProviderRequestType,
-    trace_collector: Arc<TraceCollector>,
     traceparent: &str,
     request_path: &str,
     request_id: &str,
-    custom_attrs: &HashMap<String, String>,
 ) -> Result<RoutingResult, RoutingError> {
     // Clone metadata for routing before converting (which consumes client_request)
     let routing_metadata = client_request.metadata().clone();
@@ -57,14 +53,14 @@ pub async fn router_chat_get_upstream_model(
             | ProviderRequestType::BedrockConverseStream(_)
             | ProviderRequestType::ResponsesAPIRequest(_),
         ) => {
-            warn!("Unexpected: got non-ChatCompletions request after converting to OpenAI format");
+            warn!("unexpected: got non-ChatCompletions request after converting to OpenAI format");
             return Err(RoutingError::internal_error(
                 "Request conversion failed".to_string(),
             ));
         }
         Err(err) => {
             warn!(
-                "Failed to convert request to ChatCompletionsRequest: {}",
+                "failed to convert request to ChatCompletionsRequest: {}",
                 err
             );
             return Err(RoutingError::internal_error(format!(
@@ -75,9 +71,8 @@ pub async fn router_chat_get_upstream_model(
     };
 
     debug!(
-        "[PLANO_REQ_ID: {:?}]: ROUTER_REQ: {}",
-        request_id,
-        &serde_json::to_string(&chat_request).unwrap()
+        request = %serde_json::to_string(&chat_request).unwrap(),
+        "router request"
     );
 
     // Extract usage preferences from metadata
@@ -113,16 +108,14 @@ pub async fn router_chat_get_upstream_model(
     };
 
     info!(
-        "[PLANO_REQ_ID: {:?}] | ROUTER_REQ | Usage preferences from request: {}, request_path: {}, latest message: {}",
-        request_id,
-        usage_preferences.is_some(),
-        request_path,
-        latest_message_for_log
+        has_usage_preferences = usage_preferences.is_some(),
+        path = %request_path,
+        latest_message = %latest_message_for_log,
+        "processing router request"
     );
 
     // Capture start time for routing span
     let routing_start_time = std::time::Instant::now();
-    let routing_start_system_time = std::time::SystemTime::now();
 
     // Attempt to determine route using the router service
     let routing_result = router_service
@@ -134,47 +127,21 @@ pub async fn router_chat_get_upstream_model(
         )
         .await;
 
+    let determination_ms = routing_start_time.elapsed().as_millis() as i64;
+    let current_span = tracing::Span::current();
+    current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);
+
     match routing_result {
         Ok(route) => match route {
             Some((_, model_name)) => {
-                // Record successful routing span
-                let mut attrs: HashMap<String, String> = HashMap::new();
-                attrs.insert("route.selected_model".to_string(), model_name.clone());
-                for (key, value) in custom_attrs {
-                    attrs.entry(key.clone()).or_insert_with(|| value.clone());
-                }
-                record_routing_span(
-                    trace_collector,
-                    traceparent,
-                    routing_start_time,
-                    routing_start_system_time,
-                    attrs,
-                )
-                .await;
-
+                current_span.record("route.selected_model", model_name.as_str());
                 Ok(RoutingResult { model_name })
             }
             None => {
                 // No route determined, return sentinel value "none"
                 // This signals to llm.rs to use the original validated request model
-                info!(
-                    "[PLANO_REQ_ID: {}] | ROUTER_REQ | No route determined, returning sentinel 'none'",
-                    request_id
-                );
-
-                let mut attrs = HashMap::new();
-                attrs.insert("route.selected_model".to_string(), "none".to_string());
-                for (key, value) in custom_attrs {
-                    attrs.entry(key.clone()).or_insert_with(|| value.clone());
-                }
-                record_routing_span(
-                    trace_collector,
-                    traceparent,
-                    routing_start_time,
-                    routing_start_system_time,
-                    attrs,
-                )
-                .await;
+                current_span.record("route.selected_model", "none");
+                info!("no route determined, using default model");
 
                 Ok(RoutingResult {
                     model_name: "none".to_string(),
@@ -182,22 +149,7 @@ pub async fn router_chat_get_upstream_model(
             }
         },
         Err(err) => {
-            // Record failed routing span
-            let mut attrs = HashMap::new();
-            attrs.insert("route.selected_model".to_string(), "unknown".to_string());
-            attrs.insert("error.message".to_string(), err.to_string());
-            for (key, value) in custom_attrs {
-                attrs.entry(key.clone()).or_insert_with(|| value.clone());
-            }
-            record_routing_span(
-                trace_collector,
-                traceparent,
-                routing_start_time,
-                routing_start_system_time,
-                attrs,
-            )
-            .await;
-
+            current_span.record("route.selected_model", "unknown");
             Err(RoutingError::internal_error(format!(
                 "Failed to determine route: {}",
                 err
@@ -205,53 +157,3 @@ pub async fn router_chat_get_upstream_model(
         }
     }
 }
-
-/// Helper function to record a routing span with the given attributes.
-/// Reduces code duplication across different routing outcomes.
-async fn record_routing_span(
-    trace_collector: Arc<TraceCollector>,
-    traceparent: &str,
-    start_time: std::time::Instant,
-    start_system_time: std::time::SystemTime,
-    attrs: HashMap<String, String>,
-) {
-    // The routing always uses OpenAI Chat Completions format internally,
-    // so we log that as the actual API being used for routing
-    let routing_api_path = "/v1/chat/completions";
-
-    let routing_operation_name = OperationNameBuilder::new()
-        .with_method("POST")
-        .with_path(routing_api_path)
-        .with_target("Arch-Router-1.5B")
-        .build();
-
-    let (trace_id, parent_span_id) = parse_traceparent(traceparent);
-
-    // Build the routing span directly using constants
-    let mut span_builder = SpanBuilder::new(&routing_operation_name)
-        .with_trace_id(&trace_id)
-        .with_kind(SpanKind::Client)
-        .with_start_time(start_system_time)
-        .with_end_time(std::time::SystemTime::now())
-        .with_attribute(http::METHOD, "POST")
-        .with_attribute(http::TARGET, routing_api_path.to_string())
-        .with_attribute(
-            routing::ROUTE_DETERMINATION_MS,
-            start_time.elapsed().as_millis().to_string(),
-        );
-
-    // Only set parent span ID if it exists (not a root span)
-    if let Some(parent) = parent_span_id {
-        span_builder = span_builder.with_parent_span_id(&parent);
-    }
-
-    // Add all custom attributes
-    for (key, value) in attrs {
-        span_builder = span_builder.with_attribute(key, value);
-    }
-
-    let span = span_builder.build();
-
-    // Record the span directly to the collector
-    trace_collector.record_span(operation_component::ROUTING, span);
-}
diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs
index 9d577c7f..bfcadb63 100644
--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@@ -14,7 +14,6 @@ use common::consts::{
     CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH, PLANO_ORCHESTRATOR_MODEL_NAME,
 };
 use common::llm_providers::LlmProviders;
-use common::traces::TraceCollector;
 use http_body_util::{combinators::BoxBody, BodyExt, Empty};
 use hyper::body::Incoming;
 use hyper::server::conn::http1;
@@ -125,17 +124,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
     // Initialize trace collector and start background flusher
     // Tracing is enabled if the tracing config is present in plano_config.yaml
     // Pass Some(true/false) to override, or None to use env var OTEL_TRACING_ENABLED
-    let tracing_enabled = if plano_config.tracing.is_some() {
-        info!("Tracing configuration found in plano_config.yaml");
-        Some(true)
-    } else {
-        info!(
-            "No tracing configuration in plano_config.yaml, will check OTEL_TRACING_ENABLED env var"
-        );
-        None
-    };
-    let trace_collector = Arc::new(TraceCollector::new(tracing_enabled));
-    let _flusher_handle = trace_collector.clone().start_background_flusher();
+    // OpenTelemetry automatic instrumentation is configured in utils/tracing.rs
 
     // Initialize conversation state storage for v1/responses
     // Configurable via plano_config.yaml state_storage section
@@ -145,7 +134,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
         if let Some(storage_config) = &plano_config.state_storage {
             let storage: Arc<dyn StateStorage> = match storage_config.storage_type {
                 common::configuration::StateStorageType::Memory => {
-                    info!("Initialized conversation state storage: Memory");
+                    info!(
+                        storage_type = "memory",
+                        "initialized conversation state storage"
+                    );
                     Arc::new(MemoryConversationalStorage::new())
                 }
                 common::configuration::StateStorageType::Postgres => {
@@ -154,8 +146,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
                         .as_ref()
                         .expect("connection_string is required for postgres state_storage");
 
-                    debug!("Postgres connection string (full): {}", connection_string);
-                    info!("Initializing conversation state storage: Postgres");
+                    debug!(connection_string = %connection_string, "postgres connection");
+                    info!(
+                        storage_type = "postgres",
+                        "initializing conversation state storage"
+                    );
                     Arc::new(
                         PostgreSQLConversationStorage::new(connection_string.clone())
                             .await
@@ -165,7 +160,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
             };
             Some(storage)
         } else {
-            info!("No state_storage configured - conversation state management disabled");
+            info!("no state_storage configured, conversation state management disabled");
             None
         };
 
@@ -184,7 +179,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
         let llm_providers = llm_providers.clone();
         let agents_list = combined_agents_filters_list.clone();
         let listeners = listeners.clone();
-        let trace_collector = trace_collector.clone();
         let span_attributes = span_attributes.clone();
         let state_storage = state_storage.clone();
         let service = service_fn(move |req| {
@@ -196,7 +190,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
             let model_aliases = Arc::clone(&model_aliases);
             let agents_list = agents_list.clone();
             let listeners = listeners.clone();
-            let trace_collector = trace_collector.clone();
             let span_attributes = span_attributes.clone();
             let state_storage = state_storage.clone();
 
@@ -217,7 +210,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
                             fully_qualified_url,
                             agents_list,
                             listeners,
-                            trace_collector,
                             span_attributes,
                         )
                         .with_context(parent_cx)
@@ -236,7 +228,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
                             fully_qualified_url,
                             model_aliases,
                             llm_providers,
-                            trace_collector,
                             span_attributes,
                             state_storage,
                         )
@@ -278,7 +269,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
                         Ok(response)
                     }
                     _ => {
-                        debug!("No route for {} {}", req.method(), req.uri().path());
+                        debug!(method = %req.method(), path = %req.uri().path(), "no route found");
                         let mut not_found = Response::new(empty());
                         *not_found.status_mut() = StatusCode::NOT_FOUND;
                         Ok(not_found)
@@ -288,13 +279,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
         });
 
         tokio::task::spawn(async move {
-            debug!("Accepted connection from {:?}", peer_addr);
+            debug!(peer = ?peer_addr, "accepted connection");
             if let Err(err) = http1::Builder::new()
                 // .serve_connection(io, service_fn(chat_completion))
                 .serve_connection(io, service)
                 .await
             {
-                warn!("Error serving connection: {:?}", err);
+                warn!(error = ?err, "error serving connection");
             }
         });
     }
diff --git a/crates/brightstaff/src/tracing/mod.rs b/crates/brightstaff/src/tracing/mod.rs
index f0d18ea4..1db01064 100644
--- a/crates/brightstaff/src/tracing/mod.rs
+++ b/crates/brightstaff/src/tracing/mod.rs
@@ -1,5 +1,6 @@
 mod constants;
 mod custom_attributes;
+mod service_name_exporter;
 
 pub use constants::{
     error, http, llm, operation_component, routing, signals, OperationNameBuilder,
@@ -7,3 +8,32 @@ pub use constants::{
 pub use custom_attributes::{
     append_span_attributes, collect_custom_trace_attributes, extract_custom_trace_attributes,
 };
+pub use service_name_exporter::{ServiceNameOverrideExporter, SERVICE_NAME_OVERRIDE_KEY};
+
+use opentelemetry::trace::get_active_span;
+use opentelemetry::KeyValue;
+
+/// Sets the service name override on the current active OpenTelemetry span.
+///
+/// This function adds the `service.name.override` attribute to the active
+/// OpenTelemetry span, which allows observability backends to filter and group
+/// spans by their logical service (e.g., `plano(llm)`, `plano(filter)`).
+///
+/// # Arguments
+/// * `service_name` - The service name to use (e.g., `operation_component::LLM`)
+///
+/// # Example
+/// ```rust,ignore
+/// use brightstaff::tracing::{set_service_name, operation_component};
+///
+/// // Inside a traced function:
+/// set_service_name(operation_component::LLM);
+/// ```
+pub fn set_service_name(service_name: &str) {
+    get_active_span(|span| {
+        span.set_attribute(KeyValue::new(
+            SERVICE_NAME_OVERRIDE_KEY,
+            service_name.to_string(),
+        ));
+    });
+}
diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs
index 55045450..f4e2b7b4 100644
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@@ -90,6 +90,8 @@ pub struct Overrides {
 pub struct Tracing {
     pub sampling_rate: Option<f64>,
     pub trace_arch_internal: Option<bool>,
+    pub random_sampling: Option<u32>,
+    pub opentracing_grpc_endpoint: Option<String>,
     pub span_attributes: Option<SpanAttributes>,
 }
 
diff --git a/demos/agent_orchestration/travel_agents/config.yaml b/demos/agent_orchestration/travel_agents/config.yaml
index ce9b8f44..911baf89 100644
--- a/demos/agent_orchestration/travel_agents/config.yaml
+++ b/demos/agent_orchestration/travel_agents/config.yaml
@@ -57,4 +57,4 @@ tracing:
   random_sampling: 100
   span_attributes:
     header_prefixes:
-      - x-katanemo-
+      - x-acme-
diff --git a/demos/agent_orchestration/travel_agents/test.rest b/demos/agent_orchestration/travel_agents/test.rest
index 7d7c5759..b6348f28 100644
--- a/demos/agent_orchestration/travel_agents/test.rest
+++ b/demos/agent_orchestration/travel_agents/test.rest
@@ -3,12 +3,13 @@
 ### Travel Agent Chat Completion Request
 POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1
 Content-Type: application/json
-X-Katanemo-Workspace-Id: ws_7e2c5d91b4224f59b0e6a4e0125c21b3
-X-Katanemo-Tenant-Id: ten_4102a8c7fa6542b084b395d2df184a9a
-X-Katanemo-User-Id: usr_19df7e6751b846f9ba026776e3c12abe
-X-Katanemo-Admin-Level: 3
-X-Katanemo-Is-Internal: true
-X-Katanemo-Budget: 42.5
+X-Acme-Workspace-Id: ws_7e2c5d91b4224f59b0e6a4e0125c21b3
+X-Acme-Tenant-Id: ten_4102a8c7fa6542b084b395d2df184a9a
+X-Acme-User-Id: usr_19df7e6751b846f9ba026776e3c12abe
+X-Acme-Admin-Level: 3
+X-Acme-Environment: production
+X-Acme-Is-Internal: false
+X-Acme-Cost-Center: HD100
 
 {
   "model": "gpt-5.2",