use standard tracing and logging in brightstaff (#721)

2026-05-10 16:22:42 +02:00 · 2026-02-09 13:33:27 -08:00 · 2026-02-09 13:33:27 -08:00 · 46de89590b
commit 46de89590b
parent 4d9ed74b68
55 changed files with 1494 additions and 2432 deletions
--- a/crates/brightstaff/src/handlers/agent_chat_completions.rs
+++ b/crates/brightstaff/src/handlers/agent_chat_completions.rs
@ -1,9 +1,7 @@
 use std::sync::Arc;
-use std::time::{Instant, SystemTime};
+use std::time::Instant;

 use bytes::Bytes;
-use common::consts::TRACE_PARENT_HEADER;
-use common::traces::{generate_random_span_id, parse_traceparent, SpanBuilder, SpanKind};
 use hermesllm::apis::OpenAIMessage;
 use hermesllm::clients::SupportedAPIsFromClient;
 use hermesllm::providers::request::ProviderRequest;
@ -11,14 +9,15 @@ use hermesllm::ProviderRequestType;
 use http_body_util::combinators::BoxBody;
 use http_body_util::BodyExt;
 use hyper::{Request, Response};
+use opentelemetry::trace::get_active_span;
 use serde::ser::Error as SerError;
-use tracing::{debug, info, warn};
+use tracing::{debug, info, info_span, warn, Instrument};

 use super::agent_selector::{AgentSelectionError, AgentSelector};
 use super::pipeline_processor::{PipelineError, PipelineProcessor};
 use super::response_handler::ResponseHandler;
 use crate::router::plano_orchestrator::OrchestratorService;
-use crate::tracing::{http, operation_component, OperationNameBuilder};
+use crate::tracing::{operation_component, set_service_name};

 /// Main errors for agent chat completions
 #[derive(Debug, thiserror::Error)]
@ -41,92 +40,122 @@ pub async fn agent_chat(
    _: String,
    agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
    listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
-    trace_collector: Arc<common::traces::TraceCollector>,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
-    match handle_agent_chat(
-        request,
-        orchestrator_service,
-        agents_list,
-        listeners,
-        trace_collector,
-    )
-    .await
+    // Extract request_id from headers or generate a new one
+    let request_id: String = match request
+        .headers()
+        .get(common::consts::REQUEST_ID_HEADER)
+        .and_then(|h| h.to_str().ok())
+        .map(|s| s.to_string())
    {
-        Ok(response) => Ok(response),
-        Err(err) => {
-            // Check if this is a client error from the pipeline that should be cascaded
-            if let AgentFilterChainError::Pipeline(PipelineError::ClientError {
-                agent,
-                status,
-                body,
-            }) = &err
-            {
-                warn!(
-                    "Client error from agent '{}' (HTTP {}): {}",
-                    agent, status, body
-                );
+        Some(id) => id,
+        None => uuid::Uuid::new_v4().to_string(),
+    };

-                // Create error response with the original status code and body
+    // Create a span with request_id that will be included in all log lines
+    let request_span = info_span!(
+        "(orchestrator)",
+        component = "orchestrator",
+        request_id = %request_id,
+        http.method = %request.method(),
+        http.path = %request.uri().path()
+    );
+
+    // Execute the handler inside the span
+    async {
+        // Set service name for orchestrator operations
+        set_service_name(operation_component::ORCHESTRATOR);
+
+        match handle_agent_chat_inner(
+            request,
+            orchestrator_service,
+            agents_list,
+            listeners,
+            request_id,
+        )
+        .await
+        {
+            Ok(response) => Ok(response),
+            Err(err) => {
+                // Check if this is a client error from the pipeline that should be cascaded
+                if let AgentFilterChainError::Pipeline(PipelineError::ClientError {
+                    agent,
+                    status,
+                    body,
+                }) = &err
+                {
+                    warn!(
+                        agent = %agent,
+                        status = %status,
+                        body = %body,
+                        "client error from agent"
+                    );
+
+                    // Create error response with the original status code and body
+                    let error_json = serde_json::json!({
+                        "error": "ClientError",
+                        "agent": agent,
+                        "status": status,
+                        "agent_response": body
+                    });
+
+                    let json_string = error_json.to_string();
+                    let mut response =
+                        Response::new(ResponseHandler::create_full_body(json_string));
+                    *response.status_mut() = hyper::StatusCode::from_u16(*status)
+                        .unwrap_or(hyper::StatusCode::BAD_REQUEST);
+                    response.headers_mut().insert(
+                        hyper::header::CONTENT_TYPE,
+                        "application/json".parse().unwrap(),
+                    );
+                    return Ok(response);
+                }
+
+                // Print detailed error information with full error chain for other errors
+                let mut error_chain = Vec::new();
+                let mut current_error: &dyn std::error::Error = &err;
+
+                // Collect the full error chain
+                loop {
+                    error_chain.push(current_error.to_string());
+                    match current_error.source() {
+                        Some(source) => current_error = source,
+                        None => break,
+                    }
+                }
+
+                // Log the complete error chain
+                warn!(error_chain = ?error_chain, "agent chat error chain");
+                warn!(root_error = ?err, "root error");
+
+                // Create structured error response as JSON
                let error_json = serde_json::json!({
-                    "error": "ClientError",
-                    "agent": agent,
-                    "status": status,
-                    "agent_response": body
+                    "error": {
+                        "type": "AgentFilterChainError",
+                        "message": err.to_string(),
+                        "error_chain": error_chain,
+                        "debug_info": format!("{:?}", err)
+                    }
                });

-                let json_string = error_json.to_string();
-                let mut response = Response::new(ResponseHandler::create_full_body(json_string));
-                *response.status_mut() =
-                    hyper::StatusCode::from_u16(*status).unwrap_or(hyper::StatusCode::BAD_REQUEST);
-                response.headers_mut().insert(
-                    hyper::header::CONTENT_TYPE,
-                    "application/json".parse().unwrap(),
-                );
-                return Ok(response);
+                // Log the error for debugging
+                info!(error = %error_json, "structured error info");
+
+                // Return JSON error response
+                Ok(ResponseHandler::create_json_error_response(&error_json))
            }
-
-            // Print detailed error information with full error chain for other errors
-            let mut error_chain = Vec::new();
-            let mut current_error: &dyn std::error::Error = &err;
-
-            // Collect the full error chain
-            loop {
-                error_chain.push(current_error.to_string());
-                match current_error.source() {
-                    Some(source) => current_error = source,
-                    None => break,
-                }
-            }
-
-            // Log the complete error chain
-            warn!("Agent chat error chain: {:#?}", error_chain);
-            warn!("Root error: {:?}", err);
-
-            // Create structured error response as JSON
-            let error_json = serde_json::json!({
-                "error": {
-                    "type": "AgentFilterChainError",
-                    "message": err.to_string(),
-                    "error_chain": error_chain,
-                    "debug_info": format!("{:?}", err)
-                }
-            });
-
-            // Log the error for debugging
-            info!("Structured error info: {}", error_json);
-
-            // Return JSON error response
-            Ok(ResponseHandler::create_json_error_response(&error_json))
        }
    }
+    .instrument(request_span)
+    .await
 }

-async fn handle_agent_chat(
+async fn handle_agent_chat_inner(
    request: Request<hyper::body::Incoming>,
    orchestrator_service: Arc<OrchestratorService>,
    agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
    listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
-    trace_collector: Arc<common::traces::TraceCollector>,
+    request_id: String,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, AgentFilterChainError> {
    // Initialize services
    let agent_selector = AgentSelector::new(orchestrator_service);
@ -140,14 +169,18 @@ async fn handle_agent_chat(
        .and_then(|name| name.to_str().ok());

    // Find the appropriate listener
-    let listener = {
+    let listener: common::configuration::Listener = {
        let listeners = listeners.read().await;
        agent_selector
            .find_listener(listener_name, &listeners)
            .await?
    };

-    info!("Handling request for listener: {}", listener.name);
+    get_active_span(|span| {
+        span.update_name(listener.name.to_string());
+    });
+
+    info!(listener = %listener.name, "handling request");

    // Parse request body
    let request_path = request
@ -162,12 +195,8 @@ async fn handle_agent_chat(
        let mut headers = request.headers().clone();
        headers.remove(common::consts::ENVOY_ORIGINAL_PATH_HEADER);

+        // Set the request_id in headers if not already present
        if !headers.contains_key(common::consts::REQUEST_ID_HEADER) {
-            let request_id = uuid::Uuid::new_v4().to_string();
-            info!(
-                "Request id not found in headers, generated new request id: {}",
-                request_id
-            );
            headers.insert(
                common::consts::REQUEST_ID_HEADER,
                hyper::header::HeaderValue::from_str(&request_id).unwrap(),
@ -180,8 +209,8 @@ async fn handle_agent_chat(
    let chat_request_bytes = request.collect().await?.to_bytes();

    debug!(
-        "Received request body (raw utf8): {}",
-        String::from_utf8_lossy(&chat_request_bytes)
+        body = %String::from_utf8_lossy(&chat_request_bytes),
+        "received request body"
    );

    // Determine the API type from the endpoint
@ -195,7 +224,7 @@ async fn handle_agent_chat(
    let client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) {
        Ok(request) => request,
        Err(err) => {
-            warn!("Failed to parse request as ProviderRequestType: {}", err);
+            warn!("failed to parse request as ProviderRequestType: {}", err);
            let err_msg = format!("Failed to parse request: {}", err);
            return Err(AgentFilterChainError::RequestParsing(
                serde_json::Error::custom(err_msg),
@ -205,12 +234,6 @@ async fn handle_agent_chat(

    let message: Vec<OpenAIMessage> = client_request.get_messages();

-    // Extract trace parent for routing
-    let traceparent = request_headers
-        .iter()
-        .find(|(key, _)| key.as_str() == TRACE_PARENT_HEADER)
-        .map(|(_, value)| value.to_str().unwrap_or_default().to_string());
-
    let request_id = request_headers
        .get(common::consts::REQUEST_ID_HEADER)
        .and_then(|val| val.to_str().ok())
@ -223,87 +246,58 @@ async fn handle_agent_chat(
        agent_selector.create_agent_map(agents)
    };

-    // Parse trace parent to get trace_id and parent_span_id
-    let (trace_id, parent_span_id) = if let Some(ref tp) = traceparent {
-        parse_traceparent(tp)
-    } else {
-        (String::new(), None)
-    };
-
    // Select appropriate agents using arch orchestrator llm model
-    let selection_span_id = generate_random_span_id();
-    let selection_start_time = SystemTime::now();
-    let selection_start_instant = Instant::now();
-
+    let selection_start = Instant::now();
    let selected_agents = agent_selector
-        .select_agents(&message, &listener, traceparent.clone(), request_id.clone())
+        .select_agents(&message, &listener, request_id.clone())
        .await?;

-    // Record agent selection span
-    let selection_end_time = SystemTime::now();
-    let selection_elapsed = selection_start_instant.elapsed();
-    let selection_operation_name = OperationNameBuilder::new()
-        .with_method("POST")
-        .with_path("/agents/select")
-        .with_target(&listener.name)
-        .build();
-
-    let mut selection_span_builder = SpanBuilder::new(&selection_operation_name)
-        .with_span_id(selection_span_id)
-        .with_kind(SpanKind::Internal)
-        .with_start_time(selection_start_time)
-        .with_end_time(selection_end_time)
-        .with_attribute(http::METHOD, "POST")
-        .with_attribute(http::TARGET, "/agents/select")
-        .with_attribute("selection.listener", listener.name.clone())
-        .with_attribute("selection.agent_count", selected_agents.len().to_string())
-        .with_attribute(
+    // Record selection attributes on the current orchestrator span
+    let selection_elapsed_ms = selection_start.elapsed().as_secs_f64() * 1000.0;
+    get_active_span(|span| {
+        span.set_attribute(opentelemetry::KeyValue::new(
+            "selection.listener",
+            listener.name.clone(),
+        ));
+        span.set_attribute(opentelemetry::KeyValue::new(
+            "selection.agent_count",
+            selected_agents.len() as i64,
+        ));
+        span.set_attribute(opentelemetry::KeyValue::new(
            "selection.agents",
            selected_agents
                .iter()
                .map(|a| a.id.as_str())
                .collect::<Vec<_>>()
                .join(","),
-        )
-        .with_attribute(
-            "duration_ms",
-            format!("{:.2}", selection_elapsed.as_secs_f64() * 1000.0),
-        );
+        ));
+        span.set_attribute(opentelemetry::KeyValue::new(
+            "selection.determination_ms",
+            format!("{:.2}", selection_elapsed_ms),
+        ));
+    });

-    if !trace_id.is_empty() {
-        selection_span_builder = selection_span_builder.with_trace_id(trace_id.clone());
-    }
-    if let Some(parent_id) = parent_span_id.clone() {
-        selection_span_builder = selection_span_builder.with_parent_span_id(parent_id);
-    }
-
-    let selection_span = selection_span_builder.build();
-    trace_collector.record_span(operation_component::ORCHESTRATOR, selection_span);
-
-    info!("Selected {} agent(s) for execution", selected_agents.len());
+    info!(
+        count = selected_agents.len(),
+        "selected agents for execution"
+    );

    // Execute agents sequentially, passing output from one to the next
    let mut current_messages = message.clone();
    let agent_count = selected_agents.len();

    for (agent_index, selected_agent) in selected_agents.iter().enumerate() {
+        // Get agent name
+        let agent_name = selected_agent.id.clone();
        let is_last_agent = agent_index == agent_count - 1;

        debug!(
-            "Processing agent {}/{}: {}",
-            agent_index + 1,
-            agent_count,
-            selected_agent.id
+            agent_index = agent_index + 1,
+            total = agent_count,
+            agent = %agent_name,
+            "processing agent"
        );

-        // Record the start time for agent span
-        let agent_start_time = SystemTime::now();
-        let agent_start_instant = Instant::now();
-        let span_id = generate_random_span_id();
-
-        // Get agent name
-        let agent_name = selected_agent.id.clone();
-
        // Process the filter chain
        let chat_history = pipeline_processor
            .process_filter_chain(
@ -311,88 +305,71 @@ async fn handle_agent_chat(
                selected_agent,
                &agent_map,
                &request_headers,
-                Some(&trace_collector),
-                trace_id.clone(),
-                span_id.clone(),
            )
            .await?;

        // Get agent details and invoke
        let agent = agent_map.get(&agent_name).unwrap();

-        debug!("Invoking agent: {}", agent_name);
+        debug!(agent = %agent_name, "invoking agent");

-        let llm_response = pipeline_processor
-            .invoke_agent(
-                &chat_history,
-                client_request.clone(),
-                agent,
-                &request_headers,
-                trace_id.clone(),
-                span_id.clone(),
-            )
-            .await?;
+        let agent_span = info_span!(
+            "agent",
+            agent_id = %agent_name,
+            message_count = chat_history.len(),
+        );

-        // Record agent span
-        let agent_end_time = SystemTime::now();
-        let agent_elapsed = agent_start_instant.elapsed();
-        let full_path = format!("/agents{}", request_path);
-        let operation_name = OperationNameBuilder::new()
-            .with_method("POST")
-            .with_path(&full_path)
-            .with_target(&agent_name)
-            .build();
+        let llm_response = async {
+            set_service_name(operation_component::AGENT);
+            get_active_span(|span| {
+                span.update_name(format!("{} /v1/chat/completions", agent_name));
+            });

-        let mut span_builder = SpanBuilder::new(&operation_name)
-            .with_span_id(span_id)
-            .with_kind(SpanKind::Internal)
-            .with_start_time(agent_start_time)
-            .with_end_time(agent_end_time)
-            .with_attribute(http::METHOD, "POST")
-            .with_attribute(http::TARGET, full_path)
-            .with_attribute("agent.name", agent_name.clone())
-            .with_attribute(
-                "agent.sequence",
-                format!("{}/{}", agent_index + 1, agent_count),
-            )
-            .with_attribute(
-                "duration_ms",
-                format!("{:.2}", agent_elapsed.as_secs_f64() * 1000.0),
-            );
-
-        if !trace_id.is_empty() {
-            span_builder = span_builder.with_trace_id(trace_id.clone());
+            pipeline_processor
+                .invoke_agent(
+                    &chat_history,
+                    client_request.clone(),
+                    agent,
+                    &request_headers,
+                )
+                .await
        }
-        if let Some(parent_id) = parent_span_id.clone() {
-            span_builder = span_builder.with_parent_span_id(parent_id);
-        }
-
-        let span = span_builder.build();
-        trace_collector.record_span(operation_component::AGENT, span);
+        .instrument(agent_span.clone())
+        .await?;

        // If this is the last agent, return the streaming response
        if is_last_agent {
            info!(
-                "Completed agent chain, returning response from last agent: {}",
-                agent_name
+                agent = %agent_name,
+                "completed agent chain, returning response"
            );
-            return response_handler
-                .create_streaming_response(llm_response)
-                .await
-                .map_err(AgentFilterChainError::from);
+            // Capture the orchestrator span (parent of the agent span) so it
+            // stays open for the full streaming duration alongside the agent span.
+            let orchestrator_span = tracing::Span::current();
+            return async {
+                response_handler
+                    .create_streaming_response(
+                        llm_response,
+                        tracing::Span::current(), // agent span (inner)
+                        orchestrator_span,        // orchestrator span (outer)
+                    )
+                    .await
+                    .map_err(AgentFilterChainError::from)
+            }
+            .instrument(agent_span)
+            .await;
        }

        // For intermediate agents, collect the full response and pass to next agent
-        debug!(
-            "Collecting response from intermediate agent: {}",
-            agent_name
-        );
-        let response_text = response_handler.collect_full_response(llm_response).await?;
+        debug!(agent = %agent_name, "collecting response from intermediate agent");
+        let response_text = async { response_handler.collect_full_response(llm_response).await }
+            .instrument(agent_span)
+            .await?;

        info!(
-            "Agent {} completed, passing {} character response to next agent",
-            agent_name,
-            response_text.len()
+            agent = %agent_name,
+            response_len = response_text.len(),
+            "agent completed, passing response to next agent"
        );

        // remove last message and add new one at the end
--- a/crates/brightstaff/src/handlers/agent_selector.rs
+++ b/crates/brightstaff/src/handlers/agent_selector.rs
@ -75,7 +75,7 @@ impl AgentSelector {
            .cloned()
            .or_else(|| {
                warn!(
-                    "No default agent found, routing request to first agent: {}",
+                    "no default agent found, routing request to first agent: {}",
                    agents[0].id
                );
                Some(agents[0].clone())
@ -108,7 +108,6 @@ impl AgentSelector {
        &self,
        messages: &[Message],
        listener: &Listener,
-        trace_parent: Option<String>,
        request_id: Option<String>,
    ) -> Result<Vec<AgentFilterChain>, AgentSelectionError> {
        let agents = listener
@ -118,7 +117,7 @@ impl AgentSelector {

        // If only one agent, skip orchestration
        if agents.len() == 1 {
-            debug!("Only one agent available, skipping orchestration");
+            debug!("only one agent available, skipping orchestration");
            return Ok(vec![agents[0].clone()]);
        }

@ -132,15 +131,15 @@ impl AgentSelector {

        match self
            .orchestrator_service
-            .determine_orchestration(messages, trace_parent, Some(usage_preferences), request_id)
+            .determine_orchestration(messages, Some(usage_preferences), request_id)
            .await
        {
            Ok(Some(routes)) => {
-                debug!("Determined {} agent(s) via orchestration", routes.len());
+                debug!(count = routes.len(), "determined agents via orchestration");
                let mut selected_agents = Vec::new();

                for (route_name, agent_name) in routes {
-                    debug!("Processing route: {}, agent: {}", route_name, agent_name);
+                    debug!(route = %route_name, agent = %agent_name, "processing route");
                    let selected_agent = agents
                        .iter()
                        .find(|a| a.id == agent_name)
@ -155,14 +154,14 @@ impl AgentSelector {
                }

                if selected_agents.is_empty() {
-                    debug!("No agents determined using orchestration, using default agent");
+                    debug!("no agents determined via orchestration, using default");
                    Ok(vec![self.get_default_agent(agents, &listener.name)?])
                } else {
                    Ok(selected_agents)
                }
            }
            Ok(None) => {
-                debug!("No agents determined using orchestration, using default agent");
+                debug!("no agents determined using orchestration, using default agent");
                Ok(vec![self.get_default_agent(agents, &listener.name)?])
            }
            Err(err) => Err(AgentSelectionError::OrchestrationError(err.to_string())),
--- a/crates/brightstaff/src/handlers/function_calling.rs
+++ b/crates/brightstaff/src/handlers/function_calling.rs
@ -944,7 +944,7 @@ impl ArchFunctionHandler {
    ) -> Result<ChatCompletionsResponse> {
        use tracing::{error, info};

-        info!("[Arch-Function] - ChatCompletion");
+        info!("processing chat completion request");

        let messages = self.process_messages(
            &request.messages,
@ -955,9 +955,9 @@ impl ArchFunctionHandler {
        )?;

        info!(
-            "[request to arch-fc]: model: {}, messages count: {}",
-            self.model_name,
-            messages.len()
+            model = %self.model_name,
+            message_count = messages.len(),
+            "sending request to arch-fc"
        );

        let use_agent_orchestrator = request
@ -991,7 +991,7 @@ impl ArchFunctionHandler {
                    }
                }
            }
-            info!("[Agent Orchestrator]: response received");
+            info!("agent orchestrator response received");
        } else if let Some(tools) = request.tools.as_ref() {
            let mut hallucination_state = HallucinationState::new(tools);
            let mut has_tool_calls = None;
@ -1040,7 +1040,10 @@ impl ArchFunctionHandler {
            }

            if has_tool_calls == Some(true) && has_hallucination {
-                info!("[Hallucination]: {}", hallucination_state.error_message);
+                info!(
+                    "detected hallucination: {}",
+                    hallucination_state.error_message
+                );

                let clarify_messages = self.prefill_message(messages.clone(), &self.clarify_prefix);
                let clarify_request = self.create_request_with_extra_body(clarify_messages, false);
@ -1075,8 +1078,8 @@ impl ArchFunctionHandler {
        let response_dict = self.parse_model_response(&model_response);

        info!(
-            "[arch-fc]: raw model response: {}",
-            response_dict.raw_response
+            raw_response = %response_dict.raw_response,
+            "arch-fc model response"
        );

        // General model response (no intent matched - should route to default target)
@ -1126,7 +1129,7 @@ impl ArchFunctionHandler {

                        if verification.is_valid {
                            info!(
-                                "[Tool calls]: {:?}",
+                                "tool calls extracted: {:?}",
                                response_dict
                                    .tool_calls
                                    .iter()
@ -1143,7 +1146,7 @@ impl ArchFunctionHandler {
                                tool_calls: Some(response_dict.tool_calls.clone()),
                            }
                        } else {
-                            error!("Invalid tool call - {}", verification.error_message);
+                            error!(error = %verification.error_message, "invalid tool call");
                            ResponseMessage {
                                role: Role::Assistant,
                                content: Some(String::new()),
@ -1155,7 +1158,7 @@ impl ArchFunctionHandler {
                            }
                        }
                    } else {
-                        error!("Tool calls present but no tools provided in request");
+                        error!("tool calls present but no tools provided in request");
                        ResponseMessage {
                            role: Role::Assistant,
                            content: Some(String::new()),
@ -1168,7 +1171,7 @@ impl ArchFunctionHandler {
                    }
                } else {
                    info!(
-                        "[Tool calls]: {:?}",
+                        "tool calls extracted: {:?}",
                        response_dict
                            .tool_calls
                            .iter()
@ -1187,8 +1190,8 @@ impl ArchFunctionHandler {
                }
            } else {
                error!(
-                    "Invalid tool calls in response: {}",
-                    response_dict.error_message
+                    error = %response_dict.error_message,
+                    "invalid tool calls in response"
                );
                ResponseMessage {
                    role: Role::Assistant,
@ -1201,7 +1204,7 @@ impl ArchFunctionHandler {
                }
            }
        } else {
-            error!("Invalid model response - {}", model_response);
+            error!(response = %model_response, "invalid model response");
            ResponseMessage {
                role: Role::Assistant,
                content: Some(String::new()),
@ -1244,7 +1247,7 @@ impl ArchFunctionHandler {
            metadata: Some(metadata),
        };

-        info!("[response arch-fc]: {:?}", chat_completion_response);
+        info!(response = ?chat_completion_response, "arch-fc response");

        Ok(chat_completion_response)
    }
@ -1331,7 +1334,7 @@ pub async fn function_calling_chat_handler(
    let mut body_json: Value = match serde_json::from_slice(&whole_body) {
        Ok(json) => json,
        Err(e) => {
-            error!("Failed to parse request body as JSON: {}", e);
+            error!(error = %e, "failed to parse request body as json");
            let mut response = Response::new(full(
                serde_json::json!({
                    "error": format!("Invalid request body: {}", e)
@ -1355,13 +1358,13 @@ pub async fn function_calling_chat_handler(
    let chat_request: ChatCompletionsRequest = match serde_json::from_value(body_json) {
        Ok(req) => {
            info!(
-                "[request body]: {}",
-                serde_json::to_string(&req).unwrap_or_default()
+                request_body = %serde_json::to_string(&req).unwrap_or_default(),
+                "received request"
            );
            req
        }
        Err(e) => {
-            error!("Failed to parse request body: {}", e);
+            error!(error = %e, "failed to parse request body");
            let mut response = Response::new(full(
                serde_json::json!({
                    "error": format!("Invalid request body: {}", e)
@ -1384,7 +1387,10 @@ pub async fn function_calling_chat_handler(
        .and_then(|v| v.as_bool())
        .unwrap_or(false);

-    info!("Use agent orchestrator: {}", use_agent_orchestrator);
+    info!(
+        use_agent_orchestrator = use_agent_orchestrator,
+        "handler mode"
+    );

    // Create the appropriate handler
    let handler_name = if use_agent_orchestrator {
@ -1415,7 +1421,7 @@ pub async fn function_calling_chat_handler(
    match final_response {
        Ok(response_data) => {
            let response_json = serde_json::to_string(&response_data).unwrap_or_else(|e| {
-                error!("Failed to serialize response: {}", e);
+                error!(error = %e, "failed to serialize response");
                serde_json::json!({"error": "Failed to serialize response"}).to_string()
            });

@ -1428,7 +1434,7 @@ pub async fn function_calling_chat_handler(
            Ok(response)
        }
        Err(e) => {
-            error!("[{}] - Error in function calling: {}", handler_name, e);
+            error!(handler = handler_name, error = %e, "error in function calling");

            let error_response = serde_json::json!({
                "error": format!("[{}] - Error in function calling: {}", handler_name, e)
--- a/crates/brightstaff/src/handlers/integration_tests.rs
+++ b/crates/brightstaff/src/handlers/integration_tests.rs
@ -112,15 +112,7 @@ mod tests {

        let headers = HeaderMap::new();
        let result = pipeline_processor
-            .process_filter_chain(
-                &request.messages,
-                &test_pipeline,
-                &agent_map,
-                &headers,
-                None,
-                String::new(),
-                String::new(),
-            )
+            .process_filter_chain(&request.messages, &test_pipeline, &agent_map, &headers)
            .await;

        println!("Pipeline processing result: {:?}", result);
--- a/crates/brightstaff/src/handlers/llm.rs
+++ b/crates/brightstaff/src/handlers/llm.rs
@ -4,7 +4,6 @@ use common::consts::{
    ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
 };
 use common::llm_providers::LlmProviders;
-use common::traces::TraceCollector;
 use hermesllm::apis::openai_responses::InputParam;
 use hermesllm::clients::{SupportedAPIsFromClient, SupportedUpstreamAPIs};
 use hermesllm::{ProviderRequest, ProviderRequestType};
@ -12,10 +11,13 @@ use http_body_util::combinators::BoxBody;
 use http_body_util::{BodyExt, Full};
 use hyper::header::{self};
 use hyper::{Request, Response, StatusCode};
+use opentelemetry::global;
+use opentelemetry::trace::get_active_span;
+use opentelemetry_http::HeaderInjector;
 use std::collections::HashMap;
 use std::sync::Arc;
 use tokio::sync::RwLock;
-use tracing::{debug, info, warn};
+use tracing::{debug, info, info_span, warn, Instrument};

 use crate::handlers::router_chat::router_chat_get_upstream_model;
 use crate::handlers::utils::{
@ -26,7 +28,7 @@ use crate::state::response_state_processor::ResponsesStateProcessor;
 use crate::state::{
    extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError,
 };
-use crate::tracing::operation_component;
+use crate::tracing::{operation_component, set_service_name};

 fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
    Full::new(chunk.into())
@ -40,7 +42,6 @@ pub async fn llm_chat(
    full_qualified_llm_provider_url: String,
    model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
    llm_providers: Arc<RwLock<LlmProviders>>,
-    trace_collector: Arc<TraceCollector>,
    state_storage: Option<Arc<dyn StateStorage>>,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
    let request_path = request.uri().path().to_string();
@ -51,16 +52,49 @@ pub async fn llm_chat(
        .map(|s| s.to_string())
    {
        Some(id) => id,
-        None => {
-            let generated_id = uuid::Uuid::new_v4().to_string();
-            warn!(
-                "[PLANO_REQ_ID:{}] | REQUEST_ID header missing, generated new ID",
-                generated_id
-            );
-            generated_id
-        }
+        None => uuid::Uuid::new_v4().to_string(),
    };

+    // Create a span with request_id that will be included in all log lines
+    let request_span = info_span!(
+        "llm",
+        component = "llm",
+        request_id = %request_id,
+        http.method = %request.method(),
+        http.path = %request_path,
+    );
+
+    // Execute the rest of the handler inside the span
+    llm_chat_inner(
+        request,
+        router_service,
+        full_qualified_llm_provider_url,
+        model_aliases,
+        llm_providers,
+        state_storage,
+        request_id,
+        request_path,
+        request_headers,
+    )
+    .instrument(request_span)
+    .await
+}
+
+#[allow(clippy::too_many_arguments)]
+async fn llm_chat_inner(
+    request: Request<hyper::body::Incoming>,
+    router_service: Arc<RouterService>,
+    full_qualified_llm_provider_url: String,
+    model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
+    llm_providers: Arc<RwLock<LlmProviders>>,
+    state_storage: Option<Arc<dyn StateStorage>>,
+    request_id: String,
+    request_path: String,
+    mut request_headers: hyper::HeaderMap,
+) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
+    // Set service name for LLM operations
+    set_service_name(operation_component::LLM);
+
    // Extract or generate traceparent - this establishes the trace context for all spans
    let traceparent: String = match request_headers
        .get(TRACE_PARENT_HEADER)
@ -73,20 +107,18 @@ pub async fn llm_chat(
            let trace_id = Uuid::new_v4().to_string().replace("-", "");
            let generated_tp = format!("00-{}-0000000000000000-01", trace_id);
            warn!(
-                "[PLANO_REQ_ID:{}] | TRACE_PARENT header missing, generated new traceparent: {}",
-                request_id, generated_tp
+                generated_traceparent = %generated_tp,
+                "TRACE_PARENT header missing, generated new traceparent"
            );
            generated_tp
        }
    };

-    let mut request_headers = request_headers;
    let chat_request_bytes = request.collect().await?.to_bytes();

    debug!(
-        "[PLANO_REQ_ID:{}] | REQUEST_BODY (UTF8): {}",
-        request_id,
-        String::from_utf8_lossy(&chat_request_bytes)
+        body = %String::from_utf8_lossy(&chat_request_bytes),
+        "request body received"
    );

    let mut client_request = match ProviderRequestType::try_from((
@ -96,13 +128,10 @@ pub async fn llm_chat(
        Ok(request) => request,
        Err(err) => {
            warn!(
-                "[PLANO_REQ_ID:{}] | FAILURE | Failed to parse request as ProviderRequestType: {}",
-                request_id, err
-            );
-            let err_msg = format!(
-                "[PLANO_REQ_ID:{}] | FAILURE | Failed to parse request: {}",
-                request_id, err
+                error = %err,
+                "failed to parse request as ProviderRequestType"
            );
+            let err_msg = format!("Failed to parse request: {}", err);
            let mut bad_request = Response::new(full(err_msg));
            *bad_request.status_mut() = StatusCode::BAD_REQUEST;
            return Ok(bad_request);
@ -120,18 +149,23 @@ pub async fn llm_chat(
    // Model alias resolution: update model field in client_request immediately
    // This ensures all downstream objects use the resolved model
    let model_from_request = client_request.model().to_string();
-    let temperature = client_request.get_temperature();
+    let _temperature = client_request.get_temperature();
    let is_streaming_request = client_request.is_streaming();
-    let resolved_model = resolve_model_alias(&model_from_request, &model_aliases);
+    let alias_resolved_model = resolve_model_alias(&model_from_request, &model_aliases);

    // Validate that the requested model exists in configuration
    // This matches the validation in llm_gateway routing.rs
-    if llm_providers.read().await.get(&resolved_model).is_none() {
+    if llm_providers
+        .read()
+        .await
+        .get(&alias_resolved_model)
+        .is_none()
+    {
        let err_msg = format!(
            "Model '{}' not found in configured providers",
-            resolved_model
+            alias_resolved_model
        );
-        warn!("[PLANO_REQ_ID:{}] | FAILURE | {}", request_id, err_msg);
+        warn!(model = %alias_resolved_model, "model not found in configured providers");
        let mut bad_request = Response::new(full(err_msg));
        *bad_request.status_mut() = StatusCode::BAD_REQUEST;
        return Ok(bad_request);
@ -139,29 +173,26 @@ pub async fn llm_chat(

    // Handle provider/model slug format (e.g., "openai/gpt-4")
    // Extract just the model name for upstream (providers don't understand the slug)
-    let model_name_only = if let Some((_, model)) = resolved_model.split_once('/') {
+    let model_name_only = if let Some((_, model)) = alias_resolved_model.split_once('/') {
        model.to_string()
    } else {
-        resolved_model.clone()
+        alias_resolved_model.clone()
    };

    // Extract tool names and user message preview for span attributes
-    let tool_names = client_request.get_tool_names();
-    let user_message_preview = client_request
+    let _tool_names = client_request.get_tool_names();
+    let _user_message_preview = client_request
        .get_recent_user_message()
        .map(|msg| truncate_message(&msg, 50));

    // Extract messages for signal analysis (clone before moving client_request)
-    let messages_for_signals = client_request.get_messages();
+    let messages_for_signals = Some(client_request.get_messages());

    // Set the model to just the model name (without provider prefix)
    // This ensures upstream receives "gpt-4" not "openai/gpt-4"
    client_request.set_model(model_name_only.clone());
    if client_request.remove_metadata_key("archgw_preference_config") {
-        debug!(
-            "[PLANO_REQ_ID:{}] Removed archgw_preference_config from metadata",
-            request_id
-        );
+        debug!("removed archgw_preference_config from metadata");
    }

    // === v1/responses state management: Determine upstream API and combine input if needed ===
@ -180,9 +211,9 @@ pub async fn llm_chat(
            // Get the upstream path and check if it's ResponsesAPI
            let upstream_path = get_upstream_path(
                &llm_providers,
-                &resolved_model,
+                &alias_resolved_model,
                &request_path,
-                &resolved_model,
+                &alias_resolved_model,
                is_streaming_request,
            )
            .await;
@ -209,14 +240,17 @@ pub async fn llm_chat(
                            // Update both the request and original_input_items
                            responses_req.input = InputParam::Items(combined_input.clone());
                            original_input_items = combined_input;
-                            info!("[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Updated request with conversation history ({} items)", request_id, original_input_items.len());
+                            info!(
+                                items = original_input_items.len(),
+                                "updated request with conversation history"
+                            );
                        }
                        Err(StateStorageError::NotFound(_)) => {
                            // Return 409 Conflict when previous_response_id not found
-                            warn!("[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Previous response_id not found: {}", request_id, prev_resp_id);
+                            warn!(previous_response_id = %prev_resp_id, "previous response_id not found");
                            let err_msg = format!(
-                                "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Conversation state not found for previous_response_id: {}",
-                                request_id, prev_resp_id
+                                "Conversation state not found for previous_response_id: {}",
+                                prev_resp_id
                            );
                            let mut conflict_response = Response::new(full(err_msg));
                            *conflict_response.status_mut() = StatusCode::CONFLICT;
@ -225,8 +259,9 @@ pub async fn llm_chat(
                        Err(e) => {
                            // Log warning but continue on other storage errors
                            warn!(
-                                "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to retrieve conversation state for {}: {}",
-                                request_id, prev_resp_id, e
+                                previous_response_id = %prev_resp_id,
+                                error = %e,
+                                "failed to retrieve conversation state"
                            );
                            // Restore original_input_items since we passed ownership
                            original_input_items = extract_input_items(&responses_req.input);
@ -234,10 +269,7 @@ pub async fn llm_chat(
                    }
                }
            } else {
-                debug!(
-                    "[PLANO_REQ_ID:{}] | BRIGHT_STAFF | Upstream supports ResponsesAPI natively.",
-                    request_id
-                );
+                debug!("upstream supports ResponsesAPI natively");
            }
        }
    }
@ -246,14 +278,29 @@ pub async fn llm_chat(
    let client_request_bytes_for_upstream = ProviderRequestType::to_bytes(&client_request).unwrap();

    // Determine routing using the dedicated router_chat module
-    let routing_result = match router_chat_get_upstream_model(
-        router_service,
-        client_request, // Pass the original request - router_chat will convert it
-        trace_collector.clone(),
-        &traceparent,
-        &request_path,
-        &request_id,
-    )
+    // This gets its own span for latency and error tracking
+    let routing_span = info_span!(
+        "routing",
+        component = "routing",
+        http.method = "POST",
+        http.target = %request_path,
+        model.requested = %model_from_request,
+        model.alias_resolved = %alias_resolved_model,
+        route.selected_model = tracing::field::Empty,
+        routing.determination_ms = tracing::field::Empty,
+    );
+    let routing_result = match async {
+        set_service_name(operation_component::ROUTING);
+        router_chat_get_upstream_model(
+            router_service,
+            client_request, // Pass the original request - router_chat will convert it
+            &traceparent,
+            &request_path,
+            &request_id,
+        )
+        .await
+    }
+    .instrument(routing_span)
    .await
    {
        Ok(result) => result,
@ -267,22 +314,36 @@ pub async fn llm_chat(
    // Determine final model to use
    // Router returns "none" as a sentinel value when it doesn't select a specific model
    let router_selected_model = routing_result.model_name;
-    let model_name = if router_selected_model != "none" {
+    let resolved_model = if router_selected_model != "none" {
        // Router selected a specific model via routing preferences
        router_selected_model
    } else {
        // Router returned "none" sentinel, use validated resolved_model from request
-        resolved_model.clone()
+        alias_resolved_model.clone()
    };

+    let span_name = if model_from_request == resolved_model {
+        format!("POST {} {}", request_path, resolved_model)
+    } else {
+        format!(
+            "POST {} {} -> {}",
+            request_path, model_from_request, resolved_model
+        )
+    };
+    get_active_span(|span| {
+        span.update_name(span_name.clone());
+    });
+
    debug!(
-        "[PLANO_REQ_ID:{}] | ARCH_ROUTER URL | {}, Provider Hint: {}, Model for upstream: {}",
-        request_id, full_qualified_llm_provider_url, model_name, model_name_only
+        url = %full_qualified_llm_provider_url,
+        provider_hint = %resolved_model,
+        upstream_model = %model_name_only,
+        "Routing to upstream"
    );

    request_headers.insert(
        ARCH_PROVIDER_HINT_HEADER,
-        header::HeaderValue::from_str(&model_name).unwrap(),
+        header::HeaderValue::from_str(&resolved_model).unwrap(),
    );

    request_headers.insert(
@ -292,12 +353,18 @@ pub async fn llm_chat(
    // remove content-length header if it exists
    request_headers.remove(header::CONTENT_LENGTH);

+    // Inject current LLM span's trace context so upstream spans are children of plano(llm)
+    global::get_text_map_propagator(|propagator| {
+        let cx = tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
+        propagator.inject_context(&cx, &mut HeaderInjector(&mut request_headers));
+    });
+
    // Capture start time right before sending request to upstream
    let request_start_time = std::time::Instant::now();
-    let request_start_system_time = std::time::SystemTime::now();
+    let _request_start_system_time = std::time::SystemTime::now();

    let llm_response = match reqwest::Client::new()
-        .post(full_qualified_llm_provider_url)
+        .post(&full_qualified_llm_provider_url)
        .headers(request_headers)
        .body(client_request_bytes_for_upstream)
        .send()
@ -324,29 +391,12 @@ pub async fn llm_chat(
    // Build LLM span with actual status code using constants
    let byte_stream = llm_response.bytes_stream();

-    // Build the LLM span (will be finalized after streaming completes)
-    let llm_span = build_llm_span(
-        &traceparent,
-        &request_path,
-        &resolved_model,
-        &model_name,
-        upstream_status.as_u16(),
-        is_streaming_request,
-        request_start_system_time,
-        tool_names,
-        user_message_preview,
-        temperature,
-        &llm_providers,
-    )
-    .await;
-
    // Create base processor for metrics and tracing
    let base_processor = ObservableStreamProcessor::new(
-        trace_collector,
        operation_component::LLM,
-        llm_span,
+        span_name,
        request_start_time,
-        Some(messages_for_signals),
+        messages_for_signals,
    );

    // === v1/responses state management: Wrap with ResponsesStateProcessor ===
@ -367,8 +417,8 @@ pub async fn llm_chat(
            base_processor,
            state_store,
            original_input_items,
+            alias_resolved_model.clone(),
            resolved_model.clone(),
-            model_name.clone(),
            is_streaming_request,
            false, // Not OpenAI upstream since should_manage_state is true
            content_encoding,
@ -409,88 +459,6 @@ fn resolve_model_alias(
    model_from_request.to_string()
 }

-/// Builds the LLM span with all required and optional attributes.
-#[allow(clippy::too_many_arguments)]
-async fn build_llm_span(
-    traceparent: &str,
-    request_path: &str,
-    resolved_model: &str,
-    model_name: &str,
-    status_code: u16,
-    is_streaming: bool,
-    start_time: std::time::SystemTime,
-    tool_names: Option<Vec<String>>,
-    user_message_preview: Option<String>,
-    temperature: Option<f32>,
-    llm_providers: &Arc<RwLock<LlmProviders>>,
-) -> common::traces::Span {
-    use crate::tracing::{http, llm, OperationNameBuilder};
-    use common::traces::{parse_traceparent, SpanBuilder, SpanKind};
-
-    // Calculate the upstream path based on provider configuration
-    let upstream_path = get_upstream_path(
-        llm_providers,
-        model_name,
-        request_path,
-        resolved_model,
-        is_streaming,
-    )
-    .await;
-
-    // Build operation name showing path transformation if different
-    let operation_name = if request_path != upstream_path {
-        OperationNameBuilder::new()
-            .with_method("POST")
-            .with_path(format!("{} >> {}", request_path, upstream_path))
-            .with_target(resolved_model)
-            .build()
-    } else {
-        OperationNameBuilder::new()
-            .with_method("POST")
-            .with_path(request_path)
-            .with_target(resolved_model)
-            .build()
-    };
-
-    let (trace_id, parent_span_id) = parse_traceparent(traceparent);
-
-    let mut span_builder = SpanBuilder::new(&operation_name)
-        .with_trace_id(&trace_id)
-        .with_kind(SpanKind::Client)
-        .with_start_time(start_time)
-        .with_attribute(http::METHOD, "POST")
-        .with_attribute(http::STATUS_CODE, status_code.to_string())
-        .with_attribute(http::TARGET, request_path.to_string())
-        .with_attribute(http::UPSTREAM_TARGET, upstream_path)
-        .with_attribute(llm::MODEL_NAME, resolved_model.to_string())
-        .with_attribute(llm::IS_STREAMING, is_streaming.to_string());
-
-    // Only set parent span ID if it exists (not a root span)
-    if let Some(parent) = parent_span_id {
-        span_builder = span_builder.with_parent_span_id(&parent);
-    }
-
-    // Add optional attributes
-    if let Some(temp) = temperature {
-        span_builder = span_builder.with_attribute(llm::TEMPERATURE, temp.to_string());
-    }
-
-    if let Some(tools) = tool_names {
-        let formatted_tools = tools
-            .iter()
-            .map(|name| format!("{}(...)", name))
-            .collect::<Vec<_>>()
-            .join("\n");
-        span_builder = span_builder.with_attribute(llm::TOOLS, formatted_tools);
-    }
-
-    if let Some(preview) = user_message_preview {
-        span_builder = span_builder.with_attribute(llm::USER_MESSAGE_PREVIEW, preview);
-    }
-
-    span_builder.build()
-}
-
 /// Calculates the upstream path for the provider based on the model name.
 /// Looks up provider configuration, gets the ProviderId and base_url_path_prefix,
 /// then uses target_endpoint_for_provider to calculate the correct upstream path.
--- a/crates/brightstaff/src/handlers/pipeline_processor.rs
+++ b/crates/brightstaff/src/handlers/pipeline_processor.rs
@ -4,20 +4,18 @@ use common::configuration::{Agent, AgentFilterChain};
 use common::consts::{
    ARCH_UPSTREAM_HOST_HEADER, BRIGHT_STAFF_SERVICE_NAME, ENVOY_RETRY_HEADER, TRACE_PARENT_HEADER,
 };
-use common::traces::{generate_random_span_id, SpanBuilder, SpanKind};
 use hermesllm::apis::openai::Message;
 use hermesllm::{ProviderRequest, ProviderRequestType};
 use hyper::header::HeaderMap;
-use std::time::{Instant, SystemTime};
-use tracing::{debug, info, warn};
-
-use crate::tracing::operation_component::{self};
-use crate::tracing::{http, OperationNameBuilder};
+use opentelemetry::global;
+use opentelemetry_http::HeaderInjector;
+use tracing::{debug, info, instrument, warn};

 use crate::handlers::jsonrpc::{
    JsonRpcId, JsonRpcNotification, JsonRpcRequest, JsonRpcResponse, JSON_RPC_VERSION,
    MCP_INITIALIZE, MCP_INITIALIZE_NOTIFICATION, TOOL_CALL_METHOD,
 };
+use crate::tracing::{operation_component, set_service_name};
 use uuid::Uuid;

 /// Errors that can occur during pipeline processing
@ -81,115 +79,14 @@ impl PipelineProcessor {
        }
    }

-    /// Record a span for filter execution
-    #[allow(clippy::too_many_arguments)]
-    fn record_filter_span(
-        &self,
-        collector: &std::sync::Arc<common::traces::TraceCollector>,
-        agent_name: &str,
-        tool_name: &str,
-        start_time: SystemTime,
-        end_time: SystemTime,
-        elapsed: std::time::Duration,
-        trace_id: String,
-        parent_span_id: String,
-        span_id: String,
-    ) -> String {
-        // let (trace_id, parent_span_id) = self.extract_trace_context();
-
-        // Build operation name: POST /agents/* {filter_name}
-        // Using generic path since we don't have access to specific endpoint here
-        let operation_name = OperationNameBuilder::new()
-            .with_method("POST")
-            .with_path("/agents/*")
-            .with_target(agent_name)
-            .build();
-
-        let mut span_builder = SpanBuilder::new(&operation_name)
-            .with_span_id(span_id.clone())
-            .with_kind(SpanKind::Client)
-            .with_start_time(start_time)
-            .with_end_time(end_time)
-            .with_attribute(http::METHOD, "POST")
-            .with_attribute(http::TARGET, "/agents/*")
-            .with_attribute("filter.name", agent_name.to_string())
-            .with_attribute("filter.tool_name", tool_name.to_string())
-            .with_attribute(
-                "duration_ms",
-                format!("{:.2}", elapsed.as_secs_f64() * 1000.0),
-            );
-
-        if !trace_id.is_empty() {
-            span_builder = span_builder.with_trace_id(trace_id);
-        }
-        if !parent_span_id.is_empty() {
-            span_builder = span_builder.with_parent_span_id(parent_span_id);
-        }
-
-        let span = span_builder.build();
-        // Use plano(filter) as service name for filter execution spans
-        collector.record_span(operation_component::AGENT_FILTER, span);
-        span_id.clone()
-    }
-
-    /// Record a span for MCP protocol interactions
-    #[allow(clippy::too_many_arguments)]
-    fn record_agent_filter_span(
-        &self,
-        collector: &std::sync::Arc<common::traces::TraceCollector>,
-        operation: &str,
-        agent_id: &str,
-        start_time: SystemTime,
-        end_time: SystemTime,
-        elapsed: std::time::Duration,
-        additional_attrs: Option<HashMap<&str, String>>,
-        trace_id: String,
-        parent_span_id: String,
-        span_id: Option<String>,
-    ) {
-        // let (trace_id, parent_span_id) = self.extract_trace_context();
-
-        // Build operation name: POST /mcp {agent_id}
-        let operation_name = OperationNameBuilder::new()
-            .with_method("POST")
-            .with_path("/mcp")
-            .with_operation(operation)
-            .with_target(agent_id)
-            .build();
-
-        let mut span_builder = SpanBuilder::new(&operation_name)
-            .with_span_id(span_id.unwrap_or_else(generate_random_span_id))
-            .with_kind(SpanKind::Client)
-            .with_start_time(start_time)
-            .with_end_time(end_time)
-            .with_attribute(http::METHOD, "POST")
-            .with_attribute(http::TARGET, format!("/mcp ({})", operation))
-            .with_attribute("mcp.operation", operation.to_string())
-            .with_attribute("mcp.agent_id", agent_id.to_string())
-            .with_attribute(
-                "duration_ms",
-                format!("{:.2}", elapsed.as_secs_f64() * 1000.0),
-            );
-
-        if let Some(attrs) = additional_attrs {
-            for (key, value) in attrs {
-                span_builder = span_builder.with_attribute(key, value);
-            }
-        }
-
-        if !trace_id.is_empty() {
-            span_builder = span_builder.with_trace_id(trace_id);
-        }
-        if !parent_span_id.is_empty() {
-            span_builder = span_builder.with_parent_span_id(parent_span_id);
-        }
-
-        let span = span_builder.build();
-        // MCP spans also use plano(filter) service name as they are part of filter operations
-        collector.record_span(operation_component::AGENT_FILTER, span);
-    }
-
-    /// Process the filter chain of agents (all except the terminal agent)
+    // /// Process the filter chain of agents (all except the terminal agent)
+    // #[instrument(
+    //     skip(self, chat_history, agent_filter_chain, agent_map, request_headers),
+    //     fields(
+    //         filter_count = agent_filter_chain.filter_chain.as_ref().map(|fc| fc.len()).unwrap_or(0),
+    //         message_count = chat_history.len()
+    //     )
+    // )]
    #[allow(clippy::too_many_arguments)]
    pub async fn process_filter_chain(
        &mut self,
@ -197,9 +94,6 @@ impl PipelineProcessor {
        agent_filter_chain: &AgentFilterChain,
        agent_map: &HashMap<String, Agent>,
        request_headers: &HeaderMap,
-        trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
-        trace_id: String,
-        parent_span_id: String,
    ) -> Result<Vec<Message>, PipelineError> {
        let mut chat_history_updated = chat_history.to_vec();

@ -210,7 +104,7 @@ impl PipelineProcessor {
        };

        for agent_name in filter_chain {
-            debug!("Processing filter agent: {}", agent_name);
+            debug!(agent = %agent_name, "processing filter agent");

            let agent = agent_map
                .get(agent_name)
@ -219,68 +113,29 @@ impl PipelineProcessor {
            let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);

            info!(
-                "executing filter: {}/{}, url: {}, type: {}, conversation length: {}",
-                agent_name,
-                tool_name,
-                agent.url,
-                agent.agent_type.as_deref().unwrap_or("mcp"),
-                chat_history.len()
+                agent = %agent_name,
+                tool = %tool_name,
+                url = %agent.url,
+                agent_type = %agent.agent_type.as_deref().unwrap_or("mcp"),
+                conversation_len = chat_history.len(),
+                "executing filter"
            );

-            let start_time = SystemTime::now();
-            let start_instant = Instant::now();
-
-            // Generate filter span ID before execution so MCP spans can use it as parent
-            let filter_span_id = generate_random_span_id();
-
            if agent.agent_type.as_deref().unwrap_or("mcp") == "mcp" {
                chat_history_updated = self
-                    .execute_mcp_filter(
-                        &chat_history_updated,
-                        agent,
-                        request_headers,
-                        trace_collector,
-                        trace_id.clone(),
-                        filter_span_id.clone(),
-                    )
+                    .execute_mcp_filter(&chat_history_updated, agent, request_headers)
                    .await?;
            } else {
                chat_history_updated = self
-                    .execute_http_filter(
-                        &chat_history_updated,
-                        agent,
-                        request_headers,
-                        trace_collector,
-                        trace_id.clone(),
-                        filter_span_id.clone(),
-                    )
+                    .execute_http_filter(&chat_history_updated, agent, request_headers)
                    .await?;
            }

-            let end_time = SystemTime::now();
-            let elapsed = start_instant.elapsed();
-
            info!(
-                "Filter '{}' completed in {:.2}ms, updated conversation length: {}",
-                agent_name,
-                elapsed.as_secs_f64() * 1000.0,
-                chat_history_updated.len()
+                agent = %agent_name,
+                updated_len = chat_history_updated.len(),
+                "filter completed"
            );
-
-            // Record span for this filter execution
-            if let Some(collector) = trace_collector {
-                self.record_filter_span(
-                    collector,
-                    agent_name,
-                    tool_name,
-                    start_time,
-                    end_time,
-                    elapsed,
-                    trace_id.clone(),
-                    parent_span_id.clone(),
-                    filter_span_id,
-                );
-            }
        }

        Ok(chat_history_updated)
@ -292,18 +147,17 @@ impl PipelineProcessor {
        request_headers: &HeaderMap,
        agent_id: &str,
        session_id: Option<&str>,
-        trace_id: String,
-        parent_span_id: String,
    ) -> Result<HeaderMap, PipelineError> {
-        let trace_parent = format!("00-{}-{}-01", trace_id, parent_span_id);
        let mut headers = request_headers.clone();
        headers.remove(hyper::header::CONTENT_LENGTH);

+        // Inject OpenTelemetry trace context automatically
        headers.remove(TRACE_PARENT_HEADER);
-        headers.insert(
-            TRACE_PARENT_HEADER,
-            hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
-        );
+        global::get_text_map_propagator(|propagator| {
+            let cx =
+                tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
+            propagator.inject_context(&cx, &mut HeaderInjector(&mut headers));
+        });

        headers.insert(
            ARCH_UPSTREAM_HOST_HEADER,
@ -348,9 +202,9 @@ impl PipelineProcessor {
        // Validate SSE format: first line should be "event: message"
        if lines.is_empty() || lines[0] != "event: message" {
            warn!(
-                "Invalid SSE response format from agent {}: expected 'event: message' as first line, got: {:?}",
-                agent_id,
-                lines.first()
+                agent = %agent_id,
+                first_line = ?lines.first(),
+                "invalid SSE response format"
            );
            return Err(PipelineError::NoContentInResponse(format!(
                "Invalid SSE response format from agent {}: expected 'event: message' as first line",
@ -367,9 +221,9 @@ impl PipelineProcessor {

        if data_lines.len() != 1 {
            warn!(
-                "Expected exactly one 'data:' line from agent {}, found {}",
-                agent_id,
-                data_lines.len()
+                agent = %agent_id,
+                found = data_lines.len(),
+                "expected exactly one 'data:' line"
            );
            return Err(PipelineError::NoContentInResponse(format!(
                "Expected exactly one 'data:' line from agent {}, found {}",
@ -429,27 +283,34 @@ impl PipelineProcessor {
    }

    /// Send request to a specific agent and return the response content
+    #[instrument(
+        skip(self, messages, agent, request_headers),
+        fields(
+            agent_id = %agent.id,
+            filter_name = %agent.id,
+            message_count = messages.len()
+        )
+    )]
    async fn execute_mcp_filter(
        &mut self,
        messages: &[Message],
        agent: &Agent,
        request_headers: &HeaderMap,
-        trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
-        trace_id: String,
-        filter_span_id: String,
    ) -> Result<Vec<Message>, PipelineError> {
+        // Set service name for this filter span
+        set_service_name(operation_component::AGENT_FILTER);
+
+        // Update current span name to include filter name
+        use opentelemetry::trace::get_active_span;
+        get_active_span(|span| {
+            span.update_name(format!("execute_mcp_filter ({})", agent.id));
+        });
+
        // Get or create MCP session
        let mcp_session_id = if let Some(session_id) = self.agent_id_session_map.get(&agent.id) {
            session_id.clone()
        } else {
-            let session_id = self
-                .get_new_session_id(
-                    &agent.id,
-                    trace_id.clone(),
-                    filter_span_id.clone(),
-                    request_headers,
-                )
-                .await;
+            let session_id = self.get_new_session_id(&agent.id, request_headers).await;
            self.agent_id_session_map
                .insert(agent.id.clone(), session_id.clone());
            session_id
@ -464,21 +325,9 @@ impl PipelineProcessor {
        let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
        let json_rpc_request = self.build_tool_call_request(tool_name, messages)?;

-        // Generate span ID for this MCP tool call (child of filter span)
-        let mcp_span_id = generate_random_span_id();
-
        // Build headers
-        let agent_headers = self.build_mcp_headers(
-            request_headers,
-            &agent.id,
-            Some(&mcp_session_id),
-            trace_id.clone(),
-            mcp_span_id.clone(),
-        )?;
-
-        // Send request with tracing
-        let start_time = SystemTime::now();
-        let start_instant = Instant::now();
+        let agent_headers =
+            self.build_mcp_headers(request_headers, &agent.id, Some(&mcp_session_id))?;

        let response = self
            .send_mcp_request(&json_rpc_request, &agent_headers, &agent.id)
@ -486,31 +335,6 @@ impl PipelineProcessor {
        let http_status = response.status();
        let response_bytes = response.bytes().await?;

-        let end_time = SystemTime::now();
-        let elapsed = start_instant.elapsed();
-
-        // Record MCP tool call span
-        if let Some(collector) = trace_collector {
-            let mut attrs = HashMap::new();
-            attrs.insert("mcp.method", "tools/call".to_string());
-            attrs.insert("mcp.tool_name", tool_name.to_string());
-            attrs.insert("mcp.session_id", mcp_session_id.clone());
-            attrs.insert("http.status_code", http_status.as_u16().to_string());
-
-            self.record_agent_filter_span(
-                collector,
-                "tool_call",
-                &agent.id,
-                start_time,
-                end_time,
-                elapsed,
-                Some(attrs),
-                trace_id.clone(),
-                filter_span_id.clone(),
-                Some(mcp_span_id),
-            );
-        }
-
        // Handle HTTP errors
        if !http_status.is_success() {
            let error_body = String::from_utf8_lossy(&response_bytes).to_string();
@ -611,8 +435,6 @@ impl PipelineProcessor {
        &self,
        agent_id: &str,
        session_id: &str,
-        trace_id: String,
-        parent_span_id: String,
        request_headers: &HeaderMap,
    ) -> Result<(), PipelineError> {
        let initialized_notification = JsonRpcNotification {
@ -622,15 +444,9 @@ impl PipelineProcessor {
        };

        let notification_body = serde_json::to_string(&initialized_notification)?;
-        debug!("Sending initialized notification for agent {}", agent_id);
+        debug!("sending initialized notification for agent {}", agent_id);

-        let headers = self.build_mcp_headers(
-            request_headers,
-            agent_id,
-            Some(session_id),
-            trace_id.clone(),
-            parent_span_id.clone(),
-        )?;
+        let headers = self.build_mcp_headers(request_headers, agent_id, Some(session_id))?;

        let response = self
            .client
@ -641,31 +457,19 @@ impl PipelineProcessor {
            .await?;

        info!(
-            "Initialized notification response status: {}",
+            "initialized notification response status: {}",
            response.status()
        );

        Ok(())
    }

-    async fn get_new_session_id(
-        &self,
-        agent_id: &str,
-        trace_id: String,
-        parent_span_id: String,
-        request_headers: &HeaderMap,
-    ) -> String {
-        info!("Initializing MCP session for agent {}", agent_id);
+    async fn get_new_session_id(&self, agent_id: &str, request_headers: &HeaderMap) -> String {
+        info!("initializing MCP session for agent {}", agent_id);

        let initialize_request = self.build_initialize_request();
        let headers = self
-            .build_mcp_headers(
-                request_headers,
-                agent_id,
-                None,
-                trace_id.clone(),
-                parent_span_id.clone(),
-            )
+            .build_mcp_headers(request_headers, agent_id, None)
            .expect("Failed to build headers for initialization");

        let response = self
@ -673,7 +477,7 @@ impl PipelineProcessor {
            .await
            .expect("Failed to initialize MCP session");

-        info!("Initialize response status: {}", response.status());
+        info!("initialize response status: {}", response.status());

        let session_id = response
            .headers()
@ -683,49 +487,54 @@ impl PipelineProcessor {
            .to_string();

        info!(
-            "Created new MCP session for agent {}: {}",
+            "created new MCP session for agent {}: {}",
            agent_id, session_id
        );

        // Send initialized notification
-        self.send_initialized_notification(
-            agent_id,
-            &session_id,
-            trace_id.clone(),
-            parent_span_id.clone(),
-            &headers,
-        )
-        .await
-        .expect("Failed to send initialized notification");
+        self.send_initialized_notification(agent_id, &session_id, &headers)
+            .await
+            .expect("Failed to send initialized notification");

        session_id
    }

    /// Execute a HTTP-based filter agent
+    #[instrument(
+        skip(self, messages, agent, request_headers),
+        fields(
+            agent_id = %agent.id,
+            agent_url = %agent.url,
+            filter_name = %agent.id,
+            message_count = messages.len()
+        )
+    )]
    async fn execute_http_filter(
        &mut self,
        messages: &[Message],
        agent: &Agent,
        request_headers: &HeaderMap,
-        trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
-        trace_id: String,
-        filter_span_id: String,
    ) -> Result<Vec<Message>, PipelineError> {
-        let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
+        // Set service name for this filter span
+        set_service_name(operation_component::AGENT_FILTER);

-        // Generate span ID for this HTTP call (child of filter span)
-        let http_span_id = generate_random_span_id();
+        // Update current span name to include filter name
+        use opentelemetry::trace::get_active_span;
+        get_active_span(|span| {
+            span.update_name(format!("execute_http_filter ({})", agent.id));
+        });

        // Build headers
-        let trace_parent = format!("00-{}-{}-01", trace_id, http_span_id);
        let mut agent_headers = request_headers.clone();
        agent_headers.remove(hyper::header::CONTENT_LENGTH);

+        // Inject OpenTelemetry trace context automatically
        agent_headers.remove(TRACE_PARENT_HEADER);
-        agent_headers.insert(
-            TRACE_PARENT_HEADER,
-            hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
-        );
+        global::get_text_map_propagator(|propagator| {
+            let cx =
+                tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
+            propagator.inject_context(&cx, &mut HeaderInjector(&mut agent_headers));
+        });

        agent_headers.insert(
            ARCH_UPSTREAM_HOST_HEADER,
@ -748,10 +557,6 @@ impl PipelineProcessor {
            hyper::header::HeaderValue::from_static("application/json"),
        );

-        // Send request with tracing
-        let start_time = SystemTime::now();
-        let start_instant = Instant::now();
-
        debug!(
            "Sending HTTP request to agent {} at URL: {}",
            agent.id, agent.url
@ -769,30 +574,6 @@ impl PipelineProcessor {
        let http_status = response.status();
        let response_bytes = response.bytes().await?;

-        let end_time = SystemTime::now();
-        let elapsed = start_instant.elapsed();
-
-        // Record HTTP call span
-        if let Some(collector) = trace_collector {
-            let mut attrs = HashMap::new();
-            attrs.insert("http.tool_name", tool_name.to_string());
-            attrs.insert("http.url", agent.url.clone());
-            attrs.insert("http.status_code", http_status.as_u16().to_string());
-
-            self.record_agent_filter_span(
-                collector,
-                "http_call",
-                &agent.id,
-                start_time,
-                end_time,
-                elapsed,
-                Some(attrs),
-                trace_id.clone(),
-                filter_span_id.clone(),
-                Some(http_span_id),
-            );
-        }
-
        // Handle HTTP errors
        if !http_status.is_success() {
            let error_body = String::from_utf8_lossy(&response_bytes).to_string();
@ -825,34 +606,34 @@ impl PipelineProcessor {
    }

    /// Send request to terminal agent and return the raw response for streaming
+    /// Note: The caller is responsible for creating the plano(agent) span that wraps
+    /// both this call and the subsequent response consumption.
    pub async fn invoke_agent(
        &self,
        messages: &[Message],
        mut original_request: ProviderRequestType,
        terminal_agent: &Agent,
        request_headers: &HeaderMap,
-        trace_id: String,
-        agent_span_id: String,
    ) -> Result<reqwest::Response, PipelineError> {
        // let mut request = original_request.clone();
        original_request.set_messages(messages);

+        let request_url = "/v1/chat/completions";
+
        let request_body = ProviderRequestType::to_bytes(&original_request).unwrap();
        // let request_body = serde_json::to_string(&request)?;
-        debug!("Sending request to terminal agent {}", terminal_agent.id);
+        debug!("sending request to terminal agent {}", terminal_agent.id);

        let mut agent_headers = request_headers.clone();
        agent_headers.remove(hyper::header::CONTENT_LENGTH);

-        // Set traceparent header to make the egress span a child of the agent span
-        if !trace_id.is_empty() && !agent_span_id.is_empty() {
-            let trace_parent = format!("00-{}-{}-01", trace_id, agent_span_id);
-            agent_headers.remove(TRACE_PARENT_HEADER);
-            agent_headers.insert(
-                TRACE_PARENT_HEADER,
-                hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
-            );
-        }
+        // Inject OpenTelemetry trace context automatically
+        agent_headers.remove(TRACE_PARENT_HEADER);
+        global::get_text_map_propagator(|propagator| {
+            let cx =
+                tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
+            propagator.inject_context(&cx, &mut HeaderInjector(&mut agent_headers));
+        });

        agent_headers.insert(
            ARCH_UPSTREAM_HOST_HEADER,
@ -867,7 +648,7 @@ impl PipelineProcessor {

        let response = self
            .client
-            .post(format!("{}/v1/chat/completions", self.url))
+            .post(format!("{}{}", self.url, request_url))
            .headers(agent_headers)
            .body(request_body)
            .send()
@ -914,15 +695,7 @@ mod tests {
        let pipeline = create_test_pipeline(vec!["nonexistent-agent", "terminal-agent"]);

        let result = processor
-            .process_filter_chain(
-                &messages,
-                &pipeline,
-                &agent_map,
-                &request_headers,
-                None,
-                String::new(),
-                String::new(),
-            )
+            .process_filter_chain(&messages, &pipeline, &agent_map, &request_headers)
            .await;

        assert!(result.is_err());
@ -956,14 +729,7 @@ mod tests {
        let request_headers = HeaderMap::new();

        let result = processor
-            .execute_mcp_filter(
-                &messages,
-                &agent,
-                &request_headers,
-                None,
-                "trace-123".to_string(),
-                "span-123".to_string(),
-            )
+            .execute_mcp_filter(&messages, &agent, &request_headers)
            .await;

        match result {
@ -1002,14 +768,7 @@ mod tests {
        let request_headers = HeaderMap::new();

        let result = processor
-            .execute_mcp_filter(
-                &messages,
-                &agent,
-                &request_headers,
-                None,
-                "trace-456".to_string(),
-                "span-456".to_string(),
-            )
+            .execute_mcp_filter(&messages, &agent, &request_headers)
            .await;

        match result {
@ -1061,14 +820,7 @@ mod tests {
        let request_headers = HeaderMap::new();

        let result = processor
-            .execute_mcp_filter(
-                &messages,
-                &agent,
-                &request_headers,
-                None,
-                "trace-789".to_string(),
-                "span-789".to_string(),
-            )
+            .execute_mcp_filter(&messages, &agent, &request_headers)
            .await;

        match result {
--- a/crates/brightstaff/src/handlers/response_handler.rs
+++ b/crates/brightstaff/src/handlers/response_handler.rs
@ -9,7 +9,7 @@ use hyper::{Response, StatusCode};
 use tokio::sync::mpsc;
 use tokio_stream::wrappers::ReceiverStream;
 use tokio_stream::StreamExt;
-use tracing::{info, warn};
+use tracing::{info, warn, Instrument};

 /// Errors that can occur during response handling
 #[derive(Debug, thiserror::Error)]
@ -69,10 +69,14 @@ impl ResponseHandler {
        response
    }

-    /// Create a streaming response from a reqwest response
+    /// Create a streaming response from a reqwest response.
+    /// The spawned streaming task is instrumented with both `agent_span` and `orchestrator_span`
+    /// so their durations reflect the actual time spent streaming to the client.
    pub async fn create_streaming_response(
        &self,
        llm_response: reqwest::Response,
+        agent_span: tracing::Span,
+        orchestrator_span: tracing::Span,
    ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ResponseError> {
        // Copy headers from the original response
        let response_headers = llm_response.headers();
@ -89,25 +93,30 @@ impl ResponseHandler {
        // Create channel for async streaming
        let (tx, rx) = mpsc::channel::<Bytes>(16);

-        // Spawn task to stream data
-        tokio::spawn(async move {
-            let mut byte_stream = llm_response.bytes_stream();
+        // Spawn streaming task instrumented with both spans (nested) so both
+        // remain entered for the full streaming duration.
+        tokio::spawn(
+            async move {
+                let mut byte_stream = llm_response.bytes_stream();

-            while let Some(item) = byte_stream.next().await {
-                let chunk = match item {
-                    Ok(chunk) => chunk,
-                    Err(err) => {
-                        warn!("Error receiving chunk: {:?}", err);
+                while let Some(item) = byte_stream.next().await {
+                    let chunk = match item {
+                        Ok(chunk) => chunk,
+                        Err(err) => {
+                            warn!(error = ?err, "error receiving chunk");
+                            break;
+                        }
+                    };
+
+                    if tx.send(chunk).await.is_err() {
+                        warn!("receiver dropped");
                        break;
                    }
-                };
-
-                if tx.send(chunk).await.is_err() {
-                    warn!("Receiver dropped");
-                    break;
                }
            }
-        });
+            .instrument(agent_span)
+            .instrument(orchestrator_span),
+        );

        let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk)));
        let stream_body = BoxBody::new(StreamBody::new(stream));
@ -164,11 +173,11 @@ impl ResponseHandler {
                        if let Some(content) = provider_response.content_delta() {
                            accumulated_text.push_str(content);
                        } else {
-                            info!("No content delta in provider response");
+                            info!("no content delta in provider response");
                        }
                    }
                    Err(e) => {
-                        warn!("Failed to parse provider response: {:?}", e);
+                        warn!(error = ?e, "failed to parse provider response");
                    }
                }
            }
@ -248,7 +257,13 @@ mod tests {
        let llm_response = client.get(&(server.url() + "/test")).send().await.unwrap();

        let handler = ResponseHandler::new();
-        let result = handler.create_streaming_response(llm_response).await;
+        let result = handler
+            .create_streaming_response(
+                llm_response,
+                tracing::Span::current(),
+                tracing::Span::current(),
+            )
+            .await;

        mock.assert_async().await;
        assert!(result.is_ok());
--- a/crates/brightstaff/src/handlers/router_chat.rs
+++ b/crates/brightstaff/src/handlers/router_chat.rs
@ -1,14 +1,12 @@
 use common::configuration::ModelUsagePreference;
-use common::traces::{parse_traceparent, SpanBuilder, SpanKind, TraceCollector};
 use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
 use hermesllm::{ProviderRequest, ProviderRequestType};
 use hyper::StatusCode;
-use std::collections::HashMap;
 use std::sync::Arc;
 use tracing::{debug, info, warn};

 use crate::router::llm_router::RouterService;
-use crate::tracing::{http, operation_component, routing, OperationNameBuilder};
+use crate::tracing::routing;

 pub struct RoutingResult {
    pub model_name: String,
@ -36,7 +34,6 @@ impl RoutingError {
 pub async fn router_chat_get_upstream_model(
    router_service: Arc<RouterService>,
    client_request: ProviderRequestType,
-    trace_collector: Arc<TraceCollector>,
    traceparent: &str,
    request_path: &str,
    request_id: &str,
@ -56,14 +53,14 @@ pub async fn router_chat_get_upstream_model(
            | ProviderRequestType::BedrockConverseStream(_)
            | ProviderRequestType::ResponsesAPIRequest(_),
        ) => {
-            warn!("Unexpected: got non-ChatCompletions request after converting to OpenAI format");
+            warn!("unexpected: got non-ChatCompletions request after converting to OpenAI format");
            return Err(RoutingError::internal_error(
                "Request conversion failed".to_string(),
            ));
        }
        Err(err) => {
            warn!(
-                "Failed to convert request to ChatCompletionsRequest: {}",
+                "failed to convert request to ChatCompletionsRequest: {}",
                err
            );
            return Err(RoutingError::internal_error(format!(
@ -74,9 +71,8 @@ pub async fn router_chat_get_upstream_model(
    };

    debug!(
-        "[PLANO_REQ_ID: {:?}]: ROUTER_REQ: {}",
-        request_id,
-        &serde_json::to_string(&chat_request).unwrap()
+        request = %serde_json::to_string(&chat_request).unwrap(),
+        "router request"
    );

    // Extract usage preferences from metadata
@ -112,16 +108,14 @@ pub async fn router_chat_get_upstream_model(
    };

    info!(
-        "[PLANO_REQ_ID: {:?}] | ROUTER_REQ | Usage preferences from request: {}, request_path: {}, latest message: {}",
-        request_id,
-        usage_preferences.is_some(),
-        request_path,
-        latest_message_for_log
+        has_usage_preferences = usage_preferences.is_some(),
+        path = %request_path,
+        latest_message = %latest_message_for_log,
+        "processing router request"
    );

    // Capture start time for routing span
    let routing_start_time = std::time::Instant::now();
-    let routing_start_system_time = std::time::SystemTime::now();

    // Attempt to determine route using the router service
    let routing_result = router_service
@ -133,41 +127,21 @@ pub async fn router_chat_get_upstream_model(
        )
        .await;

+    let determination_ms = routing_start_time.elapsed().as_millis() as i64;
+    let current_span = tracing::Span::current();
+    current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);
+
    match routing_result {
        Ok(route) => match route {
            Some((_, model_name)) => {
-                // Record successful routing span
-                let mut attrs: HashMap<String, String> = HashMap::new();
-                attrs.insert("route.selected_model".to_string(), model_name.clone());
-                record_routing_span(
-                    trace_collector,
-                    traceparent,
-                    routing_start_time,
-                    routing_start_system_time,
-                    attrs,
-                )
-                .await;
-
+                current_span.record("route.selected_model", model_name.as_str());
                Ok(RoutingResult { model_name })
            }
            None => {
                // No route determined, return sentinel value "none"
                // This signals to llm.rs to use the original validated request model
-                info!(
-                    "[PLANO_REQ_ID: {}] | ROUTER_REQ | No route determined, returning sentinel 'none'",
-                    request_id
-                );
-
-                let mut attrs = HashMap::new();
-                attrs.insert("route.selected_model".to_string(), "none".to_string());
-                record_routing_span(
-                    trace_collector,
-                    traceparent,
-                    routing_start_time,
-                    routing_start_system_time,
-                    attrs,
-                )
-                .await;
+                current_span.record("route.selected_model", "none");
+                info!("no route determined, using default model");

                Ok(RoutingResult {
                    model_name: "none".to_string(),
@ -175,19 +149,7 @@ pub async fn router_chat_get_upstream_model(
            }
        },
        Err(err) => {
-            // Record failed routing span
-            let mut attrs = HashMap::new();
-            attrs.insert("route.selected_model".to_string(), "unknown".to_string());
-            attrs.insert("error.message".to_string(), err.to_string());
-            record_routing_span(
-                trace_collector,
-                traceparent,
-                routing_start_time,
-                routing_start_system_time,
-                attrs,
-            )
-            .await;
-
+            current_span.record("route.selected_model", "unknown");
            Err(RoutingError::internal_error(format!(
                "Failed to determine route: {}",
                err
@ -195,53 +157,3 @@ pub async fn router_chat_get_upstream_model(
        }
    }
 }
-
-/// Helper function to record a routing span with the given attributes.
-/// Reduces code duplication across different routing outcomes.
-async fn record_routing_span(
-    trace_collector: Arc<TraceCollector>,
-    traceparent: &str,
-    start_time: std::time::Instant,
-    start_system_time: std::time::SystemTime,
-    attrs: HashMap<String, String>,
-) {
-    // The routing always uses OpenAI Chat Completions format internally,
-    // so we log that as the actual API being used for routing
-    let routing_api_path = "/v1/chat/completions";
-
-    let routing_operation_name = OperationNameBuilder::new()
-        .with_method("POST")
-        .with_path(routing_api_path)
-        .with_target("Arch-Router-1.5B")
-        .build();
-
-    let (trace_id, parent_span_id) = parse_traceparent(traceparent);
-
-    // Build the routing span directly using constants
-    let mut span_builder = SpanBuilder::new(&routing_operation_name)
-        .with_trace_id(&trace_id)
-        .with_kind(SpanKind::Client)
-        .with_start_time(start_system_time)
-        .with_end_time(std::time::SystemTime::now())
-        .with_attribute(http::METHOD, "POST")
-        .with_attribute(http::TARGET, routing_api_path.to_string())
-        .with_attribute(
-            routing::ROUTE_DETERMINATION_MS,
-            start_time.elapsed().as_millis().to_string(),
-        );
-
-    // Only set parent span ID if it exists (not a root span)
-    if let Some(parent) = parent_span_id {
-        span_builder = span_builder.with_parent_span_id(&parent);
-    }
-
-    // Add all custom attributes
-    for (key, value) in attrs {
-        span_builder = span_builder.with_attribute(key, value);
-    }
-
-    let span = span_builder.build();
-
-    // Record the span directly to the collector
-    trace_collector.record_span(operation_component::ROUTING, span);
-}
--- a/crates/brightstaff/src/handlers/utils.rs
+++ b/crates/brightstaff/src/handlers/utils.rs
@ -1,18 +1,18 @@
 use bytes::Bytes;
-use common::traces::{Attribute, AttributeValue, Event, Span, TraceCollector};
 use http_body_util::combinators::BoxBody;
 use http_body_util::StreamBody;
 use hyper::body::Frame;
-use std::sync::Arc;
-use std::time::{Instant, SystemTime};
+use opentelemetry::trace::TraceContextExt;
+use opentelemetry::KeyValue;
+use std::time::Instant;
 use tokio::sync::mpsc;
 use tokio_stream::wrappers::ReceiverStream;
 use tokio_stream::StreamExt;
-use tracing::warn;
+use tracing::{info, warn, Instrument};
+use tracing_opentelemetry::OpenTelemetrySpanExt;

-// Import tracing constants and signals
 use crate::signals::{InteractionQuality, SignalAnalyzer, TextBasedSignalAnalyzer, FLAG_MARKER};
-use crate::tracing::{error, llm, signals as signal_constants};
+use crate::tracing::{llm, set_service_name, signals as signal_constants};
 use hermesllm::apis::openai::Message;

 /// Trait for processing streaming chunks
@ -31,11 +31,10 @@ pub trait StreamProcessor: Send + 'static {
    fn on_error(&mut self, _error: &str) {}
 }

-/// A processor that tracks streaming metrics and finalizes the span
+/// A processor that tracks streaming metrics
 pub struct ObservableStreamProcessor {
-    collector: Arc<TraceCollector>,
    service_name: String,
-    span: Span,
+    operation_name: String,
    total_bytes: usize,
    chunk_count: usize,
    start_time: Instant,
@ -47,22 +46,28 @@ impl ObservableStreamProcessor {
    /// Create a new passthrough processor
    ///
    /// # Arguments
-    /// * `collector` - The trace collector to record the span to
-    /// * `service_name` - The service name for this span (e.g., "archgw(llm)")
-    /// * `span` - The span to finalize after streaming completes
+    /// * `service_name` - The service name for this span (e.g., "plano(llm)")
+    ///   This will be set as the `service.name.override` attribute on the current span,
+    ///   allowing the ServiceNameOverrideExporter to route spans to different services.
+    /// * `operation_name` - The current span operation name (e.g., "POST /v1/chat/completions gpt-4")
+    ///   Used to append the flag marker when concerning signals are detected.
    /// * `start_time` - When the request started (for duration calculation)
    /// * `messages` - Optional conversation messages for signal analysis
    pub fn new(
-        collector: Arc<TraceCollector>,
        service_name: impl Into<String>,
-        span: Span,
+        operation_name: impl Into<String>,
        start_time: Instant,
        messages: Option<Vec<Message>>,
    ) -> Self {
+        let service_name = service_name.into();
+
+        // Set the service name override on the current span for OpenTelemetry export
+        // This allows the ServiceNameOverrideExporter to route this span to the correct service
+        set_service_name(&service_name);
+
        Self {
-            collector,
-            service_name: service_name.into(),
-            span,
+            service_name,
+            operation_name: operation_name.into(),
            total_bytes: 0,
            chunk_count: 0,
            start_time,
@ -87,89 +92,81 @@ impl StreamProcessor for ObservableStreamProcessor {
    }

    fn on_complete(&mut self) {
-        // Update span with streaming metrics and end time
-        let end_time_nanos = SystemTime::now()
-            .duration_since(SystemTime::UNIX_EPOCH)
-            .unwrap_or_default()
-            .as_nanos();
-
-        self.span.end_time_unix_nano = format!("{}", end_time_nanos);
-
-        // Add streaming metrics as attributes using constants
-        self.span.attributes.push(Attribute {
-            key: llm::RESPONSE_BYTES.to_string(),
-            value: AttributeValue {
-                string_value: Some(self.total_bytes.to_string()),
-            },
-        });
-
-        self.span.attributes.push(Attribute {
-            key: llm::DURATION_MS.to_string(),
-            value: AttributeValue {
-                string_value: Some(self.start_time.elapsed().as_millis().to_string()),
-            },
-        });
-
-        // Add time to first token if available (streaming only)
+        // Record time-to-first-token as an OTel span attribute + event (streaming only)
        if let Some(ttft) = self.time_to_first_token {
-            self.span.attributes.push(Attribute {
-                key: llm::TIME_TO_FIRST_TOKEN_MS.to_string(),
-                value: AttributeValue {
-                    string_value: Some(ttft.to_string()),
-                },
-            });
-
-            // Add time to first token as a span event
-            // Calculate the timestamp by adding ttft duration to span start time
-            if let Ok(start_time_nanos) = self.span.start_time_unix_nano.parse::<u128>() {
-                // Convert ttft from milliseconds to nanoseconds and add to start time
-                let event_timestamp = start_time_nanos + (ttft * 1_000_000);
-                let mut event =
-                    Event::new(llm::TIME_TO_FIRST_TOKEN_MS.to_string(), event_timestamp);
-                event.add_attribute(llm::TIME_TO_FIRST_TOKEN_MS.to_string(), ttft.to_string());
-
-                // Initialize events vector if needed
-                if self.span.events.is_none() {
-                    self.span.events = Some(Vec::new());
-                }
-
-                if let Some(ref mut events) = self.span.events {
-                    events.push(event);
-                }
-            }
+            let span = tracing::Span::current();
+            let otel_context = span.context();
+            let otel_span = otel_context.span();
+            otel_span.set_attribute(KeyValue::new(llm::TIME_TO_FIRST_TOKEN_MS, ttft as i64));
+            otel_span.add_event(
+                llm::TIME_TO_FIRST_TOKEN_MS,
+                vec![KeyValue::new(llm::TIME_TO_FIRST_TOKEN_MS, ttft as i64)],
+            );
        }

-        // Analyze signals if messages are available and add to span attributes
+        // Analyze signals if messages are available and record as span attributes
        if let Some(ref messages) = self.messages {
            let analyzer: Box<dyn SignalAnalyzer> = Box::new(TextBasedSignalAnalyzer::new());
            let report = analyzer.analyze(messages);

+            // Get the current OTel span to set signal attributes
+            let span = tracing::Span::current();
+            let otel_context = span.context();
+            let otel_span = otel_context.span();
+
            // Add overall quality
-            self.span.attributes.push(Attribute {
-                key: signal_constants::QUALITY.to_string(),
-                value: AttributeValue {
-                    string_value: Some(format!("{:?}", report.overall_quality)),
-                },
-            });
+            otel_span.set_attribute(KeyValue::new(
+                signal_constants::QUALITY,
+                format!("{:?}", report.overall_quality),
+            ));

            // Add repair/follow-up metrics if concerning
            if report.follow_up.is_concerning || report.follow_up.repair_count > 0 {
-                self.span.attributes.push(Attribute {
-                    key: signal_constants::REPAIR_COUNT.to_string(),
-                    value: AttributeValue {
-                        string_value: Some(report.follow_up.repair_count.to_string()),
-                    },
-                });
-
-                self.span.attributes.push(Attribute {
-                    key: signal_constants::REPAIR_RATIO.to_string(),
-                    value: AttributeValue {
-                        string_value: Some(format!("{:.3}", report.follow_up.repair_ratio)),
-                    },
-                });
+                otel_span.set_attribute(KeyValue::new(
+                    signal_constants::REPAIR_COUNT,
+                    report.follow_up.repair_count as i64,
+                ));
+                otel_span.set_attribute(KeyValue::new(
+                    signal_constants::REPAIR_RATIO,
+                    format!("{:.3}", report.follow_up.repair_ratio),
+                ));
            }

-            // Add flag marker to operation name if any concerning signal is detected
+            // Add frustration metrics
+            if report.frustration.has_frustration {
+                otel_span.set_attribute(KeyValue::new(
+                    signal_constants::FRUSTRATION_COUNT,
+                    report.frustration.frustration_count as i64,
+                ));
+                otel_span.set_attribute(KeyValue::new(
+                    signal_constants::FRUSTRATION_SEVERITY,
+                    report.frustration.severity as i64,
+                ));
+            }
+
+            // Add repetition metrics
+            if report.repetition.has_looping {
+                otel_span.set_attribute(KeyValue::new(
+                    signal_constants::REPETITION_COUNT,
+                    report.repetition.repetition_count as i64,
+                ));
+            }
+
+            // Add escalation metrics
+            if report.escalation.escalation_requested {
+                otel_span
+                    .set_attribute(KeyValue::new(signal_constants::ESCALATION_REQUESTED, true));
+            }
+
+            // Add positive feedback metrics
+            if report.positive_feedback.has_positive_feedback {
+                otel_span.set_attribute(KeyValue::new(
+                    signal_constants::POSITIVE_FEEDBACK_COUNT,
+                    report.positive_feedback.positive_count as i64,
+                ));
+            }
+
+            // Flag the span name if any concerning signal is detected
            let should_flag = report.frustration.has_frustration
                || report.repetition.has_looping
                || report.escalation.escalation_requested
@ -179,94 +176,27 @@ impl StreamProcessor for ObservableStreamProcessor {
                );

            if should_flag {
-                // Prepend flag marker to the operation name
-                self.span.name = format!("{} {}", self.span.name, FLAG_MARKER);
-            }
-
-            // Add key signal metrics
-            if report.frustration.has_frustration {
-                self.span.attributes.push(Attribute {
-                    key: signal_constants::FRUSTRATION_COUNT.to_string(),
-                    value: AttributeValue {
-                        string_value: Some(report.frustration.frustration_count.to_string()),
-                    },
-                });
-                self.span.attributes.push(Attribute {
-                    key: signal_constants::FRUSTRATION_SEVERITY.to_string(),
-                    value: AttributeValue {
-                        string_value: Some(report.frustration.severity.to_string()),
-                    },
-                });
-            }
-
-            if report.repetition.has_looping {
-                self.span.attributes.push(Attribute {
-                    key: signal_constants::REPETITION_COUNT.to_string(),
-                    value: AttributeValue {
-                        string_value: Some(report.repetition.repetition_count.to_string()),
-                    },
-                });
-            }
-
-            if report.escalation.escalation_requested {
-                self.span.attributes.push(Attribute {
-                    key: signal_constants::ESCALATION_REQUESTED.to_string(),
-                    value: AttributeValue {
-                        string_value: Some("true".to_string()),
-                    },
-                });
-            }
-
-            if report.positive_feedback.has_positive_feedback {
-                self.span.attributes.push(Attribute {
-                    key: signal_constants::POSITIVE_FEEDBACK_COUNT.to_string(),
-                    value: AttributeValue {
-                        string_value: Some(report.positive_feedback.positive_count.to_string()),
-                    },
-                });
+                otel_span.update_name(format!("{} {}", self.operation_name, FLAG_MARKER));
            }
        }

-        // Record the finalized span
-        self.collector
-            .record_span(&self.service_name, self.span.clone());
+        info!(
+            service = %self.service_name,
+            total_bytes = self.total_bytes,
+            chunk_count = self.chunk_count,
+            duration_ms = self.start_time.elapsed().as_millis(),
+            time_to_first_token_ms = ?self.time_to_first_token,
+            "streaming completed"
+        );
    }

    fn on_error(&mut self, error_msg: &str) {
-        warn!("Stream error in PassthroughProcessor: {}", error_msg);
-
-        // Update span with error info and end time
-        let end_time_nanos = SystemTime::now()
-            .duration_since(SystemTime::UNIX_EPOCH)
-            .unwrap_or_default()
-            .as_nanos();
-
-        self.span.end_time_unix_nano = format!("{}", end_time_nanos);
-
-        self.span.attributes.push(Attribute {
-            key: error::ERROR.to_string(),
-            value: AttributeValue {
-                string_value: Some("true".to_string()),
-            },
-        });
-
-        self.span.attributes.push(Attribute {
-            key: error::MESSAGE.to_string(),
-            value: AttributeValue {
-                string_value: Some(error_msg.to_string()),
-            },
-        });
-
-        self.span.attributes.push(Attribute {
-            key: llm::DURATION_MS.to_string(),
-            value: AttributeValue {
-                string_value: Some(self.start_time.elapsed().as_millis().to_string()),
-            },
-        });
-
-        // Record the error span
-        self.collector
-            .record_span(&self.service_name, self.span.clone());
+        warn!(
+            service = %self.service_name,
+            error = error_msg,
+            duration_ms = self.start_time.elapsed().as_millis(),
+            "stream error"
+        );
    }
 }

@ -287,49 +217,55 @@ where
 {
    let (tx, rx) = mpsc::channel::<Bytes>(buffer_size);

+    // Capture the current span so the spawned task inherits the request context
+    let current_span = tracing::Span::current();
+
    // Spawn a task to process and forward chunks
-    let processor_handle = tokio::spawn(async move {
-        let mut is_first_chunk = true;
+    let processor_handle = tokio::spawn(
+        async move {
+            let mut is_first_chunk = true;

-        while let Some(item) = byte_stream.next().await {
-            let chunk = match item {
-                Ok(chunk) => chunk,
-                Err(err) => {
-                    let err_msg = format!("Error receiving chunk: {:?}", err);
-                    warn!("{}", err_msg);
-                    processor.on_error(&err_msg);
-                    break;
+            while let Some(item) = byte_stream.next().await {
+                let chunk = match item {
+                    Ok(chunk) => chunk,
+                    Err(err) => {
+                        let err_msg = format!("Error receiving chunk: {:?}", err);
+                        warn!(error = %err_msg, "stream error");
+                        processor.on_error(&err_msg);
+                        break;
+                    }
+                };
+
+                // Call on_first_bytes for the first chunk
+                if is_first_chunk {
+                    processor.on_first_bytes();
+                    is_first_chunk = false;
                }
-            };

-            // Call on_first_bytes for the first chunk
-            if is_first_chunk {
-                processor.on_first_bytes();
-                is_first_chunk = false;
-            }
-
-            // Process the chunk
-            match processor.process_chunk(chunk) {
-                Ok(Some(processed_chunk)) => {
-                    if tx.send(processed_chunk).await.is_err() {
-                        warn!("Receiver dropped");
+                // Process the chunk
+                match processor.process_chunk(chunk) {
+                    Ok(Some(processed_chunk)) => {
+                        if tx.send(processed_chunk).await.is_err() {
+                            warn!("receiver dropped");
+                            break;
+                        }
+                    }
+                    Ok(None) => {
+                        // Skip this chunk
+                        continue;
+                    }
+                    Err(err) => {
+                        warn!("processor error: {}", err);
+                        processor.on_error(&err);
                        break;
                    }
                }
-                Ok(None) => {
-                    // Skip this chunk
-                    continue;
-                }
-                Err(err) => {
-                    warn!("Processor error: {}", err);
-                    processor.on_error(&err);
-                    break;
-                }
            }
-        }

-        processor.on_complete();
-    });
+            processor.on_complete();
+        }
+        .instrument(current_span),
+    );

    // Convert channel receiver to HTTP stream
    let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk)));