fix: restore span attributes, OTEL_SERVICE_NAME, config path, and error status

This commit is contained in:
Adil Hafeez 2026-03-06 14:48:32 +00:00
parent 9748cdd857
commit a0513fe191
7 changed files with 55 additions and 11 deletions

View file

@ -1,7 +1,7 @@
use std::collections::HashMap;
use std::sync::Arc;
use common::configuration::{Agent, Listener, ModelAlias};
use common::configuration::{Agent, Listener, ModelAlias, SpanAttributes};
use common::llm_providers::LlmProviders;
use tokio::sync::RwLock;
@ -22,6 +22,7 @@ pub struct AppState {
pub listeners: Arc<RwLock<Vec<Listener>>>,
pub state_storage: Option<Arc<dyn StateStorage>>,
pub llm_provider_url: String,
pub span_attributes: Arc<Option<SpanAttributes>>,
/// Shared HTTP client for upstream LLM requests (connection pooling / keep-alive).
pub http_client: reqwest::Client,
}

View file

@ -19,7 +19,7 @@ use crate::app_state::AppState;
use crate::handlers::errors::build_error_chain_response;
use crate::handlers::request::extract_request_id;
use crate::handlers::response::ResponseHandler;
use crate::tracing::{operation_component, set_service_name};
use crate::tracing::{collect_custom_trace_attributes, operation_component, set_service_name};
/// Main errors for agent chat completions
#[derive(Debug, thiserror::Error)]
@ -41,6 +41,8 @@ pub async fn agent_chat(
state: Arc<AppState>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
let request_id = extract_request_id(&request);
let custom_attrs =
collect_custom_trace_attributes(request.headers(), state.span_attributes.as_ref().as_ref());
// Create a span with request_id that will be included in all log lines
let request_span = info_span!(
@ -56,7 +58,7 @@ pub async fn agent_chat(
// Set service name for orchestrator operations
set_service_name(operation_component::ORCHESTRATOR);
match handle_agent_chat_inner(request, state, request_id).await {
match handle_agent_chat_inner(request, state, request_id, custom_attrs).await {
Ok(response) => Ok(response),
Err(err) => {
// Check if this is a client error from the pipeline that should be cascaded
@ -84,7 +86,7 @@ pub async fn agent_chat(
let mut response =
Response::new(ResponseHandler::create_full_body(json_string));
*response.status_mut() = hyper::StatusCode::from_u16(*status)
.unwrap_or(hyper::StatusCode::BAD_REQUEST);
.unwrap_or(hyper::StatusCode::INTERNAL_SERVER_ERROR);
response.headers_mut().insert(
hyper::header::CONTENT_TYPE,
"application/json".parse().unwrap(),
@ -104,6 +106,7 @@ async fn handle_agent_chat_inner(
request: Request<hyper::body::Incoming>,
state: Arc<AppState>,
request_id: String,
custom_attrs: std::collections::HashMap<String, String>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, AgentFilterChainError> {
// Initialize services
let agent_selector = AgentSelector::new(Arc::clone(&state.orchestrator_service));
@ -126,6 +129,9 @@ async fn handle_agent_chat_inner(
get_active_span(|span| {
span.update_name(listener.name.to_string());
for (key, value) in &custom_attrs {
span.set_attribute(opentelemetry::KeyValue::new(key.clone(), value.clone()));
}
});
info!(listener = %listener.name, "handling request");
@ -276,6 +282,12 @@ async fn handle_agent_chat_inner(
set_service_name(operation_component::AGENT);
get_active_span(|span| {
span.update_name(format!("{} /v1/chat/completions", agent_name));
for (key, value) in &custom_attrs {
span.set_attribute(opentelemetry::KeyValue::new(
key.clone(),
value.clone(),
));
}
});
pipeline_processor

View file

@ -29,7 +29,9 @@ use crate::state::response_state_processor::ResponsesStateProcessor;
use crate::state::{
extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError,
};
use crate::tracing::{llm as tracing_llm, operation_component, set_service_name};
use crate::tracing::{
collect_custom_trace_attributes, llm as tracing_llm, operation_component, set_service_name,
};
use router::router_chat_get_upstream_model;
fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
@ -45,6 +47,8 @@ pub async fn llm_chat(
let request_path = request.uri().path().to_string();
let request_headers = request.headers().clone();
let request_id = extract_request_id(&request);
let custom_attrs =
collect_custom_trace_attributes(&request_headers, state.span_attributes.as_ref().as_ref());
// Create a span with request_id that will be included in all log lines
let request_span = info_span!(
@ -60,20 +64,33 @@ pub async fn llm_chat(
);
// Execute the rest of the handler inside the span
llm_chat_inner(request, state, request_id, request_path, request_headers)
.instrument(request_span)
.await
llm_chat_inner(
request,
state,
custom_attrs,
request_id,
request_path,
request_headers,
)
.instrument(request_span)
.await
}
async fn llm_chat_inner(
request: Request<hyper::body::Incoming>,
state: Arc<AppState>,
custom_attrs: HashMap<String, String>,
request_id: String,
request_path: String,
mut request_headers: hyper::HeaderMap,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
// Set service name for LLM operations
set_service_name(operation_component::LLM);
get_active_span(|span| {
for (key, value) in &custom_attrs {
span.set_attribute(opentelemetry::KeyValue::new(key.clone(), value.clone()));
}
});
let traceparent = extract_or_generate_traceparent(&request_headers);

View file

@ -72,11 +72,11 @@ fn cors_preflight() -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Err
/// Load and parse the YAML configuration file.
///
/// The path is read from `PLANO_CONFIG_PATH_RENDERED` (env) or falls back to
/// `./arch_config_rendered.yaml`.
/// `./plano_config_rendered.yaml`.
fn load_config() -> Result<Configuration, Box<dyn std::error::Error + Send + Sync>> {
let path = env::var("PLANO_CONFIG_PATH_RENDERED")
.unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string());
eprintln!("loading arch_config.yaml from {}", path);
.unwrap_or_else(|_| "./plano_config_rendered.yaml".to_string());
eprintln!("loading plano_config.yaml from {}", path);
let contents = fs::read_to_string(&path).map_err(|e| format!("failed to read {path}: {e}"))?;
@ -136,6 +136,13 @@ async fn init_app_state(
let state_storage = init_state_storage(config).await?;
let span_attributes = Arc::new(
config
.tracing
.as_ref()
.and_then(|tracing| tracing.span_attributes.clone()),
);
Ok(AppState {
router_service,
orchestrator_service,
@ -145,6 +152,7 @@ async fn init_app_state(
listeners: Arc::new(RwLock::new(config.listeners.clone())),
state_storage,
llm_provider_url,
span_attributes,
http_client: reqwest::Client::new(),
})
}

View file

@ -52,6 +52,7 @@ pub fn collect_custom_trace_attributes(
attributes
}
#[allow(dead_code)]
pub fn append_span_attributes(
mut span_builder: SpanBuilder,
attributes: &HashMap<String, String>,

View file

@ -99,6 +99,9 @@ pub fn init_tracer(tracing_config: Option<&Tracing>) -> &'static SdkTracerProvid
// Create OTLP exporter to send spans to collector.
// Use `if let` to destructure the endpoint, avoiding an unwrap.
if let Some(endpoint) = otel_endpoint.as_deref().filter(|_| tracing_enabled) {
if std::env::var("OTEL_SERVICE_NAME").is_err() {
std::env::set_var("OTEL_SERVICE_NAME", "plano");
}
// Create ServiceNameOverrideExporter to support per-span service names
// This allows spans to have different service names (e.g., plano(orchestrator),
// plano(filter), plano(llm)) by setting the "service.name.override" attribute

View file

@ -1,10 +1,12 @@
mod constants;
mod custom_attributes;
mod init;
mod service_name_exporter;
pub use constants::{
error, http, llm, operation_component, routing, signals, OperationNameBuilder,
};
pub use custom_attributes::collect_custom_trace_attributes;
pub use init::init_tracer;
pub use service_name_exporter::{ServiceNameOverrideExporter, SERVICE_NAME_OVERRIDE_KEY};