add Custom Trace Attributes to extend observability (#708)

* add custom trace attributes

* refactor: prefix custom trace attributes and update schema handlers tests configs

* refactor: rename custom_attribute_prefixes to span_attribute_header_prefixes in configuration and related handlers

* docs: add section on custom span attributes

* refactor: update tracing configuration to use span attributes and adjust related handlers

* docs: custom span attributes section to include static attributes and clarify configuration

* add custom trace attributes

* refactor: prefix custom trace attributes and update schema handlers tests configs

* refactor: rename custom_attribute_prefixes to span_attribute_header_prefixes in configuration and related handlers

* docs: add section on custom span attributes

* refactor: update tracing configuration to use span attributes and adjust related handlers

* docs: custom span attributes section to include static attributes and clarify configuration

* refactor: remove TraceCollector usage and enhance logging with structured attributes

* refactor: custom trace attribute extraction to improve clarity

---------

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Musa 2026-02-25 16:27:20 -08:00 committed by GitHub
parent 70ad56a258
commit 2bde21ff57
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 353 additions and 9 deletions

View file

@ -2,6 +2,7 @@ use std::sync::Arc;
use std::time::Instant;
use bytes::Bytes;
use common::configuration::SpanAttributes;
use common::errors::BrightStaffError;
use common::llm_providers::LlmProviders;
use hermesllm::apis::OpenAIMessage;
@ -20,7 +21,7 @@ use super::agent_selector::{AgentSelectionError, AgentSelector};
use super::pipeline_processor::{PipelineError, PipelineProcessor};
use super::response_handler::ResponseHandler;
use crate::router::plano_orchestrator::OrchestratorService;
use crate::tracing::{operation_component, set_service_name};
use crate::tracing::{collect_custom_trace_attributes, operation_component, set_service_name};
/// Main errors for agent chat completions
#[derive(Debug, thiserror::Error)]
@ -43,8 +44,11 @@ pub async fn agent_chat(
_: String,
agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
span_attributes: Arc<Option<SpanAttributes>>,
llm_providers: Arc<RwLock<LlmProviders>>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
let custom_attrs =
collect_custom_trace_attributes(request.headers(), span_attributes.as_ref().as_ref());
// Extract request_id from headers or generate a new one
let request_id: String = match request
.headers()
@ -77,6 +81,7 @@ pub async fn agent_chat(
listeners,
llm_providers,
request_id,
custom_attrs,
)
.await
{
@ -164,6 +169,7 @@ async fn handle_agent_chat_inner(
listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
llm_providers: Arc<RwLock<LlmProviders>>,
request_id: String,
custom_attrs: std::collections::HashMap<String, String>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, AgentFilterChainError> {
// Initialize services
let agent_selector = AgentSelector::new(orchestrator_service);
@ -186,6 +192,9 @@ async fn handle_agent_chat_inner(
get_active_span(|span| {
span.update_name(listener.name.to_string());
for (key, value) in &custom_attrs {
span.set_attribute(opentelemetry::KeyValue::new(key.clone(), value.clone()));
}
});
info!(listener = %listener.name, "handling request");
@ -348,6 +357,9 @@ async fn handle_agent_chat_inner(
set_service_name(operation_component::AGENT);
get_active_span(|span| {
span.update_name(format!("{} /v1/chat/completions", agent_name));
for (key, value) in &custom_attrs {
span.set_attribute(opentelemetry::KeyValue::new(key.clone(), value.clone()));
}
});
pipeline_processor

View file

@ -1,5 +1,5 @@
use bytes::Bytes;
use common::configuration::ModelAlias;
use common::configuration::{ModelAlias, SpanAttributes};
use common::consts::{
ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
};
@ -28,7 +28,9 @@ use crate::state::response_state_processor::ResponsesStateProcessor;
use crate::state::{
extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError,
};
use crate::tracing::{llm as tracing_llm, operation_component, set_service_name};
use crate::tracing::{
collect_custom_trace_attributes, llm as tracing_llm, operation_component, set_service_name,
};
use common::errors::BrightStaffError;
@ -38,6 +40,7 @@ pub async fn llm_chat(
full_qualified_llm_provider_url: String,
model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
llm_providers: Arc<RwLock<LlmProviders>>,
span_attributes: Arc<Option<SpanAttributes>>,
state_storage: Option<Arc<dyn StateStorage>>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
let request_path = request.uri().path().to_string();
@ -50,6 +53,8 @@ pub async fn llm_chat(
Some(id) => id,
None => uuid::Uuid::new_v4().to_string(),
};
let custom_attrs =
collect_custom_trace_attributes(&request_headers, span_attributes.as_ref().as_ref());
// Create a span with request_id that will be included in all log lines
let request_span = info_span!(
@ -71,6 +76,7 @@ pub async fn llm_chat(
full_qualified_llm_provider_url,
model_aliases,
llm_providers,
custom_attrs,
state_storage,
request_id,
request_path,
@ -87,6 +93,7 @@ async fn llm_chat_inner(
full_qualified_llm_provider_url: String,
model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
llm_providers: Arc<RwLock<LlmProviders>>,
custom_attrs: HashMap<String, String>,
state_storage: Option<Arc<dyn StateStorage>>,
request_id: String,
request_path: String,
@ -94,6 +101,11 @@ async fn llm_chat_inner(
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
// Set service name for LLM operations
set_service_name(operation_component::LLM);
get_active_span(|span| {
for (key, value) in &custom_attrs {
span.set_attribute(opentelemetry::KeyValue::new(key.clone(), value.clone()));
}
});
// Extract or generate traceparent - this establishes the trace context for all spans
let traceparent: String = match request_headers

View file

@ -114,6 +114,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
));
let model_aliases = Arc::new(plano_config.model_aliases.clone());
let span_attributes = Arc::new(
plano_config
.tracing
.as_ref()
.and_then(|tracing| tracing.span_attributes.clone()),
);
// Initialize trace collector and start background flusher
// Tracing is enabled if the tracing config is present in plano_config.yaml
@ -173,6 +179,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let llm_providers = llm_providers.clone();
let agents_list = combined_agents_filters_list.clone();
let listeners = listeners.clone();
let span_attributes = span_attributes.clone();
let state_storage = state_storage.clone();
let service = service_fn(move |req| {
let router_service = Arc::clone(&router_service);
@ -183,6 +190,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let model_aliases = Arc::clone(&model_aliases);
let agents_list = agents_list.clone();
let listeners = listeners.clone();
let span_attributes = span_attributes.clone();
let state_storage = state_storage.clone();
async move {
@ -202,6 +210,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
fully_qualified_url,
agents_list,
listeners,
span_attributes,
llm_providers,
)
.with_context(parent_cx)
@ -220,6 +229,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
fully_qualified_url,
model_aliases,
llm_providers,
span_attributes,
state_storage,
)
.with_context(parent_cx)

View file

@ -0,0 +1,156 @@
use std::collections::HashMap;
use common::configuration::SpanAttributes;
use common::traces::SpanBuilder;
use hyper::header::HeaderMap;
pub fn collect_custom_trace_attributes(
headers: &HeaderMap,
span_attributes: Option<&SpanAttributes>,
) -> HashMap<String, String> {
let mut attributes = HashMap::new();
let Some(span_attributes) = span_attributes else {
return attributes;
};
if let Some(static_attributes) = span_attributes.static_attributes.as_ref() {
for (key, value) in static_attributes {
attributes.insert(key.clone(), value.clone());
}
}
let Some(header_prefixes) = span_attributes.header_prefixes.as_deref() else {
return attributes;
};
if header_prefixes.is_empty() {
return attributes;
}
for (name, value) in headers.iter() {
let header_name = name.as_str();
let matched_prefix = header_prefixes
.iter()
.find(|prefix| header_name.starts_with(prefix.as_str()))
.map(String::as_str);
let Some(prefix) = matched_prefix else {
continue;
};
let Some(raw_value) = value.to_str().ok().map(str::trim) else {
continue;
};
let suffix = header_name.strip_prefix(prefix).unwrap_or("");
let suffix_key = suffix.trim_start_matches('-').replace('-', ".");
if suffix_key.is_empty() {
continue;
}
attributes.insert(suffix_key, raw_value.to_string());
}
attributes
}
pub fn append_span_attributes(
mut span_builder: SpanBuilder,
attributes: &HashMap<String, String>,
) -> SpanBuilder {
for (key, value) in attributes {
span_builder = span_builder.with_attribute(key, value);
}
span_builder
}
#[cfg(test)]
mod tests {
use super::collect_custom_trace_attributes;
use common::configuration::SpanAttributes;
use hyper::header::{HeaderMap, HeaderValue};
use std::collections::HashMap;
#[test]
fn extracts_headers_by_prefix() {
let mut headers = HeaderMap::new();
headers.insert("x-katanemo-tenant-id", HeaderValue::from_static("ten_456"));
headers.insert("x-katanemo-user-id", HeaderValue::from_static("usr_789"));
headers.insert("x-katanemo-admin-level", HeaderValue::from_static("3"));
headers.insert("x-other-id", HeaderValue::from_static("ignored"));
let attrs = collect_custom_trace_attributes(
&headers,
Some(&SpanAttributes {
header_prefixes: Some(vec!["x-katanemo-".to_string()]),
static_attributes: None,
}),
);
assert_eq!(attrs.get("tenant.id"), Some(&"ten_456".to_string()));
assert_eq!(attrs.get("user.id"), Some(&"usr_789".to_string()));
assert_eq!(attrs.get("admin.level"), Some(&"3".to_string()));
assert!(!attrs.contains_key("other.id"));
}
#[test]
fn returns_empty_when_prefixes_missing_or_empty() {
let mut headers = HeaderMap::new();
headers.insert("x-katanemo-tenant-id", HeaderValue::from_static("ten_456"));
let attrs_none = collect_custom_trace_attributes(
&headers,
Some(&SpanAttributes {
header_prefixes: None,
static_attributes: None,
}),
);
assert!(attrs_none.is_empty());
let attrs_empty = collect_custom_trace_attributes(
&headers,
Some(&SpanAttributes {
header_prefixes: Some(Vec::new()),
static_attributes: None,
}),
);
assert!(attrs_empty.is_empty());
}
#[test]
fn supports_multiple_prefixes() {
let mut headers = HeaderMap::new();
headers.insert("x-katanemo-tenant-id", HeaderValue::from_static("ten_456"));
headers.insert("x-tenant-user-id", HeaderValue::from_static("usr_789"));
let attrs = collect_custom_trace_attributes(
&headers,
Some(&SpanAttributes {
header_prefixes: Some(vec!["x-katanemo-".to_string(), "x-tenant-".to_string()]),
static_attributes: None,
}),
);
assert_eq!(attrs.get("tenant.id"), Some(&"ten_456".to_string()));
assert_eq!(attrs.get("user.id"), Some(&"usr_789".to_string()));
}
#[test]
fn header_attributes_override_static_attributes() {
let mut headers = HeaderMap::new();
headers.insert("x-katanemo-tenant-id", HeaderValue::from_static("ten_456"));
let mut static_attributes = HashMap::new();
static_attributes.insert("tenant.id".to_string(), "ten_static".to_string());
static_attributes.insert("environment".to_string(), "prod".to_string());
let attrs = collect_custom_trace_attributes(
&headers,
Some(&SpanAttributes {
header_prefixes: Some(vec!["x-katanemo-".to_string()]),
static_attributes: Some(static_attributes),
}),
);
assert_eq!(attrs.get("tenant.id"), Some(&"ten_456".to_string()));
assert_eq!(attrs.get("environment"), Some(&"prod".to_string()));
}
}

View file

@ -1,9 +1,11 @@
mod constants;
mod custom_attributes;
mod service_name_exporter;
pub use constants::{
error, http, llm, operation_component, routing, signals, OperationNameBuilder,
};
pub use custom_attributes::{append_span_attributes, collect_custom_trace_attributes};
pub use service_name_exporter::{ServiceNameOverrideExporter, SERVICE_NAME_OVERRIDE_KEY};
use opentelemetry::trace::get_active_span;

View file

@ -92,6 +92,14 @@ pub struct Tracing {
pub trace_arch_internal: Option<bool>,
pub random_sampling: Option<u32>,
pub opentracing_grpc_endpoint: Option<String>,
pub span_attributes: Option<SpanAttributes>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct SpanAttributes {
pub header_prefixes: Option<Vec<String>>,
#[serde(rename = "static")]
pub static_attributes: Option<HashMap<String, String>>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Default)]