Improve end to end tracing (#628)

* adding canonical tracing support via bright-staff

* improved formatting for tools in the traces

* removing anthropic from the currency exchange demo

* using Envoy to transport traces, not calling OTEL directly

* moving otel collcetor cluster outside tracing if/else

* minor fixes to not write to the OTEL collector if tracing is disabled

* fixed PR comments and added more trace attributes

* more fixes based on PR comments

* more clean up based on PR comments

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
Salman Paracha 2025-12-11 15:21:57 -08:00 committed by GitHub
parent 8adb9795d8
commit a79f55f313
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
34 changed files with 2556 additions and 403 deletions

View file

@ -2,26 +2,18 @@ use crate::metrics::Metrics;
use crate::stream_context::StreamContext;
use common::configuration::Configuration;
use common::configuration::Overrides;
use common::consts::OTEL_COLLECTOR_HTTP;
use common::consts::OTEL_POST_PATH;
use common::http::CallArgs;
use common::http::Client;
use common::llm_providers::LlmProviders;
use common::ratelimit;
use common::stats::Gauge;
use common::tracing::TraceData;
use log::trace;
use log::warn;
use proxy_wasm::traits::*;
use proxy_wasm::types::*;
use std::cell::RefCell;
use std::collections::HashMap;
use std::collections::VecDeque;
use std::rc::Rc;
use std::time::Duration;
use std::sync::{Arc, Mutex};
#[derive(Debug)]
pub struct CallContext {}
@ -31,7 +23,6 @@ pub struct FilterContext {
// callouts stores token_id to request mapping that we use during #on_http_call_response to match the response to the request.
callouts: RefCell<HashMap<u32, CallContext>>,
llm_providers: Option<Rc<LlmProviders>>,
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
overrides: Rc<Option<Overrides>>,
}
@ -41,7 +32,6 @@ impl FilterContext {
callouts: RefCell::new(HashMap::new()),
metrics: Rc::new(Metrics::new()),
llm_providers: None,
traces_queue: Arc::new(Mutex::new(VecDeque::new())),
overrides: Rc::new(None),
}
}
@ -95,7 +85,6 @@ impl RootContext for FilterContext {
.as_ref()
.expect("LLM Providers must exist when Streams are being created"),
),
Arc::clone(&self.traces_queue),
Rc::clone(&self.overrides),
)))
}
@ -108,34 +97,6 @@ impl RootContext for FilterContext {
self.set_tick_period(Duration::from_secs(1));
true
}
fn on_tick(&mut self) {
let _ = self.traces_queue.try_lock().map(|mut traces_queue| {
while let Some(trace) = traces_queue.pop_front() {
let trace_str = serde_json::to_string(&trace).unwrap();
trace!("trace details: {}", trace_str);
let call_args = CallArgs::new(
OTEL_COLLECTOR_HTTP,
OTEL_POST_PATH,
vec![
(":method", http::Method::POST.as_str()),
(":path", OTEL_POST_PATH),
(":authority", OTEL_COLLECTOR_HTTP),
("content-type", "application/json"),
],
Some(trace_str.as_bytes()),
vec![],
Duration::from_secs(60),
);
if let Err(error) = self.http_call(call_args, CallContext {}) {
warn!(
"failed to schedule http call to otel-collector: {:?}",
error
);
}
}
});
}
}
impl Context for FilterContext {

View file

@ -4,10 +4,8 @@ use log::{debug, info, warn};
use proxy_wasm::hostcalls::get_current_time;
use proxy_wasm::traits::*;
use proxy_wasm::types::*;
use std::collections::VecDeque;
use std::num::NonZero;
use std::rc::Rc;
use std::sync::{Arc, Mutex};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use crate::metrics::Metrics;
@ -20,7 +18,6 @@ use common::errors::ServerError;
use common::llm_providers::LlmProviders;
use common::ratelimit::Header;
use common::stats::{IncrementingMetric, RecordingMetric};
use common::tracing::{Event, Span, TraceData, Traceparent};
use common::{ratelimit, routing, tokenizer};
use hermesllm::apis::streaming_shapes::amazon_bedrock_binary_frame::BedrockBinaryFrameDecoder;
use hermesllm::apis::streaming_shapes::sse::{
@ -51,7 +48,6 @@ pub struct StreamContext {
ttft_time: Option<u128>,
traceparent: Option<String>,
request_body_sent_time: Option<u128>,
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
overrides: Rc<Option<Overrides>>,
user_message: Option<String>,
upstream_status_code: Option<StatusCode>,
@ -65,7 +61,6 @@ impl StreamContext {
pub fn new(
metrics: Rc<Metrics>,
llm_providers: Rc<LlmProviders>,
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
overrides: Rc<Option<Overrides>>,
) -> Self {
StreamContext {
@ -83,7 +78,6 @@ impl StreamContext {
ttft_duration: None,
traceparent: None,
ttft_time: None,
traces_queue,
request_body_sent_time: None,
user_message: None,
upstream_status_code: None,
@ -333,68 +327,6 @@ impl StreamContext {
self.metrics
.output_sequence_length
.record(self.response_tokens as u64);
if let Some(traceparent) = self.traceparent.as_ref() {
let current_time_ns = current_time_ns();
match Traceparent::try_from(traceparent.to_string()) {
Err(e) => {
warn!("traceparent header is invalid: {}", e);
}
Ok(traceparent) => {
let service_name = match &self.resolved_api {
Some(api) => {
let api_display = api.to_string();
format!("archgw.{}", api_display)
}
None => "archgw".to_string(),
};
let mut trace_data =
common::tracing::TraceData::new_with_service_name(service_name);
let mut llm_span = Span::new(
self.llm_provider().name.to_string(),
Some(traceparent.trace_id),
Some(traceparent.parent_id),
self.request_body_sent_time.unwrap(),
current_time_ns,
);
llm_span
.add_attribute("model".to_string(), self.llm_provider().name.to_string());
if let Some(user_message) = &self.user_message {
llm_span.add_attribute("message".to_string(), user_message.clone());
}
// Add HTTP attributes
if let Some(method) = &self.http_method {
llm_span.add_attribute("http.method".to_string(), method.clone());
}
if let Some(protocol) = &self.http_protocol {
llm_span.add_attribute("http.protocol".to_string(), protocol.clone());
}
if let Some(status_code) = &self.upstream_status_code {
llm_span.add_attribute(
"http.status_code".to_string(),
status_code.as_u16().to_string(),
);
}
// Add request ID attribute
llm_span
.add_attribute("http.request_id".to_string(), self.request_identifier());
if self.ttft_time.is_some() {
llm_span.add_event(Event::new(
"time_to_first_token".to_string(),
self.ttft_time.unwrap(),
));
}
trace_data.add_span(llm_span);
self.traces_queue.lock().unwrap().push_back(trace_data);
}
};
}
}
fn read_raw_response_body(&mut self, body_size: usize) -> Result<Vec<u8>, Action> {