mirror of
https://github.com/katanemo/plano.git
synced 2026-05-08 07:12:42 +02:00
Improve end to end tracing (#628)
* adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
parent
8adb9795d8
commit
a79f55f313
34 changed files with 2556 additions and 403 deletions
|
|
@ -2,26 +2,18 @@ use crate::metrics::Metrics;
|
|||
use crate::stream_context::StreamContext;
|
||||
use common::configuration::Configuration;
|
||||
use common::configuration::Overrides;
|
||||
use common::consts::OTEL_COLLECTOR_HTTP;
|
||||
use common::consts::OTEL_POST_PATH;
|
||||
use common::http::CallArgs;
|
||||
use common::http::Client;
|
||||
use common::llm_providers::LlmProviders;
|
||||
use common::ratelimit;
|
||||
use common::stats::Gauge;
|
||||
use common::tracing::TraceData;
|
||||
use log::trace;
|
||||
use log::warn;
|
||||
use proxy_wasm::traits::*;
|
||||
use proxy_wasm::types::*;
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::VecDeque;
|
||||
use std::rc::Rc;
|
||||
use std::time::Duration;
|
||||
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CallContext {}
|
||||
|
||||
|
|
@ -31,7 +23,6 @@ pub struct FilterContext {
|
|||
// callouts stores token_id to request mapping that we use during #on_http_call_response to match the response to the request.
|
||||
callouts: RefCell<HashMap<u32, CallContext>>,
|
||||
llm_providers: Option<Rc<LlmProviders>>,
|
||||
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
|
||||
overrides: Rc<Option<Overrides>>,
|
||||
}
|
||||
|
||||
|
|
@ -41,7 +32,6 @@ impl FilterContext {
|
|||
callouts: RefCell::new(HashMap::new()),
|
||||
metrics: Rc::new(Metrics::new()),
|
||||
llm_providers: None,
|
||||
traces_queue: Arc::new(Mutex::new(VecDeque::new())),
|
||||
overrides: Rc::new(None),
|
||||
}
|
||||
}
|
||||
|
|
@ -95,7 +85,6 @@ impl RootContext for FilterContext {
|
|||
.as_ref()
|
||||
.expect("LLM Providers must exist when Streams are being created"),
|
||||
),
|
||||
Arc::clone(&self.traces_queue),
|
||||
Rc::clone(&self.overrides),
|
||||
)))
|
||||
}
|
||||
|
|
@ -108,34 +97,6 @@ impl RootContext for FilterContext {
|
|||
self.set_tick_period(Duration::from_secs(1));
|
||||
true
|
||||
}
|
||||
|
||||
fn on_tick(&mut self) {
|
||||
let _ = self.traces_queue.try_lock().map(|mut traces_queue| {
|
||||
while let Some(trace) = traces_queue.pop_front() {
|
||||
let trace_str = serde_json::to_string(&trace).unwrap();
|
||||
trace!("trace details: {}", trace_str);
|
||||
let call_args = CallArgs::new(
|
||||
OTEL_COLLECTOR_HTTP,
|
||||
OTEL_POST_PATH,
|
||||
vec![
|
||||
(":method", http::Method::POST.as_str()),
|
||||
(":path", OTEL_POST_PATH),
|
||||
(":authority", OTEL_COLLECTOR_HTTP),
|
||||
("content-type", "application/json"),
|
||||
],
|
||||
Some(trace_str.as_bytes()),
|
||||
vec![],
|
||||
Duration::from_secs(60),
|
||||
);
|
||||
if let Err(error) = self.http_call(call_args, CallContext {}) {
|
||||
warn!(
|
||||
"failed to schedule http call to otel-collector: {:?}",
|
||||
error
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl Context for FilterContext {
|
||||
|
|
|
|||
|
|
@ -4,10 +4,8 @@ use log::{debug, info, warn};
|
|||
use proxy_wasm::hostcalls::get_current_time;
|
||||
use proxy_wasm::traits::*;
|
||||
use proxy_wasm::types::*;
|
||||
use std::collections::VecDeque;
|
||||
use std::num::NonZero;
|
||||
use std::rc::Rc;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use crate::metrics::Metrics;
|
||||
|
|
@ -20,7 +18,6 @@ use common::errors::ServerError;
|
|||
use common::llm_providers::LlmProviders;
|
||||
use common::ratelimit::Header;
|
||||
use common::stats::{IncrementingMetric, RecordingMetric};
|
||||
use common::tracing::{Event, Span, TraceData, Traceparent};
|
||||
use common::{ratelimit, routing, tokenizer};
|
||||
use hermesllm::apis::streaming_shapes::amazon_bedrock_binary_frame::BedrockBinaryFrameDecoder;
|
||||
use hermesllm::apis::streaming_shapes::sse::{
|
||||
|
|
@ -51,7 +48,6 @@ pub struct StreamContext {
|
|||
ttft_time: Option<u128>,
|
||||
traceparent: Option<String>,
|
||||
request_body_sent_time: Option<u128>,
|
||||
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
|
||||
overrides: Rc<Option<Overrides>>,
|
||||
user_message: Option<String>,
|
||||
upstream_status_code: Option<StatusCode>,
|
||||
|
|
@ -65,7 +61,6 @@ impl StreamContext {
|
|||
pub fn new(
|
||||
metrics: Rc<Metrics>,
|
||||
llm_providers: Rc<LlmProviders>,
|
||||
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
|
||||
overrides: Rc<Option<Overrides>>,
|
||||
) -> Self {
|
||||
StreamContext {
|
||||
|
|
@ -83,7 +78,6 @@ impl StreamContext {
|
|||
ttft_duration: None,
|
||||
traceparent: None,
|
||||
ttft_time: None,
|
||||
traces_queue,
|
||||
request_body_sent_time: None,
|
||||
user_message: None,
|
||||
upstream_status_code: None,
|
||||
|
|
@ -333,68 +327,6 @@ impl StreamContext {
|
|||
self.metrics
|
||||
.output_sequence_length
|
||||
.record(self.response_tokens as u64);
|
||||
|
||||
if let Some(traceparent) = self.traceparent.as_ref() {
|
||||
let current_time_ns = current_time_ns();
|
||||
|
||||
match Traceparent::try_from(traceparent.to_string()) {
|
||||
Err(e) => {
|
||||
warn!("traceparent header is invalid: {}", e);
|
||||
}
|
||||
Ok(traceparent) => {
|
||||
let service_name = match &self.resolved_api {
|
||||
Some(api) => {
|
||||
let api_display = api.to_string();
|
||||
format!("archgw.{}", api_display)
|
||||
}
|
||||
None => "archgw".to_string(),
|
||||
};
|
||||
|
||||
let mut trace_data =
|
||||
common::tracing::TraceData::new_with_service_name(service_name);
|
||||
let mut llm_span = Span::new(
|
||||
self.llm_provider().name.to_string(),
|
||||
Some(traceparent.trace_id),
|
||||
Some(traceparent.parent_id),
|
||||
self.request_body_sent_time.unwrap(),
|
||||
current_time_ns,
|
||||
);
|
||||
llm_span
|
||||
.add_attribute("model".to_string(), self.llm_provider().name.to_string());
|
||||
|
||||
if let Some(user_message) = &self.user_message {
|
||||
llm_span.add_attribute("message".to_string(), user_message.clone());
|
||||
}
|
||||
|
||||
// Add HTTP attributes
|
||||
if let Some(method) = &self.http_method {
|
||||
llm_span.add_attribute("http.method".to_string(), method.clone());
|
||||
}
|
||||
if let Some(protocol) = &self.http_protocol {
|
||||
llm_span.add_attribute("http.protocol".to_string(), protocol.clone());
|
||||
}
|
||||
if let Some(status_code) = &self.upstream_status_code {
|
||||
llm_span.add_attribute(
|
||||
"http.status_code".to_string(),
|
||||
status_code.as_u16().to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
// Add request ID attribute
|
||||
llm_span
|
||||
.add_attribute("http.request_id".to_string(), self.request_identifier());
|
||||
|
||||
if self.ttft_time.is_some() {
|
||||
llm_span.add_event(Event::new(
|
||||
"time_to_first_token".to_string(),
|
||||
self.ttft_time.unwrap(),
|
||||
));
|
||||
}
|
||||
trace_data.add_span(llm_span);
|
||||
self.traces_queue.lock().unwrap().push_back(trace_data);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fn read_raw_response_body(&mut self, body_size: usize) -> Result<Vec<u8>, Action> {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue