From c18c544c94467c8b8063a279586a5fa0dd2f44ff Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Mon, 11 Nov 2024 19:16:49 -0800 Subject: [PATCH] add tft --- crates/prompt_gateway/src/http_context.rs | 34 ++++++++++++++++----- crates/prompt_gateway/src/stream_context.rs | 13 ++++---- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/crates/prompt_gateway/src/http_context.rs b/crates/prompt_gateway/src/http_context.rs index 8faaa062..ae51fdc9 100644 --- a/crates/prompt_gateway/src/http_context.rs +++ b/crates/prompt_gateway/src/http_context.rs @@ -253,15 +253,21 @@ impl HttpContext for StreamContext { return Action::Continue; } + if self.time_to_first_token.is_none() { + self.time_to_first_token = Some( + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(), + ); + } + if end_of_stream { if let Some(traceparent) = self.traceparent.as_ref() { - let since_the_epoch_ns = match SystemTime::now().duration_since(UNIX_EPOCH) { - Ok(duration) => duration.as_nanos(), - Err(_) => { - eprintln!("System time went backwards"); - std::process::exit(1); - } - }; + let since_the_epoch_ns = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); let traceparent_tokens = traceparent.split("-").collect::>(); if traceparent_tokens.len() != 4 { @@ -275,12 +281,24 @@ impl HttpContext for StreamContext { trace_id: parent_trace_id.to_string(), parent_span_id: Some(parent_span_id.to_string()), span_id: format!("{}", get_random_span_id()), - name: "archgw".to_string(), + name: "total_time".to_string(), start_time_unix_nano: format!("{}", self.start_upstream_llm_request_time), end_time_unix_nano: format!("{}", since_the_epoch_ns), kind: 1, attributes: vec![], }); + + trace_data.add_span(Span { + trace_id: parent_trace_id.to_string(), + parent_span_id: Some(parent_span_id.to_string()), + span_id: format!("{}", get_random_span_id()), + name: "time_to_first_token".to_string(), + start_time_unix_nano: format!("{}", self.start_upstream_llm_request_time), + end_time_unix_nano: format!("{}", self.time_to_first_token.unwrap()), + kind: 1, + attributes: vec![], + }); + let trace_data_str = serde_json::to_string(&trace_data).unwrap(); debug!("upstream_llm trace details: {}", trace_data_str); // send trace_data to http tracing endpoint diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs index 6f32d716..79dd99a7 100644 --- a/crates/prompt_gateway/src/stream_context.rs +++ b/crates/prompt_gateway/src/stream_context.rs @@ -78,6 +78,7 @@ pub struct StreamContext { pub prompt_guards: Rc, pub request_id: Option, pub start_upstream_llm_request_time: u128, + pub time_to_first_token: Option, pub traceparent: Option, pub tracing: Rc>, } @@ -115,6 +116,7 @@ impl StreamContext { traceparent: None, tracing, start_upstream_llm_request_time: 0, + time_to_first_token: None, } } @@ -1005,13 +1007,10 @@ impl StreamContext { }; debug!("archgw => llm request: {}", llm_request_str); - self.start_upstream_llm_request_time = match SystemTime::now().duration_since(UNIX_EPOCH) { - Ok(duration) => duration.as_nanos(), - Err(_) => { - eprintln!("System time went backwards"); - std::process::exit(1); - } - }; + self.start_upstream_llm_request_time = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); self.set_http_request_body(0, self.request_body_size, &llm_request_str.into_bytes()); self.resume_http_request();