diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 7e35e7f2..c7e42c82 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -40,6 +40,7 @@ pub struct StreamContext { ttft_duration: Option, ttft_time: Option, pub traceparent: Option, + request_body_sent_time: Option, user_message: Option, } @@ -60,6 +61,7 @@ impl StreamContext { traceparent: None, ttft_time: None, user_message: None, + request_body_sent_time: None, } } fn llm_provider(&self) -> &LlmProvider { @@ -196,6 +198,11 @@ impl HttpContext for StreamContext { fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action { // Let the client send the gateway all the data before sending to the LLM_provider. // TODO: consider a streaming API. + + if self.request_body_sent_time.is_none() { + self.request_body_sent_time = Some(get_current_time().unwrap()); + } + if !end_of_stream { return Action::Pause; } @@ -351,7 +358,7 @@ impl HttpContext for StreamContext { "upstream_llm_time".to_string(), parent_trace_id.to_string(), Some(parent_span_id.to_string()), - self.start_time + self.request_body_sent_time .unwrap() .duration_since(UNIX_EPOCH) .unwrap() diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs index a40389aa..c39debd6 100644 --- a/crates/llm_gateway/tests/integration.rs +++ b/crates/llm_gateway/tests/integration.rs @@ -217,6 +217,8 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() { ) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) + .expect_get_current_time_nanos() + .returning(Some(0)) .expect_log(Some(LogLevel::Trace), None) .expect_log(Some(LogLevel::Debug), None) .expect_metric_record("input_sequence_length", 21) @@ -279,6 +281,8 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() { ) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(incomplete_chat_completions_request_body)) + .expect_get_current_time_nanos() + .returning(Some(0)) .expect_log(Some(LogLevel::Debug), None) .expect_send_local_response( Some(StatusCode::BAD_REQUEST.as_u16().into()), @@ -337,6 +341,8 @@ fn llm_gateway_request_ratelimited() { ) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) + .expect_get_current_time_nanos() + .returning(Some(0)) // The actual call is not important in this test, we just need to grab the token_id .expect_log(Some(LogLevel::Trace), None) .expect_log(Some(LogLevel::Debug), None) @@ -403,6 +409,8 @@ fn llm_gateway_request_not_ratelimited() { ) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) + .expect_get_current_time_nanos() + .returning(Some(0)) // The actual call is not important in this test, we just need to grab the token_id .expect_log(Some(LogLevel::Trace), None) .expect_log(Some(LogLevel::Debug), None)