fix start time of llm filter (#278)

* fix start time of llm filter * fix int tests
2026-06-08 14:55:14 +02:00 · 2024-11-17 17:01:19 -08:00 · 2024-11-17 17:01:19 -08:00 · 097513ee60
commit 097513ee60
parent df0cd50cbd
2 changed files with 16 additions and 1 deletions
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -40,6 +40,7 @@ pub struct StreamContext {
    ttft_duration: Option<Duration>,
    ttft_time: Option<SystemTime>,
    pub traceparent: Option<String>,
+    request_body_sent_time: Option<SystemTime>,
    user_message: Option<Message>,
 }

@ -60,6 +61,7 @@ impl StreamContext {
            traceparent: None,
            ttft_time: None,
            user_message: None,
+            request_body_sent_time: None,
        }
    }
    fn llm_provider(&self) -> &LlmProvider {
@ -196,6 +198,11 @@ impl HttpContext for StreamContext {
    fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
        // Let the client send the gateway all the data before sending to the LLM_provider.
        // TODO: consider a streaming API.
+
+        if self.request_body_sent_time.is_none() {
+            self.request_body_sent_time = Some(get_current_time().unwrap());
+        }
+
        if !end_of_stream {
            return Action::Pause;
        }
@ -351,7 +358,7 @@ impl HttpContext for StreamContext {
                    "upstream_llm_time".to_string(),
                    parent_trace_id.to_string(),
                    Some(parent_span_id.to_string()),
-                    self.start_time
+                    self.request_body_sent_time
                        .unwrap()
                        .duration_since(UNIX_EPOCH)
                        .unwrap()
--- a/crates/llm_gateway/tests/integration.rs
+++ b/crates/llm_gateway/tests/integration.rs
@ -217,6 +217,8 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
        )
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
+        .expect_get_current_time_nanos()
+        .returning(Some(0))
        .expect_log(Some(LogLevel::Trace), None)
        .expect_log(Some(LogLevel::Debug), None)
        .expect_metric_record("input_sequence_length", 21)
@ -279,6 +281,8 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
        )
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(incomplete_chat_completions_request_body))
+        .expect_get_current_time_nanos()
+        .returning(Some(0))
        .expect_log(Some(LogLevel::Debug), None)
        .expect_send_local_response(
            Some(StatusCode::BAD_REQUEST.as_u16().into()),
@ -337,6 +341,8 @@ fn llm_gateway_request_ratelimited() {
        )
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
+        .expect_get_current_time_nanos()
+        .returning(Some(0))
        // The actual call is not important in this test, we just need to grab the token_id
        .expect_log(Some(LogLevel::Trace), None)
        .expect_log(Some(LogLevel::Debug), None)
@ -403,6 +409,8 @@ fn llm_gateway_request_not_ratelimited() {
        )
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
+        .expect_get_current_time_nanos()
+        .returning(Some(0))
        // The actual call is not important in this test, we just need to grab the token_id
        .expect_log(Some(LogLevel::Trace), None)
        .expect_log(Some(LogLevel::Debug), None)