Merge branch 'main' into adil/use_custom_tags

2026-06-17 15:25:17 +02:00 · 2024-11-18 10:18:16 -08:00 · 2024-11-18 10:18:16 -08:00 · f398ef047f
commit f398ef047f
parent 780c7cf7ad 9cee04ed31
17 changed files with 298 additions and 3967 deletions
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -43,9 +43,10 @@ pub struct StreamContext {
    ttft_time: Option<SystemTime>,
    trace_id: String,
    span_id: String,
-    traceparent: String,
    parent_span_id: Option<String>,
+    traceparent: String,
    traceparent_present_in_request: bool,
+    request_body_sent_time: Option<SystemTime>,
    user_message: Option<Message>,
    traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
 }
@ -79,6 +80,7 @@ impl StreamContext {
            user_message: None,
            traces_queue,
            traceparent_present_in_request: false,
+            request_body_sent_time: None,
        }
    }
    fn llm_provider(&self) -> &LlmProvider {
@ -227,6 +229,11 @@ impl HttpContext for StreamContext {
    fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
        // Let the client send the gateway all the data before sending to the LLM_provider.
        // TODO: consider a streaming API.
+
+        if self.request_body_sent_time.is_none() {
+            self.request_body_sent_time = Some(get_current_time().unwrap());
+        }
+
        if !end_of_stream {
            return Action::Pause;
        }
@ -412,7 +419,8 @@ impl HttpContext for StreamContext {
                self.trace_id.to_string(),
                self.span_id.to_string(),
                parent_span_id,
-                self.start_time
+                self.request_body_sent_time
+                    .unwrap()
                    .duration_since(UNIX_EPOCH)
                    .unwrap()
                    .as_nanos(),
--- a/crates/llm_gateway/tests/integration.rs
+++ b/crates/llm_gateway/tests/integration.rs
@ -217,6 +217,8 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
        )
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
+        .expect_get_current_time_nanos()
+        .returning(Some(0))
        .expect_log(Some(LogLevel::Trace), None)
        .expect_log(Some(LogLevel::Debug), None)
        .expect_metric_record("input_sequence_length", 21)
@ -279,6 +281,8 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
        )
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(incomplete_chat_completions_request_body))
+        .expect_get_current_time_nanos()
+        .returning(Some(0))
        .expect_log(Some(LogLevel::Debug), None)
        .expect_send_local_response(
            Some(StatusCode::BAD_REQUEST.as_u16().into()),
@ -337,6 +341,8 @@ fn llm_gateway_request_ratelimited() {
        )
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
+        .expect_get_current_time_nanos()
+        .returning(Some(0))
        // The actual call is not important in this test, we just need to grab the token_id
        .expect_log(Some(LogLevel::Trace), None)
        .expect_log(Some(LogLevel::Debug), None)
@ -403,6 +409,8 @@ fn llm_gateway_request_not_ratelimited() {
        )
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
+        .expect_get_current_time_nanos()
+        .returning(Some(0))
        // The actual call is not important in this test, we just need to grab the token_id
        .expect_log(Some(LogLevel::Trace), None)
        .expect_log(Some(LogLevel::Debug), None)