fix start time of llm filter (#278)

* fix start time of llm filter

* fix int tests
This commit is contained in:
Adil Hafeez 2024-11-17 17:01:19 -08:00 committed by GitHub
parent df0cd50cbd
commit 097513ee60
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 16 additions and 1 deletions

View file

@ -40,6 +40,7 @@ pub struct StreamContext {
ttft_duration: Option<Duration>,
ttft_time: Option<SystemTime>,
pub traceparent: Option<String>,
request_body_sent_time: Option<SystemTime>,
user_message: Option<Message>,
}
@ -60,6 +61,7 @@ impl StreamContext {
traceparent: None,
ttft_time: None,
user_message: None,
request_body_sent_time: None,
}
}
fn llm_provider(&self) -> &LlmProvider {
@ -196,6 +198,11 @@ impl HttpContext for StreamContext {
fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
// Let the client send the gateway all the data before sending to the LLM_provider.
// TODO: consider a streaming API.
if self.request_body_sent_time.is_none() {
self.request_body_sent_time = Some(get_current_time().unwrap());
}
if !end_of_stream {
return Action::Pause;
}
@ -351,7 +358,7 @@ impl HttpContext for StreamContext {
"upstream_llm_time".to_string(),
parent_trace_id.to_string(),
Some(parent_span_id.to_string()),
self.start_time
self.request_body_sent_time
.unwrap()
.duration_since(UNIX_EPOCH)
.unwrap()

View file

@ -217,6 +217,8 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
.expect_get_current_time_nanos()
.returning(Some(0))
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_metric_record("input_sequence_length", 21)
@ -279,6 +281,8 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(incomplete_chat_completions_request_body))
.expect_get_current_time_nanos()
.returning(Some(0))
.expect_log(Some(LogLevel::Debug), None)
.expect_send_local_response(
Some(StatusCode::BAD_REQUEST.as_u16().into()),
@ -337,6 +341,8 @@ fn llm_gateway_request_ratelimited() {
)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
.expect_get_current_time_nanos()
.returning(Some(0))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
@ -403,6 +409,8 @@ fn llm_gateway_request_not_ratelimited() {
)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
.expect_get_current_time_nanos()
.returning(Some(0))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)