Merge branch 'main' into adil/use_custom_tags

This commit is contained in:
Adil Hafeez 2024-11-18 10:18:16 -08:00
commit f398ef047f
17 changed files with 298 additions and 3967 deletions

View file

@ -43,9 +43,10 @@ pub struct StreamContext {
ttft_time: Option<SystemTime>,
trace_id: String,
span_id: String,
traceparent: String,
parent_span_id: Option<String>,
traceparent: String,
traceparent_present_in_request: bool,
request_body_sent_time: Option<SystemTime>,
user_message: Option<Message>,
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
}
@ -79,6 +80,7 @@ impl StreamContext {
user_message: None,
traces_queue,
traceparent_present_in_request: false,
request_body_sent_time: None,
}
}
fn llm_provider(&self) -> &LlmProvider {
@ -227,6 +229,11 @@ impl HttpContext for StreamContext {
fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
// Let the client send the gateway all the data before sending to the LLM_provider.
// TODO: consider a streaming API.
if self.request_body_sent_time.is_none() {
self.request_body_sent_time = Some(get_current_time().unwrap());
}
if !end_of_stream {
return Action::Pause;
}
@ -412,7 +419,8 @@ impl HttpContext for StreamContext {
self.trace_id.to_string(),
self.span_id.to_string(),
parent_span_id,
self.start_time
self.request_body_sent_time
.unwrap()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos(),

View file

@ -217,6 +217,8 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
.expect_get_current_time_nanos()
.returning(Some(0))
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_metric_record("input_sequence_length", 21)
@ -279,6 +281,8 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(incomplete_chat_completions_request_body))
.expect_get_current_time_nanos()
.returning(Some(0))
.expect_log(Some(LogLevel::Debug), None)
.expect_send_local_response(
Some(StatusCode::BAD_REQUEST.as_u16().into()),
@ -337,6 +341,8 @@ fn llm_gateway_request_ratelimited() {
)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
.expect_get_current_time_nanos()
.returning(Some(0))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
@ -403,6 +409,8 @@ fn llm_gateway_request_not_ratelimited() {
)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
.expect_get_current_time_nanos()
.returning(Some(0))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)