mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
fix start time of llm filter (#278)
* fix start time of llm filter * fix int tests
This commit is contained in:
parent
df0cd50cbd
commit
097513ee60
2 changed files with 16 additions and 1 deletions
|
|
@ -40,6 +40,7 @@ pub struct StreamContext {
|
|||
ttft_duration: Option<Duration>,
|
||||
ttft_time: Option<SystemTime>,
|
||||
pub traceparent: Option<String>,
|
||||
request_body_sent_time: Option<SystemTime>,
|
||||
user_message: Option<Message>,
|
||||
}
|
||||
|
||||
|
|
@ -60,6 +61,7 @@ impl StreamContext {
|
|||
traceparent: None,
|
||||
ttft_time: None,
|
||||
user_message: None,
|
||||
request_body_sent_time: None,
|
||||
}
|
||||
}
|
||||
fn llm_provider(&self) -> &LlmProvider {
|
||||
|
|
@ -196,6 +198,11 @@ impl HttpContext for StreamContext {
|
|||
fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
|
||||
// Let the client send the gateway all the data before sending to the LLM_provider.
|
||||
// TODO: consider a streaming API.
|
||||
|
||||
if self.request_body_sent_time.is_none() {
|
||||
self.request_body_sent_time = Some(get_current_time().unwrap());
|
||||
}
|
||||
|
||||
if !end_of_stream {
|
||||
return Action::Pause;
|
||||
}
|
||||
|
|
@ -351,7 +358,7 @@ impl HttpContext for StreamContext {
|
|||
"upstream_llm_time".to_string(),
|
||||
parent_trace_id.to_string(),
|
||||
Some(parent_span_id.to_string()),
|
||||
self.start_time
|
||||
self.request_body_sent_time
|
||||
.unwrap()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
|
|
|
|||
|
|
@ -217,6 +217,8 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
|
|||
)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
.expect_get_current_time_nanos()
|
||||
.returning(Some(0))
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_metric_record("input_sequence_length", 21)
|
||||
|
|
@ -279,6 +281,8 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
|
|||
)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(incomplete_chat_completions_request_body))
|
||||
.expect_get_current_time_nanos()
|
||||
.returning(Some(0))
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_send_local_response(
|
||||
Some(StatusCode::BAD_REQUEST.as_u16().into()),
|
||||
|
|
@ -337,6 +341,8 @@ fn llm_gateway_request_ratelimited() {
|
|||
)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
.expect_get_current_time_nanos()
|
||||
.returning(Some(0))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
|
|
@ -403,6 +409,8 @@ fn llm_gateway_request_not_ratelimited() {
|
|||
)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
.expect_get_current_time_nanos()
|
||||
.returning(Some(0))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue