mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
Merge branch 'main' into adil/use_custom_tags
This commit is contained in:
commit
f398ef047f
17 changed files with 298 additions and 3967 deletions
|
|
@ -43,9 +43,10 @@ pub struct StreamContext {
|
|||
ttft_time: Option<SystemTime>,
|
||||
trace_id: String,
|
||||
span_id: String,
|
||||
traceparent: String,
|
||||
parent_span_id: Option<String>,
|
||||
traceparent: String,
|
||||
traceparent_present_in_request: bool,
|
||||
request_body_sent_time: Option<SystemTime>,
|
||||
user_message: Option<Message>,
|
||||
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
|
||||
}
|
||||
|
|
@ -79,6 +80,7 @@ impl StreamContext {
|
|||
user_message: None,
|
||||
traces_queue,
|
||||
traceparent_present_in_request: false,
|
||||
request_body_sent_time: None,
|
||||
}
|
||||
}
|
||||
fn llm_provider(&self) -> &LlmProvider {
|
||||
|
|
@ -227,6 +229,11 @@ impl HttpContext for StreamContext {
|
|||
fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
|
||||
// Let the client send the gateway all the data before sending to the LLM_provider.
|
||||
// TODO: consider a streaming API.
|
||||
|
||||
if self.request_body_sent_time.is_none() {
|
||||
self.request_body_sent_time = Some(get_current_time().unwrap());
|
||||
}
|
||||
|
||||
if !end_of_stream {
|
||||
return Action::Pause;
|
||||
}
|
||||
|
|
@ -412,7 +419,8 @@ impl HttpContext for StreamContext {
|
|||
self.trace_id.to_string(),
|
||||
self.span_id.to_string(),
|
||||
parent_span_id,
|
||||
self.start_time
|
||||
self.request_body_sent_time
|
||||
.unwrap()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_nanos(),
|
||||
|
|
|
|||
|
|
@ -217,6 +217,8 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
|
|||
)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
.expect_get_current_time_nanos()
|
||||
.returning(Some(0))
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_metric_record("input_sequence_length", 21)
|
||||
|
|
@ -279,6 +281,8 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
|
|||
)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(incomplete_chat_completions_request_body))
|
||||
.expect_get_current_time_nanos()
|
||||
.returning(Some(0))
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_send_local_response(
|
||||
Some(StatusCode::BAD_REQUEST.as_u16().into()),
|
||||
|
|
@ -337,6 +341,8 @@ fn llm_gateway_request_ratelimited() {
|
|||
)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
.expect_get_current_time_nanos()
|
||||
.returning(Some(0))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
|
|
@ -403,6 +409,8 @@ fn llm_gateway_request_not_ratelimited() {
|
|||
)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
.expect_get_current_time_nanos()
|
||||
.returning(Some(0))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue