From 840b6a0e3e04fe32c48aa7cd30b8ffc1634a980c Mon Sep 17 00:00:00 2001 From: aayushwhiz Date: Wed, 6 Nov 2024 18:37:48 -0800 Subject: [PATCH] fix bug with checking for token count of zero Changed check to check that token count is > than 0, changed debug message to say tokens, and divided time by number of tokens received during that time so it is actually per token --- crates/llm_gateway/src/stream_context.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 701d7dd0..f5545928 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -395,7 +395,7 @@ impl HttpContext for StreamContext { warn!("Failed to get current time: {:?}", e); } } - } else if self.first_token_processed && token_count == 0 { + } else if self.first_token_processed && token_count > 0 { if let Some(last_token_time) = self.last_token_time { match get_current_time() { Ok(current_time) => { @@ -405,13 +405,13 @@ impl HttpContext for StreamContext { // Convert the duration to milliseconds let duration_ms = duration.as_millis(); debug!( - "Time for Current Output Token: {} milliseconds", + "Time for Current Output Tokens: {} milliseconds", duration_ms ); // Record TPOT metric for historgram self.metrics .time_per_output_token - .record(duration_ms as u64); + .record((duration_ms as u64) / (token_count as u64)); } Err(e) => { warn!("SystemTime error: {:?}", e);