mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
fix bug with checking for token count of zero
Changed check to check that token count is > than 0, changed debug message to say tokens, and divided time by number of tokens received during that time so it is actually per token
This commit is contained in:
parent
bf39fecd6d
commit
840b6a0e3e
1 changed files with 3 additions and 3 deletions
|
|
@ -395,7 +395,7 @@ impl HttpContext for StreamContext {
|
|||
warn!("Failed to get current time: {:?}", e);
|
||||
}
|
||||
}
|
||||
} else if self.first_token_processed && token_count == 0 {
|
||||
} else if self.first_token_processed && token_count > 0 {
|
||||
if let Some(last_token_time) = self.last_token_time {
|
||||
match get_current_time() {
|
||||
Ok(current_time) => {
|
||||
|
|
@ -405,13 +405,13 @@ impl HttpContext for StreamContext {
|
|||
// Convert the duration to milliseconds
|
||||
let duration_ms = duration.as_millis();
|
||||
debug!(
|
||||
"Time for Current Output Token: {} milliseconds",
|
||||
"Time for Current Output Tokens: {} milliseconds",
|
||||
duration_ms
|
||||
);
|
||||
// Record TPOT metric for historgram
|
||||
self.metrics
|
||||
.time_per_output_token
|
||||
.record(duration_ms as u64);
|
||||
.record((duration_ms as u64) / (token_count as u64));
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("SystemTime error: {:?}", e);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue