mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
change to not ignore time to first token in tpot calculation
This commit is contained in:
parent
312b21ef7d
commit
a01c1c61bd
1 changed files with 1 additions and 3 deletions
|
|
@ -298,9 +298,7 @@ impl HttpContext for StreamContext {
|
|||
self.metrics.request_latency.record(duration_ms as u64);
|
||||
|
||||
// Compute the time per output token
|
||||
let tpot = (duration_ms as u64
|
||||
- self.ttft_duration.unwrap().as_millis() as u64)
|
||||
/ self.response_tokens as u64;
|
||||
let tpot = duration_ms as u64 / self.response_tokens as u64;
|
||||
|
||||
debug!("Time per output token: {} milliseconds", tpot);
|
||||
// Record the time per output token
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue