change to not ignore time to first token in tpot calculation

This commit is contained in:
aayushwhiz 2024-11-14 14:23:28 -08:00
parent 312b21ef7d
commit a01c1c61bd

View file

@ -298,9 +298,7 @@ impl HttpContext for StreamContext {
self.metrics.request_latency.record(duration_ms as u64);
// Compute the time per output token
let tpot = (duration_ms as u64
- self.ttft_duration.unwrap().as_millis() as u64)
/ self.response_tokens as u64;
let tpot = duration_ms as u64 / self.response_tokens as u64;
debug!("Time per output token: {} milliseconds", tpot);
// Record the time per output token