From a01c1c61bd3e6ee6a807c04fff793b0060cc24be Mon Sep 17 00:00:00 2001 From: aayushwhiz Date: Thu, 14 Nov 2024 14:23:28 -0800 Subject: [PATCH] change to not ignore time to first token in tpot calculation --- crates/llm_gateway/src/stream_context.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 253d8cea..38266f72 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -298,9 +298,7 @@ impl HttpContext for StreamContext { self.metrics.request_latency.record(duration_ms as u64); // Compute the time per output token - let tpot = (duration_ms as u64 - - self.ttft_duration.unwrap().as_millis() as u64) - / self.response_tokens as u64; + let tpot = duration_ms as u64 / self.response_tokens as u64; debug!("Time per output token: {} milliseconds", tpot); // Record the time per output token