Add in tpot (#269)

* add in tpot and tokens per second

* add in debug logs for new stats and update integration tests

* update shared dashboard to include new stats
This commit is contained in:
Aayush 2024-11-14 15:03:08 -08:00 committed by GitHub
parent 9eeb790c7f
commit 1d229cba8f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 252 additions and 28 deletions

View file

@ -296,6 +296,17 @@ impl HttpContext for StreamContext {
debug!("Total latency: {} milliseconds", duration_ms);
// Record the latency to the latency histogram
self.metrics.request_latency.record(duration_ms as u64);
// Compute the time per output token
let tpot = duration_ms as u64 / self.response_tokens as u64;
debug!("Time per output token: {} milliseconds", tpot);
// Record the time per output token
self.metrics.time_per_output_token.record(tpot);
debug!("Tokens per second: {}", 1000 / tpot);
// Record the tokens per second
self.metrics.tokens_per_second.record(1000 / tpot);
}
Err(e) => {
warn!("SystemTime error: {:?}", e);