mirror of
https://github.com/katanemo/plano.git
synced 2026-04-29 02:46:28 +02:00
Add in tpot (#269)
* add in tpot and tokens per second * add in debug logs for new stats and update integration tests * update shared dashboard to include new stats
This commit is contained in:
parent
9eeb790c7f
commit
1d229cba8f
4 changed files with 252 additions and 28 deletions
|
|
@ -296,6 +296,17 @@ impl HttpContext for StreamContext {
|
|||
debug!("Total latency: {} milliseconds", duration_ms);
|
||||
// Record the latency to the latency histogram
|
||||
self.metrics.request_latency.record(duration_ms as u64);
|
||||
|
||||
// Compute the time per output token
|
||||
let tpot = duration_ms as u64 / self.response_tokens as u64;
|
||||
|
||||
debug!("Time per output token: {} milliseconds", tpot);
|
||||
// Record the time per output token
|
||||
self.metrics.time_per_output_token.record(tpot);
|
||||
|
||||
debug!("Tokens per second: {}", 1000 / tpot);
|
||||
// Record the tokens per second
|
||||
self.metrics.tokens_per_second.record(1000 / tpot);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("SystemTime error: {:?}", e);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue