From 312b21ef7d8af5f38e86db212c1195ebe19ee15d Mon Sep 17 00:00:00 2001 From: aayushwhiz Date: Thu, 14 Nov 2024 13:56:56 -0800 Subject: [PATCH] add in debug logs for new stats and update integration tests --- crates/llm_gateway/src/stream_context.rs | 2 ++ crates/llm_gateway/tests/integration.rs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 4cf2e9c5..253d8cea 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -302,9 +302,11 @@ impl HttpContext for StreamContext { - self.ttft_duration.unwrap().as_millis() as u64) / self.response_tokens as u64; + debug!("Time per output token: {} milliseconds", tpot); // Record the time per output token self.metrics.time_per_output_token.record(tpot); + debug!("Tokens per second: {}", 1000 / tpot); // Record the tokens per second self.metrics.tokens_per_second.record(1000 / tpot); } diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs index ea65bfa0..7107b4d2 100644 --- a/crates/llm_gateway/tests/integration.rs +++ b/crates/llm_gateway/tests/integration.rs @@ -75,6 +75,8 @@ fn setup_filter(module: &mut Tester, config: &str) -> i32 { .expect_metric_creation(MetricType::Gauge, "active_http_calls") .expect_metric_creation(MetricType::Counter, "ratelimited_rq") .expect_metric_creation(MetricType::Histogram, "time_to_first_token") + .expect_metric_creation(MetricType::Histogram, "time_per_output_token") + .expect_metric_creation(MetricType::Histogram, "tokens_per_second") .expect_metric_creation(MetricType::Histogram, "request_latency") .expect_metric_creation(MetricType::Histogram, "output_sequence_length") .expect_metric_creation(MetricType::Histogram, "input_sequence_length")