From 41359118039a61b209472f98ae1d2ab2f197269f Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Tue, 12 Nov 2024 14:24:08 -0800 Subject: [PATCH] fix dashboard --- crates/llm_gateway/src/stream_context.rs | 10 ++++++++-- .../grafana/dashboards/envoy_overview.json | 4 ++-- .../grafana/dashboards/envoy_overview.json | 20 +++++++++---------- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index ab73798b..2c0c764c 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -250,9 +250,15 @@ impl HttpContext for StreamContext { }); } + // only use the tokens from the messages, excluding the metadata and json tags + let input_tokens_str = deserialized_body + .messages + .iter() + .fold(String::new(), |acc, m| { + acc + " " + m.content.as_ref().unwrap_or(&String::new()) + }); // enforce ratelimits on ingress - if let Err(e) = - self.enforce_ratelimits(&deserialized_body.model, &chat_completion_request_str) + if let Err(e) = self.enforce_ratelimits(&deserialized_body.model, input_tokens_str.as_str()) { self.send_server_error( ServerError::ExceededRatelimit(e), diff --git a/demos/weather_forecast/grafana/dashboards/envoy_overview.json b/demos/weather_forecast/grafana/dashboards/envoy_overview.json index 259a510f..5a77e075 100644 --- a/demos/weather_forecast/grafana/dashboards/envoy_overview.json +++ b/demos/weather_forecast/grafana/dashboards/envoy_overview.json @@ -202,7 +202,7 @@ "useBackend": false } ], - "title": "request token count (p50)", + "title": "input sequence length (p50)", "type": "timeseries" }, { @@ -317,7 +317,7 @@ "useBackend": false } ], - "title": "response token count (p50)", + "title": "output sequence length (p50)", "type": "timeseries" }, { diff --git a/demos/weather_forecast_signoz/grafana/dashboards/envoy_overview.json b/demos/weather_forecast_signoz/grafana/dashboards/envoy_overview.json index 259a510f..5d372209 100644 --- a/demos/weather_forecast_signoz/grafana/dashboards/envoy_overview.json +++ b/demos/weather_forecast_signoz/grafana/dashboards/envoy_overview.json @@ -192,8 +192,8 @@ "targets": [ { "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))", + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[5m])))", "fullMetaSearch": false, "includeNullMetadata": false, "legendFormat": "__auto", @@ -202,7 +202,7 @@ "useBackend": false } ], - "title": "request token count (p50)", + "title": "input sequence length (p50)", "type": "timeseries" }, { @@ -307,7 +307,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))", + "expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[5m])) by(le))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -317,7 +317,7 @@ "useBackend": false } ], - "title": "response token count (p50)", + "title": "output sequence length (p50)", "type": "timeseries" }, { @@ -416,8 +416,8 @@ "targets": [ { "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))", + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[5m])))", "fullMetaSearch": false, "includeNullMetadata": false, "legendFormat": "__auto", @@ -542,7 +542,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum(rate(latency_bucket[60m])) by (le))", + "expr": "histogram_quantile(0.5, sum(rate(latency_bucket[5m])) by (le))", "hide": false, "instant": false, "legendFormat": "__auto", @@ -555,7 +555,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[60m])) by (le))", + "expr": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[5m])) by (le))", "hide": false, "instant": false, "legendFormat": "__auto", @@ -582,6 +582,6 @@ "timezone": "browser", "title": "Arch Gateway Dashboard", "uid": "adt6uhx5lk8aob", - "version": 1, + "version": 2, "weekStart": "" }