fix dashboard

This commit is contained in:
Adil Hafeez 2024-11-12 14:24:08 -08:00
parent ee2751be68
commit 4135911803
3 changed files with 20 additions and 14 deletions

View file

@ -250,9 +250,15 @@ impl HttpContext for StreamContext {
});
}
// only use the tokens from the messages, excluding the metadata and json tags
let input_tokens_str = deserialized_body
.messages
.iter()
.fold(String::new(), |acc, m| {
acc + " " + m.content.as_ref().unwrap_or(&String::new())
});
// enforce ratelimits on ingress
if let Err(e) =
self.enforce_ratelimits(&deserialized_body.model, &chat_completion_request_str)
if let Err(e) = self.enforce_ratelimits(&deserialized_body.model, input_tokens_str.as_str())
{
self.send_server_error(
ServerError::ExceededRatelimit(e),

View file

@ -202,7 +202,7 @@
"useBackend": false
}
],
"title": "request token count (p50)",
"title": "input sequence length (p50)",
"type": "timeseries"
},
{
@ -317,7 +317,7 @@
"useBackend": false
}
],
"title": "response token count (p50)",
"title": "output sequence length (p50)",
"type": "timeseries"
},
{

View file

@ -192,8 +192,8 @@
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))",
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[5m])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
@ -202,7 +202,7 @@
"useBackend": false
}
],
"title": "request token count (p50)",
"title": "input sequence length (p50)",
"type": "timeseries"
},
{
@ -307,7 +307,7 @@
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))",
"expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[5m])) by(le))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": false,
@ -317,7 +317,7 @@
"useBackend": false
}
],
"title": "response token count (p50)",
"title": "output sequence length (p50)",
"type": "timeseries"
},
{
@ -416,8 +416,8 @@
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))",
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[5m])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
@ -542,7 +542,7 @@
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum(rate(latency_bucket[60m])) by (le))",
"expr": "histogram_quantile(0.5, sum(rate(latency_bucket[5m])) by (le))",
"hide": false,
"instant": false,
"legendFormat": "__auto",
@ -555,7 +555,7 @@
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[60m])) by (le))",
"expr": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[5m])) by (le))",
"hide": false,
"instant": false,
"legendFormat": "__auto",
@ -582,6 +582,6 @@
"timezone": "browser",
"title": "Arch Gateway Dashboard",
"uid": "adt6uhx5lk8aob",
"version": 1,
"version": 2,
"weekStart": ""
}