mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
fix dashboard
This commit is contained in:
parent
ee2751be68
commit
4135911803
3 changed files with 20 additions and 14 deletions
|
|
@ -250,9 +250,15 @@ impl HttpContext for StreamContext {
|
|||
});
|
||||
}
|
||||
|
||||
// only use the tokens from the messages, excluding the metadata and json tags
|
||||
let input_tokens_str = deserialized_body
|
||||
.messages
|
||||
.iter()
|
||||
.fold(String::new(), |acc, m| {
|
||||
acc + " " + m.content.as_ref().unwrap_or(&String::new())
|
||||
});
|
||||
// enforce ratelimits on ingress
|
||||
if let Err(e) =
|
||||
self.enforce_ratelimits(&deserialized_body.model, &chat_completion_request_str)
|
||||
if let Err(e) = self.enforce_ratelimits(&deserialized_body.model, input_tokens_str.as_str())
|
||||
{
|
||||
self.send_server_error(
|
||||
ServerError::ExceededRatelimit(e),
|
||||
|
|
|
|||
|
|
@ -202,7 +202,7 @@
|
|||
"useBackend": false
|
||||
}
|
||||
],
|
||||
"title": "request token count (p50)",
|
||||
"title": "input sequence length (p50)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
|
|
@ -317,7 +317,7 @@
|
|||
"useBackend": false
|
||||
}
|
||||
],
|
||||
"title": "response token count (p50)",
|
||||
"title": "output sequence length (p50)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -192,8 +192,8 @@
|
|||
"targets": [
|
||||
{
|
||||
"disableTextWrap": false,
|
||||
"editorMode": "builder",
|
||||
"expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))",
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[5m])))",
|
||||
"fullMetaSearch": false,
|
||||
"includeNullMetadata": false,
|
||||
"legendFormat": "__auto",
|
||||
|
|
@ -202,7 +202,7 @@
|
|||
"useBackend": false
|
||||
}
|
||||
],
|
||||
"title": "request token count (p50)",
|
||||
"title": "input sequence length (p50)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
|
|
@ -307,7 +307,7 @@
|
|||
},
|
||||
"disableTextWrap": false,
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))",
|
||||
"expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[5m])) by(le))",
|
||||
"fullMetaSearch": false,
|
||||
"includeNullMetadata": false,
|
||||
"instant": false,
|
||||
|
|
@ -317,7 +317,7 @@
|
|||
"useBackend": false
|
||||
}
|
||||
],
|
||||
"title": "response token count (p50)",
|
||||
"title": "output sequence length (p50)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
|
|
@ -416,8 +416,8 @@
|
|||
"targets": [
|
||||
{
|
||||
"disableTextWrap": false,
|
||||
"editorMode": "builder",
|
||||
"expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))",
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[5m])))",
|
||||
"fullMetaSearch": false,
|
||||
"includeNullMetadata": false,
|
||||
"legendFormat": "__auto",
|
||||
|
|
@ -542,7 +542,7 @@
|
|||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.5, sum(rate(latency_bucket[60m])) by (le))",
|
||||
"expr": "histogram_quantile(0.5, sum(rate(latency_bucket[5m])) by (le))",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
|
|
@ -555,7 +555,7 @@
|
|||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[60m])) by (le))",
|
||||
"expr": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[5m])) by (le))",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
|
|
@ -582,6 +582,6 @@
|
|||
"timezone": "browser",
|
||||
"title": "Arch Gateway Dashboard",
|
||||
"uid": "adt6uhx5lk8aob",
|
||||
"version": 1,
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue