move custom tracer to llm filter (#267)

This commit is contained in:
Adil Hafeez 2024-11-15 10:44:01 -08:00 committed by GitHub
parent 1d229cba8f
commit d3c17c7abd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 335 additions and 133 deletions

View file

@ -190,8 +190,8 @@
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))",
"editorMode": "code",
"expr": "histogram_quantile(0.9, sum by(le) (rate(input_sequence_length_bucket[5m])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
@ -200,7 +200,7 @@
"useBackend": false
}
],
"title": "input sequence length (p50)",
"title": "input sequence length (p90)",
"type": "timeseries"
},
{
@ -305,7 +305,7 @@
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))",
"expr": "histogram_quantile(0.9, sum(rate(output_sequence_length_bucket[5m])) by(le))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": false,
@ -315,7 +315,7 @@
"useBackend": false
}
],
"title": "output sequence length (p50)",
"title": "output sequence length (p90)",
"type": "timeseries"
},
{
@ -415,7 +415,11 @@
{
"disableTextWrap": false,
"editorMode": "code",
<<<<<<< HEAD
"expr": "histogram_quantile(0.9, sum by(le) (rate(time_to_first_token_bucket[5m])))",
=======
"expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))",
>>>>>>> main
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
@ -424,7 +428,7 @@
"useBackend": false
}
],
"title": "time to first token (p50)",
"title": "time to first token (p90)",
"type": "timeseries"
},
{
@ -539,20 +543,29 @@
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
<<<<<<< HEAD
"editorMode": "code",
"expr": "histogram_quantile(0.9, sum(rate(request_latency_bucket[60m])) by (le))",
=======
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(request_latency_bucket[1h])))",
"fullMetaSearch": false,
>>>>>>> main
"hide": false,
"includeNullMetadata": false,
"instant": false,
"legendFormat": "__auto",
"range": true,
<<<<<<< HEAD
"refId": "B"
=======
"refId": "A",
"useBackend": false
>>>>>>> main
}
],
"title": "request latency (p50)",
"title": "request latency (p90)",
"type": "timeseries"
},
{