Change LLM latency dashboard to be rate & bump version (#92)

This commit is contained in:
cybermaggedon 2024-10-01 21:04:55 +01:00 committed by GitHub
parent 5985b8612b
commit 56a9ac3ba9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 9 additions and 5 deletions

View file

@ -1,6 +1,6 @@
# VERSION=$(shell git describe | sed 's/^v//')
VERSION=0.11.14
VERSION=0.11.15
DOCKER=podman

View file

@ -90,14 +90,18 @@
"type": "prometheus",
"uid": "f6b18033-5918-4e05-a1ca-4cb30343b129"
},
"editorMode": "code",
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "max by(le) (text_completion_duration_bucket)",
"expr": "sum by(le) (rate(text_completion_duration_bucket[$__rate_interval]))",
"format": "heatmap",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "99%",
"range": true,
"refId": "A"
"refId": "A",
"useBackend": false
}
],
"title": "LLM latency",
@ -1136,7 +1140,7 @@
"list": []
},
"time": {
"from": "now-5m",
"from": "now-15m",
"to": "now"
},
"timepicker": {},