address pr feedback

This commit is contained in:
Adil Hafeez 2024-11-12 13:05:03 -08:00
parent 3b8a19efe9
commit ee2751be68
8 changed files with 836 additions and 383 deletions

View file

@ -18,8 +18,7 @@ pub struct WasmMetrics {
pub active_http_calls: Gauge,
pub ratelimited_rq: Counter,
pub time_to_first_token: Histogram,
pub time_per_output_token: Histogram,
pub latency: Histogram,
pub request_latency: Histogram,
pub output_sequence_length: Histogram,
pub input_sequence_length: Histogram,
}
@ -30,8 +29,7 @@ impl WasmMetrics {
active_http_calls: Gauge::new(String::from("active_http_calls")),
ratelimited_rq: Counter::new(String::from("ratelimited_rq")),
time_to_first_token: Histogram::new(String::from("time_to_first_token")),
time_per_output_token: Histogram::new(String::from("time_per_output_token")),
latency: Histogram::new(String::from("latency")),
request_latency: Histogram::new(String::from("request_latency")),
output_sequence_length: Histogram::new(String::from("output_sequence_length")),
input_sequence_length: Histogram::new(String::from("input_sequence_length")),
}

View file

@ -36,10 +36,7 @@ pub struct StreamContext {
llm_provider: Option<Rc<LlmProvider>>,
request_id: Option<String>,
start_time: Option<SystemTime>,
ttft_recorded: bool,
ttft_duration: Option<Duration>, // Store the duration directly
first_token_processed: bool,
last_token_time: Option<SystemTime>,
}
impl StreamContext {
@ -55,10 +52,7 @@ impl StreamContext {
llm_provider: None,
request_id: None,
start_time: None,
ttft_recorded: false,
ttft_duration: None,
first_token_processed: false,
last_token_time: None,
}
}
fn llm_provider(&self) -> &LlmProvider {
@ -144,24 +138,12 @@ impl StreamContext {
// Check if rate limiting needs to be applied.
if let Some(selector) = self.ratelimit_selector.take() {
log::debug!("Rate limiting applied for model: {}", model);
let result = ratelimit::ratelimits(None).read().unwrap().check_limit(
log::debug!("Applying ratelimit for model: {}", model);
ratelimit::ratelimits(None).read().unwrap().check_limit(
model.to_owned(),
selector,
NonZero::new(token_count as u32).unwrap(),
);
match result {
Ok(_) => log::debug!("Rate limit check passed for model: {}", model),
Err(e) => {
log::debug!(
"Rate limit check failed for model: {} with error: {:?}",
model,
e
);
return Err(e);
}
}
)?;
} else {
log::debug!("No rate limit applied for model: {}", model);
}
@ -196,17 +178,9 @@ impl HttpContext for StreamContext {
self.request_id = self.get_http_request_header(REQUEST_ID_HEADER);
//start the timing for the request using get_current_time()
match get_current_time() {
Ok(current_time) => {
self.start_time = Some(current_time);
self.ttft_recorded = false;
self.ttft_duration = None;
}
Err(e) => {
warn!("Failed to get current time: {:?}", e);
self.start_time = None;
}
}
let current_time = get_current_time().unwrap();
self.start_time = Some(current_time);
self.ttft_duration = None;
Action::Continue
}
@ -309,21 +283,17 @@ impl HttpContext for StreamContext {
// All streaming responses end with bytes=0 and end_stream=true
// Record the latency for the request
if let Some(start_time) = self.start_time {
match get_current_time() {
Ok(current_time) => match current_time.duration_since(start_time) {
Ok(duration) => {
// Convert the duration to milliseconds
let duration_ms = duration.as_millis();
debug!("Total latency: {} milliseconds", duration_ms);
// Record the latency to the latency histogram
self.metrics.latency.record(duration_ms as u64);
}
Err(e) => {
warn!("SystemTime error: {:?}", e);
}
},
let current_time = get_current_time().unwrap();
match current_time.duration_since(start_time) {
Ok(duration) => {
// Convert the duration to milliseconds
let duration_ms = duration.as_millis();
debug!("Total latency: {} milliseconds", duration_ms);
// Record the latency to the latency histogram
self.metrics.request_latency.record(duration_ms as u64);
}
Err(e) => {
warn!("Failed to get current time: {:?}", e);
warn!("SystemTime error: {:?}", e);
}
}
}
@ -422,73 +392,24 @@ impl HttpContext for StreamContext {
self.response_tokens += token_count;
// Compute TTFT if not already recorded
if !self.ttft_recorded {
if self.ttft_duration.is_none() {
if let Some(start_time) = self.start_time {
match get_current_time() {
Ok(current_time) => match current_time.duration_since(start_time) {
Ok(duration) => {
let duration_ms = duration.as_millis();
debug!("Time to First Token (TTFT): {} milliseconds", duration_ms);
self.ttft_duration = Some(duration);
self.metrics.time_to_first_token.record(duration_ms as u64);
}
Err(e) => {
warn!("SystemTime error: {:?}", e);
}
},
let current_time = get_current_time().unwrap();
match current_time.duration_since(start_time) {
Ok(duration) => {
let duration_ms = duration.as_millis();
debug!("Time to First Token (TTFT): {} milliseconds", duration_ms);
self.ttft_duration = Some(duration);
self.metrics.time_to_first_token.record(duration_ms as u64);
}
Err(e) => {
warn!("Failed to get current time: {:?}", e);
warn!("SystemTime error: {:?}", e);
}
}
self.ttft_recorded = true;
} else {
warn!("Start time was not recorded");
}
}
// Check if first token was not processed yet, and if there are tokens in the response.
// If so, set the last_token_time to now and set first_token_processed to true
if !self.first_token_processed && token_count > 0 {
self.first_token_processed = true;
// Set last_token_time to now
match get_current_time() {
Ok(current_time) => {
self.last_token_time = Some(current_time);
}
Err(e) => {
warn!("Failed to get current time: {:?}", e);
}
}
} else if self.first_token_processed && token_count > 0 {
if let Some(last_token_time) = self.last_token_time {
match get_current_time() {
Ok(current_time) => {
// record the time for the current output token and calculate the time per output token
match current_time.duration_since(last_token_time) {
Ok(duration) => {
// Convert the duration to milliseconds
let duration_ms = duration.as_millis();
debug!(
"Time for Current Output Token: {} milliseconds",
duration_ms as u64 / token_count as u64
);
// Record TPOT metric for historgram
self.metrics
.time_per_output_token
.record((duration_ms as u64) / (token_count as u64));
}
Err(e) => {
warn!("SystemTime error: {:?}", e);
}
}
// Set last_token_time to now
self.last_token_time = Some(current_time);
}
Err(e) => {
warn!("Failed to get current time: {:?}", e);
}
}
}
}
} else {
debug!("non streaming response");
let chat_completions_response: ChatCompletionsResponse =
@ -507,31 +428,6 @@ impl HttpContext for StreamContext {
.unwrap()
.completion_tokens;
}
// // Compute TFT if not already recorded
// if !self.ttft_recorded {
// if let Some(start_time) = self.start_time {
// match get_current_time() {
// Ok(current_time) => match current_time.duration_since(start_time) {
// Ok(duration) => {
// let duration_ms = duration.as_millis();
// debug!("Time to First Token (TFT): {} milliseconds", duration_ms);
// self.ttft_duration = Some(duration);
// self.metrics.time_to_first_token.record(duration_ms as u64);
// }
// Err(e) => {
// warn!("SystemTime error: {:?}", e);
// }
// },
// Err(e) => {
// warn!("Failed to get current time: {:?}", e);
// }
// }
// self.ttft_recorded = true;
// } else {
// warn!("Start time was not recorded");
// }
// }
}
debug!(

View file

@ -50,9 +50,9 @@
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 6,
"h": 9,
"w": 5,
"x": 0,
"y": 0
},
"id": 4,
@ -63,7 +63,9 @@
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": ["lastNotNull"],
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
@ -71,7 +73,7 @@
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.0",
"pluginVersion": "11.3.0+security-01",
"targets": [
{
"datasource": {
@ -94,6 +96,115 @@
"title": "# of Completd Requests",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))"
},
"properties": [
{
"id": "displayName",
"value": "Input Sequence Length"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 9,
"x": 5,
"y": 0
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0+security-01",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "request token count (p50)",
"type": "timeseries"
},
{
"datasource": {
"default": true,
@ -169,10 +280,10 @@
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
"h": 9,
"w": 10,
"x": 14,
"y": 0
},
"id": 3,
"options": {
@ -187,7 +298,7 @@
"sort": "none"
}
},
"pluginVersion": "11.3.0",
"pluginVersion": "11.3.0+security-01",
"targets": [
{
"datasource": {
@ -195,7 +306,7 @@
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))",
"fullMetaSearch": false,
"includeNullMetadata": false,
@ -206,7 +317,7 @@
"useBackend": false
}
],
"title": "Output Sequence Length 50th Percentile over last hour (tokens)",
"title": "response token count (p50)",
"type": "timeseries"
},
{
@ -271,24 +382,24 @@
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))"
"options": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))"
},
"properties": [
{
"id": "displayName",
"value": "Input Sequence Length"
"value": "Time to First Token"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
"h": 14,
"w": 11,
"x": 0,
"y": 9
},
"id": 7,
"id": 8,
"options": {
"legend": {
"calcs": [],
@ -301,12 +412,12 @@
"sort": "none"
}
},
"pluginVersion": "11.3.0",
"pluginVersion": "11.3.0+security-01",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))",
"expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
@ -315,7 +426,7 @@
"useBackend": false
}
],
"title": "Input Sequence Length 50th percentile over last hour (tokens)",
"title": "time to first token (p50)",
"type": "timeseries"
},
{
@ -405,10 +516,10 @@
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
"h": 14,
"w": 13,
"x": 11,
"y": 9
},
"id": 1,
"options": {
@ -423,7 +534,7 @@
"sort": "none"
}
},
"pluginVersion": "11.3.0",
"pluginVersion": "11.3.0+security-01",
"targets": [
{
"datasource": {
@ -452,228 +563,7 @@
"refId": "A"
}
],
"title": "Latency 50th Percentile over last hour (ms)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum by(le) (rate(time_per_output_token_bucket[1h])))"
},
"properties": [
{
"id": "displayName",
"value": "Time Per Output Token"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(time_per_output_token_bucket[1h])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Time Per Output Token 50th Percentile over last hour (ms)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))"
},
"properties": [
{
"id": "displayName",
"value": "Time to First Token"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 6,
"y": 24
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Time to First Token 50th percentile over last hour (ms)",
"title": "request latency (p50)",
"type": "timeseries"
}
],

View file

@ -24,3 +24,41 @@ services:
- "host.docker.internal:host-gateway"
volumes:
- ./arch_config.yaml:/app/arch_config.yaml
trace_streamer:
build:
context: ../shared/trace_streamer
environment:
- OTEL_TRACING_HTTP_ENDPOINT=http://otel-collector:4318/v1/traces
volumes:
- ~/archgw_logs:/var/log/
prometheus:
image: prom/prometheus
container_name: prometheus
command:
- "--config.file=/etc/prometheus/prometheus.yaml"
ports:
- 9090:9090
restart: unless-stopped
volumes:
- ./prometheus:/etc/prometheus
- ./prom_data:/prometheus
# profiles:
# - monitoring
grafana:
image: grafana/grafana
container_name: grafana
ports:
- 3000:3000
restart: unless-stopped
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=grafana
volumes:
- ./grafana:/etc/grafana/provisioning/datasources
- ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml
- ./grafana/dashboards:/var/lib/grafana/dashboards
# profiles:
# - monitoring

View file

@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: "Dashboard provider"
orgId: 1
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: false
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: true

View file

@ -0,0 +1,587 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": 1,
"links": [],
"panels": [
{
"datasource": {
"default": true,
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 5,
"x": 0,
"y": 0
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.0+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "envoy_cluster_upstream_rq_completed{envoy_cluster_name=~\"openai|api_server\"}",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": true,
"legendFormat": "{{envoy_cluster_name}}",
"range": false,
"refId": "A",
"useBackend": false
}
],
"title": "# of Completd Requests",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))"
},
"properties": [
{
"id": "displayName",
"value": "Input Sequence Length"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 9,
"x": 5,
"y": 0
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0+security-01",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "request token count (p50)",
"type": "timeseries"
},
{
"datasource": {
"default": true,
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))"
},
"properties": [
{
"id": "displayName",
"value": "Output Sequence Length"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 10,
"x": 14,
"y": 0
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "response token count (p50)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))"
},
"properties": [
{
"id": "displayName",
"value": "Time to First Token"
}
]
}
]
},
"gridPos": {
"h": 14,
"w": 11,
"x": 0,
"y": 9
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0+security-01",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "time to first token (p50)",
"type": "timeseries"
},
{
"datasource": {
"default": true,
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum(rate(latency_bucket[60m])) by (le))"
},
"properties": [
{
"id": "displayName",
"value": "Total Request Latency"
}
]
},
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[60m])) by (le))"
},
"properties": [
{
"id": "displayName",
"value": "Time to First Token"
}
]
}
]
},
"gridPos": {
"h": 14,
"w": 13,
"x": 11,
"y": 9
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum(rate(latency_bucket[60m])) by (le))",
"hide": false,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[60m])) by (le))",
"hide": false,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "request latency (p50)",
"type": "timeseries"
}
],
"preload": false,
"refresh": "",
"schemaVersion": 40,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Arch Gateway Dashboard",
"uid": "adt6uhx5lk8aob",
"version": 1,
"weekStart": ""
}

View file

@ -0,0 +1,9 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus:9090
isDefault: true
access: proxy
editable: true

View file

@ -0,0 +1,23 @@
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: []
scheme: http
timeout: 10s
api_version: v1
scrape_configs:
- job_name: envoy
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 10s
metrics_path: /stats
scheme: http
static_configs:
- targets:
- host.docker.internal:19901
params:
format: ["prometheus"]