diff --git a/crates/llm_gateway/src/filter_context.rs b/crates/llm_gateway/src/filter_context.rs index 23f30899..2a4d47a9 100644 --- a/crates/llm_gateway/src/filter_context.rs +++ b/crates/llm_gateway/src/filter_context.rs @@ -18,8 +18,7 @@ pub struct WasmMetrics { pub active_http_calls: Gauge, pub ratelimited_rq: Counter, pub time_to_first_token: Histogram, - pub time_per_output_token: Histogram, - pub latency: Histogram, + pub request_latency: Histogram, pub output_sequence_length: Histogram, pub input_sequence_length: Histogram, } @@ -30,8 +29,7 @@ impl WasmMetrics { active_http_calls: Gauge::new(String::from("active_http_calls")), ratelimited_rq: Counter::new(String::from("ratelimited_rq")), time_to_first_token: Histogram::new(String::from("time_to_first_token")), - time_per_output_token: Histogram::new(String::from("time_per_output_token")), - latency: Histogram::new(String::from("latency")), + request_latency: Histogram::new(String::from("request_latency")), output_sequence_length: Histogram::new(String::from("output_sequence_length")), input_sequence_length: Histogram::new(String::from("input_sequence_length")), } diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index b9c0f485..ab73798b 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -36,10 +36,7 @@ pub struct StreamContext { llm_provider: Option>, request_id: Option, start_time: Option, - ttft_recorded: bool, ttft_duration: Option, // Store the duration directly - first_token_processed: bool, - last_token_time: Option, } impl StreamContext { @@ -55,10 +52,7 @@ impl StreamContext { llm_provider: None, request_id: None, start_time: None, - ttft_recorded: false, ttft_duration: None, - first_token_processed: false, - last_token_time: None, } } fn llm_provider(&self) -> &LlmProvider { @@ -144,24 +138,12 @@ impl StreamContext { // Check if rate limiting needs to be applied. if let Some(selector) = self.ratelimit_selector.take() { - log::debug!("Rate limiting applied for model: {}", model); - let result = ratelimit::ratelimits(None).read().unwrap().check_limit( + log::debug!("Applying ratelimit for model: {}", model); + ratelimit::ratelimits(None).read().unwrap().check_limit( model.to_owned(), selector, NonZero::new(token_count as u32).unwrap(), - ); - - match result { - Ok(_) => log::debug!("Rate limit check passed for model: {}", model), - Err(e) => { - log::debug!( - "Rate limit check failed for model: {} with error: {:?}", - model, - e - ); - return Err(e); - } - } + )?; } else { log::debug!("No rate limit applied for model: {}", model); } @@ -196,17 +178,9 @@ impl HttpContext for StreamContext { self.request_id = self.get_http_request_header(REQUEST_ID_HEADER); //start the timing for the request using get_current_time() - match get_current_time() { - Ok(current_time) => { - self.start_time = Some(current_time); - self.ttft_recorded = false; - self.ttft_duration = None; - } - Err(e) => { - warn!("Failed to get current time: {:?}", e); - self.start_time = None; - } - } + let current_time = get_current_time().unwrap(); + self.start_time = Some(current_time); + self.ttft_duration = None; Action::Continue } @@ -309,21 +283,17 @@ impl HttpContext for StreamContext { // All streaming responses end with bytes=0 and end_stream=true // Record the latency for the request if let Some(start_time) = self.start_time { - match get_current_time() { - Ok(current_time) => match current_time.duration_since(start_time) { - Ok(duration) => { - // Convert the duration to milliseconds - let duration_ms = duration.as_millis(); - debug!("Total latency: {} milliseconds", duration_ms); - // Record the latency to the latency histogram - self.metrics.latency.record(duration_ms as u64); - } - Err(e) => { - warn!("SystemTime error: {:?}", e); - } - }, + let current_time = get_current_time().unwrap(); + match current_time.duration_since(start_time) { + Ok(duration) => { + // Convert the duration to milliseconds + let duration_ms = duration.as_millis(); + debug!("Total latency: {} milliseconds", duration_ms); + // Record the latency to the latency histogram + self.metrics.request_latency.record(duration_ms as u64); + } Err(e) => { - warn!("Failed to get current time: {:?}", e); + warn!("SystemTime error: {:?}", e); } } } @@ -422,73 +392,24 @@ impl HttpContext for StreamContext { self.response_tokens += token_count; // Compute TTFT if not already recorded - if !self.ttft_recorded { + if self.ttft_duration.is_none() { if let Some(start_time) = self.start_time { - match get_current_time() { - Ok(current_time) => match current_time.duration_since(start_time) { - Ok(duration) => { - let duration_ms = duration.as_millis(); - debug!("Time to First Token (TTFT): {} milliseconds", duration_ms); - self.ttft_duration = Some(duration); - self.metrics.time_to_first_token.record(duration_ms as u64); - } - Err(e) => { - warn!("SystemTime error: {:?}", e); - } - }, + let current_time = get_current_time().unwrap(); + match current_time.duration_since(start_time) { + Ok(duration) => { + let duration_ms = duration.as_millis(); + debug!("Time to First Token (TTFT): {} milliseconds", duration_ms); + self.ttft_duration = Some(duration); + self.metrics.time_to_first_token.record(duration_ms as u64); + } Err(e) => { - warn!("Failed to get current time: {:?}", e); + warn!("SystemTime error: {:?}", e); } } - self.ttft_recorded = true; } else { warn!("Start time was not recorded"); } } - // Check if first token was not processed yet, and if there are tokens in the response. - // If so, set the last_token_time to now and set first_token_processed to true - if !self.first_token_processed && token_count > 0 { - self.first_token_processed = true; - // Set last_token_time to now - match get_current_time() { - Ok(current_time) => { - self.last_token_time = Some(current_time); - } - Err(e) => { - warn!("Failed to get current time: {:?}", e); - } - } - } else if self.first_token_processed && token_count > 0 { - if let Some(last_token_time) = self.last_token_time { - match get_current_time() { - Ok(current_time) => { - // record the time for the current output token and calculate the time per output token - match current_time.duration_since(last_token_time) { - Ok(duration) => { - // Convert the duration to milliseconds - let duration_ms = duration.as_millis(); - debug!( - "Time for Current Output Token: {} milliseconds", - duration_ms as u64 / token_count as u64 - ); - // Record TPOT metric for historgram - self.metrics - .time_per_output_token - .record((duration_ms as u64) / (token_count as u64)); - } - Err(e) => { - warn!("SystemTime error: {:?}", e); - } - } - // Set last_token_time to now - self.last_token_time = Some(current_time); - } - Err(e) => { - warn!("Failed to get current time: {:?}", e); - } - } - } - } } else { debug!("non streaming response"); let chat_completions_response: ChatCompletionsResponse = @@ -507,31 +428,6 @@ impl HttpContext for StreamContext { .unwrap() .completion_tokens; } - - // // Compute TFT if not already recorded - // if !self.ttft_recorded { - // if let Some(start_time) = self.start_time { - // match get_current_time() { - // Ok(current_time) => match current_time.duration_since(start_time) { - // Ok(duration) => { - // let duration_ms = duration.as_millis(); - // debug!("Time to First Token (TFT): {} milliseconds", duration_ms); - // self.ttft_duration = Some(duration); - // self.metrics.time_to_first_token.record(duration_ms as u64); - // } - // Err(e) => { - // warn!("SystemTime error: {:?}", e); - // } - // }, - // Err(e) => { - // warn!("Failed to get current time: {:?}", e); - // } - // } - // self.ttft_recorded = true; - // } else { - // warn!("Start time was not recorded"); - // } - // } } debug!( diff --git a/demos/weather_forecast/grafana/dashboards/envoy_overview.json b/demos/weather_forecast/grafana/dashboards/envoy_overview.json index 44d1a04d..259a510f 100644 --- a/demos/weather_forecast/grafana/dashboards/envoy_overview.json +++ b/demos/weather_forecast/grafana/dashboards/envoy_overview.json @@ -50,9 +50,9 @@ "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 6, + "h": 9, + "w": 5, + "x": 0, "y": 0 }, "id": 4, @@ -63,7 +63,9 @@ "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "fields": "", "values": false }, @@ -71,7 +73,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.3.0", + "pluginVersion": "11.3.0+security-01", "targets": [ { "datasource": { @@ -94,6 +96,115 @@ "title": "# of Completd Requests", "type": "stat" }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))" + }, + "properties": [ + { + "id": "displayName", + "value": "Input Sequence Length" + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 9, + "x": 5, + "y": 0 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "request token count (p50)", + "type": "timeseries" + }, { "datasource": { "default": true, @@ -169,10 +280,10 @@ ] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 + "h": 9, + "w": 10, + "x": 14, + "y": 0 }, "id": 3, "options": { @@ -187,7 +298,7 @@ "sort": "none" } }, - "pluginVersion": "11.3.0", + "pluginVersion": "11.3.0+security-01", "targets": [ { "datasource": { @@ -195,7 +306,7 @@ "uid": "PBFA97CFB590B2093" }, "disableTextWrap": false, - "editorMode": "builder", + "editorMode": "code", "expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))", "fullMetaSearch": false, "includeNullMetadata": false, @@ -206,7 +317,7 @@ "useBackend": false } ], - "title": "Output Sequence Length 50th Percentile over last hour (tokens)", + "title": "response token count (p50)", "type": "timeseries" }, { @@ -271,24 +382,24 @@ { "matcher": { "id": "byName", - "options": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))" + "options": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))" }, "properties": [ { "id": "displayName", - "value": "Input Sequence Length" + "value": "Time to First Token" } ] } ] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 + "h": 14, + "w": 11, + "x": 0, + "y": 9 }, - "id": 7, + "id": 8, "options": { "legend": { "calcs": [], @@ -301,12 +412,12 @@ "sort": "none" } }, - "pluginVersion": "11.3.0", + "pluginVersion": "11.3.0+security-01", "targets": [ { "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))", + "expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))", "fullMetaSearch": false, "includeNullMetadata": false, "legendFormat": "__auto", @@ -315,7 +426,7 @@ "useBackend": false } ], - "title": "Input Sequence Length 50th percentile over last hour (tokens)", + "title": "time to first token (p50)", "type": "timeseries" }, { @@ -405,10 +516,10 @@ ] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 + "h": 14, + "w": 13, + "x": 11, + "y": 9 }, "id": 1, "options": { @@ -423,7 +534,7 @@ "sort": "none" } }, - "pluginVersion": "11.3.0", + "pluginVersion": "11.3.0+security-01", "targets": [ { "datasource": { @@ -452,228 +563,7 @@ "refId": "A" } ], - "title": "Latency 50th Percentile over last hour (ms)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "histogram_quantile(0.5, sum by(le) (rate(time_per_output_token_bucket[1h])))" - }, - "properties": [ - { - "id": "displayName", - "value": "Time Per Output Token" - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "id": 6, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.3.0", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(time_per_output_token_bucket[1h])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Time Per Output Token 50th Percentile over last hour (ms)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))" - }, - "properties": [ - { - "id": "displayName", - "value": "Time to First Token" - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 6, - "y": 24 - }, - "id": 8, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.3.0", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Time to First Token 50th percentile over last hour (ms)", + "title": "request latency (p50)", "type": "timeseries" } ], diff --git a/demos/weather_forecast_signoz/docker-compose.yaml b/demos/weather_forecast_signoz/docker-compose.yaml index d557c0ad..e8bc1730 100644 --- a/demos/weather_forecast_signoz/docker-compose.yaml +++ b/demos/weather_forecast_signoz/docker-compose.yaml @@ -24,3 +24,41 @@ services: - "host.docker.internal:host-gateway" volumes: - ./arch_config.yaml:/app/arch_config.yaml + + trace_streamer: + build: + context: ../shared/trace_streamer + environment: + - OTEL_TRACING_HTTP_ENDPOINT=http://otel-collector:4318/v1/traces + volumes: + - ~/archgw_logs:/var/log/ + + prometheus: + image: prom/prometheus + container_name: prometheus + command: + - "--config.file=/etc/prometheus/prometheus.yaml" + ports: + - 9090:9090 + restart: unless-stopped + volumes: + - ./prometheus:/etc/prometheus + - ./prom_data:/prometheus + # profiles: + # - monitoring + + grafana: + image: grafana/grafana + container_name: grafana + ports: + - 3000:3000 + restart: unless-stopped + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=grafana + volumes: + - ./grafana:/etc/grafana/provisioning/datasources + - ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml + - ./grafana/dashboards:/var/lib/grafana/dashboards + # profiles: + # - monitoring diff --git a/demos/weather_forecast_signoz/grafana/dashboard.yaml b/demos/weather_forecast_signoz/grafana/dashboard.yaml new file mode 100644 index 00000000..fd66a479 --- /dev/null +++ b/demos/weather_forecast_signoz/grafana/dashboard.yaml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: "Dashboard provider" + orgId: 1 + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: false + options: + path: /var/lib/grafana/dashboards + foldersFromFilesStructure: true diff --git a/demos/weather_forecast_signoz/grafana/dashboards/envoy_overview.json b/demos/weather_forecast_signoz/grafana/dashboards/envoy_overview.json new file mode 100644 index 00000000..259a510f --- /dev/null +++ b/demos/weather_forecast_signoz/grafana/dashboards/envoy_overview.json @@ -0,0 +1,587 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 1, + "links": [], + "panels": [ + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 5, + "x": 0, + "y": 0 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "envoy_cluster_upstream_rq_completed{envoy_cluster_name=~\"openai|api_server\"}", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": true, + "legendFormat": "{{envoy_cluster_name}}", + "range": false, + "refId": "A", + "useBackend": false + } + ], + "title": "# of Completd Requests", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))" + }, + "properties": [ + { + "id": "displayName", + "value": "Input Sequence Length" + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 9, + "x": 5, + "y": 0 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "request token count (p50)", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))" + }, + "properties": [ + { + "id": "displayName", + "value": "Output Sequence Length" + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 10, + "x": 14, + "y": 0 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "response token count (p50)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))" + }, + "properties": [ + { + "id": "displayName", + "value": "Time to First Token" + } + ] + } + ] + }, + "gridPos": { + "h": 14, + "w": 11, + "x": 0, + "y": 9 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "time to first token (p50)", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum(rate(latency_bucket[60m])) by (le))" + }, + "properties": [ + { + "id": "displayName", + "value": "Total Request Latency" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[60m])) by (le))" + }, + "properties": [ + { + "id": "displayName", + "value": "Time to First Token" + } + ] + } + ] + }, + "gridPos": { + "h": 14, + "w": 13, + "x": 11, + "y": 9 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(latency_bucket[60m])) by (le))", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[60m])) by (le))", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "request latency (p50)", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "", + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Arch Gateway Dashboard", + "uid": "adt6uhx5lk8aob", + "version": 1, + "weekStart": "" +} diff --git a/demos/weather_forecast_signoz/grafana/datasource.yaml b/demos/weather_forecast_signoz/grafana/datasource.yaml new file mode 100644 index 00000000..44999d46 --- /dev/null +++ b/demos/weather_forecast_signoz/grafana/datasource.yaml @@ -0,0 +1,9 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + url: http://prometheus:9090 + isDefault: true + access: proxy + editable: true diff --git a/demos/weather_forecast_signoz/prometheus/prometheus.yaml b/demos/weather_forecast_signoz/prometheus/prometheus.yaml new file mode 100644 index 00000000..59b47e8c --- /dev/null +++ b/demos/weather_forecast_signoz/prometheus/prometheus.yaml @@ -0,0 +1,23 @@ +global: + scrape_interval: 15s + scrape_timeout: 10s + evaluation_interval: 15s +alerting: + alertmanagers: + - static_configs: + - targets: [] + scheme: http + timeout: 10s + api_version: v1 +scrape_configs: + - job_name: envoy + honor_timestamps: true + scrape_interval: 15s + scrape_timeout: 10s + metrics_path: /stats + scheme: http + static_configs: + - targets: + - host.docker.internal:19901 + params: + format: ["prometheus"]