diff --git a/crates/common/src/stats.rs b/crates/common/src/stats.rs index 527713f3..9479fadf 100644 --- a/crates/common/src/stats.rs +++ b/crates/common/src/stats.rs @@ -80,7 +80,7 @@ impl RecordingMetric for Gauge {} /// For offset deltas impl IncrementingMetric for Gauge {} -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub struct Histogram { id: u32, } diff --git a/crates/llm_gateway/src/filter_context.rs b/crates/llm_gateway/src/filter_context.rs index be80c390..2a4d47a9 100644 --- a/crates/llm_gateway/src/filter_context.rs +++ b/crates/llm_gateway/src/filter_context.rs @@ -5,6 +5,7 @@ use common::llm_providers::LlmProviders; use common::ratelimit; use common::stats::Counter; use common::stats::Gauge; +use common::stats::Histogram; use log::debug; use proxy_wasm::traits::*; use proxy_wasm::types::*; @@ -16,6 +17,10 @@ use std::rc::Rc; pub struct WasmMetrics { pub active_http_calls: Gauge, pub ratelimited_rq: Counter, + pub time_to_first_token: Histogram, + pub request_latency: Histogram, + pub output_sequence_length: Histogram, + pub input_sequence_length: Histogram, } impl WasmMetrics { @@ -23,6 +28,10 @@ impl WasmMetrics { WasmMetrics { active_http_calls: Gauge::new(String::from("active_http_calls")), ratelimited_rq: Counter::new(String::from("ratelimited_rq")), + time_to_first_token: Histogram::new(String::from("time_to_first_token")), + request_latency: Histogram::new(String::from("request_latency")), + output_sequence_length: Histogram::new(String::from("output_sequence_length")), + input_sequence_length: Histogram::new(String::from("input_sequence_length")), } } } diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 9c3db01d..3b556a44 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -20,7 +20,10 @@ use proxy_wasm::types::*; use std::num::NonZero; use std::rc::Rc; -use common::stats::IncrementingMetric; +use common::stats::{IncrementingMetric, RecordingMetric}; + +use proxy_wasm::hostcalls::get_current_time; +use std::time::{Duration, SystemTime}; pub struct StreamContext { context_id: u32, @@ -32,6 +35,8 @@ pub struct StreamContext { llm_providers: Rc, llm_provider: Option>, request_id: Option, + start_time: Option, + ttft_duration: Option, // Store the duration directly } impl StreamContext { @@ -46,6 +51,8 @@ impl StreamContext { llm_providers, llm_provider: None, request_id: None, + start_time: None, + ttft_duration: None, } } fn llm_provider(&self) -> &LlmProvider { @@ -120,16 +127,27 @@ impl StreamContext { model: &str, json_string: &str, ) -> Result<(), ratelimit::Error> { + // Tokenize and record token count. + let token_count = tokenizer::token_count(model, json_string).unwrap_or(0); + + // Record the token count to metrics. + self.metrics + .input_sequence_length + .record(token_count as u64); + log::debug!("Recorded input token count: {}", token_count); + + // Check if rate limiting needs to be applied. if let Some(selector) = self.ratelimit_selector.take() { - // Tokenize and Ratelimit. - if let Ok(token_count) = tokenizer::token_count(model, json_string) { - ratelimit::ratelimits(None).read().unwrap().check_limit( - model.to_owned(), - selector, - NonZero::new(token_count as u32).unwrap(), - )?; - } + log::debug!("Applying ratelimit for model: {}", model); + ratelimit::ratelimits(None).read().unwrap().check_limit( + model.to_owned(), + selector, + NonZero::new(token_count as u32).unwrap(), + )?; + } else { + log::debug!("No rate limit applied for model: {}", model); } + Ok(()) } } @@ -158,6 +176,12 @@ impl HttpContext for StreamContext { ); self.request_id = self.get_http_request_header(REQUEST_ID_HEADER); + + //start the timing for the request using get_current_time() + let current_time = get_current_time().unwrap(); + self.start_time = Some(current_time); + self.ttft_duration = None; + Action::Continue } @@ -226,9 +250,15 @@ impl HttpContext for StreamContext { }); } + // only use the tokens from the messages, excluding the metadata and json tags + let input_tokens_str = deserialized_body + .messages + .iter() + .fold(String::new(), |acc, m| { + acc + " " + m.content.as_ref().unwrap_or(&String::new()) + }); // enforce ratelimits on ingress - if let Err(e) = - self.enforce_ratelimits(&deserialized_body.model, &chat_completion_request_str) + if let Err(e) = self.enforce_ratelimits(&deserialized_body.model, input_tokens_str.as_str()) { self.send_server_error( ServerError::ExceededRatelimit(e), @@ -254,10 +284,33 @@ impl HttpContext for StreamContext { return Action::Continue; } - let body = if self.streaming_response { - if end_of_stream && body_size == 0 { - return Action::Continue; + let current_time = get_current_time().unwrap(); + if end_of_stream && body_size == 0 { + // All streaming responses end with bytes=0 and end_stream=true + // Record the latency for the request + if let Some(start_time) = self.start_time { + match current_time.duration_since(start_time) { + Ok(duration) => { + // Convert the duration to milliseconds + let duration_ms = duration.as_millis(); + debug!("Total latency: {} milliseconds", duration_ms); + // Record the latency to the latency histogram + self.metrics.request_latency.record(duration_ms as u64); + } + Err(e) => { + warn!("SystemTime error: {:?}", e); + } + } } + // Record the output sequence length + self.metrics + .output_sequence_length + .record(self.response_tokens as u64); + + return Action::Continue; + } + + let body = if self.streaming_response { let chunk_start = 0; let chunk_size = body_size; debug!( @@ -344,6 +397,26 @@ impl HttpContext for StreamContext { } }; self.response_tokens += token_count; + + // Compute TTFT if not already recorded + if self.ttft_duration.is_none() { + if let Some(start_time) = self.start_time { + let current_time = get_current_time().unwrap(); + match current_time.duration_since(start_time) { + Ok(duration) => { + let duration_ms = duration.as_millis(); + debug!("Time to First Token (TTFT): {} milliseconds", duration_ms); + self.ttft_duration = Some(duration); + self.metrics.time_to_first_token.record(duration_ms as u64); + } + Err(e) => { + warn!("SystemTime error: {:?}", e); + } + } + } else { + warn!("Start time was not recorded"); + } + } } else { debug!("non streaming response"); let chat_completions_response: ChatCompletionsResponse = diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs index 2a92441a..ea65bfa0 100644 --- a/crates/llm_gateway/tests/integration.rs +++ b/crates/llm_gateway/tests/integration.rs @@ -51,6 +51,8 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) { .expect_log(Some(LogLevel::Debug), None) .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-request-id")) .returning(None) + .expect_get_current_time_nanos() + .returning(Some(0)) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); } @@ -72,6 +74,10 @@ fn setup_filter(module: &mut Tester, config: &str) -> i32 { .call_proxy_on_context_create(filter_context, 0) .expect_metric_creation(MetricType::Gauge, "active_http_calls") .expect_metric_creation(MetricType::Counter, "ratelimited_rq") + .expect_metric_creation(MetricType::Histogram, "time_to_first_token") + .expect_metric_creation(MetricType::Histogram, "request_latency") + .expect_metric_creation(MetricType::Histogram, "output_sequence_length") + .expect_metric_creation(MetricType::Histogram, "input_sequence_length") .execute_and_expect(ReturnType::None) .unwrap(); @@ -209,6 +215,9 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() { .returning(Some(chat_completions_request_body)) .expect_log(Some(LogLevel::Trace), None) .expect_log(Some(LogLevel::Debug), None) + .expect_metric_record("input_sequence_length", 21) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) @@ -310,7 +319,7 @@ fn llm_gateway_request_ratelimited() { },\ {\ \"role\": \"user\",\ - \"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"\ + \"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"\ }\ ],\ \"model\": \"gpt-4\"\ @@ -327,9 +336,11 @@ fn llm_gateway_request_ratelimited() { // The actual call is not important in this test, we just need to grab the token_id .expect_log(Some(LogLevel::Trace), None) .expect_log(Some(LogLevel::Debug), None) + .expect_metric_record("input_sequence_length", 107) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) - // .expect_metric_increment("active_http_calls", 1) .expect_send_local_response( Some(StatusCode::TOO_MANY_REQUESTS.as_u16().into()), None, @@ -391,6 +402,9 @@ fn llm_gateway_request_not_ratelimited() { // The actual call is not important in this test, we just need to grab the token_id .expect_log(Some(LogLevel::Trace), None) .expect_log(Some(LogLevel::Debug), None) + .expect_metric_record("input_sequence_length", 29) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) diff --git a/demos/weather_forecast/docker-compose.yaml b/demos/weather_forecast/docker-compose.yaml index d347c29a..c032b1ed 100644 --- a/demos/weather_forecast/docker-compose.yaml +++ b/demos/weather_forecast/docker-compose.yaml @@ -37,3 +37,33 @@ services: - OTEL_TRACING_HTTP_ENDPOINT=http://jaeger:4318/v1/traces volumes: - ~/archgw_logs:/var/log/ + + prometheus: + image: prom/prometheus + container_name: prometheus + command: + - "--config.file=/etc/prometheus/prometheus.yaml" + ports: + - 9090:9090 + restart: unless-stopped + volumes: + - ./prometheus:/etc/prometheus + - ./prom_data:/prometheus + profiles: + - monitoring + + grafana: + image: grafana/grafana + container_name: grafana + ports: + - 3000:3000 + restart: unless-stopped + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=grafana + volumes: + - ./grafana:/etc/grafana/provisioning/datasources + - ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml + - ./grafana/dashboards:/var/lib/grafana/dashboards + profiles: + - monitoring diff --git a/demos/weather_forecast/grafana/dashboard.yaml b/demos/weather_forecast/grafana/dashboard.yaml new file mode 100644 index 00000000..fd66a479 --- /dev/null +++ b/demos/weather_forecast/grafana/dashboard.yaml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: "Dashboard provider" + orgId: 1 + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: false + options: + path: /var/lib/grafana/dashboards + foldersFromFilesStructure: true diff --git a/demos/weather_forecast/grafana/dashboards/envoy_overview.json b/demos/weather_forecast/grafana/dashboards/envoy_overview.json new file mode 100644 index 00000000..5a77e075 --- /dev/null +++ b/demos/weather_forecast/grafana/dashboards/envoy_overview.json @@ -0,0 +1,587 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 1, + "links": [], + "panels": [ + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 5, + "x": 0, + "y": 0 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "envoy_cluster_upstream_rq_completed{envoy_cluster_name=~\"openai|api_server\"}", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": true, + "legendFormat": "{{envoy_cluster_name}}", + "range": false, + "refId": "A", + "useBackend": false + } + ], + "title": "# of Completd Requests", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))" + }, + "properties": [ + { + "id": "displayName", + "value": "Input Sequence Length" + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 9, + "x": 5, + "y": 0 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "input sequence length (p50)", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))" + }, + "properties": [ + { + "id": "displayName", + "value": "Output Sequence Length" + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 10, + "x": 14, + "y": 0 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "output sequence length (p50)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))" + }, + "properties": [ + { + "id": "displayName", + "value": "Time to First Token" + } + ] + } + ] + }, + "gridPos": { + "h": 14, + "w": 11, + "x": 0, + "y": 9 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "time to first token (p50)", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum(rate(latency_bucket[60m])) by (le))" + }, + "properties": [ + { + "id": "displayName", + "value": "Total Request Latency" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[60m])) by (le))" + }, + "properties": [ + { + "id": "displayName", + "value": "Time to First Token" + } + ] + } + ] + }, + "gridPos": { + "h": 14, + "w": 13, + "x": 11, + "y": 9 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(latency_bucket[60m])) by (le))", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[60m])) by (le))", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "request latency (p50)", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "", + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Arch Gateway Dashboard", + "uid": "adt6uhx5lk8aob", + "version": 1, + "weekStart": "" +} diff --git a/demos/weather_forecast/grafana/datasource.yaml b/demos/weather_forecast/grafana/datasource.yaml new file mode 100644 index 00000000..44999d46 --- /dev/null +++ b/demos/weather_forecast/grafana/datasource.yaml @@ -0,0 +1,9 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + url: http://prometheus:9090 + isDefault: true + access: proxy + editable: true diff --git a/demos/weather_forecast/prometheus/prometheus.yaml b/demos/weather_forecast/prometheus/prometheus.yaml new file mode 100644 index 00000000..59b47e8c --- /dev/null +++ b/demos/weather_forecast/prometheus/prometheus.yaml @@ -0,0 +1,23 @@ +global: + scrape_interval: 15s + scrape_timeout: 10s + evaluation_interval: 15s +alerting: + alertmanagers: + - static_configs: + - targets: [] + scheme: http + timeout: 10s + api_version: v1 +scrape_configs: + - job_name: envoy + honor_timestamps: true + scrape_interval: 15s + scrape_timeout: 10s + metrics_path: /stats + scheme: http + static_configs: + - targets: + - host.docker.internal:19901 + params: + format: ["prometheus"] diff --git a/demos/weather_forecast_signoz/docker-compose.yaml b/demos/weather_forecast_signoz/docker-compose.yaml index d557c0ad..e8bc1730 100644 --- a/demos/weather_forecast_signoz/docker-compose.yaml +++ b/demos/weather_forecast_signoz/docker-compose.yaml @@ -24,3 +24,41 @@ services: - "host.docker.internal:host-gateway" volumes: - ./arch_config.yaml:/app/arch_config.yaml + + trace_streamer: + build: + context: ../shared/trace_streamer + environment: + - OTEL_TRACING_HTTP_ENDPOINT=http://otel-collector:4318/v1/traces + volumes: + - ~/archgw_logs:/var/log/ + + prometheus: + image: prom/prometheus + container_name: prometheus + command: + - "--config.file=/etc/prometheus/prometheus.yaml" + ports: + - 9090:9090 + restart: unless-stopped + volumes: + - ./prometheus:/etc/prometheus + - ./prom_data:/prometheus + # profiles: + # - monitoring + + grafana: + image: grafana/grafana + container_name: grafana + ports: + - 3000:3000 + restart: unless-stopped + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=grafana + volumes: + - ./grafana:/etc/grafana/provisioning/datasources + - ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml + - ./grafana/dashboards:/var/lib/grafana/dashboards + # profiles: + # - monitoring diff --git a/demos/weather_forecast_signoz/grafana/dashboard.yaml b/demos/weather_forecast_signoz/grafana/dashboard.yaml new file mode 100644 index 00000000..fd66a479 --- /dev/null +++ b/demos/weather_forecast_signoz/grafana/dashboard.yaml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: "Dashboard provider" + orgId: 1 + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: false + options: + path: /var/lib/grafana/dashboards + foldersFromFilesStructure: true diff --git a/demos/weather_forecast_signoz/grafana/dashboards/envoy_overview.json b/demos/weather_forecast_signoz/grafana/dashboards/envoy_overview.json new file mode 100644 index 00000000..5d372209 --- /dev/null +++ b/demos/weather_forecast_signoz/grafana/dashboards/envoy_overview.json @@ -0,0 +1,587 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 1, + "links": [], + "panels": [ + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 5, + "x": 0, + "y": 0 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "envoy_cluster_upstream_rq_completed{envoy_cluster_name=~\"openai|api_server\"}", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": true, + "legendFormat": "{{envoy_cluster_name}}", + "range": false, + "refId": "A", + "useBackend": false + } + ], + "title": "# of Completd Requests", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))" + }, + "properties": [ + { + "id": "displayName", + "value": "Input Sequence Length" + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 9, + "x": 5, + "y": 0 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[5m])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "input sequence length (p50)", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))" + }, + "properties": [ + { + "id": "displayName", + "value": "Output Sequence Length" + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 10, + "x": 14, + "y": 0 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[5m])) by(le))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "output sequence length (p50)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))" + }, + "properties": [ + { + "id": "displayName", + "value": "Time to First Token" + } + ] + } + ] + }, + "gridPos": { + "h": 14, + "w": 11, + "x": 0, + "y": 9 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[5m])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "time to first token (p50)", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum(rate(latency_bucket[60m])) by (le))" + }, + "properties": [ + { + "id": "displayName", + "value": "Total Request Latency" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[60m])) by (le))" + }, + "properties": [ + { + "id": "displayName", + "value": "Time to First Token" + } + ] + } + ] + }, + "gridPos": { + "h": 14, + "w": 13, + "x": 11, + "y": 9 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(latency_bucket[5m])) by (le))", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[5m])) by (le))", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "request latency (p50)", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "", + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Arch Gateway Dashboard", + "uid": "adt6uhx5lk8aob", + "version": 2, + "weekStart": "" +} diff --git a/demos/weather_forecast_signoz/grafana/datasource.yaml b/demos/weather_forecast_signoz/grafana/datasource.yaml new file mode 100644 index 00000000..44999d46 --- /dev/null +++ b/demos/weather_forecast_signoz/grafana/datasource.yaml @@ -0,0 +1,9 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + url: http://prometheus:9090 + isDefault: true + access: proxy + editable: true diff --git a/demos/weather_forecast_signoz/prometheus/prometheus.yaml b/demos/weather_forecast_signoz/prometheus/prometheus.yaml new file mode 100644 index 00000000..59b47e8c --- /dev/null +++ b/demos/weather_forecast_signoz/prometheus/prometheus.yaml @@ -0,0 +1,23 @@ +global: + scrape_interval: 15s + scrape_timeout: 10s + evaluation_interval: 15s +alerting: + alertmanagers: + - static_configs: + - targets: [] + scheme: http + timeout: 10s + api_version: v1 +scrape_configs: + - job_name: envoy + honor_timestamps: true + scrape_interval: 15s + scrape_timeout: 10s + metrics_path: /stats + scheme: http + static_configs: + - targets: + - host.docker.internal:19901 + params: + format: ["prometheus"]