From 87f5efd790445327cdc4818df39cc74dee5dd5bc Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Fri, 18 Oct 2024 15:07:23 -0700 Subject: [PATCH] update dashboard --- arch/envoy.template.yaml | 740 +++++++++--------- .../grafana/dashboards/envoy_overview.json | 114 ++- 2 files changed, 487 insertions(+), 367 deletions(-) diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index 235807d5..14e26e84 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -1,361 +1,379 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 1, - "links": [], - "panels": [ - { - "datasource": { - "default": true, - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "bars", - "fillOpacity": 54, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "avg(rate(envoy_cluster_external_upstream_rq_time_sum{envoy_cluster_name!=\"openai\",envoy_cluster_name!=\"arch_llm_listener\"}[1m]) / rate(envoy_cluster_external_upstream_rq_time_count[1m])) by (envoy_cluster_name)", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "request latency - internal (ms)", - "type": "timeseries" - }, - { - "datasource": { - "default": true, - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "id": 4, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "avg(rate(envoy_cluster_external_upstream_rq_time_sum{envoy_cluster_name!=\"hallucination\", envoy_cluster_name!=\"zeroshot\", envoy_cluster_name!=\"embeddings\", envoy_cluster_name!=\"arch_fc\", envoy_cluster_name!=\"api_server\"}[1m]) / rate(envoy_cluster_external_upstream_rq_time_count[1m])) by (envoy_cluster_name)", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "request latency - external (ms)", - "type": "timeseries" - }, - { - "datasource": { - "default": true, - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 3, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "avg(rate(envoy_cluster_internal_upstream_rq_completed{envoy_cluster_name !=\"opentelemetry_collector\"}[1m])) by (envoy_cluster_name)", - "fullMetaSearch": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "avg(rate(envoy_cluster_external_upstream_rq_completed{envoy_cluster_name !=\"opentelemetry_collector\"}[1m])) by (envoy_cluster_name)", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "Upstream request count", - "type": "timeseries" - } - ], - "schemaVersion": 39, - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-15m", - "to": "now" - }, - "timepicker": {}, - "timezone": "browser", - "title": "Intelligent Gateway Overview", - "uid": "adt6uhx5lk8aob", - "version": 2, - "weekStart": "" -} +admin: + address: + socket_address: { address: 0.0.0.0, port_value: 9901 } +static_resources: + listeners: + - name: arch_listener_http + address: + socket_address: + address: 0.0.0.0 + port_value: 10000 + traffic_direction: INBOUND + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %} + generate_request_id: true + tracing: + provider: + name: envoy.tracers.opentelemetry + typed_config: + "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig + grpc_service: + envoy_grpc: + cluster_name: opentelemetry_collector + timeout: 0.250s + service_name: arch + random_sampling: + value: {{ arch_tracing.random_sampling }} + {% endif %} + stat_prefix: arch_listener_http + codec_type: AUTO + scheme_header_transformation: + scheme_to_overwrite: https + access_log: + - name: envoy.access_loggers.file + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog + path: "/var/log/access_ingress.log" + route_config: + name: local_routes + virtual_hosts: + - name: local_service + domains: + - "*" + routes: + - match: + prefix: "/" + route: + auto_host_rewrite: true + cluster: arch_llm_listener + timeout: 60s + http_filters: + - name: envoy.filters.http.wasm + typed_config: + "@type": type.googleapis.com/udpa.type.v1.TypedStruct + type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm + value: + config: + name: "http_config" + root_id: prompt_gateway + configuration: + "@type": "type.googleapis.com/google.protobuf.StringValue" + value: | + {{ arch_config | indent(32) }} + vm_config: + runtime: "envoy.wasm.runtime.v8" + code: + local: + filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm" + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + + - name: arch_internal + address: + socket_address: + address: 0.0.0.0 + port_value: 11000 + traffic_direction: OUTBOUND + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %} + generate_request_id: true + tracing: + provider: + name: envoy.tracers.opentelemetry + typed_config: + "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig + grpc_service: + envoy_grpc: + cluster_name: opentelemetry_collector + timeout: 0.250s + service_name: arch + random_sampling: + value: {{ arch_tracing.random_sampling }} + {% endif %} + stat_prefix: arch_internal + codec_type: AUTO + scheme_header_transformation: + scheme_to_overwrite: https + access_log: + - name: envoy.access_loggers.file + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog + path: "/var/log/access_internal.log" + route_config: + name: local_routes + virtual_hosts: + - name: local_service + domains: + - "*" + routes: + + {% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination"] %} + - match: + prefix: "/" + headers: + - name: "x-arch-upstream" + string_match: + exact: {{ internal_clustrer }} + route: + auto_host_rewrite: true + cluster: {{ internal_clustrer }} + timeout: 60s + {% endfor %} + + {% for _, cluster in arch_clusters.items() %} + - match: + prefix: "/" + headers: + - name: "x-arch-upstream" + string_match: + exact: {{ cluster.name }} + route: + auto_host_rewrite: true + cluster: {{ cluster.name }} + timeout: 60s + {% endfor %} + http_filters: + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + + - name: arch_listener_llm + address: + socket_address: + address: 0.0.0.0 + port_value: 12000 + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %} + generate_request_id: true + tracing: + provider: + name: envoy.tracers.opentelemetry + typed_config: + "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig + grpc_service: + envoy_grpc: + cluster_name: opentelemetry_collector + timeout: 0.250s + service_name: arch + random_sampling: + value: {{ arch_tracing.random_sampling }} + {% endif %} + stat_prefix: arch_listener_http + codec_type: AUTO + scheme_header_transformation: + scheme_to_overwrite: https + access_log: + - name: envoy.access_loggers.file + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog + path: "/var/log/access_llm.log" + route_config: + name: local_routes + virtual_hosts: + - name: local_service + domains: + - "*" + routes: + {% for provider in arch_llm_providers %} + - match: + prefix: "/" + headers: + - name: "x-arch-llm-provider" + string_match: + exact: {{ provider.name }} + route: + auto_host_rewrite: true + cluster: {{ provider.provider }} + timeout: 60s + {% endfor %} + - match: + prefix: "/" + direct_response: + status: 400 + body: + inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n" + http_filters: + - name: envoy.filters.http.wasm + typed_config: + "@type": type.googleapis.com/udpa.type.v1.TypedStruct + type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm + value: + config: + name: "http_config" + root_id: llm_gateway + configuration: + "@type": "type.googleapis.com/google.protobuf.StringValue" + value: | + {{ arch_llm_config | indent(32) }} + vm_config: + runtime: "envoy.wasm.runtime.v8" + code: + local: + filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm" + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + + clusters: + - name: openai + connect_timeout: 5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: openai + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: api.openai.com + port_value: 443 + hostname: "api.openai.com" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: api.openai.com + common_tls_context: + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + - name: mistral + connect_timeout: 5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: mistral + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: api.mistral.ai + port_value: 443 + hostname: "api.mistral.ai" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: api.mistral.ai + {% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination"] %} + - name: {{ internal_clustrer }} + connect_timeout: 5s + type: STRICT_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: {{ internal_clustrer }} + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: host.docker.internal + port_value: 51000 + hostname: {{ internal_clustrer }} + {% endfor %} + - name: mistral_7b_instruct + connect_timeout: 5s + type: STRICT_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: mistral_7b_instruct + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: mistral_7b_instruct + port_value: 10001 + hostname: "mistral_7b_instruct" +{% for _, cluster in arch_clusters.items() %} + - name: {{ cluster.name }} + {% if cluster.connect_timeout -%} + connect_timeout: {{ cluster.connect_timeout }} + {% else -%} + connect_timeout: 5s + {% endif -%} + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: {{ cluster.name }} + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: {{ cluster.endpoint }} + port_value: {{ cluster.port }} + hostname: {{ cluster.name }} +{% endfor %} + - name: arch_internal + connect_timeout: 5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: arch_internal + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: 0.0.0.0 + port_value: 11000 + hostname: arch_internal + + - name: arch_llm_listener + connect_timeout: 5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: arch_llm_listener + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: 0.0.0.0 + port_value: 12000 + hostname: arch_llm_listener + +{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %} + - name: opentelemetry_collector + type: STRICT_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + typed_extension_protocol_options: + envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + explicit_http_config: + http2_protocol_options: {} + load_assignment: + cluster_name: opentelemetry_collector + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: host.docker.internal + port_value: 4317 +{% endif %} diff --git a/demos/function_calling/grafana/dashboards/envoy_overview.json b/demos/function_calling/grafana/dashboards/envoy_overview.json index 235807d5..3bb43a91 100644 --- a/demos/function_calling/grafana/dashboards/envoy_overview.json +++ b/demos/function_calling/grafana/dashboards/envoy_overview.json @@ -87,7 +87,7 @@ "x": 0, "y": 0 }, - "id": 1, + "id": 5, "options": { "legend": { "calcs": [], @@ -108,7 +108,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "avg(rate(envoy_cluster_external_upstream_rq_time_sum{envoy_cluster_name!=\"openai\",envoy_cluster_name!=\"arch_llm_listener\"}[1m]) / rate(envoy_cluster_external_upstream_rq_time_count[1m])) by (envoy_cluster_name)", + "expr": "avg(rate(envoy_cluster_external_upstream_rq_time_sum{envoy_cluster_name=\"api_server\"}[1m]) / rate(envoy_cluster_external_upstream_rq_time_count[1m])) by (envoy_cluster_name)", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, @@ -119,7 +119,7 @@ "useBackend": false } ], - "title": "request latency - internal (ms)", + "title": "request latency - developer api server (ms)", "type": "timeseries" }, { @@ -224,6 +224,108 @@ "title": "request latency - external (ms)", "type": "timeseries" }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 54, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "avg(rate(envoy_cluster_external_upstream_rq_time_sum{envoy_cluster_name!=\"openai\",envoy_cluster_name!=\"arch_llm_listener\",envoy_cluster_name!=\"api_server\"}[1m]) / rate(envoy_cluster_external_upstream_rq_time_count[1m])) by (envoy_cluster_name)", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "request latency - internal arch services (ms)", + "type": "timeseries" + }, { "datasource": { "default": true, @@ -289,7 +391,7 @@ "h": 8, "w": 12, "x": 0, - "y": 8 + "y": 16 }, "id": 3, "options": { @@ -339,7 +441,7 @@ "useBackend": false } ], - "title": "Upstream request count", + "title": "Request count (per sec)", "type": "timeseries" } ], @@ -356,6 +458,6 @@ "timezone": "browser", "title": "Intelligent Gateway Overview", "uid": "adt6uhx5lk8aob", - "version": 2, + "version": 14, "weekStart": "" }