{ "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "description": "RED, LLM upstream, routing service, and process metrics for brightstaff. Pair with Envoy admin metrics from cluster=bright_staff.", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, "id": null, "links": [], "liveNow": false, "panels": [ { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 100, "panels": [], "title": "HTTP RED", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisLabel": "req/s", "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" }, "unit": "reqps" } }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 }, "id": 1, "options": { "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum by (handler) (rate(brightstaff_http_requests_total[1m]))", "legendFormat": "{{handler}}", "refId": "A" } ], "title": "Rate — brightstaff RPS by handler", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "5xx fraction over 5m. Page-worthy when sustained above ~1%.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 0.01 }, { "color": "red", "value": 0.05 } ] }, "unit": "percentunit" } }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 }, "id": 2, "options": { "colorMode": "background", "graphMode": "area", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum(rate(brightstaff_http_requests_total{status_class=\"5xx\"}[5m])) / clamp_min(sum(rate(brightstaff_http_requests_total[5m])), 1)", "legendFormat": "5xx rate", "refId": "A" } ], "title": "Errors — brightstaff 5xx rate", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "p50/p95/p99 by handler, computed from histogram buckets over 5m.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" }, "unit": "s" } }, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 9 }, "id": 3, "options": { "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "histogram_quantile(0.50, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))", "legendFormat": "p50 {{handler}}", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "histogram_quantile(0.95, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))", "legendFormat": "p95 {{handler}}", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "histogram_quantile(0.99, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))", "legendFormat": "p99 {{handler}}", "refId": "C" } ], "title": "Duration — p50 / p95 / p99 by handler", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "In-flight requests by handler. Climbs before latency does when brightstaff is saturated.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" }, "unit": "short" } }, "gridPos": { "h": 8, "w": 24, "x": 0, "y": 18 }, "id": 4, "options": { "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum by (handler) (brightstaff_http_in_flight_requests)", "legendFormat": "{{handler}}", "refId": "A" } ], "title": "In-flight requests by handler", "type": "timeseries" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 26 }, "id": 200, "panels": [], "title": "LLM upstream", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" }, "unit": "s" } }, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 27 }, "id": 5, "options": { "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "histogram_quantile(0.95, sum by (le, provider, model) (rate(brightstaff_llm_upstream_duration_seconds_bucket[5m])))", "legendFormat": "p95 {{provider}}/{{model}}", "refId": "A" } ], "title": "LLM upstream p95 by provider/model", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "All non-success error classes. timeout/connect = network, 5xx/429 = provider, parse = body shape mismatch, stream = mid-stream disconnect.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } }, "unit": "reqps" } }, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 27 }, "id": 6, "options": { "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum by (provider, error_class) (rate(brightstaff_llm_upstream_requests_total{error_class!=\"none\"}[5m]))", "legendFormat": "{{provider}} / {{error_class}}", "refId": "A" } ], "title": "LLM upstream errors by provider / class", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Streaming only. Empty if the route never streams.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" }, "unit": "s" } }, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 36 }, "id": 7, "options": { "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "histogram_quantile(0.95, sum by (le, provider, model) (rate(brightstaff_llm_time_to_first_token_seconds_bucket[5m])))", "legendFormat": "p95 {{provider}}/{{model}}", "refId": "A" } ], "title": "Time-to-first-token p95 (streaming)", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Tokens/sec by provider/model/kind — proxy for cost. Stacked.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } }, "unit": "tokens/s" } }, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 36 }, "id": 8, "options": { "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum by (provider, model, kind) (rate(brightstaff_llm_tokens_total[5m]))", "legendFormat": "{{provider}}/{{model}} {{kind}}", "refId": "A" } ], "title": "Token throughput by provider / model / kind", "type": "timeseries" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 45 }, "id": 300, "panels": [], "title": "Routing service", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Which models the orchestrator picked over the last 15 minutes.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "unit": "short" } }, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 46 }, "id": 9, "options": { "displayMode": "gradient", "orientation": "horizontal", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum by (selected_model) (increase(brightstaff_router_decisions_total[15m]))", "legendFormat": "{{selected_model}}", "refId": "A" } ], "title": "Model selection distribution (last 15m)", "type": "bargauge" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Fraction of decisions that fell back (orchestrator returned `none` or errored). High = router can't classify intent or no candidates configured.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" }, "unit": "percentunit" } }, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 46 }, "id": 10, "options": { "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum by (route) (rate(brightstaff_router_decisions_total{fallback=\"true\"}[5m])) / clamp_min(sum by (route) (rate(brightstaff_router_decisions_total[5m])), 1)", "legendFormat": "{{route}}", "refId": "A" } ], "title": "Fallback rate by route", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" }, "unit": "s" } }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 55 }, "id": 11, "options": { "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "histogram_quantile(0.95, sum by (le, route) (rate(brightstaff_router_decision_duration_seconds_bucket[5m])))", "legendFormat": "p95 {{route}}", "refId": "A" } ], "title": "Router decision p95 latency", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Hit / (hit + miss). Low ratio = sessions aren't being reused or TTL too short.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "thresholds": { "mode": "absolute", "steps": [ { "color": "red", "value": null }, { "color": "yellow", "value": 0.5 }, { "color": "green", "value": 0.8 } ] }, "unit": "percentunit", "min": 0, "max": 1 } }, "gridPos": { "h": 8, "w": 6, "x": 12, "y": 55 }, "id": 12, "options": { "colorMode": "background", "graphMode": "area", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum(rate(brightstaff_session_cache_events_total{outcome=\"hit\"}[5m])) / clamp_min(sum(rate(brightstaff_session_cache_events_total{outcome=~\"hit|miss\"}[5m])), 1)", "legendFormat": "hit rate", "refId": "A" } ], "title": "Session cache hit rate", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "decision_served = a real model picked. no_candidates = sentinel `none` returned. policy_error = orchestrator failed.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } }, "unit": "reqps" } }, "gridPos": { "h": 8, "w": 6, "x": 18, "y": 55 }, "id": 13, "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum by (outcome) (rate(brightstaff_routing_service_requests_total[5m]))", "legendFormat": "{{outcome}}", "refId": "A" } ], "title": "/routing/* outcomes", "type": "timeseries" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 63 }, "id": 400, "panels": [], "title": "Process & Envoy link", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Compare to brightstaff RPS (panel 1) — sustained gap = network or Envoy queueing.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" }, "unit": "reqps" } }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 64 }, "id": 14, "options": { "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum(rate(envoy_cluster_upstream_rq_total{envoy_cluster_name=\"bright_staff\"}[1m]))", "legendFormat": "envoy → bright_staff", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum(rate(brightstaff_http_requests_total[1m]))", "legendFormat": "brightstaff served", "refId": "B" } ], "title": "Envoy → brightstaff link health", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" } }, "overrides": [ { "matcher": { "id": "byName", "options": "RSS" }, "properties": [{ "id": "unit", "value": "bytes" }] }, { "matcher": { "id": "byName", "options": "CPU" }, "properties": [{ "id": "unit", "value": "percentunit" }] } ] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 64 }, "id": 15, "options": { "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "process_resident_memory_bytes{job=\"brightstaff\"}", "legendFormat": "RSS", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "rate(process_cpu_seconds_total{job=\"brightstaff\"}[1m])", "legendFormat": "CPU", "refId": "B" } ], "title": "Brightstaff process RSS / CPU", "type": "timeseries" } ], "refresh": "30s", "schemaVersion": 39, "tags": ["plano", "brightstaff", "llm"], "templating": { "list": [ { "name": "DS_PROMETHEUS", "label": "Prometheus", "type": "datasource", "query": "prometheus", "current": { "selected": false, "text": "Prometheus", "value": "DS_PROMETHEUS" }, "hide": 0, "refresh": 1, "regex": "", "skipUrlSync": false, "includeAll": false, "multi": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": {}, "timezone": "browser", "title": "Brightstaff (Plano dataplane)", "uid": "brightstaff", "version": 1, "weekStart": "" }