diff --git a/demos/observability/README.md b/demos/observability/README.md new file mode 100644 index 00000000..12868f7d --- /dev/null +++ b/demos/observability/README.md @@ -0,0 +1,103 @@ +# Plano Observability Stack + +Grafana dashboard for monitoring Plano LLM gateway traffic using trace-derived metrics. + +## Architecture + +``` +Plano (brightstaff) --OTLP gRPC--> OTEL Collector --traces--> Tempo + | + spanmetrics connector + | + v + Prometheus <--- Grafana + ^ + | + Envoy /stats/prometheus +``` + +The OTEL Collector receives traces from Plano and does two things: +1. Forwards them to Tempo for trace viewing +2. Derives Prometheus metrics (request counts, latency histograms) from spans via the **spanmetrics connector** + +Prometheus also scrapes Envoy's native stats endpoint for WASM metrics like `ratelimited_rq`. + +## Quick Start + +### 1. Start the observability stack + +```bash +cd demos/observability +docker compose up -d +``` + +### 2. Configure Plano to send traces to the OTEL Collector + +Add or update the `tracing` section in your `plano_config.yaml`: + +```yaml +tracing: + # Sample 100% of requests (adjust for production) + random_sampling: 100 + # Point at the OTEL Collector's OTLP gRPC port (host port 9317) + opentracing_grpc_endpoint: http://localhost:9317 +``` + +If Plano is running inside Docker on the same network, use the service name +and the container-internal port instead: + +```yaml +tracing: + random_sampling: 100 + opentracing_grpc_endpoint: http://otel-collector:4317 +``` + +### 3. Restart Plano + +Restart Plano so brightstaff picks up the new tracing config. Traces will flow +into the OTEL Collector, which forwards them to Tempo and generates Prometheus +metrics from span data. + +### 4. Open Grafana + +Navigate to http://localhost:9000 and log in with `admin` / `admin`. +The **Plano - Requests Overview** dashboard is auto-provisioned under the +"Plano" folder. Send a few requests through Plano and the panels will +start populating within ~15 seconds (the Prometheus scrape interval). + +## Access + +| Service | URL | Credentials | +|----------------|------------------------------|---------------| +| Grafana | http://localhost:9000 | admin / admin | +| Tempo | http://localhost:9200 | | +| Prometheus | http://localhost:9190 | | +| OTEL Collector | http://localhost:9317 (gRPC) | | + +The **Plano - Requests Overview** dashboard is auto-provisioned in Grafana under the "Plano" folder. + +## Dashboard Panels + +| Panel | Query Source | What It Shows | +|-------|-------------|---------------| +| LLM Requests/sec by Model | spanmetrics `calls_total{service_name="plano(llm)"}` by `llm_model` | Per-model request rate over time | +| Agent Requests/sec by Agent | spanmetrics `calls_total{service_name="plano(agent)"}` by `agent_id` | Per-agent invocation rate over time | +| Total Requests/sec | spanmetrics `calls_total` by service | Aggregate request rate across LLM, agent, and orchestrator | +| Rate-Limited Requests/sec | Envoy `envoy_wasmcustom_ratelimited_rq` | Global rate-limit rejections (no per-model breakdown) | +| LLM Latency p50/p95/p99 by Model | spanmetrics `duration_milliseconds_bucket` | End-to-end latency percentiles per model | +| Cumulative Request Count | spanmetrics `calls_total` | Total requests per model since start | + +## Envoy Stats + +For the rate-limit panel to work, Prometheus needs to scrape Envoy's admin stats endpoint. +The default config assumes Envoy's admin interface is at `host.docker.internal:9901`. +Adjust `prometheus.yaml` if your Envoy admin port differs. + +## Span Attributes Used + +These attributes are set by brightstaff's tracing instrumentation: + +- `service.name` — `plano(llm)`, `plano(agent)`, `plano(orchestrator)`, `plano(filter)`, `plano(routing)` +- `llm.model` — model name (e.g., `gpt-4`, `claude-3-sonnet`) +- `agent_id` — agent identifier from the orchestrator +- `selection.listener` — listener that triggered agent selection diff --git a/demos/observability/docker-compose.yaml b/demos/observability/docker-compose.yaml new file mode 100644 index 00000000..a1006640 --- /dev/null +++ b/demos/observability/docker-compose.yaml @@ -0,0 +1,50 @@ +services: + # OpenTelemetry Collector: receives traces from Plano, derives Prometheus + # metrics via the spanmetrics connector, and forwards traces to Tempo. + otel-collector: + image: otel/opentelemetry-collector-contrib:0.102.0 + command: ["--config=/etc/otel-collector-config.yaml"] + volumes: + - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro + ports: + - "9317:4317" # OTLP gRPC (Plano sends traces here) + - "8889:8889" # Prometheus metrics endpoint (spanmetrics) + depends_on: + - tempo + + tempo: + image: grafana/tempo:2.5.0 + command: ["-config.file=/etc/tempo.yaml"] + volumes: + - ./tempo.yaml:/etc/tempo.yaml:ro + ports: + - "9200:3200" # Tempo HTTP API + + prometheus: + image: prom/prometheus:v2.53.0 + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.retention.time=7d" + volumes: + - ./prometheus.yaml:/etc/prometheus/prometheus.yml:ro + ports: + - "9190:9090" + extra_hosts: + - "host.docker.internal:host-gateway" + depends_on: + - otel-collector + + grafana: + image: grafana/grafana:11.1.0 + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_USERS_ALLOW_SIGN_UP=false + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - ./grafana/dashboards:/var/lib/grafana/dashboards:ro + ports: + - "9000:3000" + depends_on: + - prometheus + - tempo diff --git a/demos/observability/grafana/dashboards/plano-requests.json b/demos/observability/grafana/dashboards/plano-requests.json new file mode 100644 index 00000000..cee19c47 --- /dev/null +++ b/demos/observability/grafana/dashboards/plano-requests.json @@ -0,0 +1,280 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "links": [], + "panels": [ + { + "title": "LLM Requests / sec by Model", + "description": "Rate of LLM requests proxied through Plano, broken down by model name. Derived from OpenTelemetry trace spans via the spanmetrics connector.", + "type": "timeseries", + "gridPos": { "h": 10, "w": 12, "x": 0, "y": 0 }, + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisLabel": "req/s", + "drawStyle": "line", + "fillOpacity": 15, + "lineWidth": 2, + "pointSize": 5, + "showPoints": "auto", + "stacking": { "mode": "none" } + }, + "unit": "reqps" + }, + "overrides": [] + }, + "options": { + "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "sum(rate(calls_total{service_name=\"plano(llm)\"}[$__rate_interval])) by (llm_model)", + "legendFormat": "{{ llm_model }}", + "refId": "A" + } + ] + }, + { + "title": "Agent Requests / sec by Agent", + "description": "Rate of agent invocations through the orchestrator, broken down by agent ID. Derived from OpenTelemetry trace spans.", + "type": "timeseries", + "gridPos": { "h": 10, "w": 12, "x": 12, "y": 0 }, + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisLabel": "req/s", + "drawStyle": "line", + "fillOpacity": 15, + "lineWidth": 2, + "pointSize": 5, + "showPoints": "auto", + "stacking": { "mode": "none" } + }, + "unit": "reqps" + }, + "overrides": [] + }, + "options": { + "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "sum(rate(calls_total{service_name=\"plano(agent)\"}[$__rate_interval])) by (agent_id)", + "legendFormat": "{{ agent_id }}", + "refId": "A" + } + ] + }, + { + "title": "Total LLM Requests / sec", + "description": "Aggregate LLM request rate across all models.", + "type": "timeseries", + "gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 }, + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "fixed", "fixedColor": "blue" }, + "custom": { + "axisBorderShow": false, + "axisLabel": "req/s", + "drawStyle": "line", + "fillOpacity": 20, + "lineWidth": 2, + "pointSize": 5, + "showPoints": "auto", + "stacking": { "mode": "none" } + }, + "unit": "reqps" + }, + "overrides": [] + }, + "options": { + "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "sum(rate(calls_total{service_name=\"plano(llm)\"}[$__rate_interval]))", + "legendFormat": "Total LLM Requests", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "sum(rate(calls_total{service_name=\"plano(agent)\"}[$__rate_interval]))", + "legendFormat": "Total Agent Requests", + "refId": "B" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "sum(rate(calls_total{service_name=\"plano(orchestrator)\"}[$__rate_interval]))", + "legendFormat": "Total Orchestrator Requests", + "refId": "C" + } + ] + }, + { + "title": "Rate-Limited Requests / sec", + "description": "Rate of requests rejected by Envoy WASM rate limiting. This is a global counter from the llm_gateway filter — no per-model breakdown is available.", + "type": "timeseries", + "gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 }, + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "fixed", "fixedColor": "red" }, + "custom": { + "axisBorderShow": false, + "axisLabel": "req/s", + "drawStyle": "line", + "fillOpacity": 20, + "lineWidth": 2, + "pointSize": 5, + "showPoints": "auto", + "stacking": { "mode": "none" } + }, + "unit": "reqps" + }, + "overrides": [] + }, + "options": { + "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "rate(envoy_wasmcustom_ratelimited_rq[$__rate_interval])", + "legendFormat": "Rate-Limited", + "refId": "A" + } + ] + }, + { + "title": "LLM Request Latency p50 / p95 / p99 by Model", + "description": "Request duration percentiles from trace-derived histograms, broken down by model.", + "type": "timeseries", + "gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 }, + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisLabel": "ms", + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 2, + "pointSize": 5, + "showPoints": "auto", + "stacking": { "mode": "none" } + }, + "unit": "ms" + }, + "overrides": [] + }, + "options": { + "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "histogram_quantile(0.50, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))", + "legendFormat": "p50 {{ llm_model }}", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "histogram_quantile(0.95, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))", + "legendFormat": "p95 {{ llm_model }}", + "refId": "B" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "histogram_quantile(0.99, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))", + "legendFormat": "p99 {{ llm_model }}", + "refId": "C" + } + ] + }, + { + "title": "Cumulative Request Count by Model", + "description": "Total number of LLM requests per model since the collector started.", + "type": "stat", + "gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 }, + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic-by-name" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "colorMode": "background_gradient", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto", + "wideLayout": true + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "sum(calls_total{service_name=\"plano(llm)\"}) by (llm_model)", + "legendFormat": "{{ llm_model }}", + "refId": "A" + } + ] + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": ["plano", "llm", "observability"], + "templating": { + "list": [ + { + "current": { "selected": false, "text": "Prometheus", "value": "Prometheus" }, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "browser", + "title": "Plano - Requests Overview", + "uid": "plano-requests-overview", + "version": 1 +} diff --git a/demos/observability/grafana/provisioning/dashboards/dashboards.yaml b/demos/observability/grafana/provisioning/dashboards/dashboards.yaml new file mode 100644 index 00000000..4228925f --- /dev/null +++ b/demos/observability/grafana/provisioning/dashboards/dashboards.yaml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: Plano + orgId: 1 + folder: Plano + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards + foldersFromFilesStructure: false diff --git a/demos/observability/grafana/provisioning/datasources/datasources.yaml b/demos/observability/grafana/provisioning/datasources/datasources.yaml new file mode 100644 index 00000000..ed7c4e4a --- /dev/null +++ b/demos/observability/grafana/provisioning/datasources/datasources.yaml @@ -0,0 +1,20 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: true + + - name: Tempo + type: tempo + access: proxy + url: http://tempo:3200 + editable: true + jsonData: + tracesToMetrics: + datasourceUid: Prometheus + serviceMap: + datasourceUid: Prometheus diff --git a/demos/observability/otel-collector-config.yaml b/demos/observability/otel-collector-config.yaml new file mode 100644 index 00000000..e55e5761 --- /dev/null +++ b/demos/observability/otel-collector-config.yaml @@ -0,0 +1,40 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +connectors: + spanmetrics: + dimensions: + - name: llm.model + - name: agent_id + - name: selection.listener + - name: http.method + - name: http.status_code + histogram: + explicit: + buckets: [5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000] + +exporters: + otlp/tempo: + endpoint: tempo:4317 + tls: + insecure: true + prometheus: + endpoint: 0.0.0.0:8889 + +processors: + batch: + timeout: 5s + send_batch_size: 1024 + +service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [otlp/tempo, spanmetrics] + metrics/spanmetrics: + receivers: [spanmetrics] + exporters: [prometheus] diff --git a/demos/observability/prometheus.yaml b/demos/observability/prometheus.yaml new file mode 100644 index 00000000..8f198252 --- /dev/null +++ b/demos/observability/prometheus.yaml @@ -0,0 +1,15 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: otel-collector-spanmetrics + static_configs: + - targets: ["otel-collector:8889"] + + # Scrape Envoy stats for WASM metrics (ratelimited_rq, etc.) + # Adjust the target if your Envoy admin port differs. + - job_name: envoy + metrics_path: /stats/prometheus + static_configs: + - targets: ["host.docker.internal:9901"] diff --git a/demos/observability/tempo.yaml b/demos/observability/tempo.yaml new file mode 100644 index 00000000..5d23dbfe --- /dev/null +++ b/demos/observability/tempo.yaml @@ -0,0 +1,25 @@ +stream_over_http_enabled: true +server: + http_listen_port: 3200 + +distributor: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +storage: + trace: + backend: local + local: + path: /var/tempo/traces + wal: + path: /var/tempo/wal + +metrics_generator: + registry: + external_labels: + source: tempo + storage: + path: /var/tempo/generator/wal