Merge a43c3d7557 into 78dc4edad9

2026-04-25 00:36:34 +02:00 · 2026-04-23 15:48:27 -07:00 · 2026-04-23 15:48:27 -07:00 · 1e21239f7b
commit 1e21239f7b
parent 78dc4edad9 a43c3d7557
8 changed files with 545 additions and 0 deletions
--- a/demos/observability/README.md
+++ b/demos/observability/README.md
@ -0,0 +1,103 @@
+# Plano Observability Stack
+
+Grafana dashboard for monitoring Plano LLM gateway traffic using trace-derived metrics.
+
+## Architecture
+
+```
+Plano (brightstaff) --OTLP gRPC--> OTEL Collector --traces--> Tempo
+                                        |
+                                   spanmetrics connector
+                                        |
+                                        v
+                                   Prometheus <--- Grafana
+                                        ^
+                                        |
+                              Envoy /stats/prometheus
+```
+
+The OTEL Collector receives traces from Plano and does two things:
+1. Forwards them to Tempo for trace viewing
+2. Derives Prometheus metrics (request counts, latency histograms) from spans via the **spanmetrics connector**
+
+Prometheus also scrapes Envoy's native stats endpoint for WASM metrics like `ratelimited_rq`.
+
+## Quick Start
+
+### 1. Start the observability stack
+
+```bash
+cd demos/observability
+docker compose up -d
+```
+
+### 2. Configure Plano to send traces to the OTEL Collector
+
+Add or update the `tracing` section in your `plano_config.yaml`:
+
+```yaml
+tracing:
+  # Sample 100% of requests (adjust for production)
+  random_sampling: 100
+  # Point at the OTEL Collector's OTLP gRPC port (host port 9317)
+  opentracing_grpc_endpoint: http://localhost:9317
+```
+
+If Plano is running inside Docker on the same network, use the service name
+and the container-internal port instead:
+
+```yaml
+tracing:
+  random_sampling: 100
+  opentracing_grpc_endpoint: http://otel-collector:4317
+```
+
+### 3. Restart Plano
+
+Restart Plano so brightstaff picks up the new tracing config. Traces will flow
+into the OTEL Collector, which forwards them to Tempo and generates Prometheus
+metrics from span data.
+
+### 4. Open Grafana
+
+Navigate to http://localhost:9000 and log in with `admin` / `admin`.
+The **Plano - Requests Overview** dashboard is auto-provisioned under the
+"Plano" folder. Send a few requests through Plano and the panels will
+start populating within ~15 seconds (the Prometheus scrape interval).
+
+## Access
+
+| Service        | URL                          | Credentials   |
+|----------------|------------------------------|---------------|
+| Grafana        | http://localhost:9000         | admin / admin |
+| Tempo          | http://localhost:9200         |               |
+| Prometheus     | http://localhost:9190         |               |
+| OTEL Collector | http://localhost:9317 (gRPC)  |               |
+
+The **Plano - Requests Overview** dashboard is auto-provisioned in Grafana under the "Plano" folder.
+
+## Dashboard Panels
+
+| Panel | Query Source | What It Shows |
+|-------|-------------|---------------|
+| LLM Requests/sec by Model | spanmetrics `calls_total{service_name="plano(llm)"}` by `llm_model` | Per-model request rate over time |
+| Agent Requests/sec by Agent | spanmetrics `calls_total{service_name="plano(agent)"}` by `agent_id` | Per-agent invocation rate over time |
+| Total Requests/sec | spanmetrics `calls_total` by service | Aggregate request rate across LLM, agent, and orchestrator |
+| Rate-Limited Requests/sec | Envoy `envoy_wasmcustom_ratelimited_rq` | Global rate-limit rejections (no per-model breakdown) |
+| LLM Latency p50/p95/p99 by Model | spanmetrics `duration_milliseconds_bucket` | End-to-end latency percentiles per model |
+| Cumulative Request Count | spanmetrics `calls_total` | Total requests per model since start |
+
+## Envoy Stats
+
+For the rate-limit panel to work, Prometheus needs to scrape Envoy's admin stats endpoint.
+The default config assumes Envoy's admin interface is at `host.docker.internal:9901`.
+Adjust `prometheus.yaml` if your Envoy admin port differs.
+
+## Span Attributes Used
+
+These attributes are set by brightstaff's tracing instrumentation:
+
+- `service.name` — `plano(llm)`, `plano(agent)`, `plano(orchestrator)`, `plano(filter)`, `plano(routing)`
+- `llm.model` — model name (e.g., `gpt-4`, `claude-3-sonnet`)
+- `agent_id` — agent identifier from the orchestrator
+- `selection.listener` — listener that triggered agent selection
--- a/demos/observability/docker-compose.yaml
+++ b/demos/observability/docker-compose.yaml
@ -0,0 +1,50 @@
+services:
+  # OpenTelemetry Collector: receives traces from Plano, derives Prometheus
+  # metrics via the spanmetrics connector, and forwards traces to Tempo.
+  otel-collector:
+    image: otel/opentelemetry-collector-contrib:0.102.0
+    command: ["--config=/etc/otel-collector-config.yaml"]
+    volumes:
+      - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
+    ports:
+      - "9317:4317"   # OTLP gRPC (Plano sends traces here)
+      - "8889:8889"   # Prometheus metrics endpoint (spanmetrics)
+    depends_on:
+      - tempo
+
+  tempo:
+    image: grafana/tempo:2.5.0
+    command: ["-config.file=/etc/tempo.yaml"]
+    volumes:
+      - ./tempo.yaml:/etc/tempo.yaml:ro
+    ports:
+      - "9200:3200"   # Tempo HTTP API
+
+  prometheus:
+    image: prom/prometheus:v2.53.0
+    command:
+      - "--config.file=/etc/prometheus/prometheus.yml"
+      - "--storage.tsdb.retention.time=7d"
+    volumes:
+      - ./prometheus.yaml:/etc/prometheus/prometheus.yml:ro
+    ports:
+      - "9190:9090"
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    depends_on:
+      - otel-collector
+
+  grafana:
+    image: grafana/grafana:11.1.0
+    environment:
+      - GF_SECURITY_ADMIN_USER=admin
+      - GF_SECURITY_ADMIN_PASSWORD=admin
+      - GF_USERS_ALLOW_SIGN_UP=false
+    volumes:
+      - ./grafana/provisioning:/etc/grafana/provisioning:ro
+      - ./grafana/dashboards:/var/lib/grafana/dashboards:ro
+    ports:
+      - "9000:3000"
+    depends_on:
+      - prometheus
+      - tempo
--- a/demos/observability/grafana/dashboards/plano-requests.json
+++ b/demos/observability/grafana/dashboards/plano-requests.json
@ -0,0 +1,280 @@
+{
+  "annotations": {
+    "list": []
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 1,
+  "links": [],
+  "panels": [
+    {
+      "title": "LLM Requests / sec by Model",
+      "description": "Rate of LLM requests proxied through Plano, broken down by model name. Derived from OpenTelemetry trace spans via the spanmetrics connector.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 0 },
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisLabel": "req/s",
+            "drawStyle": "line",
+            "fillOpacity": 15,
+            "lineWidth": 2,
+            "pointSize": 5,
+            "showPoints": "auto",
+            "stacking": { "mode": "none" }
+          },
+          "unit": "reqps"
+        },
+        "overrides": []
+      },
+      "options": {
+        "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
+        "tooltip": { "mode": "multi", "sort": "desc" }
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "expr": "sum(rate(calls_total{service_name=\"plano(llm)\"}[$__rate_interval])) by (llm_model)",
+          "legendFormat": "{{ llm_model }}",
+          "refId": "A"
+        }
+      ]
+    },
+    {
+      "title": "Agent Requests / sec by Agent",
+      "description": "Rate of agent invocations through the orchestrator, broken down by agent ID. Derived from OpenTelemetry trace spans.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 0 },
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisLabel": "req/s",
+            "drawStyle": "line",
+            "fillOpacity": 15,
+            "lineWidth": 2,
+            "pointSize": 5,
+            "showPoints": "auto",
+            "stacking": { "mode": "none" }
+          },
+          "unit": "reqps"
+        },
+        "overrides": []
+      },
+      "options": {
+        "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
+        "tooltip": { "mode": "multi", "sort": "desc" }
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "expr": "sum(rate(calls_total{service_name=\"plano(agent)\"}[$__rate_interval])) by (agent_id)",
+          "legendFormat": "{{ agent_id }}",
+          "refId": "A"
+        }
+      ]
+    },
+    {
+      "title": "Total LLM Requests / sec",
+      "description": "Aggregate LLM request rate across all models.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 },
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "fixed", "fixedColor": "blue" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisLabel": "req/s",
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "lineWidth": 2,
+            "pointSize": 5,
+            "showPoints": "auto",
+            "stacking": { "mode": "none" }
+          },
+          "unit": "reqps"
+        },
+        "overrides": []
+      },
+      "options": {
+        "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
+        "tooltip": { "mode": "multi", "sort": "desc" }
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "expr": "sum(rate(calls_total{service_name=\"plano(llm)\"}[$__rate_interval]))",
+          "legendFormat": "Total LLM Requests",
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "expr": "sum(rate(calls_total{service_name=\"plano(agent)\"}[$__rate_interval]))",
+          "legendFormat": "Total Agent Requests",
+          "refId": "B"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "expr": "sum(rate(calls_total{service_name=\"plano(orchestrator)\"}[$__rate_interval]))",
+          "legendFormat": "Total Orchestrator Requests",
+          "refId": "C"
+        }
+      ]
+    },
+    {
+      "title": "Rate-Limited Requests / sec",
+      "description": "Rate of requests rejected by Envoy WASM rate limiting. This is a global counter from the llm_gateway filter — no per-model breakdown is available.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 },
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "fixed", "fixedColor": "red" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisLabel": "req/s",
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "lineWidth": 2,
+            "pointSize": 5,
+            "showPoints": "auto",
+            "stacking": { "mode": "none" }
+          },
+          "unit": "reqps"
+        },
+        "overrides": []
+      },
+      "options": {
+        "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
+        "tooltip": { "mode": "multi", "sort": "desc" }
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "expr": "rate(envoy_wasmcustom_ratelimited_rq[$__rate_interval])",
+          "legendFormat": "Rate-Limited",
+          "refId": "A"
+        }
+      ]
+    },
+    {
+      "title": "LLM Request Latency p50 / p95 / p99 by Model",
+      "description": "Request duration percentiles from trace-derived histograms, broken down by model.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisLabel": "ms",
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "lineWidth": 2,
+            "pointSize": 5,
+            "showPoints": "auto",
+            "stacking": { "mode": "none" }
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "options": {
+        "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
+        "tooltip": { "mode": "multi", "sort": "desc" }
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "expr": "histogram_quantile(0.50, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))",
+          "legendFormat": "p50 {{ llm_model }}",
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "expr": "histogram_quantile(0.95, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))",
+          "legendFormat": "p95 {{ llm_model }}",
+          "refId": "B"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "expr": "histogram_quantile(0.99, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))",
+          "legendFormat": "p99 {{ llm_model }}",
+          "refId": "C"
+        }
+      ]
+    },
+    {
+      "title": "Cumulative Request Count by Model",
+      "description": "Total number of LLM requests per model since the collector started.",
+      "type": "stat",
+      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic-by-name" },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "options": {
+        "colorMode": "background_gradient",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "horizontal",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto",
+        "wideLayout": true
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "expr": "sum(calls_total{service_name=\"plano(llm)\"}) by (llm_model)",
+          "legendFormat": "{{ llm_model }}",
+          "refId": "A"
+        }
+      ]
+    }
+  ],
+  "refresh": "10s",
+  "schemaVersion": 39,
+  "tags": ["plano", "llm", "observability"],
+  "templating": {
+    "list": [
+      {
+        "current": { "selected": false, "text": "Prometheus", "value": "Prometheus" },
+        "hide": 0,
+        "includeAll": false,
+        "label": "Data Source",
+        "multi": false,
+        "name": "datasource",
+        "options": [],
+        "query": "prometheus",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "type": "datasource"
+      }
+    ]
+  },
+  "time": { "from": "now-1h", "to": "now" },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "Plano - Requests Overview",
+  "uid": "plano-requests-overview",
+  "version": 1
+}
--- a/demos/observability/grafana/provisioning/dashboards/dashboards.yaml
+++ b/demos/observability/grafana/provisioning/dashboards/dashboards.yaml
@ -0,0 +1,12 @@
+apiVersion: 1
+
+providers:
+  - name: Plano
+    orgId: 1
+    folder: Plano
+    type: file
+    disableDeletion: false
+    editable: true
+    options:
+      path: /var/lib/grafana/dashboards
+      foldersFromFilesStructure: false
--- a/demos/observability/grafana/provisioning/datasources/datasources.yaml
+++ b/demos/observability/grafana/provisioning/datasources/datasources.yaml
@ -0,0 +1,20 @@
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    type: prometheus
+    access: proxy
+    url: http://prometheus:9090
+    isDefault: true
+    editable: true
+
+  - name: Tempo
+    type: tempo
+    access: proxy
+    url: http://tempo:3200
+    editable: true
+    jsonData:
+      tracesToMetrics:
+        datasourceUid: Prometheus
+      serviceMap:
+        datasourceUid: Prometheus
--- a/demos/observability/otel-collector-config.yaml
+++ b/demos/observability/otel-collector-config.yaml
@ -0,0 +1,40 @@
+receivers:
+  otlp:
+    protocols:
+      grpc:
+        endpoint: 0.0.0.0:4317
+
+connectors:
+  spanmetrics:
+    dimensions:
+      - name: llm.model
+      - name: agent_id
+      - name: selection.listener
+      - name: http.method
+      - name: http.status_code
+    histogram:
+      explicit:
+        buckets: [5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000]
+
+exporters:
+  otlp/tempo:
+    endpoint: tempo:4317
+    tls:
+      insecure: true
+  prometheus:
+    endpoint: 0.0.0.0:8889
+
+processors:
+  batch:
+    timeout: 5s
+    send_batch_size: 1024
+
+service:
+  pipelines:
+    traces:
+      receivers: [otlp]
+      processors: [batch]
+      exporters: [otlp/tempo, spanmetrics]
+    metrics/spanmetrics:
+      receivers: [spanmetrics]
+      exporters: [prometheus]
--- a/demos/observability/prometheus.yaml
+++ b/demos/observability/prometheus.yaml
@ -0,0 +1,15 @@
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+
+scrape_configs:
+  - job_name: otel-collector-spanmetrics
+    static_configs:
+      - targets: ["otel-collector:8889"]
+
+  # Scrape Envoy stats for WASM metrics (ratelimited_rq, etc.)
+  # Adjust the target if your Envoy admin port differs.
+  - job_name: envoy
+    metrics_path: /stats/prometheus
+    static_configs:
+      - targets: ["host.docker.internal:9901"]
--- a/demos/observability/tempo.yaml
+++ b/demos/observability/tempo.yaml
@ -0,0 +1,25 @@
+stream_over_http_enabled: true
+server:
+  http_listen_port: 3200
+
+distributor:
+  receivers:
+    otlp:
+      protocols:
+        grpc:
+          endpoint: 0.0.0.0:4317
+
+storage:
+  trace:
+    backend: local
+    local:
+      path: /var/tempo/traces
+    wal:
+      path: /var/tempo/wal
+
+metrics_generator:
+  registry:
+    external_labels:
+      source: tempo
+  storage:
+    path: /var/tempo/generator/wal