Merge a43c3d7557 into 78dc4edad9

2026-04-25 00:36:34 +02:00 · 2026-04-23 15:48:27 -07:00 · 2026-04-23 15:48:27 -07:00 · 1e21239f7b
commit 1e21239f7b
parent 78dc4edad9 a43c3d7557
8 changed files with 545 additions and 0 deletions
--- a/demos/observability/README.md
+++ b/demos/observability/README.md
@ -0,0 +1,103 @@
 # Plano Observability Stack
 Grafana dashboard for monitoring Plano LLM gateway traffic using trace-derived metrics.
 ## Architecture
 ```
 Plano (brightstaff) --OTLP gRPC--> OTEL Collector --traces--> Tempo
                                        |
                                   spanmetrics connector
                                        |
                                        v
                                   Prometheus <--- Grafana
                                        ^
                                        |
                              Envoy /stats/prometheus
 ```
 The OTEL Collector receives traces from Plano and does two things:
 1. Forwards them to Tempo for trace viewing
 2. Derives Prometheus metrics (request counts, latency histograms) from spans via the **spanmetrics connector**
 Prometheus also scrapes Envoy's native stats endpoint for WASM metrics like `ratelimited_rq`.
 ## Quick Start
 ### 1. Start the observability stack
 ```bash
 cd demos/observability
 docker compose up -d
 ```
 ### 2. Configure Plano to send traces to the OTEL Collector
 Add or update the `tracing` section in your `plano_config.yaml`:
 ```yaml
 tracing:
  # Sample 100% of requests (adjust for production)
  random_sampling: 100
  # Point at the OTEL Collector's OTLP gRPC port (host port 9317)
  opentracing_grpc_endpoint: http://localhost:9317
 ```
 If Plano is running inside Docker on the same network, use the service name
 and the container-internal port instead:
 ```yaml
 tracing:
  random_sampling: 100
  opentracing_grpc_endpoint: http://otel-collector:4317
 ```
 ### 3. Restart Plano
 Restart Plano so brightstaff picks up the new tracing config. Traces will flow
 into the OTEL Collector, which forwards them to Tempo and generates Prometheus
 metrics from span data.
 ### 4. Open Grafana
 Navigate to http://localhost:9000 and log in with `admin` / `admin`.
 The **Plano - Requests Overview** dashboard is auto-provisioned under the
 "Plano" folder. Send a few requests through Plano and the panels will
 start populating within ~15 seconds (the Prometheus scrape interval).
 ## Access
 | Service        | URL                          | Credentials   |
 |----------------|------------------------------|---------------|
 | Grafana        | http://localhost:9000         | admin / admin |
 | Tempo          | http://localhost:9200         |               |
 | Prometheus     | http://localhost:9190         |               |
 | OTEL Collector | http://localhost:9317 (gRPC)  |               |
 The **Plano - Requests Overview** dashboard is auto-provisioned in Grafana under the "Plano" folder.
 ## Dashboard Panels
 | Panel | Query Source | What It Shows |
 |-------|-------------|---------------|
 | LLM Requests/sec by Model | spanmetrics `calls_total{service_name="plano(llm)"}` by `llm_model` | Per-model request rate over time |
 | Agent Requests/sec by Agent | spanmetrics `calls_total{service_name="plano(agent)"}` by `agent_id` | Per-agent invocation rate over time |
 | Total Requests/sec | spanmetrics `calls_total` by service | Aggregate request rate across LLM, agent, and orchestrator |
 | Rate-Limited Requests/sec | Envoy `envoy_wasmcustom_ratelimited_rq` | Global rate-limit rejections (no per-model breakdown) |
 | LLM Latency p50/p95/p99 by Model | spanmetrics `duration_milliseconds_bucket` | End-to-end latency percentiles per model |
 | Cumulative Request Count | spanmetrics `calls_total` | Total requests per model since start |
 ## Envoy Stats
 For the rate-limit panel to work, Prometheus needs to scrape Envoy's admin stats endpoint.
 The default config assumes Envoy's admin interface is at `host.docker.internal:9901`.
 Adjust `prometheus.yaml` if your Envoy admin port differs.
 ## Span Attributes Used
 These attributes are set by brightstaff's tracing instrumentation:
 - `service.name` — `plano(llm)`, `plano(agent)`, `plano(orchestrator)`, `plano(filter)`, `plano(routing)`
 - `llm.model` — model name (e.g., `gpt-4`, `claude-3-sonnet`)
 - `agent_id` — agent identifier from the orchestrator
 - `selection.listener` — listener that triggered agent selection
--- a/demos/observability/docker-compose.yaml
+++ b/demos/observability/docker-compose.yaml
@ -0,0 +1,50 @@
 services:
  # OpenTelemetry Collector: receives traces from Plano, derives Prometheus
  # metrics via the spanmetrics connector, and forwards traces to Tempo.
  otel-collector:
    image: otel/opentelemetry-collector-contrib:0.102.0
    command: ["--config=/etc/otel-collector-config.yaml"]
    volumes:
      - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
    ports:
      - "9317:4317"   # OTLP gRPC (Plano sends traces here)
      - "8889:8889"   # Prometheus metrics endpoint (spanmetrics)
    depends_on:
      - tempo
  tempo:
    image: grafana/tempo:2.5.0
    command: ["-config.file=/etc/tempo.yaml"]
    volumes:
      - ./tempo.yaml:/etc/tempo.yaml:ro
    ports:
      - "9200:3200"   # Tempo HTTP API
  prometheus:
    image: prom/prometheus:v2.53.0
    command:
      - "--config.file=/etc/prometheus/prometheus.yml"
      - "--storage.tsdb.retention.time=7d"
    volumes:
      - ./prometheus.yaml:/etc/prometheus/prometheus.yml:ro
    ports:
      - "9190:9090"
    extra_hosts:
      - "host.docker.internal:host-gateway"
    depends_on:
      - otel-collector
  grafana:
    image: grafana/grafana:11.1.0
    environment:
      - GF_SECURITY_ADMIN_USER=admin
      - GF_SECURITY_ADMIN_PASSWORD=admin
      - GF_USERS_ALLOW_SIGN_UP=false
    volumes:
      - ./grafana/provisioning:/etc/grafana/provisioning:ro
      - ./grafana/dashboards:/var/lib/grafana/dashboards:ro
    ports:
      - "9000:3000"
    depends_on:
      - prometheus
      - tempo
--- a/demos/observability/grafana/dashboards/plano-requests.json
+++ b/demos/observability/grafana/dashboards/plano-requests.json
@ -0,0 +1,280 @@
 {
  "annotations": {
    "list": []
  },
  "editable": true,
  "fiscalYearStartMonth": 0,
  "graphTooltip": 1,
  "links": [],
  "panels": [
    {
      "title": "LLM Requests / sec by Model",
      "description": "Rate of LLM requests proxied through Plano, broken down by model name. Derived from OpenTelemetry trace spans via the spanmetrics connector.",
      "type": "timeseries",
      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 0 },
      "datasource": { "type": "prometheus", "uid": "${datasource}" },
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "palette-classic" },
          "custom": {
            "axisBorderShow": false,
            "axisLabel": "req/s",
            "drawStyle": "line",
            "fillOpacity": 15,
            "lineWidth": 2,
            "pointSize": 5,
            "showPoints": "auto",
            "stacking": { "mode": "none" }
          },
          "unit": "reqps"
        },
        "overrides": []
      },
      "options": {
        "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
        "tooltip": { "mode": "multi", "sort": "desc" }
      },
      "targets": [
        {
          "datasource": { "type": "prometheus", "uid": "${datasource}" },
          "expr": "sum(rate(calls_total{service_name=\"plano(llm)\"}[$__rate_interval])) by (llm_model)",
          "legendFormat": "{{ llm_model }}",
          "refId": "A"
        }
      ]
    },
    {
      "title": "Agent Requests / sec by Agent",
      "description": "Rate of agent invocations through the orchestrator, broken down by agent ID. Derived from OpenTelemetry trace spans.",
      "type": "timeseries",
      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 0 },
      "datasource": { "type": "prometheus", "uid": "${datasource}" },
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "palette-classic" },
          "custom": {
            "axisBorderShow": false,
            "axisLabel": "req/s",
            "drawStyle": "line",
            "fillOpacity": 15,
            "lineWidth": 2,
            "pointSize": 5,
            "showPoints": "auto",
            "stacking": { "mode": "none" }
          },
          "unit": "reqps"
        },
        "overrides": []
      },
      "options": {
        "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
        "tooltip": { "mode": "multi", "sort": "desc" }
      },
      "targets": [
        {
          "datasource": { "type": "prometheus", "uid": "${datasource}" },
          "expr": "sum(rate(calls_total{service_name=\"plano(agent)\"}[$__rate_interval])) by (agent_id)",
          "legendFormat": "{{ agent_id }}",
          "refId": "A"
        }
      ]
    },
    {
      "title": "Total LLM Requests / sec",
      "description": "Aggregate LLM request rate across all models.",
      "type": "timeseries",
      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 },
      "datasource": { "type": "prometheus", "uid": "${datasource}" },
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "fixed", "fixedColor": "blue" },
          "custom": {
            "axisBorderShow": false,
            "axisLabel": "req/s",
            "drawStyle": "line",
            "fillOpacity": 20,
            "lineWidth": 2,
            "pointSize": 5,
            "showPoints": "auto",
            "stacking": { "mode": "none" }
          },
          "unit": "reqps"
        },
        "overrides": []
      },
      "options": {
        "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
        "tooltip": { "mode": "multi", "sort": "desc" }
      },
      "targets": [
        {
          "datasource": { "type": "prometheus", "uid": "${datasource}" },
          "expr": "sum(rate(calls_total{service_name=\"plano(llm)\"}[$__rate_interval]))",
          "legendFormat": "Total LLM Requests",
          "refId": "A"
        },
        {
          "datasource": { "type": "prometheus", "uid": "${datasource}" },
          "expr": "sum(rate(calls_total{service_name=\"plano(agent)\"}[$__rate_interval]))",
          "legendFormat": "Total Agent Requests",
          "refId": "B"
        },
        {
          "datasource": { "type": "prometheus", "uid": "${datasource}" },
          "expr": "sum(rate(calls_total{service_name=\"plano(orchestrator)\"}[$__rate_interval]))",
          "legendFormat": "Total Orchestrator Requests",
          "refId": "C"
        }
      ]
    },
    {
      "title": "Rate-Limited Requests / sec",
      "description": "Rate of requests rejected by Envoy WASM rate limiting. This is a global counter from the llm_gateway filter — no per-model breakdown is available.",
      "type": "timeseries",
      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 },
      "datasource": { "type": "prometheus", "uid": "${datasource}" },
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "fixed", "fixedColor": "red" },
          "custom": {
            "axisBorderShow": false,
            "axisLabel": "req/s",
            "drawStyle": "line",
            "fillOpacity": 20,
            "lineWidth": 2,
            "pointSize": 5,
            "showPoints": "auto",
            "stacking": { "mode": "none" }
          },
          "unit": "reqps"
        },
        "overrides": []
      },
      "options": {
        "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
        "tooltip": { "mode": "multi", "sort": "desc" }
      },
      "targets": [
        {
          "datasource": { "type": "prometheus", "uid": "${datasource}" },
          "expr": "rate(envoy_wasmcustom_ratelimited_rq[$__rate_interval])",
          "legendFormat": "Rate-Limited",
          "refId": "A"
        }
      ]
    },
    {
      "title": "LLM Request Latency p50 / p95 / p99 by Model",
      "description": "Request duration percentiles from trace-derived histograms, broken down by model.",
      "type": "timeseries",
      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
      "datasource": { "type": "prometheus", "uid": "${datasource}" },
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "palette-classic" },
          "custom": {
            "axisBorderShow": false,
            "axisLabel": "ms",
            "drawStyle": "line",
            "fillOpacity": 0,
            "lineWidth": 2,
            "pointSize": 5,
            "showPoints": "auto",
            "stacking": { "mode": "none" }
          },
          "unit": "ms"
        },
        "overrides": []
      },
      "options": {
        "legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
        "tooltip": { "mode": "multi", "sort": "desc" }
      },
      "targets": [
        {
          "datasource": { "type": "prometheus", "uid": "${datasource}" },
          "expr": "histogram_quantile(0.50, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))",
          "legendFormat": "p50 {{ llm_model }}",
          "refId": "A"
        },
        {
          "datasource": { "type": "prometheus", "uid": "${datasource}" },
          "expr": "histogram_quantile(0.95, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))",
          "legendFormat": "p95 {{ llm_model }}",
          "refId": "B"
        },
        {
          "datasource": { "type": "prometheus", "uid": "${datasource}" },
          "expr": "histogram_quantile(0.99, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))",
          "legendFormat": "p99 {{ llm_model }}",
          "refId": "C"
        }
      ]
    },
    {
      "title": "Cumulative Request Count by Model",
      "description": "Total number of LLM requests per model since the collector started.",
      "type": "stat",
      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
      "datasource": { "type": "prometheus", "uid": "${datasource}" },
      "fieldConfig": {
        "defaults": {
          "color": { "mode": "palette-classic-by-name" },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              { "color": "green", "value": null }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "options": {
        "colorMode": "background_gradient",
        "graphMode": "area",
        "justifyMode": "auto",
        "orientation": "horizontal",
        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
        "textMode": "auto",
        "wideLayout": true
      },
      "targets": [
        {
          "datasource": { "type": "prometheus", "uid": "${datasource}" },
          "expr": "sum(calls_total{service_name=\"plano(llm)\"}) by (llm_model)",
          "legendFormat": "{{ llm_model }}",
          "refId": "A"
        }
      ]
    }
  ],
  "refresh": "10s",
  "schemaVersion": 39,
  "tags": ["plano", "llm", "observability"],
  "templating": {
    "list": [
      {
        "current": { "selected": false, "text": "Prometheus", "value": "Prometheus" },
        "hide": 0,
        "includeAll": false,
        "label": "Data Source",
        "multi": false,
        "name": "datasource",
        "options": [],
        "query": "prometheus",
        "refresh": 1,
        "regex": "",
        "skipUrlSync": false,
        "type": "datasource"
      }
    ]
  },
  "time": { "from": "now-1h", "to": "now" },
  "timepicker": {},
  "timezone": "browser",
  "title": "Plano - Requests Overview",
  "uid": "plano-requests-overview",
  "version": 1
 }
--- a/demos/observability/grafana/provisioning/dashboards/dashboards.yaml
+++ b/demos/observability/grafana/provisioning/dashboards/dashboards.yaml
@ -0,0 +1,12 @@
 apiVersion: 1
 providers:
  - name: Plano
    orgId: 1
    folder: Plano
    type: file
    disableDeletion: false
    editable: true
    options:
      path: /var/lib/grafana/dashboards
      foldersFromFilesStructure: false
--- a/demos/observability/grafana/provisioning/datasources/datasources.yaml
+++ b/demos/observability/grafana/provisioning/datasources/datasources.yaml
@ -0,0 +1,20 @@
 apiVersion: 1
 datasources:
  - name: Prometheus
    type: prometheus
    access: proxy
    url: http://prometheus:9090
    isDefault: true
    editable: true
  - name: Tempo
    type: tempo
    access: proxy
    url: http://tempo:3200
    editable: true
    jsonData:
      tracesToMetrics:
        datasourceUid: Prometheus
      serviceMap:
        datasourceUid: Prometheus
--- a/demos/observability/otel-collector-config.yaml
+++ b/demos/observability/otel-collector-config.yaml
@ -0,0 +1,40 @@
 receivers:
  otlp:
    protocols:
      grpc:
        endpoint: 0.0.0.0:4317
 connectors:
  spanmetrics:
    dimensions:
      - name: llm.model
      - name: agent_id
      - name: selection.listener
      - name: http.method
      - name: http.status_code
    histogram:
      explicit:
        buckets: [5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000]
 exporters:
  otlp/tempo:
    endpoint: tempo:4317
    tls:
      insecure: true
  prometheus:
    endpoint: 0.0.0.0:8889
 processors:
  batch:
    timeout: 5s
    send_batch_size: 1024
 service:
  pipelines:
    traces:
      receivers: [otlp]
      processors: [batch]
      exporters: [otlp/tempo, spanmetrics]
    metrics/spanmetrics:
      receivers: [spanmetrics]
      exporters: [prometheus]
--- a/demos/observability/prometheus.yaml
+++ b/demos/observability/prometheus.yaml
@ -0,0 +1,15 @@
 global:
  scrape_interval: 15s
  evaluation_interval: 15s
 scrape_configs:
  - job_name: otel-collector-spanmetrics
    static_configs:
      - targets: ["otel-collector:8889"]
  # Scrape Envoy stats for WASM metrics (ratelimited_rq, etc.)
  # Adjust the target if your Envoy admin port differs.
  - job_name: envoy
    metrics_path: /stats/prometheus
    static_configs:
      - targets: ["host.docker.internal:9901"]
--- a/demos/observability/tempo.yaml
+++ b/demos/observability/tempo.yaml
@ -0,0 +1,25 @@
 stream_over_http_enabled: true
 server:
  http_listen_port: 3200
 distributor:
  receivers:
    otlp:
      protocols:
        grpc:
          endpoint: 0.0.0.0:4317
 storage:
  trace:
    backend: local
    local:
      path: /var/tempo/traces
    wal:
      path: /var/tempo/wal
 metrics_generator:
  registry:
    external_labels:
      source: tempo
  storage:
    path: /var/tempo/generator/wal