This commit is contained in:
Tom Stoffer 2026-04-23 15:48:27 -07:00 committed by GitHub
commit 1e21239f7b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 545 additions and 0 deletions

View file

@ -0,0 +1,103 @@
# Plano Observability Stack
Grafana dashboard for monitoring Plano LLM gateway traffic using trace-derived metrics.
## Architecture
```
Plano (brightstaff) --OTLP gRPC--> OTEL Collector --traces--> Tempo
|
spanmetrics connector
|
v
Prometheus <--- Grafana
^
|
Envoy /stats/prometheus
```
The OTEL Collector receives traces from Plano and does two things:
1. Forwards them to Tempo for trace viewing
2. Derives Prometheus metrics (request counts, latency histograms) from spans via the **spanmetrics connector**
Prometheus also scrapes Envoy's native stats endpoint for WASM metrics like `ratelimited_rq`.
## Quick Start
### 1. Start the observability stack
```bash
cd demos/observability
docker compose up -d
```
### 2. Configure Plano to send traces to the OTEL Collector
Add or update the `tracing` section in your `plano_config.yaml`:
```yaml
tracing:
# Sample 100% of requests (adjust for production)
random_sampling: 100
# Point at the OTEL Collector's OTLP gRPC port (host port 9317)
opentracing_grpc_endpoint: http://localhost:9317
```
If Plano is running inside Docker on the same network, use the service name
and the container-internal port instead:
```yaml
tracing:
random_sampling: 100
opentracing_grpc_endpoint: http://otel-collector:4317
```
### 3. Restart Plano
Restart Plano so brightstaff picks up the new tracing config. Traces will flow
into the OTEL Collector, which forwards them to Tempo and generates Prometheus
metrics from span data.
### 4. Open Grafana
Navigate to http://localhost:9000 and log in with `admin` / `admin`.
The **Plano - Requests Overview** dashboard is auto-provisioned under the
"Plano" folder. Send a few requests through Plano and the panels will
start populating within ~15 seconds (the Prometheus scrape interval).
## Access
| Service | URL | Credentials |
|----------------|------------------------------|---------------|
| Grafana | http://localhost:9000 | admin / admin |
| Tempo | http://localhost:9200 | |
| Prometheus | http://localhost:9190 | |
| OTEL Collector | http://localhost:9317 (gRPC) | |
The **Plano - Requests Overview** dashboard is auto-provisioned in Grafana under the "Plano" folder.
## Dashboard Panels
| Panel | Query Source | What It Shows |
|-------|-------------|---------------|
| LLM Requests/sec by Model | spanmetrics `calls_total{service_name="plano(llm)"}` by `llm_model` | Per-model request rate over time |
| Agent Requests/sec by Agent | spanmetrics `calls_total{service_name="plano(agent)"}` by `agent_id` | Per-agent invocation rate over time |
| Total Requests/sec | spanmetrics `calls_total` by service | Aggregate request rate across LLM, agent, and orchestrator |
| Rate-Limited Requests/sec | Envoy `envoy_wasmcustom_ratelimited_rq` | Global rate-limit rejections (no per-model breakdown) |
| LLM Latency p50/p95/p99 by Model | spanmetrics `duration_milliseconds_bucket` | End-to-end latency percentiles per model |
| Cumulative Request Count | spanmetrics `calls_total` | Total requests per model since start |
## Envoy Stats
For the rate-limit panel to work, Prometheus needs to scrape Envoy's admin stats endpoint.
The default config assumes Envoy's admin interface is at `host.docker.internal:9901`.
Adjust `prometheus.yaml` if your Envoy admin port differs.
## Span Attributes Used
These attributes are set by brightstaff's tracing instrumentation:
- `service.name``plano(llm)`, `plano(agent)`, `plano(orchestrator)`, `plano(filter)`, `plano(routing)`
- `llm.model` — model name (e.g., `gpt-4`, `claude-3-sonnet`)
- `agent_id` — agent identifier from the orchestrator
- `selection.listener` — listener that triggered agent selection

View file

@ -0,0 +1,50 @@
services:
# OpenTelemetry Collector: receives traces from Plano, derives Prometheus
# metrics via the spanmetrics connector, and forwards traces to Tempo.
otel-collector:
image: otel/opentelemetry-collector-contrib:0.102.0
command: ["--config=/etc/otel-collector-config.yaml"]
volumes:
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
ports:
- "9317:4317" # OTLP gRPC (Plano sends traces here)
- "8889:8889" # Prometheus metrics endpoint (spanmetrics)
depends_on:
- tempo
tempo:
image: grafana/tempo:2.5.0
command: ["-config.file=/etc/tempo.yaml"]
volumes:
- ./tempo.yaml:/etc/tempo.yaml:ro
ports:
- "9200:3200" # Tempo HTTP API
prometheus:
image: prom/prometheus:v2.53.0
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.retention.time=7d"
volumes:
- ./prometheus.yaml:/etc/prometheus/prometheus.yml:ro
ports:
- "9190:9090"
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on:
- otel-collector
grafana:
image: grafana/grafana:11.1.0
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=admin
- GF_USERS_ALLOW_SIGN_UP=false
volumes:
- ./grafana/provisioning:/etc/grafana/provisioning:ro
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
ports:
- "9000:3000"
depends_on:
- prometheus
- tempo

View file

@ -0,0 +1,280 @@
{
"annotations": {
"list": []
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"links": [],
"panels": [
{
"title": "LLM Requests / sec by Model",
"description": "Rate of LLM requests proxied through Plano, broken down by model name. Derived from OpenTelemetry trace spans via the spanmetrics connector.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 0 },
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisLabel": "req/s",
"drawStyle": "line",
"fillOpacity": 15,
"lineWidth": 2,
"pointSize": 5,
"showPoints": "auto",
"stacking": { "mode": "none" }
},
"unit": "reqps"
},
"overrides": []
},
"options": {
"legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "sum(rate(calls_total{service_name=\"plano(llm)\"}[$__rate_interval])) by (llm_model)",
"legendFormat": "{{ llm_model }}",
"refId": "A"
}
]
},
{
"title": "Agent Requests / sec by Agent",
"description": "Rate of agent invocations through the orchestrator, broken down by agent ID. Derived from OpenTelemetry trace spans.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 0 },
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisLabel": "req/s",
"drawStyle": "line",
"fillOpacity": 15,
"lineWidth": 2,
"pointSize": 5,
"showPoints": "auto",
"stacking": { "mode": "none" }
},
"unit": "reqps"
},
"overrides": []
},
"options": {
"legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "sum(rate(calls_total{service_name=\"plano(agent)\"}[$__rate_interval])) by (agent_id)",
"legendFormat": "{{ agent_id }}",
"refId": "A"
}
]
},
{
"title": "Total LLM Requests / sec",
"description": "Aggregate LLM request rate across all models.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 },
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "fixed", "fixedColor": "blue" },
"custom": {
"axisBorderShow": false,
"axisLabel": "req/s",
"drawStyle": "line",
"fillOpacity": 20,
"lineWidth": 2,
"pointSize": 5,
"showPoints": "auto",
"stacking": { "mode": "none" }
},
"unit": "reqps"
},
"overrides": []
},
"options": {
"legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "sum(rate(calls_total{service_name=\"plano(llm)\"}[$__rate_interval]))",
"legendFormat": "Total LLM Requests",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "sum(rate(calls_total{service_name=\"plano(agent)\"}[$__rate_interval]))",
"legendFormat": "Total Agent Requests",
"refId": "B"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "sum(rate(calls_total{service_name=\"plano(orchestrator)\"}[$__rate_interval]))",
"legendFormat": "Total Orchestrator Requests",
"refId": "C"
}
]
},
{
"title": "Rate-Limited Requests / sec",
"description": "Rate of requests rejected by Envoy WASM rate limiting. This is a global counter from the llm_gateway filter — no per-model breakdown is available.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 },
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "fixed", "fixedColor": "red" },
"custom": {
"axisBorderShow": false,
"axisLabel": "req/s",
"drawStyle": "line",
"fillOpacity": 20,
"lineWidth": 2,
"pointSize": 5,
"showPoints": "auto",
"stacking": { "mode": "none" }
},
"unit": "reqps"
},
"overrides": []
},
"options": {
"legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "rate(envoy_wasmcustom_ratelimited_rq[$__rate_interval])",
"legendFormat": "Rate-Limited",
"refId": "A"
}
]
},
{
"title": "LLM Request Latency p50 / p95 / p99 by Model",
"description": "Request duration percentiles from trace-derived histograms, broken down by model.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisLabel": "ms",
"drawStyle": "line",
"fillOpacity": 0,
"lineWidth": 2,
"pointSize": 5,
"showPoints": "auto",
"stacking": { "mode": "none" }
},
"unit": "ms"
},
"overrides": []
},
"options": {
"legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
"tooltip": { "mode": "multi", "sort": "desc" }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "histogram_quantile(0.50, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))",
"legendFormat": "p50 {{ llm_model }}",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "histogram_quantile(0.95, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))",
"legendFormat": "p95 {{ llm_model }}",
"refId": "B"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "histogram_quantile(0.99, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))",
"legendFormat": "p99 {{ llm_model }}",
"refId": "C"
}
]
},
{
"title": "Cumulative Request Count by Model",
"description": "Total number of LLM requests per model since the collector started.",
"type": "stat",
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic-by-name" },
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null }
]
},
"unit": "short"
},
"overrides": []
},
"options": {
"colorMode": "background_gradient",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto",
"wideLayout": true
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "sum(calls_total{service_name=\"plano(llm)\"}) by (llm_model)",
"legendFormat": "{{ llm_model }}",
"refId": "A"
}
]
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": ["plano", "llm", "observability"],
"templating": {
"list": [
{
"current": { "selected": false, "text": "Prometheus", "value": "Prometheus" },
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
}
]
},
"time": { "from": "now-1h", "to": "now" },
"timepicker": {},
"timezone": "browser",
"title": "Plano - Requests Overview",
"uid": "plano-requests-overview",
"version": 1
}

View file

@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: Plano
orgId: 1
folder: Plano
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: false

View file

@ -0,0 +1,20 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: true
- name: Tempo
type: tempo
access: proxy
url: http://tempo:3200
editable: true
jsonData:
tracesToMetrics:
datasourceUid: Prometheus
serviceMap:
datasourceUid: Prometheus

View file

@ -0,0 +1,40 @@
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
connectors:
spanmetrics:
dimensions:
- name: llm.model
- name: agent_id
- name: selection.listener
- name: http.method
- name: http.status_code
histogram:
explicit:
buckets: [5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000]
exporters:
otlp/tempo:
endpoint: tempo:4317
tls:
insecure: true
prometheus:
endpoint: 0.0.0.0:8889
processors:
batch:
timeout: 5s
send_batch_size: 1024
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [otlp/tempo, spanmetrics]
metrics/spanmetrics:
receivers: [spanmetrics]
exporters: [prometheus]

View file

@ -0,0 +1,15 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: otel-collector-spanmetrics
static_configs:
- targets: ["otel-collector:8889"]
# Scrape Envoy stats for WASM metrics (ratelimited_rq, etc.)
# Adjust the target if your Envoy admin port differs.
- job_name: envoy
metrics_path: /stats/prometheus
static_configs:
- targets: ["host.docker.internal:9901"]

View file

@ -0,0 +1,25 @@
stream_over_http_enabled: true
server:
http_listen_port: 3200
distributor:
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
storage:
trace:
backend: local
local:
path: /var/tempo/traces
wal:
path: /var/tempo/wal
metrics_generator:
registry:
external_labels:
source: tempo
storage:
path: /var/tempo/generator/wal