mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
Merge a43c3d7557 into 78dc4edad9
This commit is contained in:
commit
1e21239f7b
8 changed files with 545 additions and 0 deletions
103
demos/observability/README.md
Normal file
103
demos/observability/README.md
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
# Plano Observability Stack
|
||||
|
||||
Grafana dashboard for monitoring Plano LLM gateway traffic using trace-derived metrics.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
Plano (brightstaff) --OTLP gRPC--> OTEL Collector --traces--> Tempo
|
||||
|
|
||||
spanmetrics connector
|
||||
|
|
||||
v
|
||||
Prometheus <--- Grafana
|
||||
^
|
||||
|
|
||||
Envoy /stats/prometheus
|
||||
```
|
||||
|
||||
The OTEL Collector receives traces from Plano and does two things:
|
||||
1. Forwards them to Tempo for trace viewing
|
||||
2. Derives Prometheus metrics (request counts, latency histograms) from spans via the **spanmetrics connector**
|
||||
|
||||
Prometheus also scrapes Envoy's native stats endpoint for WASM metrics like `ratelimited_rq`.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Start the observability stack
|
||||
|
||||
```bash
|
||||
cd demos/observability
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### 2. Configure Plano to send traces to the OTEL Collector
|
||||
|
||||
Add or update the `tracing` section in your `plano_config.yaml`:
|
||||
|
||||
```yaml
|
||||
tracing:
|
||||
# Sample 100% of requests (adjust for production)
|
||||
random_sampling: 100
|
||||
# Point at the OTEL Collector's OTLP gRPC port (host port 9317)
|
||||
opentracing_grpc_endpoint: http://localhost:9317
|
||||
```
|
||||
|
||||
If Plano is running inside Docker on the same network, use the service name
|
||||
and the container-internal port instead:
|
||||
|
||||
```yaml
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
opentracing_grpc_endpoint: http://otel-collector:4317
|
||||
```
|
||||
|
||||
### 3. Restart Plano
|
||||
|
||||
Restart Plano so brightstaff picks up the new tracing config. Traces will flow
|
||||
into the OTEL Collector, which forwards them to Tempo and generates Prometheus
|
||||
metrics from span data.
|
||||
|
||||
### 4. Open Grafana
|
||||
|
||||
Navigate to http://localhost:9000 and log in with `admin` / `admin`.
|
||||
The **Plano - Requests Overview** dashboard is auto-provisioned under the
|
||||
"Plano" folder. Send a few requests through Plano and the panels will
|
||||
start populating within ~15 seconds (the Prometheus scrape interval).
|
||||
|
||||
## Access
|
||||
|
||||
| Service | URL | Credentials |
|
||||
|----------------|------------------------------|---------------|
|
||||
| Grafana | http://localhost:9000 | admin / admin |
|
||||
| Tempo | http://localhost:9200 | |
|
||||
| Prometheus | http://localhost:9190 | |
|
||||
| OTEL Collector | http://localhost:9317 (gRPC) | |
|
||||
|
||||
The **Plano - Requests Overview** dashboard is auto-provisioned in Grafana under the "Plano" folder.
|
||||
|
||||
## Dashboard Panels
|
||||
|
||||
| Panel | Query Source | What It Shows |
|
||||
|-------|-------------|---------------|
|
||||
| LLM Requests/sec by Model | spanmetrics `calls_total{service_name="plano(llm)"}` by `llm_model` | Per-model request rate over time |
|
||||
| Agent Requests/sec by Agent | spanmetrics `calls_total{service_name="plano(agent)"}` by `agent_id` | Per-agent invocation rate over time |
|
||||
| Total Requests/sec | spanmetrics `calls_total` by service | Aggregate request rate across LLM, agent, and orchestrator |
|
||||
| Rate-Limited Requests/sec | Envoy `envoy_wasmcustom_ratelimited_rq` | Global rate-limit rejections (no per-model breakdown) |
|
||||
| LLM Latency p50/p95/p99 by Model | spanmetrics `duration_milliseconds_bucket` | End-to-end latency percentiles per model |
|
||||
| Cumulative Request Count | spanmetrics `calls_total` | Total requests per model since start |
|
||||
|
||||
## Envoy Stats
|
||||
|
||||
For the rate-limit panel to work, Prometheus needs to scrape Envoy's admin stats endpoint.
|
||||
The default config assumes Envoy's admin interface is at `host.docker.internal:9901`.
|
||||
Adjust `prometheus.yaml` if your Envoy admin port differs.
|
||||
|
||||
## Span Attributes Used
|
||||
|
||||
These attributes are set by brightstaff's tracing instrumentation:
|
||||
|
||||
- `service.name` — `plano(llm)`, `plano(agent)`, `plano(orchestrator)`, `plano(filter)`, `plano(routing)`
|
||||
- `llm.model` — model name (e.g., `gpt-4`, `claude-3-sonnet`)
|
||||
- `agent_id` — agent identifier from the orchestrator
|
||||
- `selection.listener` — listener that triggered agent selection
|
||||
50
demos/observability/docker-compose.yaml
Normal file
50
demos/observability/docker-compose.yaml
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
services:
|
||||
# OpenTelemetry Collector: receives traces from Plano, derives Prometheus
|
||||
# metrics via the spanmetrics connector, and forwards traces to Tempo.
|
||||
otel-collector:
|
||||
image: otel/opentelemetry-collector-contrib:0.102.0
|
||||
command: ["--config=/etc/otel-collector-config.yaml"]
|
||||
volumes:
|
||||
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
|
||||
ports:
|
||||
- "9317:4317" # OTLP gRPC (Plano sends traces here)
|
||||
- "8889:8889" # Prometheus metrics endpoint (spanmetrics)
|
||||
depends_on:
|
||||
- tempo
|
||||
|
||||
tempo:
|
||||
image: grafana/tempo:2.5.0
|
||||
command: ["-config.file=/etc/tempo.yaml"]
|
||||
volumes:
|
||||
- ./tempo.yaml:/etc/tempo.yaml:ro
|
||||
ports:
|
||||
- "9200:3200" # Tempo HTTP API
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:v2.53.0
|
||||
command:
|
||||
- "--config.file=/etc/prometheus/prometheus.yml"
|
||||
- "--storage.tsdb.retention.time=7d"
|
||||
volumes:
|
||||
- ./prometheus.yaml:/etc/prometheus/prometheus.yml:ro
|
||||
ports:
|
||||
- "9190:9090"
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
depends_on:
|
||||
- otel-collector
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:11.1.0
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_USER=admin
|
||||
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||
- GF_USERS_ALLOW_SIGN_UP=false
|
||||
volumes:
|
||||
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
|
||||
ports:
|
||||
- "9000:3000"
|
||||
depends_on:
|
||||
- prometheus
|
||||
- tempo
|
||||
280
demos/observability/grafana/dashboards/plano-requests.json
Normal file
280
demos/observability/grafana/dashboards/plano-requests.json
Normal file
|
|
@ -0,0 +1,280 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"title": "LLM Requests / sec by Model",
|
||||
"description": "Rate of LLM requests proxied through Plano, broken down by model name. Derived from OpenTelemetry trace spans via the spanmetrics connector.",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisLabel": "req/s",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 15,
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"showPoints": "auto",
|
||||
"stacking": { "mode": "none" }
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "sum(rate(calls_total{service_name=\"plano(llm)\"}[$__rate_interval])) by (llm_model)",
|
||||
"legendFormat": "{{ llm_model }}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Agent Requests / sec by Agent",
|
||||
"description": "Rate of agent invocations through the orchestrator, broken down by agent ID. Derived from OpenTelemetry trace spans.",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisLabel": "req/s",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 15,
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"showPoints": "auto",
|
||||
"stacking": { "mode": "none" }
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "sum(rate(calls_total{service_name=\"plano(agent)\"}[$__rate_interval])) by (agent_id)",
|
||||
"legendFormat": "{{ agent_id }}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Total LLM Requests / sec",
|
||||
"description": "Aggregate LLM request rate across all models.",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "fixed", "fixedColor": "blue" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisLabel": "req/s",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"showPoints": "auto",
|
||||
"stacking": { "mode": "none" }
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "sum(rate(calls_total{service_name=\"plano(llm)\"}[$__rate_interval]))",
|
||||
"legendFormat": "Total LLM Requests",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "sum(rate(calls_total{service_name=\"plano(agent)\"}[$__rate_interval]))",
|
||||
"legendFormat": "Total Agent Requests",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "sum(rate(calls_total{service_name=\"plano(orchestrator)\"}[$__rate_interval]))",
|
||||
"legendFormat": "Total Orchestrator Requests",
|
||||
"refId": "C"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Rate-Limited Requests / sec",
|
||||
"description": "Rate of requests rejected by Envoy WASM rate limiting. This is a global counter from the llm_gateway filter — no per-model breakdown is available.",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "fixed", "fixedColor": "red" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisLabel": "req/s",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"showPoints": "auto",
|
||||
"stacking": { "mode": "none" }
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "rate(envoy_wasmcustom_ratelimited_rq[$__rate_interval])",
|
||||
"legendFormat": "Rate-Limited",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "LLM Request Latency p50 / p95 / p99 by Model",
|
||||
"description": "Request duration percentiles from trace-derived histograms, broken down by model.",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisLabel": "ms",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"showPoints": "auto",
|
||||
"stacking": { "mode": "none" }
|
||||
},
|
||||
"unit": "ms"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max", "lastNotNull"], "displayMode": "table", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "histogram_quantile(0.50, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))",
|
||||
"legendFormat": "p50 {{ llm_model }}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "histogram_quantile(0.95, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))",
|
||||
"legendFormat": "p95 {{ llm_model }}",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "histogram_quantile(0.99, sum(rate(duration_milliseconds_bucket{service_name=\"plano(llm)\"}[$__rate_interval])) by (le, llm_model))",
|
||||
"legendFormat": "p99 {{ llm_model }}",
|
||||
"refId": "C"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Cumulative Request Count by Model",
|
||||
"description": "Total number of LLM requests per model since the collector started.",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic-by-name" },
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null }
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "background_gradient",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"expr": "sum(calls_total{service_name=\"plano(llm)\"}) by (llm_model)",
|
||||
"legendFormat": "{{ llm_model }}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 39,
|
||||
"tags": ["plano", "llm", "observability"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": { "selected": false, "text": "Prometheus", "value": "Prometheus" },
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Data Source",
|
||||
"multi": false,
|
||||
"name": "datasource",
|
||||
"options": [],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": { "from": "now-1h", "to": "now" },
|
||||
"timepicker": {},
|
||||
"timezone": "browser",
|
||||
"title": "Plano - Requests Overview",
|
||||
"uid": "plano-requests-overview",
|
||||
"version": 1
|
||||
}
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: Plano
|
||||
orgId: 1
|
||||
folder: Plano
|
||||
type: file
|
||||
disableDeletion: false
|
||||
editable: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
foldersFromFilesStructure: false
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
editable: true
|
||||
|
||||
- name: Tempo
|
||||
type: tempo
|
||||
access: proxy
|
||||
url: http://tempo:3200
|
||||
editable: true
|
||||
jsonData:
|
||||
tracesToMetrics:
|
||||
datasourceUid: Prometheus
|
||||
serviceMap:
|
||||
datasourceUid: Prometheus
|
||||
40
demos/observability/otel-collector-config.yaml
Normal file
40
demos/observability/otel-collector-config.yaml
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
|
||||
connectors:
|
||||
spanmetrics:
|
||||
dimensions:
|
||||
- name: llm.model
|
||||
- name: agent_id
|
||||
- name: selection.listener
|
||||
- name: http.method
|
||||
- name: http.status_code
|
||||
histogram:
|
||||
explicit:
|
||||
buckets: [5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000]
|
||||
|
||||
exporters:
|
||||
otlp/tempo:
|
||||
endpoint: tempo:4317
|
||||
tls:
|
||||
insecure: true
|
||||
prometheus:
|
||||
endpoint: 0.0.0.0:8889
|
||||
|
||||
processors:
|
||||
batch:
|
||||
timeout: 5s
|
||||
send_batch_size: 1024
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [otlp/tempo, spanmetrics]
|
||||
metrics/spanmetrics:
|
||||
receivers: [spanmetrics]
|
||||
exporters: [prometheus]
|
||||
15
demos/observability/prometheus.yaml
Normal file
15
demos/observability/prometheus.yaml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: otel-collector-spanmetrics
|
||||
static_configs:
|
||||
- targets: ["otel-collector:8889"]
|
||||
|
||||
# Scrape Envoy stats for WASM metrics (ratelimited_rq, etc.)
|
||||
# Adjust the target if your Envoy admin port differs.
|
||||
- job_name: envoy
|
||||
metrics_path: /stats/prometheus
|
||||
static_configs:
|
||||
- targets: ["host.docker.internal:9901"]
|
||||
25
demos/observability/tempo.yaml
Normal file
25
demos/observability/tempo.yaml
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
stream_over_http_enabled: true
|
||||
server:
|
||||
http_listen_port: 3200
|
||||
|
||||
distributor:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
|
||||
storage:
|
||||
trace:
|
||||
backend: local
|
||||
local:
|
||||
path: /var/tempo/traces
|
||||
wal:
|
||||
path: /var/tempo/wal
|
||||
|
||||
metrics_generator:
|
||||
registry:
|
||||
external_labels:
|
||||
source: tempo
|
||||
storage:
|
||||
path: /var/tempo/generator/wal
|
||||
Loading…
Add table
Add a link
Reference in a new issue