mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
Add Prometheus metrics endpoint and Grafana dashboard for brightstaff (#904)
Some checks are pending
CI / pre-commit (push) Waiting to run
CI / plano-tools-tests (push) Waiting to run
CI / native-smoke-test (push) Waiting to run
CI / docker-build (push) Waiting to run
CI / validate-config (push) Waiting to run
CI / security-scan (push) Blocked by required conditions
CI / test-prompt-gateway (push) Blocked by required conditions
CI / test-model-alias-routing (push) Blocked by required conditions
CI / test-responses-api-with-state (push) Blocked by required conditions
CI / e2e-plano-tests (3.10) (push) Blocked by required conditions
CI / e2e-plano-tests (3.11) (push) Blocked by required conditions
CI / e2e-plano-tests (3.12) (push) Blocked by required conditions
CI / e2e-plano-tests (3.13) (push) Blocked by required conditions
CI / e2e-plano-tests (3.14) (push) Blocked by required conditions
CI / e2e-demo-preference (push) Blocked by required conditions
CI / e2e-demo-currency (push) Blocked by required conditions
Publish docker image (latest) / build-arm64 (push) Waiting to run
Publish docker image (latest) / build-amd64 (push) Waiting to run
Publish docker image (latest) / create-manifest (push) Blocked by required conditions
Build and Deploy Documentation / build (push) Waiting to run
Some checks are pending
CI / pre-commit (push) Waiting to run
CI / plano-tools-tests (push) Waiting to run
CI / native-smoke-test (push) Waiting to run
CI / docker-build (push) Waiting to run
CI / validate-config (push) Waiting to run
CI / security-scan (push) Blocked by required conditions
CI / test-prompt-gateway (push) Blocked by required conditions
CI / test-model-alias-routing (push) Blocked by required conditions
CI / test-responses-api-with-state (push) Blocked by required conditions
CI / e2e-plano-tests (3.10) (push) Blocked by required conditions
CI / e2e-plano-tests (3.11) (push) Blocked by required conditions
CI / e2e-plano-tests (3.12) (push) Blocked by required conditions
CI / e2e-plano-tests (3.13) (push) Blocked by required conditions
CI / e2e-plano-tests (3.14) (push) Blocked by required conditions
CI / e2e-demo-preference (push) Blocked by required conditions
CI / e2e-demo-currency (push) Blocked by required conditions
Publish docker image (latest) / build-arm64 (push) Waiting to run
Publish docker image (latest) / build-amd64 (push) Waiting to run
Publish docker image (latest) / create-manifest (push) Blocked by required conditions
Build and Deploy Documentation / build (push) Waiting to run
This commit is contained in:
parent
9812540602
commit
22f332f62d
17 changed files with 1682 additions and 6 deletions
541
config/grafana/brightstaff_dashboard.json
Normal file
541
config/grafana/brightstaff_dashboard.json
Normal file
|
|
@ -0,0 +1,541 @@
|
||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"builtIn": 1,
|
||||||
|
"datasource": "-- Grafana --",
|
||||||
|
"enable": true,
|
||||||
|
"hide": true,
|
||||||
|
"iconColor": "rgba(0, 211, 255, 1)",
|
||||||
|
"name": "Annotations & Alerts",
|
||||||
|
"type": "dashboard"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"description": "RED, LLM upstream, routing service, and process metrics for brightstaff. Pair with Envoy admin metrics from cluster=bright_staff.",
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"liveNow": false,
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
|
||||||
|
"id": 100,
|
||||||
|
"panels": [],
|
||||||
|
"title": "HTTP RED",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": {
|
||||||
|
"axisLabel": "req/s",
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 10,
|
||||||
|
"lineWidth": 1,
|
||||||
|
"showPoints": "never"
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 },
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum by (handler) (rate(brightstaff_http_requests_total[1m]))",
|
||||||
|
"legendFormat": "{{handler}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Rate — brightstaff RPS by handler",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "5xx fraction over 5m. Page-worthy when sustained above ~1%.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "thresholds" },
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "green", "value": null },
|
||||||
|
{ "color": "yellow", "value": 0.01 },
|
||||||
|
{ "color": "red", "value": 0.05 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "percentunit"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 },
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "area",
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum(rate(brightstaff_http_requests_total{status_class=\"5xx\"}[5m])) / clamp_min(sum(rate(brightstaff_http_requests_total[5m])), 1)",
|
||||||
|
"legendFormat": "5xx rate",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Errors — brightstaff 5xx rate",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "p50/p95/p99 by handler, computed from histogram buckets over 5m.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
|
||||||
|
"unit": "s"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 9, "w": 24, "x": 0, "y": 9 },
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "histogram_quantile(0.50, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))",
|
||||||
|
"legendFormat": "p50 {{handler}}",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "histogram_quantile(0.95, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))",
|
||||||
|
"legendFormat": "p95 {{handler}}",
|
||||||
|
"refId": "B"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "histogram_quantile(0.99, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))",
|
||||||
|
"legendFormat": "p99 {{handler}}",
|
||||||
|
"refId": "C"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Duration — p50 / p95 / p99 by handler",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "In-flight requests by handler. Climbs before latency does when brightstaff is saturated.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" },
|
||||||
|
"unit": "short"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 18 },
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum by (handler) (brightstaff_http_in_flight_requests)",
|
||||||
|
"legendFormat": "{{handler}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "In-flight requests by handler",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 26 },
|
||||||
|
"id": 200,
|
||||||
|
"panels": [],
|
||||||
|
"title": "LLM upstream",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
|
||||||
|
"unit": "s"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 9, "w": 12, "x": 0, "y": 27 },
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "histogram_quantile(0.95, sum by (le, provider, model) (rate(brightstaff_llm_upstream_duration_seconds_bucket[5m])))",
|
||||||
|
"legendFormat": "p95 {{provider}}/{{model}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "LLM upstream p95 by provider/model",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "All non-success error classes. timeout/connect = network, 5xx/429 = provider, parse = body shape mismatch, stream = mid-stream disconnect.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } },
|
||||||
|
"unit": "reqps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 9, "w": 12, "x": 12, "y": 27 },
|
||||||
|
"id": 6,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum by (provider, error_class) (rate(brightstaff_llm_upstream_requests_total{error_class!=\"none\"}[5m]))",
|
||||||
|
"legendFormat": "{{provider}} / {{error_class}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "LLM upstream errors by provider / class",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "Streaming only. Empty if the route never streams.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
|
||||||
|
"unit": "s"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 9, "w": 12, "x": 0, "y": 36 },
|
||||||
|
"id": 7,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "histogram_quantile(0.95, sum by (le, provider, model) (rate(brightstaff_llm_time_to_first_token_seconds_bucket[5m])))",
|
||||||
|
"legendFormat": "p95 {{provider}}/{{model}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Time-to-first-token p95 (streaming)",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "Tokens/sec by provider/model/kind — proxy for cost. Stacked.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } },
|
||||||
|
"unit": "tokens/s"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 9, "w": 12, "x": 12, "y": 36 },
|
||||||
|
"id": 8,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum by (provider, model, kind) (rate(brightstaff_llm_tokens_total[5m]))",
|
||||||
|
"legendFormat": "{{provider}}/{{model}} {{kind}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Token throughput by provider / model / kind",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 45 },
|
||||||
|
"id": 300,
|
||||||
|
"panels": [],
|
||||||
|
"title": "Routing service",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "Which models the orchestrator picked over the last 15 minutes.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"unit": "short"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 9, "w": 12, "x": 0, "y": 46 },
|
||||||
|
"id": 9,
|
||||||
|
"options": {
|
||||||
|
"displayMode": "gradient",
|
||||||
|
"orientation": "horizontal",
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum by (selected_model) (increase(brightstaff_router_decisions_total[15m]))",
|
||||||
|
"legendFormat": "{{selected_model}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Model selection distribution (last 15m)",
|
||||||
|
"type": "bargauge"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "Fraction of decisions that fell back (orchestrator returned `none` or errored). High = router can't classify intent or no candidates configured.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" },
|
||||||
|
"unit": "percentunit"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 9, "w": 12, "x": 12, "y": 46 },
|
||||||
|
"id": 10,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum by (route) (rate(brightstaff_router_decisions_total{fallback=\"true\"}[5m])) / clamp_min(sum by (route) (rate(brightstaff_router_decisions_total[5m])), 1)",
|
||||||
|
"legendFormat": "{{route}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Fallback rate by route",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
|
||||||
|
"unit": "s"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 55 },
|
||||||
|
"id": 11,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "histogram_quantile(0.95, sum by (le, route) (rate(brightstaff_router_decision_duration_seconds_bucket[5m])))",
|
||||||
|
"legendFormat": "p95 {{route}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Router decision p95 latency",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "Hit / (hit + miss). Low ratio = sessions aren't being reused or TTL too short.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "thresholds" },
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "red", "value": null },
|
||||||
|
{ "color": "yellow", "value": 0.5 },
|
||||||
|
{ "color": "green", "value": 0.8 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "percentunit",
|
||||||
|
"min": 0,
|
||||||
|
"max": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 6, "x": 12, "y": 55 },
|
||||||
|
"id": 12,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "area",
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum(rate(brightstaff_session_cache_events_total{outcome=\"hit\"}[5m])) / clamp_min(sum(rate(brightstaff_session_cache_events_total{outcome=~\"hit|miss\"}[5m])), 1)",
|
||||||
|
"legendFormat": "hit rate",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Session cache hit rate",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "decision_served = a real model picked. no_candidates = sentinel `none` returned. policy_error = orchestrator failed.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } },
|
||||||
|
"unit": "reqps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 6, "x": 18, "y": 55 },
|
||||||
|
"id": 13,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "list", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum by (outcome) (rate(brightstaff_routing_service_requests_total[5m]))",
|
||||||
|
"legendFormat": "{{outcome}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "/routing/* outcomes",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 63 },
|
||||||
|
"id": 400,
|
||||||
|
"panels": [],
|
||||||
|
"title": "Process & Envoy link",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "Compare to brightstaff RPS (panel 1) — sustained gap = network or Envoy queueing.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" },
|
||||||
|
"unit": "reqps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 64 },
|
||||||
|
"id": 14,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum(rate(envoy_cluster_upstream_rq_total{envoy_cluster_name=\"bright_staff\"}[1m]))",
|
||||||
|
"legendFormat": "envoy → bright_staff",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum(rate(brightstaff_http_requests_total[1m]))",
|
||||||
|
"legendFormat": "brightstaff served",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Envoy → brightstaff link health",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" }
|
||||||
|
},
|
||||||
|
"overrides": [
|
||||||
|
{
|
||||||
|
"matcher": { "id": "byName", "options": "RSS" },
|
||||||
|
"properties": [{ "id": "unit", "value": "bytes" }]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"matcher": { "id": "byName", "options": "CPU" },
|
||||||
|
"properties": [{ "id": "unit", "value": "percentunit" }]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 64 },
|
||||||
|
"id": 15,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "process_resident_memory_bytes{job=\"brightstaff\"}",
|
||||||
|
"legendFormat": "RSS",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "rate(process_cpu_seconds_total{job=\"brightstaff\"}[1m])",
|
||||||
|
"legendFormat": "CPU",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Brightstaff process RSS / CPU",
|
||||||
|
"type": "timeseries"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "30s",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"tags": ["plano", "brightstaff", "llm"],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"name": "DS_PROMETHEUS",
|
||||||
|
"label": "Prometheus",
|
||||||
|
"type": "datasource",
|
||||||
|
"query": "prometheus",
|
||||||
|
"current": { "selected": false, "text": "Prometheus", "value": "DS_PROMETHEUS" },
|
||||||
|
"hide": 0,
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"skipUrlSync": false,
|
||||||
|
"includeAll": false,
|
||||||
|
"multi": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": { "from": "now-1h", "to": "now" },
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Brightstaff (Plano dataplane)",
|
||||||
|
"uid": "brightstaff",
|
||||||
|
"version": 1,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
43
config/grafana/docker-compose.yaml
Normal file
43
config/grafana/docker-compose.yaml
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
# One-command Prometheus + Grafana stack for observing a locally-running
|
||||||
|
# Plano (Envoy admin :9901 + brightstaff :9092 on the host).
|
||||||
|
#
|
||||||
|
# cd config/grafana
|
||||||
|
# docker compose up -d
|
||||||
|
# open http://localhost:3000 (admin / admin)
|
||||||
|
#
|
||||||
|
# Grafana is preloaded with:
|
||||||
|
# - Prometheus datasource (uid=DS_PROMETHEUS) → http://prometheus:9090
|
||||||
|
# - Brightstaff dashboard (auto-imported from brightstaff_dashboard.json)
|
||||||
|
#
|
||||||
|
# Prometheus scrapes the host's :9092 and :9901 via host.docker.internal.
|
||||||
|
# On Linux this works because of the `extra_hosts: host-gateway` mapping
|
||||||
|
# below. On Mac/Win it works natively.
|
||||||
|
|
||||||
|
services:
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:latest
|
||||||
|
container_name: plano-prometheus
|
||||||
|
ports:
|
||||||
|
- "9090:9090"
|
||||||
|
volumes:
|
||||||
|
- ./prometheus_scrape.yaml:/etc/prometheus/prometheus.yml:ro
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:latest
|
||||||
|
container_name: plano-grafana
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
environment:
|
||||||
|
GF_SECURITY_ADMIN_USER: admin
|
||||||
|
GF_SECURITY_ADMIN_PASSWORD: admin
|
||||||
|
GF_AUTH_ANONYMOUS_ENABLED: "true"
|
||||||
|
GF_AUTH_ANONYMOUS_ORG_ROLE: Viewer
|
||||||
|
volumes:
|
||||||
|
- ./provisioning:/etc/grafana/provisioning:ro
|
||||||
|
- ./brightstaff_dashboard.json:/var/lib/grafana/dashboards/brightstaff_dashboard.json:ro
|
||||||
|
depends_on:
|
||||||
|
- prometheus
|
||||||
|
restart: unless-stopped
|
||||||
44
config/grafana/prometheus_scrape.yaml
Normal file
44
config/grafana/prometheus_scrape.yaml
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
# Prometheus config that scrapes Plano (Envoy admin + brightstaff). This is
|
||||||
|
# a complete Prometheus config — mount it directly at
|
||||||
|
# /etc/prometheus/prometheus.yml. The included docker-compose.yaml does this
|
||||||
|
# for you.
|
||||||
|
#
|
||||||
|
# Targets:
|
||||||
|
# - envoy:9901 Envoy admin → envoy_cluster_*, envoy_http_*, envoy_server_*.
|
||||||
|
# - brightstaff:9092 Native dataplane → brightstaff_http_*, brightstaff_llm_*,
|
||||||
|
# brightstaff_router_*, process_*.
|
||||||
|
#
|
||||||
|
# Hostname `host.docker.internal` works on Docker Desktop (Mac/Win) and on
|
||||||
|
# Linux when the container is started with `--add-host=host.docker.internal:
|
||||||
|
# host-gateway` (the included compose does this). If Plano runs *inside*
|
||||||
|
# Docker on the same network as Prometheus, replace it with the container
|
||||||
|
# name (e.g. `plano:9092`).
|
||||||
|
#
|
||||||
|
# This file is unrelated to demos/llm_routing/model_routing_service/prometheus.yaml,
|
||||||
|
# which scrapes a fake metrics service to feed the routing engine.
|
||||||
|
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
scrape_timeout: 10s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: envoy
|
||||||
|
honor_timestamps: true
|
||||||
|
metrics_path: /stats
|
||||||
|
params:
|
||||||
|
format: ["prometheus"]
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- host.docker.internal:9901
|
||||||
|
labels:
|
||||||
|
service: plano
|
||||||
|
|
||||||
|
- job_name: brightstaff
|
||||||
|
honor_timestamps: true
|
||||||
|
metrics_path: /metrics
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- host.docker.internal:9092
|
||||||
|
labels:
|
||||||
|
service: plano
|
||||||
15
config/grafana/provisioning/dashboards/brightstaff.yaml
Normal file
15
config/grafana/provisioning/dashboards/brightstaff.yaml
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
# Auto-load the brightstaff dashboard JSON on Grafana startup.
|
||||||
|
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: brightstaff
|
||||||
|
orgId: 1
|
||||||
|
folder: Plano
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
updateIntervalSeconds: 30
|
||||||
|
allowUiUpdates: true
|
||||||
|
options:
|
||||||
|
path: /var/lib/grafana/dashboards
|
||||||
|
foldersFromFilesStructure: false
|
||||||
14
config/grafana/provisioning/datasources/prometheus.yaml
Normal file
14
config/grafana/provisioning/datasources/prometheus.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
# Auto-provision the Prometheus datasource so the bundled dashboard wires up
|
||||||
|
# without any clicks. The `uid: DS_PROMETHEUS` matches the templated input in
|
||||||
|
# brightstaff_dashboard.json.
|
||||||
|
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
- name: Prometheus
|
||||||
|
uid: DS_PROMETHEUS
|
||||||
|
type: prometheus
|
||||||
|
access: proxy
|
||||||
|
url: http://prometheus:9090
|
||||||
|
isDefault: true
|
||||||
|
editable: true
|
||||||
332
crates/Cargo.lock
generated
332
crates/Cargo.lock
generated
|
|
@ -23,6 +23,18 @@ version = "0.3.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217"
|
checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ahash"
|
||||||
|
version = "0.8.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"once_cell",
|
||||||
|
"version_check",
|
||||||
|
"zerocopy",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aho-corasick"
|
name = "aho-corasick"
|
||||||
version = "1.1.4"
|
version = "1.1.4"
|
||||||
|
|
@ -257,6 +269,24 @@ dependencies = [
|
||||||
"vsimd",
|
"vsimd",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bindgen"
|
||||||
|
version = "0.72.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"cexpr",
|
||||||
|
"clang-sys",
|
||||||
|
"itertools 0.13.0",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"regex",
|
||||||
|
"rustc-hash 2.1.2",
|
||||||
|
"shlex",
|
||||||
|
"syn 2.0.117",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bit-set"
|
name = "bit-set"
|
||||||
version = "0.5.3"
|
version = "0.5.3"
|
||||||
|
|
@ -316,6 +346,9 @@ dependencies = [
|
||||||
"hyper 1.9.0",
|
"hyper 1.9.0",
|
||||||
"hyper-util",
|
"hyper-util",
|
||||||
"lru",
|
"lru",
|
||||||
|
"metrics 0.23.1",
|
||||||
|
"metrics-exporter-prometheus",
|
||||||
|
"metrics-process",
|
||||||
"mockito",
|
"mockito",
|
||||||
"opentelemetry",
|
"opentelemetry",
|
||||||
"opentelemetry-http",
|
"opentelemetry-http",
|
||||||
|
|
@ -391,6 +424,15 @@ dependencies = [
|
||||||
"shlex",
|
"shlex",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cexpr"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
||||||
|
dependencies = [
|
||||||
|
"nom",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cfg-if"
|
name = "cfg-if"
|
||||||
version = "1.0.4"
|
version = "1.0.4"
|
||||||
|
|
@ -428,6 +470,17 @@ dependencies = [
|
||||||
"windows-link",
|
"windows-link",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clang-sys"
|
||||||
|
version = "1.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
||||||
|
dependencies = [
|
||||||
|
"glob",
|
||||||
|
"libc",
|
||||||
|
"libloading",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cmov"
|
name = "cmov"
|
||||||
version = "0.5.3"
|
version = "0.5.3"
|
||||||
|
|
@ -574,6 +627,21 @@ dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-epoch"
|
||||||
|
version = "0.9.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-utils"
|
||||||
|
version = "0.8.21"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crypto-common"
|
name = "crypto-common"
|
||||||
version = "0.1.7"
|
version = "0.1.7"
|
||||||
|
|
@ -1070,6 +1138,12 @@ dependencies = [
|
||||||
"wasip3",
|
"wasip3",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "glob"
|
||||||
|
version = "0.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "governor"
|
name = "governor"
|
||||||
version = "0.6.3"
|
version = "0.6.3"
|
||||||
|
|
@ -1128,7 +1202,7 @@ version = "0.8.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e91b62f79061a0bc2e046024cb7ba44b08419ed238ecbd9adbd787434b9e8c25"
|
checksum = "e91b62f79061a0bc2e046024cb7ba44b08419ed238ecbd9adbd787434b9e8c25"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash",
|
"ahash 0.3.8",
|
||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -1138,6 +1212,15 @@ version = "0.12.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashbrown"
|
||||||
|
version = "0.14.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
||||||
|
dependencies = [
|
||||||
|
"ahash 0.8.12",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hashbrown"
|
name = "hashbrown"
|
||||||
version = "0.15.5"
|
version = "0.15.5"
|
||||||
|
|
@ -1189,6 +1272,12 @@ dependencies = [
|
||||||
"uuid",
|
"uuid",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hermit-abi"
|
||||||
|
version = "0.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hex"
|
name = "hex"
|
||||||
version = "0.4.3"
|
version = "0.4.3"
|
||||||
|
|
@ -1665,6 +1754,27 @@ version = "0.2.185"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f"
|
checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libloading"
|
||||||
|
version = "0.8.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"windows-link",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libproc"
|
||||||
|
version = "0.14.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a54ad7278b8bc5301d5ffd2a94251c004feb971feba96c971ea4063645990757"
|
||||||
|
dependencies = [
|
||||||
|
"bindgen",
|
||||||
|
"errno",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libredox"
|
name = "libredox"
|
||||||
version = "0.1.16"
|
version = "0.1.16"
|
||||||
|
|
@ -1745,6 +1855,12 @@ version = "0.1.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
|
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mach2"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dae608c151f68243f2b000364e1f7b186d9c29845f7d2d85bd31b9ad77ad552b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "matchers"
|
name = "matchers"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
|
|
@ -1782,6 +1898,77 @@ version = "2.8.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics"
|
||||||
|
version = "0.23.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3045b4193fbdc5b5681f32f11070da9be3609f189a79f3390706d42587f46bb5"
|
||||||
|
dependencies = [
|
||||||
|
"ahash 0.8.12",
|
||||||
|
"portable-atomic",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics"
|
||||||
|
version = "0.24.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5d5312e9ba3771cfa961b585728215e3d972c950a3eed9252aa093d6301277e8"
|
||||||
|
dependencies = [
|
||||||
|
"ahash 0.8.12",
|
||||||
|
"portable-atomic",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics-exporter-prometheus"
|
||||||
|
version = "0.15.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b4f0c8427b39666bf970460908b213ec09b3b350f20c0c2eabcbba51704a08e6"
|
||||||
|
dependencies = [
|
||||||
|
"base64 0.22.1",
|
||||||
|
"http-body-util",
|
||||||
|
"hyper 1.9.0",
|
||||||
|
"hyper-util",
|
||||||
|
"indexmap 2.14.0",
|
||||||
|
"ipnet",
|
||||||
|
"metrics 0.23.1",
|
||||||
|
"metrics-util",
|
||||||
|
"quanta",
|
||||||
|
"thiserror 1.0.69",
|
||||||
|
"tokio",
|
||||||
|
"tracing",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics-process"
|
||||||
|
version = "2.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4268d87f64a752f5a651314fc683f04da10be65701ea3e721ba4d74f79163cac"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"libproc",
|
||||||
|
"mach2",
|
||||||
|
"metrics 0.24.3",
|
||||||
|
"once_cell",
|
||||||
|
"procfs",
|
||||||
|
"rlimit",
|
||||||
|
"windows",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics-util"
|
||||||
|
version = "0.17.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4259040465c955f9f2f1a4a8a16dc46726169bca0f88e8fb2dbeced487c3e828"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-epoch",
|
||||||
|
"crossbeam-utils",
|
||||||
|
"hashbrown 0.14.5",
|
||||||
|
"metrics 0.23.1",
|
||||||
|
"num_cpus",
|
||||||
|
"quanta",
|
||||||
|
"sketches-ddsketch",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mime"
|
name = "mime"
|
||||||
version = "0.3.17"
|
version = "0.3.17"
|
||||||
|
|
@ -1935,6 +2122,16 @@ dependencies = [
|
||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num_cpus"
|
||||||
|
version = "1.17.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
|
||||||
|
dependencies = [
|
||||||
|
"hermit-abi",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "objc2-core-foundation"
|
name = "objc2-core-foundation"
|
||||||
version = "0.3.2"
|
version = "0.3.2"
|
||||||
|
|
@ -2278,6 +2475,27 @@ dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "procfs"
|
||||||
|
version = "0.18.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "25485360a54d6861439d60facef26de713b1e126bf015ec8f98239467a2b82f7"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"procfs-core",
|
||||||
|
"rustix",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "procfs-core"
|
||||||
|
version = "0.18.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e6401bf7b6af22f78b563665d15a22e9aef27775b79b149a66ca022468a4e405"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"hex",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "prompt_gateway"
|
name = "prompt_gateway"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
|
@ -2333,6 +2551,21 @@ dependencies = [
|
||||||
"log",
|
"log",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quanta"
|
||||||
|
version = "0.12.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-utils",
|
||||||
|
"libc",
|
||||||
|
"once_cell",
|
||||||
|
"raw-cpuid",
|
||||||
|
"wasi 0.11.1+wasi-snapshot-preview1",
|
||||||
|
"web-sys",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quinn"
|
name = "quinn"
|
||||||
version = "0.11.9"
|
version = "0.11.9"
|
||||||
|
|
@ -2485,6 +2718,15 @@ version = "0.10.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69"
|
checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "raw-cpuid"
|
||||||
|
version = "11.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "redis"
|
name = "redis"
|
||||||
version = "0.27.6"
|
version = "0.27.6"
|
||||||
|
|
@ -2646,6 +2888,15 @@ dependencies = [
|
||||||
"windows-sys 0.52.0",
|
"windows-sys 0.52.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rlimit"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f35ee2729c56bb610f6dba436bf78135f728b7373bdffae2ec815b2d3eb98cc3"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc-hash"
|
name = "rustc-hash"
|
||||||
version = "1.1.0"
|
version = "1.1.0"
|
||||||
|
|
@ -3098,6 +3349,12 @@ version = "1.0.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
|
checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sketches-ddsketch"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "slab"
|
name = "slab"
|
||||||
version = "0.4.12"
|
version = "0.4.12"
|
||||||
|
|
@ -4003,6 +4260,49 @@ dependencies = [
|
||||||
"web-sys",
|
"web-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-i686-pc-windows-gnu",
|
||||||
|
"winapi-x86_64-pc-windows-gnu",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-i686-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-x86_64-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows"
|
||||||
|
version = "0.62.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580"
|
||||||
|
dependencies = [
|
||||||
|
"windows-collections",
|
||||||
|
"windows-core",
|
||||||
|
"windows-future",
|
||||||
|
"windows-numerics",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-collections"
|
||||||
|
version = "0.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610"
|
||||||
|
dependencies = [
|
||||||
|
"windows-core",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-core"
|
name = "windows-core"
|
||||||
version = "0.62.2"
|
version = "0.62.2"
|
||||||
|
|
@ -4016,6 +4316,17 @@ dependencies = [
|
||||||
"windows-strings",
|
"windows-strings",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-future"
|
||||||
|
version = "0.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb"
|
||||||
|
dependencies = [
|
||||||
|
"windows-core",
|
||||||
|
"windows-link",
|
||||||
|
"windows-threading",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-implement"
|
name = "windows-implement"
|
||||||
version = "0.60.2"
|
version = "0.60.2"
|
||||||
|
|
@ -4044,6 +4355,16 @@ version = "0.2.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-numerics"
|
||||||
|
version = "0.3.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26"
|
||||||
|
dependencies = [
|
||||||
|
"windows-core",
|
||||||
|
"windows-link",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-registry"
|
name = "windows-registry"
|
||||||
version = "0.6.1"
|
version = "0.6.1"
|
||||||
|
|
@ -4133,6 +4454,15 @@ dependencies = [
|
||||||
"windows_x86_64_msvc 0.53.1",
|
"windows_x86_64_msvc 0.53.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-threading"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37"
|
||||||
|
dependencies = [
|
||||||
|
"windows-link",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_aarch64_gnullvm"
|
name = "windows_aarch64_gnullvm"
|
||||||
version = "0.52.6"
|
version = "0.52.6"
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,9 @@ opentelemetry_sdk = { version = "0.31", features = ["rt-tokio"] }
|
||||||
pretty_assertions = "1.4.1"
|
pretty_assertions = "1.4.1"
|
||||||
rand = "0.9.2"
|
rand = "0.9.2"
|
||||||
lru = "0.12"
|
lru = "0.12"
|
||||||
|
metrics = "0.23"
|
||||||
|
metrics-exporter-prometheus = { version = "0.15", default-features = false, features = ["http-listener"] }
|
||||||
|
metrics-process = "2.1"
|
||||||
redis = { version = "0.27", features = ["tokio-comp"] }
|
redis = { version = "0.27", features = ["tokio-comp"] }
|
||||||
reqwest = { version = "0.12.15", features = ["stream"] }
|
reqwest = { version = "0.12.15", features = ["stream"] }
|
||||||
serde = { version = "1.0.219", features = ["derive"] }
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
|
|
|
||||||
|
|
@ -24,13 +24,14 @@ use crate::app_state::AppState;
|
||||||
use crate::handlers::agents::pipeline::PipelineProcessor;
|
use crate::handlers::agents::pipeline::PipelineProcessor;
|
||||||
use crate::handlers::extract_request_id;
|
use crate::handlers::extract_request_id;
|
||||||
use crate::handlers::full;
|
use crate::handlers::full;
|
||||||
|
use crate::metrics as bs_metrics;
|
||||||
use crate::state::response_state_processor::ResponsesStateProcessor;
|
use crate::state::response_state_processor::ResponsesStateProcessor;
|
||||||
use crate::state::{
|
use crate::state::{
|
||||||
extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError,
|
extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError,
|
||||||
};
|
};
|
||||||
use crate::streaming::{
|
use crate::streaming::{
|
||||||
create_streaming_response, create_streaming_response_with_output_filter, truncate_message,
|
create_streaming_response, create_streaming_response_with_output_filter, truncate_message,
|
||||||
ObservableStreamProcessor, StreamProcessor,
|
LlmMetricsCtx, ObservableStreamProcessor, StreamProcessor,
|
||||||
};
|
};
|
||||||
use crate::tracing::{
|
use crate::tracing::{
|
||||||
collect_custom_trace_attributes, llm as tracing_llm, operation_component,
|
collect_custom_trace_attributes, llm as tracing_llm, operation_component,
|
||||||
|
|
@ -686,6 +687,13 @@ async fn send_upstream(
|
||||||
|
|
||||||
let request_start_time = std::time::Instant::now();
|
let request_start_time = std::time::Instant::now();
|
||||||
|
|
||||||
|
// Labels for LLM upstream metrics. We prefer `resolved_model` (post-routing)
|
||||||
|
// and derive the provider from its `provider/model` prefix. This matches the
|
||||||
|
// same model id the cost/latency router keys off.
|
||||||
|
let (metric_provider_raw, metric_model_raw) = bs_metrics::split_provider_model(resolved_model);
|
||||||
|
let metric_provider = metric_provider_raw.to_string();
|
||||||
|
let metric_model = metric_model_raw.to_string();
|
||||||
|
|
||||||
let llm_response = match http_client
|
let llm_response = match http_client
|
||||||
.post(upstream_url)
|
.post(upstream_url)
|
||||||
.headers(request_headers.clone())
|
.headers(request_headers.clone())
|
||||||
|
|
@ -695,6 +703,14 @@ async fn send_upstream(
|
||||||
{
|
{
|
||||||
Ok(res) => res,
|
Ok(res) => res,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
|
let err_class = bs_metrics::llm_error_class_from_reqwest(&err);
|
||||||
|
bs_metrics::record_llm_upstream(
|
||||||
|
&metric_provider,
|
||||||
|
&metric_model,
|
||||||
|
0,
|
||||||
|
err_class,
|
||||||
|
request_start_time.elapsed(),
|
||||||
|
);
|
||||||
let err_msg = format!("Failed to send request: {}", err);
|
let err_msg = format!("Failed to send request: {}", err);
|
||||||
let mut internal_error = Response::new(full(err_msg));
|
let mut internal_error = Response::new(full(err_msg));
|
||||||
*internal_error.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
*internal_error.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||||
|
|
@ -750,7 +766,12 @@ async fn send_upstream(
|
||||||
span_name,
|
span_name,
|
||||||
request_start_time,
|
request_start_time,
|
||||||
messages_for_signals,
|
messages_for_signals,
|
||||||
);
|
)
|
||||||
|
.with_llm_metrics(LlmMetricsCtx {
|
||||||
|
provider: metric_provider.clone(),
|
||||||
|
model: metric_model.clone(),
|
||||||
|
upstream_status: upstream_status.as_u16(),
|
||||||
|
});
|
||||||
|
|
||||||
let output_filter_request_headers = if filter_pipeline.has_output_filters() {
|
let output_filter_request_headers = if filter_pipeline.has_output_filters() {
|
||||||
Some(request_headers.clone())
|
Some(request_headers.clone())
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,24 @@ use hyper::StatusCode;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tracing::{debug, info, warn};
|
use tracing::{debug, info, warn};
|
||||||
|
|
||||||
|
use crate::metrics as bs_metrics;
|
||||||
|
use crate::metrics::labels as metric_labels;
|
||||||
use crate::router::orchestrator::OrchestratorService;
|
use crate::router::orchestrator::OrchestratorService;
|
||||||
use crate::streaming::truncate_message;
|
use crate::streaming::truncate_message;
|
||||||
use crate::tracing::routing;
|
use crate::tracing::routing;
|
||||||
|
|
||||||
|
/// Classify a request path (already stripped of `/agents` or `/routing` by
|
||||||
|
/// the caller) into the fixed `route` label used on routing metrics.
|
||||||
|
fn route_label_for_path(request_path: &str) -> &'static str {
|
||||||
|
if request_path.starts_with("/agents") {
|
||||||
|
metric_labels::ROUTE_AGENT
|
||||||
|
} else if request_path.starts_with("/routing") {
|
||||||
|
metric_labels::ROUTE_ROUTING
|
||||||
|
} else {
|
||||||
|
metric_labels::ROUTE_LLM
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct RoutingResult {
|
pub struct RoutingResult {
|
||||||
/// Primary model to use (first in the ranked list).
|
/// Primary model to use (first in the ranked list).
|
||||||
pub model_name: String,
|
pub model_name: String,
|
||||||
|
|
@ -106,15 +120,23 @@ pub async fn router_chat_get_upstream_model(
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let determination_ms = routing_start_time.elapsed().as_millis() as i64;
|
let determination_elapsed = routing_start_time.elapsed();
|
||||||
|
let determination_ms = determination_elapsed.as_millis() as i64;
|
||||||
let current_span = tracing::Span::current();
|
let current_span = tracing::Span::current();
|
||||||
current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);
|
current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);
|
||||||
|
let route_label = route_label_for_path(request_path);
|
||||||
|
|
||||||
match routing_result {
|
match routing_result {
|
||||||
Ok(route) => match route {
|
Ok(route) => match route {
|
||||||
Some((route_name, ranked_models)) => {
|
Some((route_name, ranked_models)) => {
|
||||||
let model_name = ranked_models.first().cloned().unwrap_or_default();
|
let model_name = ranked_models.first().cloned().unwrap_or_default();
|
||||||
current_span.record("route.selected_model", model_name.as_str());
|
current_span.record("route.selected_model", model_name.as_str());
|
||||||
|
bs_metrics::record_router_decision(
|
||||||
|
route_label,
|
||||||
|
&model_name,
|
||||||
|
false,
|
||||||
|
determination_elapsed,
|
||||||
|
);
|
||||||
Ok(RoutingResult {
|
Ok(RoutingResult {
|
||||||
model_name,
|
model_name,
|
||||||
models: ranked_models,
|
models: ranked_models,
|
||||||
|
|
@ -126,6 +148,12 @@ pub async fn router_chat_get_upstream_model(
|
||||||
// This signals to llm.rs to use the original validated request model
|
// This signals to llm.rs to use the original validated request model
|
||||||
current_span.record("route.selected_model", "none");
|
current_span.record("route.selected_model", "none");
|
||||||
info!("no route determined, using default model");
|
info!("no route determined, using default model");
|
||||||
|
bs_metrics::record_router_decision(
|
||||||
|
route_label,
|
||||||
|
"none",
|
||||||
|
true,
|
||||||
|
determination_elapsed,
|
||||||
|
);
|
||||||
|
|
||||||
Ok(RoutingResult {
|
Ok(RoutingResult {
|
||||||
model_name: "none".to_string(),
|
model_name: "none".to_string(),
|
||||||
|
|
@ -136,6 +164,7 @@ pub async fn router_chat_get_upstream_model(
|
||||||
},
|
},
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
current_span.record("route.selected_model", "unknown");
|
current_span.record("route.selected_model", "unknown");
|
||||||
|
bs_metrics::record_router_decision(route_label, "unknown", true, determination_elapsed);
|
||||||
Err(RoutingError::internal_error(format!(
|
Err(RoutingError::internal_error(format!(
|
||||||
"Failed to determine route: {}",
|
"Failed to determine route: {}",
|
||||||
err
|
err
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,8 @@ use tracing::{debug, info, info_span, warn, Instrument};
|
||||||
|
|
||||||
use super::extract_or_generate_traceparent;
|
use super::extract_or_generate_traceparent;
|
||||||
use crate::handlers::llm::model_selection::router_chat_get_upstream_model;
|
use crate::handlers::llm::model_selection::router_chat_get_upstream_model;
|
||||||
|
use crate::metrics as bs_metrics;
|
||||||
|
use crate::metrics::labels as metric_labels;
|
||||||
use crate::router::orchestrator::OrchestratorService;
|
use crate::router::orchestrator::OrchestratorService;
|
||||||
use crate::tracing::{collect_custom_trace_attributes, operation_component, set_service_name};
|
use crate::tracing::{collect_custom_trace_attributes, operation_component, set_service_name};
|
||||||
|
|
||||||
|
|
@ -230,6 +232,17 @@ async fn routing_decision_inner(
|
||||||
pinned: false,
|
pinned: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Distinguish "decision served" (a concrete model picked) from
|
||||||
|
// "no_candidates" (the sentinel "none" returned when nothing
|
||||||
|
// matched). The handler still responds 200 in both cases, so RED
|
||||||
|
// metrics alone can't tell them apart.
|
||||||
|
let outcome = if response.models.first().map(|m| m == "none").unwrap_or(true) {
|
||||||
|
metric_labels::ROUTING_SVC_NO_CANDIDATES
|
||||||
|
} else {
|
||||||
|
metric_labels::ROUTING_SVC_DECISION_SERVED
|
||||||
|
};
|
||||||
|
bs_metrics::record_routing_service_outcome(outcome);
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
primary_model = %response.models.first().map(|s| s.as_str()).unwrap_or("none"),
|
primary_model = %response.models.first().map(|s| s.as_str()).unwrap_or("none"),
|
||||||
total_models = response.models.len(),
|
total_models = response.models.len(),
|
||||||
|
|
@ -249,6 +262,7 @@ async fn routing_decision_inner(
|
||||||
.unwrap())
|
.unwrap())
|
||||||
}
|
}
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
|
bs_metrics::record_routing_service_outcome(metric_labels::ROUTING_SVC_POLICY_ERROR);
|
||||||
warn!(error = %err.message, "routing decision failed");
|
warn!(error = %err.message, "routing decision failed");
|
||||||
Ok(BrightStaffError::InternalServerError(err.message).into_response())
|
Ok(BrightStaffError::InternalServerError(err.message).into_response())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
pub mod app_state;
|
pub mod app_state;
|
||||||
pub mod handlers;
|
pub mod handlers;
|
||||||
|
pub mod metrics;
|
||||||
pub mod router;
|
pub mod router;
|
||||||
pub mod session_cache;
|
pub mod session_cache;
|
||||||
pub mod signals;
|
pub mod signals;
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ use brightstaff::handlers::function_calling::function_calling_chat_handler;
|
||||||
use brightstaff::handlers::llm::llm_chat;
|
use brightstaff::handlers::llm::llm_chat;
|
||||||
use brightstaff::handlers::models::list_models;
|
use brightstaff::handlers::models::list_models;
|
||||||
use brightstaff::handlers::routing_service::routing_decision;
|
use brightstaff::handlers::routing_service::routing_decision;
|
||||||
|
use brightstaff::metrics as bs_metrics;
|
||||||
|
use brightstaff::metrics::labels as metric_labels;
|
||||||
use brightstaff::router::model_metrics::ModelMetricsService;
|
use brightstaff::router::model_metrics::ModelMetricsService;
|
||||||
use brightstaff::router::orchestrator::OrchestratorService;
|
use brightstaff::router::orchestrator::OrchestratorService;
|
||||||
use brightstaff::session_cache::init_session_cache;
|
use brightstaff::session_cache::init_session_cache;
|
||||||
|
|
@ -384,10 +386,79 @@ async fn init_state_storage(
|
||||||
// Request routing
|
// Request routing
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Normalized method label — limited set so we never emit a free-form string.
|
||||||
|
fn method_label(method: &Method) -> &'static str {
|
||||||
|
match *method {
|
||||||
|
Method::GET => "GET",
|
||||||
|
Method::POST => "POST",
|
||||||
|
Method::PUT => "PUT",
|
||||||
|
Method::DELETE => "DELETE",
|
||||||
|
Method::PATCH => "PATCH",
|
||||||
|
Method::HEAD => "HEAD",
|
||||||
|
Method::OPTIONS => "OPTIONS",
|
||||||
|
_ => "OTHER",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute the fixed `handler` metric label from the request's path+method.
|
||||||
|
/// Returning `None` for fall-through means `route()` will hand the request to
|
||||||
|
/// the catch-all 404 branch.
|
||||||
|
fn handler_label_for(method: &Method, path: &str) -> &'static str {
|
||||||
|
if let Some(stripped) = path.strip_prefix("/agents") {
|
||||||
|
if matches!(
|
||||||
|
stripped,
|
||||||
|
CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH
|
||||||
|
) {
|
||||||
|
return metric_labels::HANDLER_AGENT_CHAT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(stripped) = path.strip_prefix("/routing") {
|
||||||
|
if matches!(
|
||||||
|
stripped,
|
||||||
|
CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH
|
||||||
|
) {
|
||||||
|
return metric_labels::HANDLER_ROUTING_DECISION;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
match (method, path) {
|
||||||
|
(&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH) => {
|
||||||
|
metric_labels::HANDLER_LLM_CHAT
|
||||||
|
}
|
||||||
|
(&Method::POST, "/function_calling") => metric_labels::HANDLER_FUNCTION_CALLING,
|
||||||
|
(&Method::GET, "/v1/models" | "/agents/v1/models") => metric_labels::HANDLER_LIST_MODELS,
|
||||||
|
(&Method::OPTIONS, "/v1/models" | "/agents/v1/models") => {
|
||||||
|
metric_labels::HANDLER_CORS_PREFLIGHT
|
||||||
|
}
|
||||||
|
_ => metric_labels::HANDLER_NOT_FOUND,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Route an incoming HTTP request to the appropriate handler.
|
/// Route an incoming HTTP request to the appropriate handler.
|
||||||
async fn route(
|
async fn route(
|
||||||
req: Request<Incoming>,
|
req: Request<Incoming>,
|
||||||
state: Arc<AppState>,
|
state: Arc<AppState>,
|
||||||
|
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
|
let handler = handler_label_for(req.method(), req.uri().path());
|
||||||
|
let method = method_label(req.method());
|
||||||
|
let started = std::time::Instant::now();
|
||||||
|
let _in_flight = bs_metrics::InFlightGuard::new(handler);
|
||||||
|
|
||||||
|
let result = dispatch(req, state).await;
|
||||||
|
|
||||||
|
let status = match &result {
|
||||||
|
Ok(resp) => resp.status().as_u16(),
|
||||||
|
// hyper::Error here means the body couldn't be produced; conventionally 500.
|
||||||
|
Err(_) => 500,
|
||||||
|
};
|
||||||
|
bs_metrics::record_http(handler, method, status, started);
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Inner dispatcher split out so `route()` can wrap it with metrics without
|
||||||
|
/// duplicating the match tree.
|
||||||
|
async fn dispatch(
|
||||||
|
req: Request<Incoming>,
|
||||||
|
state: Arc<AppState>,
|
||||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
let parent_cx = global::get_text_map_propagator(|p| p.extract(&HeaderExtractor(req.headers())));
|
let parent_cx = global::get_text_map_propagator(|p| p.extract(&HeaderExtractor(req.headers())));
|
||||||
let path = req.uri().path().to_string();
|
let path = req.uri().path().to_string();
|
||||||
|
|
@ -503,6 +574,7 @@ async fn run_server(state: Arc<AppState>) -> Result<(), Box<dyn std::error::Erro
|
||||||
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
let config = load_config()?;
|
let config = load_config()?;
|
||||||
let _tracer_provider = init_tracer(config.tracing.as_ref());
|
let _tracer_provider = init_tracer(config.tracing.as_ref());
|
||||||
|
bs_metrics::init();
|
||||||
info!("loaded plano_config.yaml");
|
info!("loaded plano_config.yaml");
|
||||||
let state = Arc::new(init_app_state(&config).await?);
|
let state = Arc::new(init_app_state(&config).await?);
|
||||||
run_server(state).await
|
run_server(state).await
|
||||||
|
|
|
||||||
38
crates/brightstaff/src/metrics/labels.rs
Normal file
38
crates/brightstaff/src/metrics/labels.rs
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
//! Fixed label-value constants so callers never emit free-form strings
|
||||||
|
//! (which would blow up cardinality).
|
||||||
|
|
||||||
|
// Handler enum — derived from the path+method match in `route()`.
|
||||||
|
pub const HANDLER_AGENT_CHAT: &str = "agent_chat";
|
||||||
|
pub const HANDLER_ROUTING_DECISION: &str = "routing_decision";
|
||||||
|
pub const HANDLER_LLM_CHAT: &str = "llm_chat";
|
||||||
|
pub const HANDLER_FUNCTION_CALLING: &str = "function_calling";
|
||||||
|
pub const HANDLER_LIST_MODELS: &str = "list_models";
|
||||||
|
pub const HANDLER_CORS_PREFLIGHT: &str = "cors_preflight";
|
||||||
|
pub const HANDLER_NOT_FOUND: &str = "not_found";
|
||||||
|
|
||||||
|
// Router "route" class — which brightstaff endpoint prompted the decision.
|
||||||
|
pub const ROUTE_AGENT: &str = "agent";
|
||||||
|
pub const ROUTE_ROUTING: &str = "routing";
|
||||||
|
pub const ROUTE_LLM: &str = "llm";
|
||||||
|
|
||||||
|
// Token kind for brightstaff_llm_tokens_total.
|
||||||
|
pub const TOKEN_KIND_PROMPT: &str = "prompt";
|
||||||
|
pub const TOKEN_KIND_COMPLETION: &str = "completion";
|
||||||
|
|
||||||
|
// LLM error_class values (match docstring in metrics/mod.rs).
|
||||||
|
pub const LLM_ERR_NONE: &str = "none";
|
||||||
|
pub const LLM_ERR_TIMEOUT: &str = "timeout";
|
||||||
|
pub const LLM_ERR_CONNECT: &str = "connect";
|
||||||
|
pub const LLM_ERR_PARSE: &str = "parse";
|
||||||
|
pub const LLM_ERR_OTHER: &str = "other";
|
||||||
|
pub const LLM_ERR_STREAM: &str = "stream";
|
||||||
|
|
||||||
|
// Routing service outcome values.
|
||||||
|
pub const ROUTING_SVC_DECISION_SERVED: &str = "decision_served";
|
||||||
|
pub const ROUTING_SVC_NO_CANDIDATES: &str = "no_candidates";
|
||||||
|
pub const ROUTING_SVC_POLICY_ERROR: &str = "policy_error";
|
||||||
|
|
||||||
|
// Session cache outcome values.
|
||||||
|
pub const SESSION_CACHE_HIT: &str = "hit";
|
||||||
|
pub const SESSION_CACHE_MISS: &str = "miss";
|
||||||
|
pub const SESSION_CACHE_STORE: &str = "store";
|
||||||
377
crates/brightstaff/src/metrics/mod.rs
Normal file
377
crates/brightstaff/src/metrics/mod.rs
Normal file
|
|
@ -0,0 +1,377 @@
|
||||||
|
//! Prometheus metrics for brightstaff.
|
||||||
|
//!
|
||||||
|
//! Installs the `metrics` global recorder backed by
|
||||||
|
//! `metrics-exporter-prometheus` and exposes a `/metrics` HTTP endpoint on a
|
||||||
|
//! dedicated admin port (default `0.0.0.0:9092`, overridable via
|
||||||
|
//! `METRICS_BIND_ADDRESS`).
|
||||||
|
//!
|
||||||
|
//! Emitted metric families (see `describe_all` for full list):
|
||||||
|
//! - HTTP RED: `brightstaff_http_requests_total`,
|
||||||
|
//! `brightstaff_http_request_duration_seconds`,
|
||||||
|
//! `brightstaff_http_in_flight_requests`.
|
||||||
|
//! - LLM upstream: `brightstaff_llm_upstream_requests_total`,
|
||||||
|
//! `brightstaff_llm_upstream_duration_seconds`,
|
||||||
|
//! `brightstaff_llm_time_to_first_token_seconds`,
|
||||||
|
//! `brightstaff_llm_tokens_total`,
|
||||||
|
//! `brightstaff_llm_tokens_usage_missing_total`.
|
||||||
|
//! - Routing: `brightstaff_router_decisions_total`,
|
||||||
|
//! `brightstaff_router_decision_duration_seconds`,
|
||||||
|
//! `brightstaff_routing_service_requests_total`,
|
||||||
|
//! `brightstaff_session_cache_events_total`.
|
||||||
|
//! - Process: via `metrics-process`.
|
||||||
|
//! - Build: `brightstaff_build_info`.
|
||||||
|
|
||||||
|
use std::net::SocketAddr;
|
||||||
|
use std::sync::OnceLock;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
|
use metrics::{counter, describe_counter, describe_gauge, describe_histogram, gauge, histogram};
|
||||||
|
use metrics_exporter_prometheus::{Matcher, PrometheusBuilder};
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
pub mod labels;
|
||||||
|
|
||||||
|
/// Guard flag so tests don't re-install the global recorder.
|
||||||
|
static INIT: OnceLock<()> = OnceLock::new();
|
||||||
|
|
||||||
|
const DEFAULT_METRICS_BIND: &str = "0.0.0.0:9092";
|
||||||
|
|
||||||
|
/// HTTP request duration buckets (seconds). Capped at 60s.
|
||||||
|
const HTTP_BUCKETS: &[f64] = &[
|
||||||
|
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0,
|
||||||
|
];
|
||||||
|
|
||||||
|
/// LLM upstream / TTFT buckets (seconds). Capped at 120s because provider
|
||||||
|
/// completions routinely run that long.
|
||||||
|
const LLM_BUCKETS: &[f64] = &[0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0, 120.0];
|
||||||
|
|
||||||
|
/// Router decision buckets (seconds). The orchestrator call itself is usually
|
||||||
|
/// sub-second but bucketed generously in case of upstream slowness.
|
||||||
|
const ROUTER_BUCKETS: &[f64] = &[
|
||||||
|
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0,
|
||||||
|
];
|
||||||
|
|
||||||
|
/// Install the global recorder and spawn the `/metrics` HTTP listener.
|
||||||
|
///
|
||||||
|
/// Safe to call more than once; subsequent calls are no-ops so tests that
|
||||||
|
/// construct their own recorder still work.
|
||||||
|
pub fn init() {
|
||||||
|
if INIT.get().is_some() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let bind: SocketAddr = std::env::var("METRICS_BIND_ADDRESS")
|
||||||
|
.unwrap_or_else(|_| DEFAULT_METRICS_BIND.to_string())
|
||||||
|
.parse()
|
||||||
|
.unwrap_or_else(|err| {
|
||||||
|
warn!(error = %err, default = DEFAULT_METRICS_BIND, "invalid METRICS_BIND_ADDRESS, falling back to default");
|
||||||
|
DEFAULT_METRICS_BIND.parse().expect("default bind parses")
|
||||||
|
});
|
||||||
|
|
||||||
|
let builder = PrometheusBuilder::new()
|
||||||
|
.with_http_listener(bind)
|
||||||
|
.set_buckets_for_metric(
|
||||||
|
Matcher::Full("brightstaff_http_request_duration_seconds".to_string()),
|
||||||
|
HTTP_BUCKETS,
|
||||||
|
)
|
||||||
|
.and_then(|b| {
|
||||||
|
b.set_buckets_for_metric(Matcher::Prefix("brightstaff_llm_".to_string()), LLM_BUCKETS)
|
||||||
|
})
|
||||||
|
.and_then(|b| {
|
||||||
|
b.set_buckets_for_metric(
|
||||||
|
Matcher::Full("brightstaff_router_decision_duration_seconds".to_string()),
|
||||||
|
ROUTER_BUCKETS,
|
||||||
|
)
|
||||||
|
});
|
||||||
|
|
||||||
|
let builder = match builder {
|
||||||
|
Ok(b) => b,
|
||||||
|
Err(err) => {
|
||||||
|
warn!(error = %err, "failed to configure metrics buckets, using defaults");
|
||||||
|
PrometheusBuilder::new().with_http_listener(bind)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Err(err) = builder.install() {
|
||||||
|
warn!(error = %err, "failed to install Prometheus recorder; metrics disabled");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let _ = INIT.set(());
|
||||||
|
|
||||||
|
describe_all();
|
||||||
|
emit_build_info();
|
||||||
|
|
||||||
|
// Register process-level collector (RSS, CPU, FDs).
|
||||||
|
let collector = metrics_process::Collector::default();
|
||||||
|
collector.describe();
|
||||||
|
// Prime once at startup; subsequent scrapes refresh via the exporter's
|
||||||
|
// per-scrape render, so we additionally refresh on a short interval to
|
||||||
|
// keep gauges moving between scrapes without requiring client pull.
|
||||||
|
collector.collect();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut tick = tokio::time::interval(Duration::from_secs(10));
|
||||||
|
tick.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
|
||||||
|
loop {
|
||||||
|
tick.tick().await;
|
||||||
|
collector.collect();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
info!(address = %bind, "metrics listener started");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn describe_all() {
|
||||||
|
describe_counter!(
|
||||||
|
"brightstaff_http_requests_total",
|
||||||
|
"Total HTTP requests served by brightstaff, by handler and status class."
|
||||||
|
);
|
||||||
|
describe_histogram!(
|
||||||
|
"brightstaff_http_request_duration_seconds",
|
||||||
|
"Wall-clock duration of HTTP requests served by brightstaff, by handler."
|
||||||
|
);
|
||||||
|
describe_gauge!(
|
||||||
|
"brightstaff_http_in_flight_requests",
|
||||||
|
"Number of HTTP requests currently being served by brightstaff, by handler."
|
||||||
|
);
|
||||||
|
|
||||||
|
describe_counter!(
|
||||||
|
"brightstaff_llm_upstream_requests_total",
|
||||||
|
"LLM upstream request outcomes, by provider, model, status class and error class."
|
||||||
|
);
|
||||||
|
describe_histogram!(
|
||||||
|
"brightstaff_llm_upstream_duration_seconds",
|
||||||
|
"Wall-clock duration of LLM upstream calls (stream close for streaming), by provider and model."
|
||||||
|
);
|
||||||
|
describe_histogram!(
|
||||||
|
"brightstaff_llm_time_to_first_token_seconds",
|
||||||
|
"Time from request start to first streamed byte, by provider and model (streaming only)."
|
||||||
|
);
|
||||||
|
describe_counter!(
|
||||||
|
"brightstaff_llm_tokens_total",
|
||||||
|
"Tokens reported in the provider `usage` field, by provider, model and kind (prompt/completion)."
|
||||||
|
);
|
||||||
|
describe_counter!(
|
||||||
|
"brightstaff_llm_tokens_usage_missing_total",
|
||||||
|
"LLM responses that completed without a usable `usage` block (so token counts are unknown)."
|
||||||
|
);
|
||||||
|
|
||||||
|
describe_counter!(
|
||||||
|
"brightstaff_router_decisions_total",
|
||||||
|
"Routing decisions made by the orchestrator, by route, selected model, and whether a fallback was used."
|
||||||
|
);
|
||||||
|
describe_histogram!(
|
||||||
|
"brightstaff_router_decision_duration_seconds",
|
||||||
|
"Time spent in the orchestrator deciding a route, by route."
|
||||||
|
);
|
||||||
|
describe_counter!(
|
||||||
|
"brightstaff_routing_service_requests_total",
|
||||||
|
"Outcomes of /routing/* decision requests: decision_served, no_candidates, policy_error."
|
||||||
|
);
|
||||||
|
describe_counter!(
|
||||||
|
"brightstaff_session_cache_events_total",
|
||||||
|
"Session affinity cache lookups and stores, by outcome."
|
||||||
|
);
|
||||||
|
|
||||||
|
describe_gauge!(
|
||||||
|
"brightstaff_build_info",
|
||||||
|
"Build metadata. Always 1; labels carry version and git SHA."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn emit_build_info() {
|
||||||
|
let version = env!("CARGO_PKG_VERSION");
|
||||||
|
let git_sha = option_env!("GIT_SHA").unwrap_or("unknown");
|
||||||
|
gauge!(
|
||||||
|
"brightstaff_build_info",
|
||||||
|
"version" => version.to_string(),
|
||||||
|
"git_sha" => git_sha.to_string(),
|
||||||
|
)
|
||||||
|
.set(1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Split a provider-qualified model id like `"openai/gpt-4o"` into
|
||||||
|
/// `(provider, model)`. Returns `("unknown", raw)` when there is no `/`.
|
||||||
|
pub fn split_provider_model(full: &str) -> (&str, &str) {
|
||||||
|
match full.split_once('/') {
|
||||||
|
Some((p, m)) => (p, m),
|
||||||
|
None => ("unknown", full),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Bucket an HTTP status code into `"2xx"` / `"4xx"` / `"5xx"` / `"1xx"` / `"3xx"`.
|
||||||
|
pub fn status_class(status: u16) -> &'static str {
|
||||||
|
match status {
|
||||||
|
100..=199 => "1xx",
|
||||||
|
200..=299 => "2xx",
|
||||||
|
300..=399 => "3xx",
|
||||||
|
400..=499 => "4xx",
|
||||||
|
500..=599 => "5xx",
|
||||||
|
_ => "other",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// HTTP RED helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// RAII guard that increments the in-flight gauge on construction and
|
||||||
|
/// decrements on drop. Pair with [`HttpTimer`] in the `route()` wrapper so the
|
||||||
|
/// gauge drops even on error paths.
|
||||||
|
pub struct InFlightGuard {
|
||||||
|
handler: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl InFlightGuard {
|
||||||
|
pub fn new(handler: &'static str) -> Self {
|
||||||
|
gauge!(
|
||||||
|
"brightstaff_http_in_flight_requests",
|
||||||
|
"handler" => handler,
|
||||||
|
)
|
||||||
|
.increment(1.0);
|
||||||
|
Self { handler }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for InFlightGuard {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
gauge!(
|
||||||
|
"brightstaff_http_in_flight_requests",
|
||||||
|
"handler" => self.handler,
|
||||||
|
)
|
||||||
|
.decrement(1.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record the HTTP request counter + duration histogram.
|
||||||
|
pub fn record_http(handler: &'static str, method: &'static str, status: u16, started: Instant) {
|
||||||
|
let class = status_class(status);
|
||||||
|
counter!(
|
||||||
|
"brightstaff_http_requests_total",
|
||||||
|
"handler" => handler,
|
||||||
|
"method" => method,
|
||||||
|
"status_class" => class,
|
||||||
|
)
|
||||||
|
.increment(1);
|
||||||
|
histogram!(
|
||||||
|
"brightstaff_http_request_duration_seconds",
|
||||||
|
"handler" => handler,
|
||||||
|
)
|
||||||
|
.record(started.elapsed().as_secs_f64());
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// LLM upstream helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Classify an outcome of an LLM upstream call for the `error_class` label.
|
||||||
|
pub fn llm_error_class_from_reqwest(err: &reqwest::Error) -> &'static str {
|
||||||
|
if err.is_timeout() {
|
||||||
|
"timeout"
|
||||||
|
} else if err.is_connect() {
|
||||||
|
"connect"
|
||||||
|
} else if err.is_decode() {
|
||||||
|
"parse"
|
||||||
|
} else {
|
||||||
|
"other"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record the outcome of an LLM upstream call. `status` is the HTTP status
|
||||||
|
/// the upstream returned (0 if the call never produced one, e.g. send failure).
|
||||||
|
/// `error_class` is `"none"` on success, or a discriminated error label.
|
||||||
|
pub fn record_llm_upstream(
|
||||||
|
provider: &str,
|
||||||
|
model: &str,
|
||||||
|
status: u16,
|
||||||
|
error_class: &str,
|
||||||
|
duration: Duration,
|
||||||
|
) {
|
||||||
|
let class = if status == 0 {
|
||||||
|
"error"
|
||||||
|
} else {
|
||||||
|
status_class(status)
|
||||||
|
};
|
||||||
|
counter!(
|
||||||
|
"brightstaff_llm_upstream_requests_total",
|
||||||
|
"provider" => provider.to_string(),
|
||||||
|
"model" => model.to_string(),
|
||||||
|
"status_class" => class,
|
||||||
|
"error_class" => error_class.to_string(),
|
||||||
|
)
|
||||||
|
.increment(1);
|
||||||
|
histogram!(
|
||||||
|
"brightstaff_llm_upstream_duration_seconds",
|
||||||
|
"provider" => provider.to_string(),
|
||||||
|
"model" => model.to_string(),
|
||||||
|
)
|
||||||
|
.record(duration.as_secs_f64());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn record_llm_ttft(provider: &str, model: &str, ttft: Duration) {
|
||||||
|
histogram!(
|
||||||
|
"brightstaff_llm_time_to_first_token_seconds",
|
||||||
|
"provider" => provider.to_string(),
|
||||||
|
"model" => model.to_string(),
|
||||||
|
)
|
||||||
|
.record(ttft.as_secs_f64());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn record_llm_tokens(provider: &str, model: &str, kind: &'static str, count: u64) {
|
||||||
|
counter!(
|
||||||
|
"brightstaff_llm_tokens_total",
|
||||||
|
"provider" => provider.to_string(),
|
||||||
|
"model" => model.to_string(),
|
||||||
|
"kind" => kind,
|
||||||
|
)
|
||||||
|
.increment(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn record_llm_tokens_usage_missing(provider: &str, model: &str) {
|
||||||
|
counter!(
|
||||||
|
"brightstaff_llm_tokens_usage_missing_total",
|
||||||
|
"provider" => provider.to_string(),
|
||||||
|
"model" => model.to_string(),
|
||||||
|
)
|
||||||
|
.increment(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Router helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
pub fn record_router_decision(
|
||||||
|
route: &'static str,
|
||||||
|
selected_model: &str,
|
||||||
|
fallback: bool,
|
||||||
|
duration: Duration,
|
||||||
|
) {
|
||||||
|
counter!(
|
||||||
|
"brightstaff_router_decisions_total",
|
||||||
|
"route" => route,
|
||||||
|
"selected_model" => selected_model.to_string(),
|
||||||
|
"fallback" => if fallback { "true" } else { "false" },
|
||||||
|
)
|
||||||
|
.increment(1);
|
||||||
|
histogram!(
|
||||||
|
"brightstaff_router_decision_duration_seconds",
|
||||||
|
"route" => route,
|
||||||
|
)
|
||||||
|
.record(duration.as_secs_f64());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn record_routing_service_outcome(outcome: &'static str) {
|
||||||
|
counter!(
|
||||||
|
"brightstaff_routing_service_requests_total",
|
||||||
|
"outcome" => outcome,
|
||||||
|
)
|
||||||
|
.increment(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn record_session_cache_event(outcome: &'static str) {
|
||||||
|
counter!(
|
||||||
|
"brightstaff_session_cache_events_total",
|
||||||
|
"outcome" => outcome,
|
||||||
|
)
|
||||||
|
.increment(1);
|
||||||
|
}
|
||||||
|
|
@ -15,6 +15,8 @@ use super::http::{self, post_and_extract_content};
|
||||||
use super::model_metrics::ModelMetricsService;
|
use super::model_metrics::ModelMetricsService;
|
||||||
use super::orchestrator_model::OrchestratorModel;
|
use super::orchestrator_model::OrchestratorModel;
|
||||||
|
|
||||||
|
use crate::metrics as bs_metrics;
|
||||||
|
use crate::metrics::labels as metric_labels;
|
||||||
use crate::router::orchestrator_model_v1;
|
use crate::router::orchestrator_model_v1;
|
||||||
use crate::session_cache::SessionCache;
|
use crate::session_cache::SessionCache;
|
||||||
|
|
||||||
|
|
@ -130,7 +132,13 @@ impl OrchestratorService {
|
||||||
tenant_id: Option<&str>,
|
tenant_id: Option<&str>,
|
||||||
) -> Option<CachedRoute> {
|
) -> Option<CachedRoute> {
|
||||||
let cache = self.session_cache.as_ref()?;
|
let cache = self.session_cache.as_ref()?;
|
||||||
cache.get(&Self::session_key(tenant_id, session_id)).await
|
let result = cache.get(&Self::session_key(tenant_id, session_id)).await;
|
||||||
|
bs_metrics::record_session_cache_event(if result.is_some() {
|
||||||
|
metric_labels::SESSION_CACHE_HIT
|
||||||
|
} else {
|
||||||
|
metric_labels::SESSION_CACHE_MISS
|
||||||
|
});
|
||||||
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn cache_route(
|
pub async fn cache_route(
|
||||||
|
|
@ -151,6 +159,7 @@ impl OrchestratorService {
|
||||||
self.session_ttl,
|
self.session_ttl,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
bs_metrics::record_session_cache_event(metric_labels::SESSION_CACHE_STORE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,8 @@ const STREAM_BUFFER_SIZE: usize = 16;
|
||||||
/// Most chat responses are well under this; pathological ones are dropped without
|
/// Most chat responses are well under this; pathological ones are dropped without
|
||||||
/// affecting pass-through streaming to the client.
|
/// affecting pass-through streaming to the client.
|
||||||
const USAGE_BUFFER_MAX: usize = 2 * 1024 * 1024;
|
const USAGE_BUFFER_MAX: usize = 2 * 1024 * 1024;
|
||||||
|
use crate::metrics as bs_metrics;
|
||||||
|
use crate::metrics::labels as metric_labels;
|
||||||
use crate::signals::{InteractionQuality, SignalAnalyzer, TextBasedSignalAnalyzer, FLAG_MARKER};
|
use crate::signals::{InteractionQuality, SignalAnalyzer, TextBasedSignalAnalyzer, FLAG_MARKER};
|
||||||
use crate::tracing::{llm, set_service_name, signals as signal_constants};
|
use crate::tracing::{llm, set_service_name, signals as signal_constants};
|
||||||
use hermesllm::apis::openai::Message;
|
use hermesllm::apis::openai::Message;
|
||||||
|
|
@ -172,6 +174,18 @@ impl StreamProcessor for Box<dyn StreamProcessor> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Optional Prometheus-metric context for an LLM upstream call. When present,
|
||||||
|
/// [`ObservableStreamProcessor`] emits `brightstaff_llm_*` metrics at
|
||||||
|
/// first-byte / complete / error callbacks.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct LlmMetricsCtx {
|
||||||
|
pub provider: String,
|
||||||
|
pub model: String,
|
||||||
|
/// HTTP status of the upstream response. Used to pick `status_class` and
|
||||||
|
/// `error_class` on `on_complete`.
|
||||||
|
pub upstream_status: u16,
|
||||||
|
}
|
||||||
|
|
||||||
/// A processor that tracks streaming metrics
|
/// A processor that tracks streaming metrics
|
||||||
pub struct ObservableStreamProcessor {
|
pub struct ObservableStreamProcessor {
|
||||||
service_name: String,
|
service_name: String,
|
||||||
|
|
@ -185,6 +199,8 @@ pub struct ObservableStreamProcessor {
|
||||||
/// on `on_complete`. Capped at `USAGE_BUFFER_MAX`; excess chunks are dropped
|
/// on `on_complete`. Capped at `USAGE_BUFFER_MAX`; excess chunks are dropped
|
||||||
/// from the buffer (they still pass through to the client).
|
/// from the buffer (they still pass through to the client).
|
||||||
response_buffer: Vec<u8>,
|
response_buffer: Vec<u8>,
|
||||||
|
llm_metrics: Option<LlmMetricsCtx>,
|
||||||
|
metrics_recorded: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ObservableStreamProcessor {
|
impl ObservableStreamProcessor {
|
||||||
|
|
@ -219,8 +235,17 @@ impl ObservableStreamProcessor {
|
||||||
time_to_first_token: None,
|
time_to_first_token: None,
|
||||||
messages,
|
messages,
|
||||||
response_buffer: Vec::new(),
|
response_buffer: Vec::new(),
|
||||||
|
llm_metrics: None,
|
||||||
|
metrics_recorded: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Attach LLM upstream metric context so the processor emits
|
||||||
|
/// `brightstaff_llm_*` metrics on first-byte / complete / error.
|
||||||
|
pub fn with_llm_metrics(mut self, ctx: LlmMetricsCtx) -> Self {
|
||||||
|
self.llm_metrics = Some(ctx);
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl StreamProcessor for ObservableStreamProcessor {
|
impl StreamProcessor for ObservableStreamProcessor {
|
||||||
|
|
@ -240,7 +265,11 @@ impl StreamProcessor for ObservableStreamProcessor {
|
||||||
fn on_first_bytes(&mut self) {
|
fn on_first_bytes(&mut self) {
|
||||||
// Record time to first token (only for streaming)
|
// Record time to first token (only for streaming)
|
||||||
if self.time_to_first_token.is_none() {
|
if self.time_to_first_token.is_none() {
|
||||||
self.time_to_first_token = Some(self.start_time.elapsed().as_millis());
|
let elapsed = self.start_time.elapsed();
|
||||||
|
self.time_to_first_token = Some(elapsed.as_millis());
|
||||||
|
if let Some(ref ctx) = self.llm_metrics {
|
||||||
|
bs_metrics::record_llm_ttft(&ctx.provider, &ctx.model, elapsed);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -299,6 +328,39 @@ impl StreamProcessor for ObservableStreamProcessor {
|
||||||
otel_span.set_attribute(KeyValue::new(llm::MODEL_NAME, resolved));
|
otel_span.set_attribute(KeyValue::new(llm::MODEL_NAME, resolved));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Emit LLM upstream prometheus metrics (duration + tokens) if wired.
|
||||||
|
// The upstream responded (we have a status), so status_class alone
|
||||||
|
// carries the non-2xx signal — error_class stays "none".
|
||||||
|
if let Some(ref ctx) = self.llm_metrics {
|
||||||
|
bs_metrics::record_llm_upstream(
|
||||||
|
&ctx.provider,
|
||||||
|
&ctx.model,
|
||||||
|
ctx.upstream_status,
|
||||||
|
metric_labels::LLM_ERR_NONE,
|
||||||
|
self.start_time.elapsed(),
|
||||||
|
);
|
||||||
|
if let Some(v) = usage.prompt_tokens {
|
||||||
|
bs_metrics::record_llm_tokens(
|
||||||
|
&ctx.provider,
|
||||||
|
&ctx.model,
|
||||||
|
metric_labels::TOKEN_KIND_PROMPT,
|
||||||
|
v.max(0) as u64,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if let Some(v) = usage.completion_tokens {
|
||||||
|
bs_metrics::record_llm_tokens(
|
||||||
|
&ctx.provider,
|
||||||
|
&ctx.model,
|
||||||
|
metric_labels::TOKEN_KIND_COMPLETION,
|
||||||
|
v.max(0) as u64,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if usage.prompt_tokens.is_none() && usage.completion_tokens.is_none() {
|
||||||
|
bs_metrics::record_llm_tokens_usage_missing(&ctx.provider, &ctx.model);
|
||||||
|
}
|
||||||
|
self.metrics_recorded = true;
|
||||||
|
}
|
||||||
// Release the buffered bytes early; nothing downstream needs them.
|
// Release the buffered bytes early; nothing downstream needs them.
|
||||||
self.response_buffer.clear();
|
self.response_buffer.clear();
|
||||||
self.response_buffer.shrink_to_fit();
|
self.response_buffer.shrink_to_fit();
|
||||||
|
|
@ -396,6 +458,18 @@ impl StreamProcessor for ObservableStreamProcessor {
|
||||||
duration_ms = self.start_time.elapsed().as_millis(),
|
duration_ms = self.start_time.elapsed().as_millis(),
|
||||||
"stream error"
|
"stream error"
|
||||||
);
|
);
|
||||||
|
if let Some(ref ctx) = self.llm_metrics {
|
||||||
|
if !self.metrics_recorded {
|
||||||
|
bs_metrics::record_llm_upstream(
|
||||||
|
&ctx.provider,
|
||||||
|
&ctx.model,
|
||||||
|
ctx.upstream_status,
|
||||||
|
metric_labels::LLM_ERR_STREAM,
|
||||||
|
self.start_time.elapsed(),
|
||||||
|
);
|
||||||
|
self.metrics_recorded = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -75,3 +75,54 @@ are some sample configuration files for both, respectively.
|
||||||
isDefault: true
|
isDefault: true
|
||||||
access: proxy
|
access: proxy
|
||||||
editable: true
|
editable: true
|
||||||
|
|
||||||
|
Brightstaff metrics
|
||||||
|
~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
In addition to Envoy's stats on ``:9901``, the brightstaff dataplane
|
||||||
|
process exposes its own Prometheus endpoint on ``0.0.0.0:9092`` (override
|
||||||
|
with ``METRICS_BIND_ADDRESS``). It publishes:
|
||||||
|
|
||||||
|
* HTTP RED — ``brightstaff_http_requests_total``,
|
||||||
|
``brightstaff_http_request_duration_seconds``,
|
||||||
|
``brightstaff_http_in_flight_requests`` (labels: ``handler``, ``method``,
|
||||||
|
``status_class``).
|
||||||
|
* LLM upstream — ``brightstaff_llm_upstream_requests_total``,
|
||||||
|
``brightstaff_llm_upstream_duration_seconds``,
|
||||||
|
``brightstaff_llm_time_to_first_token_seconds``,
|
||||||
|
``brightstaff_llm_tokens_total`` (labels: ``provider``, ``model``,
|
||||||
|
``error_class``, ``kind``).
|
||||||
|
* Routing — ``brightstaff_router_decisions_total``,
|
||||||
|
``brightstaff_router_decision_duration_seconds``,
|
||||||
|
``brightstaff_routing_service_requests_total``,
|
||||||
|
``brightstaff_session_cache_events_total``.
|
||||||
|
* Process & build — ``process_resident_memory_bytes``,
|
||||||
|
``process_cpu_seconds_total``, ``brightstaff_build_info``.
|
||||||
|
|
||||||
|
A self-contained Prometheus + Grafana stack is shipped under
|
||||||
|
``config/grafana/``. With Plano already running on the host, bring it up
|
||||||
|
with one command:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd config/grafana
|
||||||
|
docker compose up -d
|
||||||
|
open http://localhost:3000 # admin / admin (anonymous viewer also enabled)
|
||||||
|
|
||||||
|
Grafana auto-loads the Prometheus datasource and the brightstaff
|
||||||
|
dashboard (look under the *Plano* folder). Prometheus scrapes the host's
|
||||||
|
``:9092`` and ``:9901`` via ``host.docker.internal``.
|
||||||
|
|
||||||
|
Files:
|
||||||
|
|
||||||
|
* ``config/grafana/docker-compose.yaml`` — one-command Prom + Grafana
|
||||||
|
stack with provisioning.
|
||||||
|
* ``config/grafana/prometheus_scrape.yaml`` — complete Prometheus config
|
||||||
|
with ``envoy`` and ``brightstaff`` scrape jobs (mounted by the
|
||||||
|
compose).
|
||||||
|
* ``config/grafana/brightstaff_dashboard.json`` — 19-panel dashboard
|
||||||
|
across HTTP RED, LLM upstream, Routing service, and Process & Envoy
|
||||||
|
link rows. Auto-provisioned by the compose; can also be imported by
|
||||||
|
hand via *Dashboards → New → Import*.
|
||||||
|
* ``config/grafana/provisioning/`` — Grafana provisioning files for the
|
||||||
|
datasource and dashboard provider.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue