mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
Merge remote-tracking branch 'origin/main' into musa/chatgpt-subscription
This commit is contained in:
commit
aa53f438c6
54 changed files with 7345 additions and 3268 deletions
541
config/grafana/brightstaff_dashboard.json
Normal file
541
config/grafana/brightstaff_dashboard.json
Normal file
|
|
@ -0,0 +1,541 @@
|
||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"builtIn": 1,
|
||||||
|
"datasource": "-- Grafana --",
|
||||||
|
"enable": true,
|
||||||
|
"hide": true,
|
||||||
|
"iconColor": "rgba(0, 211, 255, 1)",
|
||||||
|
"name": "Annotations & Alerts",
|
||||||
|
"type": "dashboard"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"description": "RED, LLM upstream, routing service, and process metrics for brightstaff. Pair with Envoy admin metrics from cluster=bright_staff.",
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"liveNow": false,
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
|
||||||
|
"id": 100,
|
||||||
|
"panels": [],
|
||||||
|
"title": "HTTP RED",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": {
|
||||||
|
"axisLabel": "req/s",
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 10,
|
||||||
|
"lineWidth": 1,
|
||||||
|
"showPoints": "never"
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 },
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum by (handler) (rate(brightstaff_http_requests_total[1m]))",
|
||||||
|
"legendFormat": "{{handler}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Rate — brightstaff RPS by handler",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "5xx fraction over 5m. Page-worthy when sustained above ~1%.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "thresholds" },
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "green", "value": null },
|
||||||
|
{ "color": "yellow", "value": 0.01 },
|
||||||
|
{ "color": "red", "value": 0.05 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "percentunit"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 },
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "area",
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum(rate(brightstaff_http_requests_total{status_class=\"5xx\"}[5m])) / clamp_min(sum(rate(brightstaff_http_requests_total[5m])), 1)",
|
||||||
|
"legendFormat": "5xx rate",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Errors — brightstaff 5xx rate",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "p50/p95/p99 by handler, computed from histogram buckets over 5m.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
|
||||||
|
"unit": "s"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 9, "w": 24, "x": 0, "y": 9 },
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "histogram_quantile(0.50, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))",
|
||||||
|
"legendFormat": "p50 {{handler}}",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "histogram_quantile(0.95, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))",
|
||||||
|
"legendFormat": "p95 {{handler}}",
|
||||||
|
"refId": "B"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "histogram_quantile(0.99, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))",
|
||||||
|
"legendFormat": "p99 {{handler}}",
|
||||||
|
"refId": "C"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Duration — p50 / p95 / p99 by handler",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "In-flight requests by handler. Climbs before latency does when brightstaff is saturated.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" },
|
||||||
|
"unit": "short"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 18 },
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum by (handler) (brightstaff_http_in_flight_requests)",
|
||||||
|
"legendFormat": "{{handler}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "In-flight requests by handler",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 26 },
|
||||||
|
"id": 200,
|
||||||
|
"panels": [],
|
||||||
|
"title": "LLM upstream",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
|
||||||
|
"unit": "s"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 9, "w": 12, "x": 0, "y": 27 },
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "histogram_quantile(0.95, sum by (le, provider, model) (rate(brightstaff_llm_upstream_duration_seconds_bucket[5m])))",
|
||||||
|
"legendFormat": "p95 {{provider}}/{{model}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "LLM upstream p95 by provider/model",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "All non-success error classes. timeout/connect = network, 5xx/429 = provider, parse = body shape mismatch, stream = mid-stream disconnect.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } },
|
||||||
|
"unit": "reqps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 9, "w": 12, "x": 12, "y": 27 },
|
||||||
|
"id": 6,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum by (provider, error_class) (rate(brightstaff_llm_upstream_requests_total{error_class!=\"none\"}[5m]))",
|
||||||
|
"legendFormat": "{{provider}} / {{error_class}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "LLM upstream errors by provider / class",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "Streaming only. Empty if the route never streams.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
|
||||||
|
"unit": "s"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 9, "w": 12, "x": 0, "y": 36 },
|
||||||
|
"id": 7,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "histogram_quantile(0.95, sum by (le, provider, model) (rate(brightstaff_llm_time_to_first_token_seconds_bucket[5m])))",
|
||||||
|
"legendFormat": "p95 {{provider}}/{{model}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Time-to-first-token p95 (streaming)",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "Tokens/sec by provider/model/kind — proxy for cost. Stacked.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } },
|
||||||
|
"unit": "tokens/s"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 9, "w": 12, "x": 12, "y": 36 },
|
||||||
|
"id": 8,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum by (provider, model, kind) (rate(brightstaff_llm_tokens_total[5m]))",
|
||||||
|
"legendFormat": "{{provider}}/{{model}} {{kind}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Token throughput by provider / model / kind",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 45 },
|
||||||
|
"id": 300,
|
||||||
|
"panels": [],
|
||||||
|
"title": "Routing service",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "Which models the orchestrator picked over the last 15 minutes.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"unit": "short"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 9, "w": 12, "x": 0, "y": 46 },
|
||||||
|
"id": 9,
|
||||||
|
"options": {
|
||||||
|
"displayMode": "gradient",
|
||||||
|
"orientation": "horizontal",
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum by (selected_model) (increase(brightstaff_router_decisions_total[15m]))",
|
||||||
|
"legendFormat": "{{selected_model}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Model selection distribution (last 15m)",
|
||||||
|
"type": "bargauge"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "Fraction of decisions that fell back (orchestrator returned `none` or errored). High = router can't classify intent or no candidates configured.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" },
|
||||||
|
"unit": "percentunit"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 9, "w": 12, "x": 12, "y": 46 },
|
||||||
|
"id": 10,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum by (route) (rate(brightstaff_router_decisions_total{fallback=\"true\"}[5m])) / clamp_min(sum by (route) (rate(brightstaff_router_decisions_total[5m])), 1)",
|
||||||
|
"legendFormat": "{{route}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Fallback rate by route",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
|
||||||
|
"unit": "s"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 55 },
|
||||||
|
"id": 11,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "histogram_quantile(0.95, sum by (le, route) (rate(brightstaff_router_decision_duration_seconds_bucket[5m])))",
|
||||||
|
"legendFormat": "p95 {{route}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Router decision p95 latency",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "Hit / (hit + miss). Low ratio = sessions aren't being reused or TTL too short.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "thresholds" },
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "red", "value": null },
|
||||||
|
{ "color": "yellow", "value": 0.5 },
|
||||||
|
{ "color": "green", "value": 0.8 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "percentunit",
|
||||||
|
"min": 0,
|
||||||
|
"max": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 6, "x": 12, "y": 55 },
|
||||||
|
"id": 12,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "area",
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum(rate(brightstaff_session_cache_events_total{outcome=\"hit\"}[5m])) / clamp_min(sum(rate(brightstaff_session_cache_events_total{outcome=~\"hit|miss\"}[5m])), 1)",
|
||||||
|
"legendFormat": "hit rate",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Session cache hit rate",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "decision_served = a real model picked. no_candidates = sentinel `none` returned. policy_error = orchestrator failed.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } },
|
||||||
|
"unit": "reqps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 6, "x": 18, "y": 55 },
|
||||||
|
"id": 13,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "list", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum by (outcome) (rate(brightstaff_routing_service_requests_total[5m]))",
|
||||||
|
"legendFormat": "{{outcome}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "/routing/* outcomes",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 63 },
|
||||||
|
"id": 400,
|
||||||
|
"panels": [],
|
||||||
|
"title": "Process & Envoy link",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"description": "Compare to brightstaff RPS (panel 1) — sustained gap = network or Envoy queueing.",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" },
|
||||||
|
"unit": "reqps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 64 },
|
||||||
|
"id": 14,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum(rate(envoy_cluster_upstream_rq_total{envoy_cluster_name=\"bright_staff\"}[1m]))",
|
||||||
|
"legendFormat": "envoy → bright_staff",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "sum(rate(brightstaff_http_requests_total[1m]))",
|
||||||
|
"legendFormat": "brightstaff served",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Envoy → brightstaff link health",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": { "mode": "palette-classic" },
|
||||||
|
"custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" }
|
||||||
|
},
|
||||||
|
"overrides": [
|
||||||
|
{
|
||||||
|
"matcher": { "id": "byName", "options": "RSS" },
|
||||||
|
"properties": [{ "id": "unit", "value": "bytes" }]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"matcher": { "id": "byName", "options": "CPU" },
|
||||||
|
"properties": [{ "id": "unit", "value": "percentunit" }]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 64 },
|
||||||
|
"id": 15,
|
||||||
|
"options": {
|
||||||
|
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||||
|
"tooltip": { "mode": "multi" }
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "process_resident_memory_bytes{job=\"brightstaff\"}",
|
||||||
|
"legendFormat": "RSS",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||||
|
"expr": "rate(process_cpu_seconds_total{job=\"brightstaff\"}[1m])",
|
||||||
|
"legendFormat": "CPU",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Brightstaff process RSS / CPU",
|
||||||
|
"type": "timeseries"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "30s",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"tags": ["plano", "brightstaff", "llm"],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"name": "DS_PROMETHEUS",
|
||||||
|
"label": "Prometheus",
|
||||||
|
"type": "datasource",
|
||||||
|
"query": "prometheus",
|
||||||
|
"current": { "selected": false, "text": "Prometheus", "value": "DS_PROMETHEUS" },
|
||||||
|
"hide": 0,
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"skipUrlSync": false,
|
||||||
|
"includeAll": false,
|
||||||
|
"multi": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": { "from": "now-1h", "to": "now" },
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Brightstaff (Plano dataplane)",
|
||||||
|
"uid": "brightstaff",
|
||||||
|
"version": 1,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
43
config/grafana/docker-compose.yaml
Normal file
43
config/grafana/docker-compose.yaml
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
# One-command Prometheus + Grafana stack for observing a locally-running
|
||||||
|
# Plano (Envoy admin :9901 + brightstaff :9092 on the host).
|
||||||
|
#
|
||||||
|
# cd config/grafana
|
||||||
|
# docker compose up -d
|
||||||
|
# open http://localhost:3000 (admin / admin)
|
||||||
|
#
|
||||||
|
# Grafana is preloaded with:
|
||||||
|
# - Prometheus datasource (uid=DS_PROMETHEUS) → http://prometheus:9090
|
||||||
|
# - Brightstaff dashboard (auto-imported from brightstaff_dashboard.json)
|
||||||
|
#
|
||||||
|
# Prometheus scrapes the host's :9092 and :9901 via host.docker.internal.
|
||||||
|
# On Linux this works because of the `extra_hosts: host-gateway` mapping
|
||||||
|
# below. On Mac/Win it works natively.
|
||||||
|
|
||||||
|
services:
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:latest
|
||||||
|
container_name: plano-prometheus
|
||||||
|
ports:
|
||||||
|
- "9090:9090"
|
||||||
|
volumes:
|
||||||
|
- ./prometheus_scrape.yaml:/etc/prometheus/prometheus.yml:ro
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:latest
|
||||||
|
container_name: plano-grafana
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
environment:
|
||||||
|
GF_SECURITY_ADMIN_USER: admin
|
||||||
|
GF_SECURITY_ADMIN_PASSWORD: admin
|
||||||
|
GF_AUTH_ANONYMOUS_ENABLED: "true"
|
||||||
|
GF_AUTH_ANONYMOUS_ORG_ROLE: Viewer
|
||||||
|
volumes:
|
||||||
|
- ./provisioning:/etc/grafana/provisioning:ro
|
||||||
|
- ./brightstaff_dashboard.json:/var/lib/grafana/dashboards/brightstaff_dashboard.json:ro
|
||||||
|
depends_on:
|
||||||
|
- prometheus
|
||||||
|
restart: unless-stopped
|
||||||
44
config/grafana/prometheus_scrape.yaml
Normal file
44
config/grafana/prometheus_scrape.yaml
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
# Prometheus config that scrapes Plano (Envoy admin + brightstaff). This is
|
||||||
|
# a complete Prometheus config — mount it directly at
|
||||||
|
# /etc/prometheus/prometheus.yml. The included docker-compose.yaml does this
|
||||||
|
# for you.
|
||||||
|
#
|
||||||
|
# Targets:
|
||||||
|
# - envoy:9901 Envoy admin → envoy_cluster_*, envoy_http_*, envoy_server_*.
|
||||||
|
# - brightstaff:9092 Native dataplane → brightstaff_http_*, brightstaff_llm_*,
|
||||||
|
# brightstaff_router_*, process_*.
|
||||||
|
#
|
||||||
|
# Hostname `host.docker.internal` works on Docker Desktop (Mac/Win) and on
|
||||||
|
# Linux when the container is started with `--add-host=host.docker.internal:
|
||||||
|
# host-gateway` (the included compose does this). If Plano runs *inside*
|
||||||
|
# Docker on the same network as Prometheus, replace it with the container
|
||||||
|
# name (e.g. `plano:9092`).
|
||||||
|
#
|
||||||
|
# This file is unrelated to demos/llm_routing/model_routing_service/prometheus.yaml,
|
||||||
|
# which scrapes a fake metrics service to feed the routing engine.
|
||||||
|
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
scrape_timeout: 10s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: envoy
|
||||||
|
honor_timestamps: true
|
||||||
|
metrics_path: /stats
|
||||||
|
params:
|
||||||
|
format: ["prometheus"]
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- host.docker.internal:9901
|
||||||
|
labels:
|
||||||
|
service: plano
|
||||||
|
|
||||||
|
- job_name: brightstaff
|
||||||
|
honor_timestamps: true
|
||||||
|
metrics_path: /metrics
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- host.docker.internal:9092
|
||||||
|
labels:
|
||||||
|
service: plano
|
||||||
15
config/grafana/provisioning/dashboards/brightstaff.yaml
Normal file
15
config/grafana/provisioning/dashboards/brightstaff.yaml
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
# Auto-load the brightstaff dashboard JSON on Grafana startup.
|
||||||
|
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: brightstaff
|
||||||
|
orgId: 1
|
||||||
|
folder: Plano
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
updateIntervalSeconds: 30
|
||||||
|
allowUiUpdates: true
|
||||||
|
options:
|
||||||
|
path: /var/lib/grafana/dashboards
|
||||||
|
foldersFromFilesStructure: false
|
||||||
14
config/grafana/provisioning/datasources/prometheus.yaml
Normal file
14
config/grafana/provisioning/datasources/prometheus.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
# Auto-provision the Prometheus datasource so the bundled dashboard wires up
|
||||||
|
# without any clicks. The `uid: DS_PROMETHEUS` matches the templated input in
|
||||||
|
# brightstaff_dashboard.json.
|
||||||
|
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
- name: Prometheus
|
||||||
|
uid: DS_PROMETHEUS
|
||||||
|
type: prometheus
|
||||||
|
access: proxy
|
||||||
|
url: http://prometheus:9090
|
||||||
|
isDefault: true
|
||||||
|
editable: true
|
||||||
|
|
@ -290,6 +290,9 @@ properties:
|
||||||
type: boolean
|
type: boolean
|
||||||
use_agent_orchestrator:
|
use_agent_orchestrator:
|
||||||
type: boolean
|
type: boolean
|
||||||
|
disable_signals:
|
||||||
|
type: boolean
|
||||||
|
description: "Disable agentic signal analysis (frustration, repetition, escalation, etc.) on LLM responses to save CPU. Default false."
|
||||||
upstream_connect_timeout:
|
upstream_connect_timeout:
|
||||||
type: string
|
type: string
|
||||||
description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'."
|
description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'."
|
||||||
|
|
|
||||||
372
crates/Cargo.lock
generated
372
crates/Cargo.lock
generated
|
|
@ -23,6 +23,18 @@ version = "0.3.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217"
|
checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ahash"
|
||||||
|
version = "0.8.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"once_cell",
|
||||||
|
"version_check",
|
||||||
|
"zerocopy",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aho-corasick"
|
name = "aho-corasick"
|
||||||
version = "1.1.4"
|
version = "1.1.4"
|
||||||
|
|
@ -257,6 +269,24 @@ dependencies = [
|
||||||
"vsimd",
|
"vsimd",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bindgen"
|
||||||
|
version = "0.72.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"cexpr",
|
||||||
|
"clang-sys",
|
||||||
|
"itertools 0.13.0",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"regex",
|
||||||
|
"rustc-hash 2.1.2",
|
||||||
|
"shlex",
|
||||||
|
"syn 2.0.117",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bit-set"
|
name = "bit-set"
|
||||||
version = "0.5.3"
|
version = "0.5.3"
|
||||||
|
|
@ -316,6 +346,9 @@ dependencies = [
|
||||||
"hyper 1.9.0",
|
"hyper 1.9.0",
|
||||||
"hyper-util",
|
"hyper-util",
|
||||||
"lru",
|
"lru",
|
||||||
|
"metrics 0.23.1",
|
||||||
|
"metrics-exporter-prometheus",
|
||||||
|
"metrics-process",
|
||||||
"mockito",
|
"mockito",
|
||||||
"opentelemetry",
|
"opentelemetry",
|
||||||
"opentelemetry-http",
|
"opentelemetry-http",
|
||||||
|
|
@ -325,6 +358,7 @@ dependencies = [
|
||||||
"pretty_assertions",
|
"pretty_assertions",
|
||||||
"rand 0.9.4",
|
"rand 0.9.4",
|
||||||
"redis",
|
"redis",
|
||||||
|
"regex",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
|
@ -332,6 +366,8 @@ dependencies = [
|
||||||
"serde_yaml",
|
"serde_yaml",
|
||||||
"strsim",
|
"strsim",
|
||||||
"thiserror 2.0.18",
|
"thiserror 2.0.18",
|
||||||
|
"tikv-jemalloc-ctl",
|
||||||
|
"tikv-jemallocator",
|
||||||
"time",
|
"time",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-postgres",
|
"tokio-postgres",
|
||||||
|
|
@ -391,6 +427,15 @@ dependencies = [
|
||||||
"shlex",
|
"shlex",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cexpr"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
||||||
|
dependencies = [
|
||||||
|
"nom",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cfg-if"
|
name = "cfg-if"
|
||||||
version = "1.0.4"
|
version = "1.0.4"
|
||||||
|
|
@ -428,6 +473,17 @@ dependencies = [
|
||||||
"windows-link",
|
"windows-link",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clang-sys"
|
||||||
|
version = "1.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
||||||
|
dependencies = [
|
||||||
|
"glob",
|
||||||
|
"libc",
|
||||||
|
"libloading",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cmov"
|
name = "cmov"
|
||||||
version = "0.5.3"
|
version = "0.5.3"
|
||||||
|
|
@ -574,6 +630,21 @@ dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-epoch"
|
||||||
|
version = "0.9.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-utils"
|
||||||
|
version = "0.8.21"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crypto-common"
|
name = "crypto-common"
|
||||||
version = "0.1.7"
|
version = "0.1.7"
|
||||||
|
|
@ -1070,6 +1141,12 @@ dependencies = [
|
||||||
"wasip3",
|
"wasip3",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "glob"
|
||||||
|
version = "0.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "governor"
|
name = "governor"
|
||||||
version = "0.6.3"
|
version = "0.6.3"
|
||||||
|
|
@ -1128,7 +1205,7 @@ version = "0.8.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e91b62f79061a0bc2e046024cb7ba44b08419ed238ecbd9adbd787434b9e8c25"
|
checksum = "e91b62f79061a0bc2e046024cb7ba44b08419ed238ecbd9adbd787434b9e8c25"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash",
|
"ahash 0.3.8",
|
||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -1138,6 +1215,15 @@ version = "0.12.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashbrown"
|
||||||
|
version = "0.14.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
||||||
|
dependencies = [
|
||||||
|
"ahash 0.8.12",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hashbrown"
|
name = "hashbrown"
|
||||||
version = "0.15.5"
|
version = "0.15.5"
|
||||||
|
|
@ -1189,6 +1275,12 @@ dependencies = [
|
||||||
"uuid",
|
"uuid",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hermit-abi"
|
||||||
|
version = "0.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hex"
|
name = "hex"
|
||||||
version = "0.4.3"
|
version = "0.4.3"
|
||||||
|
|
@ -1665,6 +1757,27 @@ version = "0.2.185"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f"
|
checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libloading"
|
||||||
|
version = "0.8.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"windows-link",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libproc"
|
||||||
|
version = "0.14.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a54ad7278b8bc5301d5ffd2a94251c004feb971feba96c971ea4063645990757"
|
||||||
|
dependencies = [
|
||||||
|
"bindgen",
|
||||||
|
"errno",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libredox"
|
name = "libredox"
|
||||||
version = "0.1.16"
|
version = "0.1.16"
|
||||||
|
|
@ -1745,6 +1858,12 @@ version = "0.1.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
|
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mach2"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dae608c151f68243f2b000364e1f7b186d9c29845f7d2d85bd31b9ad77ad552b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "matchers"
|
name = "matchers"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
|
|
@ -1782,6 +1901,77 @@ version = "2.8.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics"
|
||||||
|
version = "0.23.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3045b4193fbdc5b5681f32f11070da9be3609f189a79f3390706d42587f46bb5"
|
||||||
|
dependencies = [
|
||||||
|
"ahash 0.8.12",
|
||||||
|
"portable-atomic",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics"
|
||||||
|
version = "0.24.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5d5312e9ba3771cfa961b585728215e3d972c950a3eed9252aa093d6301277e8"
|
||||||
|
dependencies = [
|
||||||
|
"ahash 0.8.12",
|
||||||
|
"portable-atomic",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics-exporter-prometheus"
|
||||||
|
version = "0.15.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b4f0c8427b39666bf970460908b213ec09b3b350f20c0c2eabcbba51704a08e6"
|
||||||
|
dependencies = [
|
||||||
|
"base64 0.22.1",
|
||||||
|
"http-body-util",
|
||||||
|
"hyper 1.9.0",
|
||||||
|
"hyper-util",
|
||||||
|
"indexmap 2.14.0",
|
||||||
|
"ipnet",
|
||||||
|
"metrics 0.23.1",
|
||||||
|
"metrics-util",
|
||||||
|
"quanta",
|
||||||
|
"thiserror 1.0.69",
|
||||||
|
"tokio",
|
||||||
|
"tracing",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics-process"
|
||||||
|
version = "2.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4268d87f64a752f5a651314fc683f04da10be65701ea3e721ba4d74f79163cac"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"libproc",
|
||||||
|
"mach2",
|
||||||
|
"metrics 0.24.3",
|
||||||
|
"once_cell",
|
||||||
|
"procfs",
|
||||||
|
"rlimit",
|
||||||
|
"windows",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics-util"
|
||||||
|
version = "0.17.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4259040465c955f9f2f1a4a8a16dc46726169bca0f88e8fb2dbeced487c3e828"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-epoch",
|
||||||
|
"crossbeam-utils",
|
||||||
|
"hashbrown 0.14.5",
|
||||||
|
"metrics 0.23.1",
|
||||||
|
"num_cpus",
|
||||||
|
"quanta",
|
||||||
|
"sketches-ddsketch",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mime"
|
name = "mime"
|
||||||
version = "0.3.17"
|
version = "0.3.17"
|
||||||
|
|
@ -1935,6 +2125,16 @@ dependencies = [
|
||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num_cpus"
|
||||||
|
version = "1.17.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
|
||||||
|
dependencies = [
|
||||||
|
"hermit-abi",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "objc2-core-foundation"
|
name = "objc2-core-foundation"
|
||||||
version = "0.3.2"
|
version = "0.3.2"
|
||||||
|
|
@ -2125,6 +2325,12 @@ dependencies = [
|
||||||
"windows-link",
|
"windows-link",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "paste"
|
||||||
|
version = "1.0.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "percent-encoding"
|
name = "percent-encoding"
|
||||||
version = "2.3.2"
|
version = "2.3.2"
|
||||||
|
|
@ -2278,6 +2484,27 @@ dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "procfs"
|
||||||
|
version = "0.18.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "25485360a54d6861439d60facef26de713b1e126bf015ec8f98239467a2b82f7"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"procfs-core",
|
||||||
|
"rustix",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "procfs-core"
|
||||||
|
version = "0.18.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e6401bf7b6af22f78b563665d15a22e9aef27775b79b149a66ca022468a4e405"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"hex",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "prompt_gateway"
|
name = "prompt_gateway"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
|
@ -2333,6 +2560,21 @@ dependencies = [
|
||||||
"log",
|
"log",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quanta"
|
||||||
|
version = "0.12.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-utils",
|
||||||
|
"libc",
|
||||||
|
"once_cell",
|
||||||
|
"raw-cpuid",
|
||||||
|
"wasi 0.11.1+wasi-snapshot-preview1",
|
||||||
|
"web-sys",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quinn"
|
name = "quinn"
|
||||||
version = "0.11.9"
|
version = "0.11.9"
|
||||||
|
|
@ -2485,6 +2727,15 @@ version = "0.10.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69"
|
checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "raw-cpuid"
|
||||||
|
version = "11.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "redis"
|
name = "redis"
|
||||||
version = "0.27.6"
|
version = "0.27.6"
|
||||||
|
|
@ -2646,6 +2897,15 @@ dependencies = [
|
||||||
"windows-sys 0.52.0",
|
"windows-sys 0.52.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rlimit"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f35ee2729c56bb610f6dba436bf78135f728b7373bdffae2ec815b2d3eb98cc3"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc-hash"
|
name = "rustc-hash"
|
||||||
version = "1.1.0"
|
version = "1.1.0"
|
||||||
|
|
@ -3098,6 +3358,12 @@ version = "1.0.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
|
checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sketches-ddsketch"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "slab"
|
name = "slab"
|
||||||
version = "0.4.12"
|
version = "0.4.12"
|
||||||
|
|
@ -3308,6 +3574,37 @@ dependencies = [
|
||||||
"rustc-hash 1.1.0",
|
"rustc-hash 1.1.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tikv-jemalloc-ctl"
|
||||||
|
version = "0.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "661f1f6a57b3a36dc9174a2c10f19513b4866816e13425d3e418b11cc37bc24c"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"paste",
|
||||||
|
"tikv-jemalloc-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tikv-jemalloc-sys"
|
||||||
|
version = "0.6.1+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cd8aa5b2ab86a2cefa406d889139c162cbb230092f7d1d7cbc1716405d852a3b"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tikv-jemallocator"
|
||||||
|
version = "0.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0359b4327f954e0567e69fb191cf1436617748813819c94b8cd4a431422d053a"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"tikv-jemalloc-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "time"
|
name = "time"
|
||||||
version = "0.3.47"
|
version = "0.3.47"
|
||||||
|
|
@ -4003,6 +4300,49 @@ dependencies = [
|
||||||
"web-sys",
|
"web-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-i686-pc-windows-gnu",
|
||||||
|
"winapi-x86_64-pc-windows-gnu",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-i686-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-x86_64-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows"
|
||||||
|
version = "0.62.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580"
|
||||||
|
dependencies = [
|
||||||
|
"windows-collections",
|
||||||
|
"windows-core",
|
||||||
|
"windows-future",
|
||||||
|
"windows-numerics",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-collections"
|
||||||
|
version = "0.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610"
|
||||||
|
dependencies = [
|
||||||
|
"windows-core",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-core"
|
name = "windows-core"
|
||||||
version = "0.62.2"
|
version = "0.62.2"
|
||||||
|
|
@ -4016,6 +4356,17 @@ dependencies = [
|
||||||
"windows-strings",
|
"windows-strings",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-future"
|
||||||
|
version = "0.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb"
|
||||||
|
dependencies = [
|
||||||
|
"windows-core",
|
||||||
|
"windows-link",
|
||||||
|
"windows-threading",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-implement"
|
name = "windows-implement"
|
||||||
version = "0.60.2"
|
version = "0.60.2"
|
||||||
|
|
@ -4044,6 +4395,16 @@ version = "0.2.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-numerics"
|
||||||
|
version = "0.3.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26"
|
||||||
|
dependencies = [
|
||||||
|
"windows-core",
|
||||||
|
"windows-link",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-registry"
|
name = "windows-registry"
|
||||||
version = "0.6.1"
|
version = "0.6.1"
|
||||||
|
|
@ -4133,6 +4494,15 @@ dependencies = [
|
||||||
"windows_x86_64_msvc 0.53.1",
|
"windows_x86_64_msvc 0.53.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-threading"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37"
|
||||||
|
dependencies = [
|
||||||
|
"windows-link",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_aarch64_gnullvm"
|
name = "windows_aarch64_gnullvm"
|
||||||
version = "0.52.6"
|
version = "0.52.6"
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,18 @@ name = "brightstaff"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["jemalloc"]
|
||||||
|
jemalloc = ["tikv-jemallocator", "tikv-jemalloc-ctl"]
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "brightstaff"
|
||||||
|
path = "src/main.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "signals_replay"
|
||||||
|
path = "src/bin/signals_replay.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
async-openai = "0.30.1"
|
async-openai = "0.30.1"
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
|
|
@ -26,7 +38,11 @@ opentelemetry-stdout = "0.31"
|
||||||
opentelemetry_sdk = { version = "0.31", features = ["rt-tokio"] }
|
opentelemetry_sdk = { version = "0.31", features = ["rt-tokio"] }
|
||||||
pretty_assertions = "1.4.1"
|
pretty_assertions = "1.4.1"
|
||||||
rand = "0.9.2"
|
rand = "0.9.2"
|
||||||
|
regex = "1.10"
|
||||||
lru = "0.12"
|
lru = "0.12"
|
||||||
|
metrics = "0.23"
|
||||||
|
metrics-exporter-prometheus = { version = "0.15", default-features = false, features = ["http-listener"] }
|
||||||
|
metrics-process = "2.1"
|
||||||
redis = { version = "0.27", features = ["tokio-comp"] }
|
redis = { version = "0.27", features = ["tokio-comp"] }
|
||||||
reqwest = { version = "0.12.15", features = ["stream"] }
|
reqwest = { version = "0.12.15", features = ["stream"] }
|
||||||
serde = { version = "1.0.219", features = ["derive"] }
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
|
|
@ -35,6 +51,8 @@ serde_with = "3.13.0"
|
||||||
strsim = "0.11"
|
strsim = "0.11"
|
||||||
serde_yaml = "0.9.34"
|
serde_yaml = "0.9.34"
|
||||||
thiserror = "2.0.12"
|
thiserror = "2.0.12"
|
||||||
|
tikv-jemallocator = { version = "0.6", optional = true }
|
||||||
|
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"], optional = true }
|
||||||
tokio = { version = "1.44.2", features = ["full"] }
|
tokio = { version = "1.44.2", features = ["full"] }
|
||||||
tokio-postgres = { version = "0.7", features = ["with-serde_json-1"] }
|
tokio-postgres = { version = "0.7", features = ["with-serde_json-1"] }
|
||||||
tokio-stream = "0.1"
|
tokio-stream = "0.1"
|
||||||
|
|
|
||||||
|
|
@ -24,4 +24,7 @@ pub struct AppState {
|
||||||
/// Shared HTTP client for upstream LLM requests (connection pooling / keep-alive).
|
/// Shared HTTP client for upstream LLM requests (connection pooling / keep-alive).
|
||||||
pub http_client: reqwest::Client,
|
pub http_client: reqwest::Client,
|
||||||
pub filter_pipeline: Arc<FilterPipeline>,
|
pub filter_pipeline: Arc<FilterPipeline>,
|
||||||
|
/// When false, agentic signal analysis is skipped on LLM responses to save CPU.
|
||||||
|
/// Controlled by `overrides.disable_signals` in plano config.
|
||||||
|
pub signals_enabled: bool,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
175
crates/brightstaff/src/bin/signals_replay.rs
Normal file
175
crates/brightstaff/src/bin/signals_replay.rs
Normal file
|
|
@ -0,0 +1,175 @@
|
||||||
|
//! `signals-replay` — batch driver for the `brightstaff` signal analyzer.
|
||||||
|
//!
|
||||||
|
//! Reads JSONL conversations from stdin (one per line) and emits matching
|
||||||
|
//! JSONL reports on stdout, one per input conversation, in the same order.
|
||||||
|
//!
|
||||||
|
//! Input shape (per line):
|
||||||
|
//! ```json
|
||||||
|
//! {"id": "convo-42", "messages": [{"from": "human", "value": "..."}, ...]}
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! Output shape (per line, success):
|
||||||
|
//! ```json
|
||||||
|
//! {"id": "convo-42", "report": { ...python-compatible SignalReport dict... }}
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! On per-line failure (parse / analyzer error), emits:
|
||||||
|
//! ```json
|
||||||
|
//! {"id": "convo-42", "error": "..."}
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! The output report dict is shaped to match the Python reference's
|
||||||
|
//! `SignalReport.to_dict()` byte-for-byte so the parity comparator can do a
|
||||||
|
//! direct structural diff.
|
||||||
|
|
||||||
|
use std::io::{self, BufRead, BufWriter, Write};
|
||||||
|
|
||||||
|
use serde::Deserialize;
|
||||||
|
use serde_json::{json, Map, Value};
|
||||||
|
|
||||||
|
use brightstaff::signals::{SignalAnalyzer, SignalGroup, SignalReport};
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct InputLine {
|
||||||
|
id: Value,
|
||||||
|
messages: Vec<MessageRow>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct MessageRow {
|
||||||
|
#[serde(default)]
|
||||||
|
from: String,
|
||||||
|
#[serde(default)]
|
||||||
|
value: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let stdin = io::stdin();
|
||||||
|
let stdout = io::stdout();
|
||||||
|
let mut out = BufWriter::new(stdout.lock());
|
||||||
|
let analyzer = SignalAnalyzer::default();
|
||||||
|
|
||||||
|
for line in stdin.lock().lines() {
|
||||||
|
let line = match line {
|
||||||
|
Ok(l) => l,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("read error: {e}");
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let trimmed = line.trim();
|
||||||
|
if trimmed.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let result = process_line(&analyzer, trimmed);
|
||||||
|
// Always emit one line per input line so id ordering stays aligned.
|
||||||
|
if let Err(e) = writeln!(out, "{result}") {
|
||||||
|
eprintln!("write error: {e}");
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
// Flush periodically isn't strictly needed — BufWriter handles it,
|
||||||
|
// and the parent process reads the whole stream when we're done.
|
||||||
|
}
|
||||||
|
let _ = out.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process_line(analyzer: &SignalAnalyzer, line: &str) -> Value {
|
||||||
|
let parsed: InputLine = match serde_json::from_str(line) {
|
||||||
|
Ok(p) => p,
|
||||||
|
Err(e) => {
|
||||||
|
return json!({
|
||||||
|
"id": Value::Null,
|
||||||
|
"error": format!("input parse: {e}"),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let id = parsed.id.clone();
|
||||||
|
|
||||||
|
let view: Vec<brightstaff::signals::analyzer::ShareGptMessage<'_>> = parsed
|
||||||
|
.messages
|
||||||
|
.iter()
|
||||||
|
.map(|m| brightstaff::signals::analyzer::ShareGptMessage {
|
||||||
|
from: m.from.as_str(),
|
||||||
|
value: m.value.as_str(),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let report = analyzer.analyze_sharegpt(&view);
|
||||||
|
let report_dict = report_to_python_dict(&report);
|
||||||
|
json!({
|
||||||
|
"id": id,
|
||||||
|
"report": report_dict,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert a `SignalReport` into the Python reference's `to_dict()` shape.
|
||||||
|
///
|
||||||
|
/// Ordering of category keys in each layer dict follows the Python source
|
||||||
|
/// exactly so even string-equality comparisons behave deterministically.
|
||||||
|
fn report_to_python_dict(r: &SignalReport) -> Value {
|
||||||
|
let mut interaction = Map::new();
|
||||||
|
interaction.insert(
|
||||||
|
"misalignment".to_string(),
|
||||||
|
signal_group_to_python(&r.interaction.misalignment),
|
||||||
|
);
|
||||||
|
interaction.insert(
|
||||||
|
"stagnation".to_string(),
|
||||||
|
signal_group_to_python(&r.interaction.stagnation),
|
||||||
|
);
|
||||||
|
interaction.insert(
|
||||||
|
"disengagement".to_string(),
|
||||||
|
signal_group_to_python(&r.interaction.disengagement),
|
||||||
|
);
|
||||||
|
interaction.insert(
|
||||||
|
"satisfaction".to_string(),
|
||||||
|
signal_group_to_python(&r.interaction.satisfaction),
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut execution = Map::new();
|
||||||
|
execution.insert(
|
||||||
|
"failure".to_string(),
|
||||||
|
signal_group_to_python(&r.execution.failure),
|
||||||
|
);
|
||||||
|
execution.insert(
|
||||||
|
"loops".to_string(),
|
||||||
|
signal_group_to_python(&r.execution.loops),
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut environment = Map::new();
|
||||||
|
environment.insert(
|
||||||
|
"exhaustion".to_string(),
|
||||||
|
signal_group_to_python(&r.environment.exhaustion),
|
||||||
|
);
|
||||||
|
|
||||||
|
json!({
|
||||||
|
"interaction_signals": Value::Object(interaction),
|
||||||
|
"execution_signals": Value::Object(execution),
|
||||||
|
"environment_signals": Value::Object(environment),
|
||||||
|
"overall_quality": r.overall_quality.as_str(),
|
||||||
|
"summary": r.summary,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signal_group_to_python(g: &SignalGroup) -> Value {
|
||||||
|
let signals: Vec<Value> = g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.map(|s| {
|
||||||
|
json!({
|
||||||
|
"signal_type": s.signal_type.as_str(),
|
||||||
|
"message_index": s.message_index,
|
||||||
|
"snippet": s.snippet,
|
||||||
|
"confidence": s.confidence,
|
||||||
|
"metadata": s.metadata,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
json!({
|
||||||
|
"category": g.category,
|
||||||
|
"count": g.count,
|
||||||
|
"severity": g.severity,
|
||||||
|
"signals": signals,
|
||||||
|
})
|
||||||
|
}
|
||||||
53
crates/brightstaff/src/handlers/debug.rs
Normal file
53
crates/brightstaff/src/handlers/debug.rs
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
use bytes::Bytes;
|
||||||
|
use http_body_util::combinators::BoxBody;
|
||||||
|
use hyper::{Response, StatusCode};
|
||||||
|
|
||||||
|
use super::full;
|
||||||
|
|
||||||
|
#[derive(serde::Serialize)]
|
||||||
|
struct MemStats {
|
||||||
|
allocated_bytes: usize,
|
||||||
|
resident_bytes: usize,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
error: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns jemalloc memory statistics as JSON.
|
||||||
|
/// Falls back to a stub when the jemalloc feature is disabled.
|
||||||
|
pub async fn memstats() -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
|
let stats = get_jemalloc_stats();
|
||||||
|
let json = serde_json::to_string(&stats).unwrap();
|
||||||
|
Ok(Response::builder()
|
||||||
|
.status(StatusCode::OK)
|
||||||
|
.header("Content-Type", "application/json")
|
||||||
|
.body(full(json))
|
||||||
|
.unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "jemalloc")]
|
||||||
|
fn get_jemalloc_stats() -> MemStats {
|
||||||
|
use tikv_jemalloc_ctl::{epoch, stats};
|
||||||
|
|
||||||
|
if let Err(e) = epoch::advance() {
|
||||||
|
return MemStats {
|
||||||
|
allocated_bytes: 0,
|
||||||
|
resident_bytes: 0,
|
||||||
|
error: Some(format!("failed to advance jemalloc epoch: {e}")),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
MemStats {
|
||||||
|
allocated_bytes: stats::allocated::read().unwrap_or(0),
|
||||||
|
resident_bytes: stats::resident::read().unwrap_or(0),
|
||||||
|
error: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(feature = "jemalloc"))]
|
||||||
|
fn get_jemalloc_stats() -> MemStats {
|
||||||
|
MemStats {
|
||||||
|
allocated_bytes: 0,
|
||||||
|
resident_bytes: 0,
|
||||||
|
error: Some("jemalloc feature not enabled".to_string()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -441,10 +441,8 @@ impl ArchFunctionHandler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Handle str/string conversions
|
// Handle str/string conversions
|
||||||
"str" | "string" => {
|
"str" | "string" if !value.is_string() => {
|
||||||
if !value.is_string() {
|
return Ok(json!(value.to_string()));
|
||||||
return Ok(json!(value.to_string()));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,13 +24,14 @@ use crate::app_state::AppState;
|
||||||
use crate::handlers::agents::pipeline::PipelineProcessor;
|
use crate::handlers::agents::pipeline::PipelineProcessor;
|
||||||
use crate::handlers::extract_request_id;
|
use crate::handlers::extract_request_id;
|
||||||
use crate::handlers::full;
|
use crate::handlers::full;
|
||||||
|
use crate::metrics as bs_metrics;
|
||||||
use crate::state::response_state_processor::ResponsesStateProcessor;
|
use crate::state::response_state_processor::ResponsesStateProcessor;
|
||||||
use crate::state::{
|
use crate::state::{
|
||||||
extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError,
|
extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError,
|
||||||
};
|
};
|
||||||
use crate::streaming::{
|
use crate::streaming::{
|
||||||
create_streaming_response, create_streaming_response_with_output_filter, truncate_message,
|
create_streaming_response, create_streaming_response_with_output_filter, truncate_message,
|
||||||
ObservableStreamProcessor, StreamProcessor,
|
LlmMetricsCtx, ObservableStreamProcessor, StreamProcessor,
|
||||||
};
|
};
|
||||||
use crate::tracing::{
|
use crate::tracing::{
|
||||||
collect_custom_trace_attributes, llm as tracing_llm, operation_component,
|
collect_custom_trace_attributes, llm as tracing_llm, operation_component,
|
||||||
|
|
@ -142,6 +143,7 @@ async fn llm_chat_inner(
|
||||||
&request_path,
|
&request_path,
|
||||||
&state.model_aliases,
|
&state.model_aliases,
|
||||||
&state.llm_providers,
|
&state.llm_providers,
|
||||||
|
state.signals_enabled,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
|
|
@ -415,6 +417,7 @@ async fn parse_and_validate_request(
|
||||||
request_path: &str,
|
request_path: &str,
|
||||||
model_aliases: &Option<HashMap<String, ModelAlias>>,
|
model_aliases: &Option<HashMap<String, ModelAlias>>,
|
||||||
llm_providers: &Arc<RwLock<LlmProviders>>,
|
llm_providers: &Arc<RwLock<LlmProviders>>,
|
||||||
|
signals_enabled: bool,
|
||||||
) -> Result<PreparedRequest, Response<BoxBody<Bytes, hyper::Error>>> {
|
) -> Result<PreparedRequest, Response<BoxBody<Bytes, hyper::Error>>> {
|
||||||
let raw_bytes = request
|
let raw_bytes = request
|
||||||
.collect()
|
.collect()
|
||||||
|
|
@ -493,7 +496,11 @@ async fn parse_and_validate_request(
|
||||||
let user_message_preview = client_request
|
let user_message_preview = client_request
|
||||||
.get_recent_user_message()
|
.get_recent_user_message()
|
||||||
.map(|msg| truncate_message(&msg, 50));
|
.map(|msg| truncate_message(&msg, 50));
|
||||||
let messages_for_signals = Some(client_request.get_messages());
|
let messages_for_signals = if signals_enabled {
|
||||||
|
Some(client_request.get_messages())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
// Set the upstream model name and strip routing metadata
|
// Set the upstream model name and strip routing metadata
|
||||||
client_request.set_model(model_name_only.clone());
|
client_request.set_model(model_name_only.clone());
|
||||||
|
|
@ -694,6 +701,13 @@ async fn send_upstream(
|
||||||
|
|
||||||
let request_start_time = std::time::Instant::now();
|
let request_start_time = std::time::Instant::now();
|
||||||
|
|
||||||
|
// Labels for LLM upstream metrics. We prefer `resolved_model` (post-routing)
|
||||||
|
// and derive the provider from its `provider/model` prefix. This matches the
|
||||||
|
// same model id the cost/latency router keys off.
|
||||||
|
let (metric_provider_raw, metric_model_raw) = bs_metrics::split_provider_model(resolved_model);
|
||||||
|
let metric_provider = metric_provider_raw.to_string();
|
||||||
|
let metric_model = metric_model_raw.to_string();
|
||||||
|
|
||||||
let llm_response = match http_client
|
let llm_response = match http_client
|
||||||
.post(upstream_url)
|
.post(upstream_url)
|
||||||
.headers(request_headers.clone())
|
.headers(request_headers.clone())
|
||||||
|
|
@ -703,6 +717,14 @@ async fn send_upstream(
|
||||||
{
|
{
|
||||||
Ok(res) => res,
|
Ok(res) => res,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
|
let err_class = bs_metrics::llm_error_class_from_reqwest(&err);
|
||||||
|
bs_metrics::record_llm_upstream(
|
||||||
|
&metric_provider,
|
||||||
|
&metric_model,
|
||||||
|
0,
|
||||||
|
err_class,
|
||||||
|
request_start_time.elapsed(),
|
||||||
|
);
|
||||||
let err_msg = format!("Failed to send request: {}", err);
|
let err_msg = format!("Failed to send request: {}", err);
|
||||||
let mut internal_error = Response::new(full(err_msg));
|
let mut internal_error = Response::new(full(err_msg));
|
||||||
*internal_error.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
*internal_error.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||||
|
|
@ -758,7 +780,12 @@ async fn send_upstream(
|
||||||
span_name,
|
span_name,
|
||||||
request_start_time,
|
request_start_time,
|
||||||
messages_for_signals,
|
messages_for_signals,
|
||||||
);
|
)
|
||||||
|
.with_llm_metrics(LlmMetricsCtx {
|
||||||
|
provider: metric_provider.clone(),
|
||||||
|
model: metric_model.clone(),
|
||||||
|
upstream_status: upstream_status.as_u16(),
|
||||||
|
});
|
||||||
|
|
||||||
let output_filter_request_headers = if filter_pipeline.has_output_filters() {
|
let output_filter_request_headers = if filter_pipeline.has_output_filters() {
|
||||||
Some(request_headers.clone())
|
Some(request_headers.clone())
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,24 @@ use hyper::StatusCode;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tracing::{debug, info, warn};
|
use tracing::{debug, info, warn};
|
||||||
|
|
||||||
|
use crate::metrics as bs_metrics;
|
||||||
|
use crate::metrics::labels as metric_labels;
|
||||||
use crate::router::orchestrator::OrchestratorService;
|
use crate::router::orchestrator::OrchestratorService;
|
||||||
use crate::streaming::truncate_message;
|
use crate::streaming::truncate_message;
|
||||||
use crate::tracing::routing;
|
use crate::tracing::routing;
|
||||||
|
|
||||||
|
/// Classify a request path (already stripped of `/agents` or `/routing` by
|
||||||
|
/// the caller) into the fixed `route` label used on routing metrics.
|
||||||
|
fn route_label_for_path(request_path: &str) -> &'static str {
|
||||||
|
if request_path.starts_with("/agents") {
|
||||||
|
metric_labels::ROUTE_AGENT
|
||||||
|
} else if request_path.starts_with("/routing") {
|
||||||
|
metric_labels::ROUTE_ROUTING
|
||||||
|
} else {
|
||||||
|
metric_labels::ROUTE_LLM
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct RoutingResult {
|
pub struct RoutingResult {
|
||||||
/// Primary model to use (first in the ranked list).
|
/// Primary model to use (first in the ranked list).
|
||||||
pub model_name: String,
|
pub model_name: String,
|
||||||
|
|
@ -106,15 +120,23 @@ pub async fn router_chat_get_upstream_model(
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let determination_ms = routing_start_time.elapsed().as_millis() as i64;
|
let determination_elapsed = routing_start_time.elapsed();
|
||||||
|
let determination_ms = determination_elapsed.as_millis() as i64;
|
||||||
let current_span = tracing::Span::current();
|
let current_span = tracing::Span::current();
|
||||||
current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);
|
current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);
|
||||||
|
let route_label = route_label_for_path(request_path);
|
||||||
|
|
||||||
match routing_result {
|
match routing_result {
|
||||||
Ok(route) => match route {
|
Ok(route) => match route {
|
||||||
Some((route_name, ranked_models)) => {
|
Some((route_name, ranked_models)) => {
|
||||||
let model_name = ranked_models.first().cloned().unwrap_or_default();
|
let model_name = ranked_models.first().cloned().unwrap_or_default();
|
||||||
current_span.record("route.selected_model", model_name.as_str());
|
current_span.record("route.selected_model", model_name.as_str());
|
||||||
|
bs_metrics::record_router_decision(
|
||||||
|
route_label,
|
||||||
|
&model_name,
|
||||||
|
false,
|
||||||
|
determination_elapsed,
|
||||||
|
);
|
||||||
Ok(RoutingResult {
|
Ok(RoutingResult {
|
||||||
model_name,
|
model_name,
|
||||||
models: ranked_models,
|
models: ranked_models,
|
||||||
|
|
@ -126,6 +148,12 @@ pub async fn router_chat_get_upstream_model(
|
||||||
// This signals to llm.rs to use the original validated request model
|
// This signals to llm.rs to use the original validated request model
|
||||||
current_span.record("route.selected_model", "none");
|
current_span.record("route.selected_model", "none");
|
||||||
info!("no route determined, using default model");
|
info!("no route determined, using default model");
|
||||||
|
bs_metrics::record_router_decision(
|
||||||
|
route_label,
|
||||||
|
"none",
|
||||||
|
true,
|
||||||
|
determination_elapsed,
|
||||||
|
);
|
||||||
|
|
||||||
Ok(RoutingResult {
|
Ok(RoutingResult {
|
||||||
model_name: "none".to_string(),
|
model_name: "none".to_string(),
|
||||||
|
|
@ -136,6 +164,7 @@ pub async fn router_chat_get_upstream_model(
|
||||||
},
|
},
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
current_span.record("route.selected_model", "unknown");
|
current_span.record("route.selected_model", "unknown");
|
||||||
|
bs_metrics::record_router_decision(route_label, "unknown", true, determination_elapsed);
|
||||||
Err(RoutingError::internal_error(format!(
|
Err(RoutingError::internal_error(format!(
|
||||||
"Failed to determine route: {}",
|
"Failed to determine route: {}",
|
||||||
err
|
err
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
pub mod agents;
|
pub mod agents;
|
||||||
|
pub mod debug;
|
||||||
pub mod function_calling;
|
pub mod function_calling;
|
||||||
pub mod llm;
|
pub mod llm;
|
||||||
pub mod models;
|
pub mod models;
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,8 @@ use tracing::{debug, info, info_span, warn, Instrument};
|
||||||
|
|
||||||
use super::extract_or_generate_traceparent;
|
use super::extract_or_generate_traceparent;
|
||||||
use crate::handlers::llm::model_selection::router_chat_get_upstream_model;
|
use crate::handlers::llm::model_selection::router_chat_get_upstream_model;
|
||||||
|
use crate::metrics as bs_metrics;
|
||||||
|
use crate::metrics::labels as metric_labels;
|
||||||
use crate::router::orchestrator::OrchestratorService;
|
use crate::router::orchestrator::OrchestratorService;
|
||||||
use crate::tracing::{collect_custom_trace_attributes, operation_component, set_service_name};
|
use crate::tracing::{collect_custom_trace_attributes, operation_component, set_service_name};
|
||||||
|
|
||||||
|
|
@ -230,6 +232,17 @@ async fn routing_decision_inner(
|
||||||
pinned: false,
|
pinned: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Distinguish "decision served" (a concrete model picked) from
|
||||||
|
// "no_candidates" (the sentinel "none" returned when nothing
|
||||||
|
// matched). The handler still responds 200 in both cases, so RED
|
||||||
|
// metrics alone can't tell them apart.
|
||||||
|
let outcome = if response.models.first().map(|m| m == "none").unwrap_or(true) {
|
||||||
|
metric_labels::ROUTING_SVC_NO_CANDIDATES
|
||||||
|
} else {
|
||||||
|
metric_labels::ROUTING_SVC_DECISION_SERVED
|
||||||
|
};
|
||||||
|
bs_metrics::record_routing_service_outcome(outcome);
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
primary_model = %response.models.first().map(|s| s.as_str()).unwrap_or("none"),
|
primary_model = %response.models.first().map(|s| s.as_str()).unwrap_or("none"),
|
||||||
total_models = response.models.len(),
|
total_models = response.models.len(),
|
||||||
|
|
@ -249,6 +262,7 @@ async fn routing_decision_inner(
|
||||||
.unwrap())
|
.unwrap())
|
||||||
}
|
}
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
|
bs_metrics::record_routing_service_outcome(metric_labels::ROUTING_SVC_POLICY_ERROR);
|
||||||
warn!(error = %err.message, "routing decision failed");
|
warn!(error = %err.message, "routing decision failed");
|
||||||
Ok(BrightStaffError::InternalServerError(err.message).into_response())
|
Ok(BrightStaffError::InternalServerError(err.message).into_response())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
pub mod app_state;
|
pub mod app_state;
|
||||||
pub mod handlers;
|
pub mod handlers;
|
||||||
|
pub mod metrics;
|
||||||
pub mod router;
|
pub mod router;
|
||||||
pub mod session_cache;
|
pub mod session_cache;
|
||||||
pub mod signals;
|
pub mod signals;
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,17 @@
|
||||||
|
#[cfg(feature = "jemalloc")]
|
||||||
|
#[global_allocator]
|
||||||
|
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||||
|
|
||||||
use brightstaff::app_state::AppState;
|
use brightstaff::app_state::AppState;
|
||||||
use brightstaff::handlers::agents::orchestrator::agent_chat;
|
use brightstaff::handlers::agents::orchestrator::agent_chat;
|
||||||
|
use brightstaff::handlers::debug;
|
||||||
use brightstaff::handlers::empty;
|
use brightstaff::handlers::empty;
|
||||||
use brightstaff::handlers::function_calling::function_calling_chat_handler;
|
use brightstaff::handlers::function_calling::function_calling_chat_handler;
|
||||||
use brightstaff::handlers::llm::llm_chat;
|
use brightstaff::handlers::llm::llm_chat;
|
||||||
use brightstaff::handlers::models::list_models;
|
use brightstaff::handlers::models::list_models;
|
||||||
use brightstaff::handlers::routing_service::routing_decision;
|
use brightstaff::handlers::routing_service::routing_decision;
|
||||||
|
use brightstaff::metrics as bs_metrics;
|
||||||
|
use brightstaff::metrics::labels as metric_labels;
|
||||||
use brightstaff::router::model_metrics::ModelMetricsService;
|
use brightstaff::router::model_metrics::ModelMetricsService;
|
||||||
use brightstaff::router::orchestrator::OrchestratorService;
|
use brightstaff::router::orchestrator::OrchestratorService;
|
||||||
use brightstaff::session_cache::init_session_cache;
|
use brightstaff::session_cache::init_session_cache;
|
||||||
|
|
@ -326,6 +333,8 @@ async fn init_app_state(
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.and_then(|tracing| tracing.span_attributes.clone());
|
.and_then(|tracing| tracing.span_attributes.clone());
|
||||||
|
|
||||||
|
let signals_enabled = !overrides.disable_signals.unwrap_or(false);
|
||||||
|
|
||||||
Ok(AppState {
|
Ok(AppState {
|
||||||
orchestrator_service,
|
orchestrator_service,
|
||||||
model_aliases: config.model_aliases.clone(),
|
model_aliases: config.model_aliases.clone(),
|
||||||
|
|
@ -337,6 +346,7 @@ async fn init_app_state(
|
||||||
span_attributes,
|
span_attributes,
|
||||||
http_client: reqwest::Client::new(),
|
http_client: reqwest::Client::new(),
|
||||||
filter_pipeline,
|
filter_pipeline,
|
||||||
|
signals_enabled,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -384,10 +394,79 @@ async fn init_state_storage(
|
||||||
// Request routing
|
// Request routing
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Normalized method label — limited set so we never emit a free-form string.
|
||||||
|
fn method_label(method: &Method) -> &'static str {
|
||||||
|
match *method {
|
||||||
|
Method::GET => "GET",
|
||||||
|
Method::POST => "POST",
|
||||||
|
Method::PUT => "PUT",
|
||||||
|
Method::DELETE => "DELETE",
|
||||||
|
Method::PATCH => "PATCH",
|
||||||
|
Method::HEAD => "HEAD",
|
||||||
|
Method::OPTIONS => "OPTIONS",
|
||||||
|
_ => "OTHER",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute the fixed `handler` metric label from the request's path+method.
|
||||||
|
/// Returning `None` for fall-through means `route()` will hand the request to
|
||||||
|
/// the catch-all 404 branch.
|
||||||
|
fn handler_label_for(method: &Method, path: &str) -> &'static str {
|
||||||
|
if let Some(stripped) = path.strip_prefix("/agents") {
|
||||||
|
if matches!(
|
||||||
|
stripped,
|
||||||
|
CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH
|
||||||
|
) {
|
||||||
|
return metric_labels::HANDLER_AGENT_CHAT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(stripped) = path.strip_prefix("/routing") {
|
||||||
|
if matches!(
|
||||||
|
stripped,
|
||||||
|
CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH
|
||||||
|
) {
|
||||||
|
return metric_labels::HANDLER_ROUTING_DECISION;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
match (method, path) {
|
||||||
|
(&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH) => {
|
||||||
|
metric_labels::HANDLER_LLM_CHAT
|
||||||
|
}
|
||||||
|
(&Method::POST, "/function_calling") => metric_labels::HANDLER_FUNCTION_CALLING,
|
||||||
|
(&Method::GET, "/v1/models" | "/agents/v1/models") => metric_labels::HANDLER_LIST_MODELS,
|
||||||
|
(&Method::OPTIONS, "/v1/models" | "/agents/v1/models") => {
|
||||||
|
metric_labels::HANDLER_CORS_PREFLIGHT
|
||||||
|
}
|
||||||
|
_ => metric_labels::HANDLER_NOT_FOUND,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Route an incoming HTTP request to the appropriate handler.
|
/// Route an incoming HTTP request to the appropriate handler.
|
||||||
async fn route(
|
async fn route(
|
||||||
req: Request<Incoming>,
|
req: Request<Incoming>,
|
||||||
state: Arc<AppState>,
|
state: Arc<AppState>,
|
||||||
|
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
|
let handler = handler_label_for(req.method(), req.uri().path());
|
||||||
|
let method = method_label(req.method());
|
||||||
|
let started = std::time::Instant::now();
|
||||||
|
let _in_flight = bs_metrics::InFlightGuard::new(handler);
|
||||||
|
|
||||||
|
let result = dispatch(req, state).await;
|
||||||
|
|
||||||
|
let status = match &result {
|
||||||
|
Ok(resp) => resp.status().as_u16(),
|
||||||
|
// hyper::Error here means the body couldn't be produced; conventionally 500.
|
||||||
|
Err(_) => 500,
|
||||||
|
};
|
||||||
|
bs_metrics::record_http(handler, method, status, started);
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Inner dispatcher split out so `route()` can wrap it with metrics without
|
||||||
|
/// duplicating the match tree.
|
||||||
|
async fn dispatch(
|
||||||
|
req: Request<Incoming>,
|
||||||
|
state: Arc<AppState>,
|
||||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
let parent_cx = global::get_text_map_propagator(|p| p.extract(&HeaderExtractor(req.headers())));
|
let parent_cx = global::get_text_map_propagator(|p| p.extract(&HeaderExtractor(req.headers())));
|
||||||
let path = req.uri().path().to_string();
|
let path = req.uri().path().to_string();
|
||||||
|
|
@ -439,6 +518,7 @@ async fn route(
|
||||||
Ok(list_models(Arc::clone(&state.llm_providers)).await)
|
Ok(list_models(Arc::clone(&state.llm_providers)).await)
|
||||||
}
|
}
|
||||||
(&Method::OPTIONS, "/v1/models" | "/agents/v1/models") => cors_preflight(),
|
(&Method::OPTIONS, "/v1/models" | "/agents/v1/models") => cors_preflight(),
|
||||||
|
(&Method::GET, "/debug/memstats") => debug::memstats().await,
|
||||||
_ => {
|
_ => {
|
||||||
debug!(method = %req.method(), path = %path, "no route found");
|
debug!(method = %req.method(), path = %path, "no route found");
|
||||||
let mut not_found = Response::new(empty());
|
let mut not_found = Response::new(empty());
|
||||||
|
|
@ -503,6 +583,7 @@ async fn run_server(state: Arc<AppState>) -> Result<(), Box<dyn std::error::Erro
|
||||||
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
let config = load_config()?;
|
let config = load_config()?;
|
||||||
let _tracer_provider = init_tracer(config.tracing.as_ref());
|
let _tracer_provider = init_tracer(config.tracing.as_ref());
|
||||||
|
bs_metrics::init();
|
||||||
info!("loaded plano_config.yaml");
|
info!("loaded plano_config.yaml");
|
||||||
let state = Arc::new(init_app_state(&config).await?);
|
let state = Arc::new(init_app_state(&config).await?);
|
||||||
run_server(state).await
|
run_server(state).await
|
||||||
|
|
|
||||||
38
crates/brightstaff/src/metrics/labels.rs
Normal file
38
crates/brightstaff/src/metrics/labels.rs
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
//! Fixed label-value constants so callers never emit free-form strings
|
||||||
|
//! (which would blow up cardinality).
|
||||||
|
|
||||||
|
// Handler enum — derived from the path+method match in `route()`.
|
||||||
|
pub const HANDLER_AGENT_CHAT: &str = "agent_chat";
|
||||||
|
pub const HANDLER_ROUTING_DECISION: &str = "routing_decision";
|
||||||
|
pub const HANDLER_LLM_CHAT: &str = "llm_chat";
|
||||||
|
pub const HANDLER_FUNCTION_CALLING: &str = "function_calling";
|
||||||
|
pub const HANDLER_LIST_MODELS: &str = "list_models";
|
||||||
|
pub const HANDLER_CORS_PREFLIGHT: &str = "cors_preflight";
|
||||||
|
pub const HANDLER_NOT_FOUND: &str = "not_found";
|
||||||
|
|
||||||
|
// Router "route" class — which brightstaff endpoint prompted the decision.
|
||||||
|
pub const ROUTE_AGENT: &str = "agent";
|
||||||
|
pub const ROUTE_ROUTING: &str = "routing";
|
||||||
|
pub const ROUTE_LLM: &str = "llm";
|
||||||
|
|
||||||
|
// Token kind for brightstaff_llm_tokens_total.
|
||||||
|
pub const TOKEN_KIND_PROMPT: &str = "prompt";
|
||||||
|
pub const TOKEN_KIND_COMPLETION: &str = "completion";
|
||||||
|
|
||||||
|
// LLM error_class values (match docstring in metrics/mod.rs).
|
||||||
|
pub const LLM_ERR_NONE: &str = "none";
|
||||||
|
pub const LLM_ERR_TIMEOUT: &str = "timeout";
|
||||||
|
pub const LLM_ERR_CONNECT: &str = "connect";
|
||||||
|
pub const LLM_ERR_PARSE: &str = "parse";
|
||||||
|
pub const LLM_ERR_OTHER: &str = "other";
|
||||||
|
pub const LLM_ERR_STREAM: &str = "stream";
|
||||||
|
|
||||||
|
// Routing service outcome values.
|
||||||
|
pub const ROUTING_SVC_DECISION_SERVED: &str = "decision_served";
|
||||||
|
pub const ROUTING_SVC_NO_CANDIDATES: &str = "no_candidates";
|
||||||
|
pub const ROUTING_SVC_POLICY_ERROR: &str = "policy_error";
|
||||||
|
|
||||||
|
// Session cache outcome values.
|
||||||
|
pub const SESSION_CACHE_HIT: &str = "hit";
|
||||||
|
pub const SESSION_CACHE_MISS: &str = "miss";
|
||||||
|
pub const SESSION_CACHE_STORE: &str = "store";
|
||||||
377
crates/brightstaff/src/metrics/mod.rs
Normal file
377
crates/brightstaff/src/metrics/mod.rs
Normal file
|
|
@ -0,0 +1,377 @@
|
||||||
|
//! Prometheus metrics for brightstaff.
|
||||||
|
//!
|
||||||
|
//! Installs the `metrics` global recorder backed by
|
||||||
|
//! `metrics-exporter-prometheus` and exposes a `/metrics` HTTP endpoint on a
|
||||||
|
//! dedicated admin port (default `0.0.0.0:9092`, overridable via
|
||||||
|
//! `METRICS_BIND_ADDRESS`).
|
||||||
|
//!
|
||||||
|
//! Emitted metric families (see `describe_all` for full list):
|
||||||
|
//! - HTTP RED: `brightstaff_http_requests_total`,
|
||||||
|
//! `brightstaff_http_request_duration_seconds`,
|
||||||
|
//! `brightstaff_http_in_flight_requests`.
|
||||||
|
//! - LLM upstream: `brightstaff_llm_upstream_requests_total`,
|
||||||
|
//! `brightstaff_llm_upstream_duration_seconds`,
|
||||||
|
//! `brightstaff_llm_time_to_first_token_seconds`,
|
||||||
|
//! `brightstaff_llm_tokens_total`,
|
||||||
|
//! `brightstaff_llm_tokens_usage_missing_total`.
|
||||||
|
//! - Routing: `brightstaff_router_decisions_total`,
|
||||||
|
//! `brightstaff_router_decision_duration_seconds`,
|
||||||
|
//! `brightstaff_routing_service_requests_total`,
|
||||||
|
//! `brightstaff_session_cache_events_total`.
|
||||||
|
//! - Process: via `metrics-process`.
|
||||||
|
//! - Build: `brightstaff_build_info`.
|
||||||
|
|
||||||
|
use std::net::SocketAddr;
|
||||||
|
use std::sync::OnceLock;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
|
use metrics::{counter, describe_counter, describe_gauge, describe_histogram, gauge, histogram};
|
||||||
|
use metrics_exporter_prometheus::{Matcher, PrometheusBuilder};
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
pub mod labels;
|
||||||
|
|
||||||
|
/// Guard flag so tests don't re-install the global recorder.
|
||||||
|
static INIT: OnceLock<()> = OnceLock::new();
|
||||||
|
|
||||||
|
const DEFAULT_METRICS_BIND: &str = "0.0.0.0:9092";
|
||||||
|
|
||||||
|
/// HTTP request duration buckets (seconds). Capped at 60s.
|
||||||
|
const HTTP_BUCKETS: &[f64] = &[
|
||||||
|
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0,
|
||||||
|
];
|
||||||
|
|
||||||
|
/// LLM upstream / TTFT buckets (seconds). Capped at 120s because provider
|
||||||
|
/// completions routinely run that long.
|
||||||
|
const LLM_BUCKETS: &[f64] = &[0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0, 120.0];
|
||||||
|
|
||||||
|
/// Router decision buckets (seconds). The orchestrator call itself is usually
|
||||||
|
/// sub-second but bucketed generously in case of upstream slowness.
|
||||||
|
const ROUTER_BUCKETS: &[f64] = &[
|
||||||
|
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0,
|
||||||
|
];
|
||||||
|
|
||||||
|
/// Install the global recorder and spawn the `/metrics` HTTP listener.
|
||||||
|
///
|
||||||
|
/// Safe to call more than once; subsequent calls are no-ops so tests that
|
||||||
|
/// construct their own recorder still work.
|
||||||
|
pub fn init() {
|
||||||
|
if INIT.get().is_some() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let bind: SocketAddr = std::env::var("METRICS_BIND_ADDRESS")
|
||||||
|
.unwrap_or_else(|_| DEFAULT_METRICS_BIND.to_string())
|
||||||
|
.parse()
|
||||||
|
.unwrap_or_else(|err| {
|
||||||
|
warn!(error = %err, default = DEFAULT_METRICS_BIND, "invalid METRICS_BIND_ADDRESS, falling back to default");
|
||||||
|
DEFAULT_METRICS_BIND.parse().expect("default bind parses")
|
||||||
|
});
|
||||||
|
|
||||||
|
let builder = PrometheusBuilder::new()
|
||||||
|
.with_http_listener(bind)
|
||||||
|
.set_buckets_for_metric(
|
||||||
|
Matcher::Full("brightstaff_http_request_duration_seconds".to_string()),
|
||||||
|
HTTP_BUCKETS,
|
||||||
|
)
|
||||||
|
.and_then(|b| {
|
||||||
|
b.set_buckets_for_metric(Matcher::Prefix("brightstaff_llm_".to_string()), LLM_BUCKETS)
|
||||||
|
})
|
||||||
|
.and_then(|b| {
|
||||||
|
b.set_buckets_for_metric(
|
||||||
|
Matcher::Full("brightstaff_router_decision_duration_seconds".to_string()),
|
||||||
|
ROUTER_BUCKETS,
|
||||||
|
)
|
||||||
|
});
|
||||||
|
|
||||||
|
let builder = match builder {
|
||||||
|
Ok(b) => b,
|
||||||
|
Err(err) => {
|
||||||
|
warn!(error = %err, "failed to configure metrics buckets, using defaults");
|
||||||
|
PrometheusBuilder::new().with_http_listener(bind)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Err(err) = builder.install() {
|
||||||
|
warn!(error = %err, "failed to install Prometheus recorder; metrics disabled");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let _ = INIT.set(());
|
||||||
|
|
||||||
|
describe_all();
|
||||||
|
emit_build_info();
|
||||||
|
|
||||||
|
// Register process-level collector (RSS, CPU, FDs).
|
||||||
|
let collector = metrics_process::Collector::default();
|
||||||
|
collector.describe();
|
||||||
|
// Prime once at startup; subsequent scrapes refresh via the exporter's
|
||||||
|
// per-scrape render, so we additionally refresh on a short interval to
|
||||||
|
// keep gauges moving between scrapes without requiring client pull.
|
||||||
|
collector.collect();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut tick = tokio::time::interval(Duration::from_secs(10));
|
||||||
|
tick.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
|
||||||
|
loop {
|
||||||
|
tick.tick().await;
|
||||||
|
collector.collect();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
info!(address = %bind, "metrics listener started");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn describe_all() {
|
||||||
|
describe_counter!(
|
||||||
|
"brightstaff_http_requests_total",
|
||||||
|
"Total HTTP requests served by brightstaff, by handler and status class."
|
||||||
|
);
|
||||||
|
describe_histogram!(
|
||||||
|
"brightstaff_http_request_duration_seconds",
|
||||||
|
"Wall-clock duration of HTTP requests served by brightstaff, by handler."
|
||||||
|
);
|
||||||
|
describe_gauge!(
|
||||||
|
"brightstaff_http_in_flight_requests",
|
||||||
|
"Number of HTTP requests currently being served by brightstaff, by handler."
|
||||||
|
);
|
||||||
|
|
||||||
|
describe_counter!(
|
||||||
|
"brightstaff_llm_upstream_requests_total",
|
||||||
|
"LLM upstream request outcomes, by provider, model, status class and error class."
|
||||||
|
);
|
||||||
|
describe_histogram!(
|
||||||
|
"brightstaff_llm_upstream_duration_seconds",
|
||||||
|
"Wall-clock duration of LLM upstream calls (stream close for streaming), by provider and model."
|
||||||
|
);
|
||||||
|
describe_histogram!(
|
||||||
|
"brightstaff_llm_time_to_first_token_seconds",
|
||||||
|
"Time from request start to first streamed byte, by provider and model (streaming only)."
|
||||||
|
);
|
||||||
|
describe_counter!(
|
||||||
|
"brightstaff_llm_tokens_total",
|
||||||
|
"Tokens reported in the provider `usage` field, by provider, model and kind (prompt/completion)."
|
||||||
|
);
|
||||||
|
describe_counter!(
|
||||||
|
"brightstaff_llm_tokens_usage_missing_total",
|
||||||
|
"LLM responses that completed without a usable `usage` block (so token counts are unknown)."
|
||||||
|
);
|
||||||
|
|
||||||
|
describe_counter!(
|
||||||
|
"brightstaff_router_decisions_total",
|
||||||
|
"Routing decisions made by the orchestrator, by route, selected model, and whether a fallback was used."
|
||||||
|
);
|
||||||
|
describe_histogram!(
|
||||||
|
"brightstaff_router_decision_duration_seconds",
|
||||||
|
"Time spent in the orchestrator deciding a route, by route."
|
||||||
|
);
|
||||||
|
describe_counter!(
|
||||||
|
"brightstaff_routing_service_requests_total",
|
||||||
|
"Outcomes of /routing/* decision requests: decision_served, no_candidates, policy_error."
|
||||||
|
);
|
||||||
|
describe_counter!(
|
||||||
|
"brightstaff_session_cache_events_total",
|
||||||
|
"Session affinity cache lookups and stores, by outcome."
|
||||||
|
);
|
||||||
|
|
||||||
|
describe_gauge!(
|
||||||
|
"brightstaff_build_info",
|
||||||
|
"Build metadata. Always 1; labels carry version and git SHA."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn emit_build_info() {
|
||||||
|
let version = env!("CARGO_PKG_VERSION");
|
||||||
|
let git_sha = option_env!("GIT_SHA").unwrap_or("unknown");
|
||||||
|
gauge!(
|
||||||
|
"brightstaff_build_info",
|
||||||
|
"version" => version.to_string(),
|
||||||
|
"git_sha" => git_sha.to_string(),
|
||||||
|
)
|
||||||
|
.set(1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Split a provider-qualified model id like `"openai/gpt-4o"` into
|
||||||
|
/// `(provider, model)`. Returns `("unknown", raw)` when there is no `/`.
|
||||||
|
pub fn split_provider_model(full: &str) -> (&str, &str) {
|
||||||
|
match full.split_once('/') {
|
||||||
|
Some((p, m)) => (p, m),
|
||||||
|
None => ("unknown", full),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Bucket an HTTP status code into `"2xx"` / `"4xx"` / `"5xx"` / `"1xx"` / `"3xx"`.
|
||||||
|
pub fn status_class(status: u16) -> &'static str {
|
||||||
|
match status {
|
||||||
|
100..=199 => "1xx",
|
||||||
|
200..=299 => "2xx",
|
||||||
|
300..=399 => "3xx",
|
||||||
|
400..=499 => "4xx",
|
||||||
|
500..=599 => "5xx",
|
||||||
|
_ => "other",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// HTTP RED helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// RAII guard that increments the in-flight gauge on construction and
|
||||||
|
/// decrements on drop. Pair with [`HttpTimer`] in the `route()` wrapper so the
|
||||||
|
/// gauge drops even on error paths.
|
||||||
|
pub struct InFlightGuard {
|
||||||
|
handler: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl InFlightGuard {
|
||||||
|
pub fn new(handler: &'static str) -> Self {
|
||||||
|
gauge!(
|
||||||
|
"brightstaff_http_in_flight_requests",
|
||||||
|
"handler" => handler,
|
||||||
|
)
|
||||||
|
.increment(1.0);
|
||||||
|
Self { handler }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for InFlightGuard {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
gauge!(
|
||||||
|
"brightstaff_http_in_flight_requests",
|
||||||
|
"handler" => self.handler,
|
||||||
|
)
|
||||||
|
.decrement(1.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record the HTTP request counter + duration histogram.
|
||||||
|
pub fn record_http(handler: &'static str, method: &'static str, status: u16, started: Instant) {
|
||||||
|
let class = status_class(status);
|
||||||
|
counter!(
|
||||||
|
"brightstaff_http_requests_total",
|
||||||
|
"handler" => handler,
|
||||||
|
"method" => method,
|
||||||
|
"status_class" => class,
|
||||||
|
)
|
||||||
|
.increment(1);
|
||||||
|
histogram!(
|
||||||
|
"brightstaff_http_request_duration_seconds",
|
||||||
|
"handler" => handler,
|
||||||
|
)
|
||||||
|
.record(started.elapsed().as_secs_f64());
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// LLM upstream helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Classify an outcome of an LLM upstream call for the `error_class` label.
|
||||||
|
pub fn llm_error_class_from_reqwest(err: &reqwest::Error) -> &'static str {
|
||||||
|
if err.is_timeout() {
|
||||||
|
"timeout"
|
||||||
|
} else if err.is_connect() {
|
||||||
|
"connect"
|
||||||
|
} else if err.is_decode() {
|
||||||
|
"parse"
|
||||||
|
} else {
|
||||||
|
"other"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record the outcome of an LLM upstream call. `status` is the HTTP status
|
||||||
|
/// the upstream returned (0 if the call never produced one, e.g. send failure).
|
||||||
|
/// `error_class` is `"none"` on success, or a discriminated error label.
|
||||||
|
pub fn record_llm_upstream(
|
||||||
|
provider: &str,
|
||||||
|
model: &str,
|
||||||
|
status: u16,
|
||||||
|
error_class: &str,
|
||||||
|
duration: Duration,
|
||||||
|
) {
|
||||||
|
let class = if status == 0 {
|
||||||
|
"error"
|
||||||
|
} else {
|
||||||
|
status_class(status)
|
||||||
|
};
|
||||||
|
counter!(
|
||||||
|
"brightstaff_llm_upstream_requests_total",
|
||||||
|
"provider" => provider.to_string(),
|
||||||
|
"model" => model.to_string(),
|
||||||
|
"status_class" => class,
|
||||||
|
"error_class" => error_class.to_string(),
|
||||||
|
)
|
||||||
|
.increment(1);
|
||||||
|
histogram!(
|
||||||
|
"brightstaff_llm_upstream_duration_seconds",
|
||||||
|
"provider" => provider.to_string(),
|
||||||
|
"model" => model.to_string(),
|
||||||
|
)
|
||||||
|
.record(duration.as_secs_f64());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn record_llm_ttft(provider: &str, model: &str, ttft: Duration) {
|
||||||
|
histogram!(
|
||||||
|
"brightstaff_llm_time_to_first_token_seconds",
|
||||||
|
"provider" => provider.to_string(),
|
||||||
|
"model" => model.to_string(),
|
||||||
|
)
|
||||||
|
.record(ttft.as_secs_f64());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn record_llm_tokens(provider: &str, model: &str, kind: &'static str, count: u64) {
|
||||||
|
counter!(
|
||||||
|
"brightstaff_llm_tokens_total",
|
||||||
|
"provider" => provider.to_string(),
|
||||||
|
"model" => model.to_string(),
|
||||||
|
"kind" => kind,
|
||||||
|
)
|
||||||
|
.increment(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn record_llm_tokens_usage_missing(provider: &str, model: &str) {
|
||||||
|
counter!(
|
||||||
|
"brightstaff_llm_tokens_usage_missing_total",
|
||||||
|
"provider" => provider.to_string(),
|
||||||
|
"model" => model.to_string(),
|
||||||
|
)
|
||||||
|
.increment(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Router helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
pub fn record_router_decision(
|
||||||
|
route: &'static str,
|
||||||
|
selected_model: &str,
|
||||||
|
fallback: bool,
|
||||||
|
duration: Duration,
|
||||||
|
) {
|
||||||
|
counter!(
|
||||||
|
"brightstaff_router_decisions_total",
|
||||||
|
"route" => route,
|
||||||
|
"selected_model" => selected_model.to_string(),
|
||||||
|
"fallback" => if fallback { "true" } else { "false" },
|
||||||
|
)
|
||||||
|
.increment(1);
|
||||||
|
histogram!(
|
||||||
|
"brightstaff_router_decision_duration_seconds",
|
||||||
|
"route" => route,
|
||||||
|
)
|
||||||
|
.record(duration.as_secs_f64());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn record_routing_service_outcome(outcome: &'static str) {
|
||||||
|
counter!(
|
||||||
|
"brightstaff_routing_service_requests_total",
|
||||||
|
"outcome" => outcome,
|
||||||
|
)
|
||||||
|
.increment(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn record_session_cache_event(outcome: &'static str) {
|
||||||
|
counter!(
|
||||||
|
"brightstaff_session_cache_events_total",
|
||||||
|
"outcome" => outcome,
|
||||||
|
)
|
||||||
|
.increment(1);
|
||||||
|
}
|
||||||
|
|
@ -3,3 +3,5 @@ pub mod model_metrics;
|
||||||
pub mod orchestrator;
|
pub mod orchestrator;
|
||||||
pub mod orchestrator_model;
|
pub mod orchestrator_model;
|
||||||
pub mod orchestrator_model_v1;
|
pub mod orchestrator_model_v1;
|
||||||
|
#[cfg(test)]
|
||||||
|
mod stress_tests;
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,8 @@ use super::http::{self, post_and_extract_content};
|
||||||
use super::model_metrics::ModelMetricsService;
|
use super::model_metrics::ModelMetricsService;
|
||||||
use super::orchestrator_model::OrchestratorModel;
|
use super::orchestrator_model::OrchestratorModel;
|
||||||
|
|
||||||
|
use crate::metrics as bs_metrics;
|
||||||
|
use crate::metrics::labels as metric_labels;
|
||||||
use crate::router::orchestrator_model_v1;
|
use crate::router::orchestrator_model_v1;
|
||||||
use crate::session_cache::SessionCache;
|
use crate::session_cache::SessionCache;
|
||||||
|
|
||||||
|
|
@ -130,7 +132,13 @@ impl OrchestratorService {
|
||||||
tenant_id: Option<&str>,
|
tenant_id: Option<&str>,
|
||||||
) -> Option<CachedRoute> {
|
) -> Option<CachedRoute> {
|
||||||
let cache = self.session_cache.as_ref()?;
|
let cache = self.session_cache.as_ref()?;
|
||||||
cache.get(&Self::session_key(tenant_id, session_id)).await
|
let result = cache.get(&Self::session_key(tenant_id, session_id)).await;
|
||||||
|
bs_metrics::record_session_cache_event(if result.is_some() {
|
||||||
|
metric_labels::SESSION_CACHE_HIT
|
||||||
|
} else {
|
||||||
|
metric_labels::SESSION_CACHE_MISS
|
||||||
|
});
|
||||||
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn cache_route(
|
pub async fn cache_route(
|
||||||
|
|
@ -151,6 +159,7 @@ impl OrchestratorService {
|
||||||
self.session_ttl,
|
self.session_ttl,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
bs_metrics::record_session_cache_event(metric_labels::SESSION_CACHE_STORE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
264
crates/brightstaff/src/router/stress_tests.rs
Normal file
264
crates/brightstaff/src/router/stress_tests.rs
Normal file
|
|
@ -0,0 +1,264 @@
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::router::orchestrator::OrchestratorService;
|
||||||
|
use crate::session_cache::memory::MemorySessionCache;
|
||||||
|
use common::configuration::{SelectionPolicy, SelectionPreference, TopLevelRoutingPreference};
|
||||||
|
use hermesllm::apis::openai::{Message, MessageContent, Role};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
fn make_messages(n: usize) -> Vec<Message> {
|
||||||
|
(0..n)
|
||||||
|
.map(|i| Message {
|
||||||
|
role: if i % 2 == 0 {
|
||||||
|
Role::User
|
||||||
|
} else {
|
||||||
|
Role::Assistant
|
||||||
|
},
|
||||||
|
content: Some(MessageContent::Text(format!(
|
||||||
|
"This is message number {i} with some padding text to make it realistic."
|
||||||
|
))),
|
||||||
|
name: None,
|
||||||
|
tool_calls: None,
|
||||||
|
tool_call_id: None,
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_routing_prefs() -> Vec<TopLevelRoutingPreference> {
|
||||||
|
vec![
|
||||||
|
TopLevelRoutingPreference {
|
||||||
|
name: "code_generation".to_string(),
|
||||||
|
description: "Code generation and debugging tasks".to_string(),
|
||||||
|
models: vec![
|
||||||
|
"openai/gpt-4o".to_string(),
|
||||||
|
"openai/gpt-4o-mini".to_string(),
|
||||||
|
],
|
||||||
|
selection_policy: SelectionPolicy {
|
||||||
|
prefer: SelectionPreference::None,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
TopLevelRoutingPreference {
|
||||||
|
name: "summarization".to_string(),
|
||||||
|
description: "Summarizing documents and text".to_string(),
|
||||||
|
models: vec![
|
||||||
|
"anthropic/claude-3-sonnet".to_string(),
|
||||||
|
"openai/gpt-4o-mini".to_string(),
|
||||||
|
],
|
||||||
|
selection_policy: SelectionPolicy {
|
||||||
|
prefer: SelectionPreference::None,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stress test: exercise the full routing code path N times using a mock
|
||||||
|
/// HTTP server and measure jemalloc allocated bytes before/after.
|
||||||
|
///
|
||||||
|
/// This catches:
|
||||||
|
/// - Memory leaks in generate_request / parse_response
|
||||||
|
/// - Leaks in reqwest connection handling
|
||||||
|
/// - String accumulation in the orchestrator model
|
||||||
|
/// - Fragmentation (jemalloc allocated vs resident)
|
||||||
|
#[tokio::test]
|
||||||
|
async fn stress_test_routing_determine_route() {
|
||||||
|
let mut server = mockito::Server::new_async().await;
|
||||||
|
let router_url = format!("{}/v1/chat/completions", server.url());
|
||||||
|
|
||||||
|
let mock_response = serde_json::json!({
|
||||||
|
"id": "chatcmpl-mock",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"created": 1234567890,
|
||||||
|
"model": "plano-orchestrator",
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "{\"route\": \"code_generation\"}"
|
||||||
|
},
|
||||||
|
"finish_reason": "stop"
|
||||||
|
}],
|
||||||
|
"usage": {"prompt_tokens": 100, "completion_tokens": 10, "total_tokens": 110}
|
||||||
|
});
|
||||||
|
|
||||||
|
let _mock = server
|
||||||
|
.mock("POST", "/v1/chat/completions")
|
||||||
|
.with_status(200)
|
||||||
|
.with_header("content-type", "application/json")
|
||||||
|
.with_body(mock_response.to_string())
|
||||||
|
.expect_at_least(1)
|
||||||
|
.create_async()
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let prefs = make_routing_prefs();
|
||||||
|
let session_cache = Arc::new(MemorySessionCache::new(1000));
|
||||||
|
let orchestrator_service = Arc::new(OrchestratorService::with_routing(
|
||||||
|
router_url,
|
||||||
|
"Plano-Orchestrator".to_string(),
|
||||||
|
"plano-orchestrator".to_string(),
|
||||||
|
Some(prefs.clone()),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
session_cache,
|
||||||
|
None,
|
||||||
|
2048,
|
||||||
|
));
|
||||||
|
|
||||||
|
// Warm up: a few requests to stabilize allocator state
|
||||||
|
for _ in 0..10 {
|
||||||
|
let msgs = make_messages(5);
|
||||||
|
let _ = orchestrator_service
|
||||||
|
.determine_route(&msgs, None, "warmup")
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Snapshot memory after warmup
|
||||||
|
let baseline = get_allocated();
|
||||||
|
|
||||||
|
let num_iterations = 2000;
|
||||||
|
|
||||||
|
for i in 0..num_iterations {
|
||||||
|
let msgs = make_messages(5 + (i % 10));
|
||||||
|
let inline = if i % 3 == 0 {
|
||||||
|
Some(make_routing_prefs())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
let _ = orchestrator_service
|
||||||
|
.determine_route(&msgs, inline, &format!("req-{i}"))
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
let after = get_allocated();
|
||||||
|
|
||||||
|
let growth = after.saturating_sub(baseline);
|
||||||
|
let growth_mb = growth as f64 / (1024.0 * 1024.0);
|
||||||
|
let per_request = if num_iterations > 0 {
|
||||||
|
growth / num_iterations
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
|
eprintln!("=== Routing Stress Test Results ===");
|
||||||
|
eprintln!(" Iterations: {num_iterations}");
|
||||||
|
eprintln!(" Baseline alloc: {} bytes", baseline);
|
||||||
|
eprintln!(" Final alloc: {} bytes", after);
|
||||||
|
eprintln!(" Growth: {} bytes ({growth_mb:.2} MB)", growth);
|
||||||
|
eprintln!(" Per-request: {} bytes", per_request);
|
||||||
|
|
||||||
|
// Allow up to 256 bytes per request of retained growth (connection pool, etc.)
|
||||||
|
// A true leak would show thousands of bytes per request.
|
||||||
|
assert!(
|
||||||
|
per_request < 256,
|
||||||
|
"Possible memory leak: {per_request} bytes/request retained after {num_iterations} iterations"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stress test with high concurrency: many parallel determine_route calls.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn stress_test_routing_concurrent() {
|
||||||
|
let mut server = mockito::Server::new_async().await;
|
||||||
|
let router_url = format!("{}/v1/chat/completions", server.url());
|
||||||
|
|
||||||
|
let mock_response = serde_json::json!({
|
||||||
|
"id": "chatcmpl-mock",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"created": 1234567890,
|
||||||
|
"model": "plano-orchestrator",
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "{\"route\": \"summarization\"}"
|
||||||
|
},
|
||||||
|
"finish_reason": "stop"
|
||||||
|
}],
|
||||||
|
"usage": {"prompt_tokens": 100, "completion_tokens": 10, "total_tokens": 110}
|
||||||
|
});
|
||||||
|
|
||||||
|
let _mock = server
|
||||||
|
.mock("POST", "/v1/chat/completions")
|
||||||
|
.with_status(200)
|
||||||
|
.with_header("content-type", "application/json")
|
||||||
|
.with_body(mock_response.to_string())
|
||||||
|
.expect_at_least(1)
|
||||||
|
.create_async()
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let prefs = make_routing_prefs();
|
||||||
|
let session_cache = Arc::new(MemorySessionCache::new(1000));
|
||||||
|
let orchestrator_service = Arc::new(OrchestratorService::with_routing(
|
||||||
|
router_url,
|
||||||
|
"Plano-Orchestrator".to_string(),
|
||||||
|
"plano-orchestrator".to_string(),
|
||||||
|
Some(prefs),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
session_cache,
|
||||||
|
None,
|
||||||
|
2048,
|
||||||
|
));
|
||||||
|
|
||||||
|
// Warm up
|
||||||
|
for _ in 0..20 {
|
||||||
|
let msgs = make_messages(3);
|
||||||
|
let _ = orchestrator_service
|
||||||
|
.determine_route(&msgs, None, "warmup")
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
let baseline = get_allocated();
|
||||||
|
|
||||||
|
let concurrency = 50;
|
||||||
|
let requests_per_task = 100;
|
||||||
|
let total = concurrency * requests_per_task;
|
||||||
|
|
||||||
|
let mut handles = vec![];
|
||||||
|
for t in 0..concurrency {
|
||||||
|
let svc = Arc::clone(&orchestrator_service);
|
||||||
|
let handle = tokio::spawn(async move {
|
||||||
|
for r in 0..requests_per_task {
|
||||||
|
let msgs = make_messages(3 + (r % 8));
|
||||||
|
let _ = svc
|
||||||
|
.determine_route(&msgs, None, &format!("req-{t}-{r}"))
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
handles.push(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
for h in handles {
|
||||||
|
h.await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let after = get_allocated();
|
||||||
|
let growth = after.saturating_sub(baseline);
|
||||||
|
let per_request = growth / total;
|
||||||
|
|
||||||
|
eprintln!("=== Concurrent Routing Stress Test Results ===");
|
||||||
|
eprintln!(" Tasks: {concurrency} x {requests_per_task} = {total}");
|
||||||
|
eprintln!(" Baseline: {} bytes", baseline);
|
||||||
|
eprintln!(" Final: {} bytes", after);
|
||||||
|
eprintln!(
|
||||||
|
" Growth: {} bytes ({:.2} MB)",
|
||||||
|
growth,
|
||||||
|
growth as f64 / 1_048_576.0
|
||||||
|
);
|
||||||
|
eprintln!(" Per-request: {} bytes", per_request);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
per_request < 512,
|
||||||
|
"Possible memory leak under concurrency: {per_request} bytes/request retained after {total} requests"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "jemalloc")]
|
||||||
|
fn get_allocated() -> usize {
|
||||||
|
tikv_jemalloc_ctl::epoch::advance().unwrap();
|
||||||
|
tikv_jemalloc_ctl::stats::allocated::read().unwrap_or(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(feature = "jemalloc"))]
|
||||||
|
fn get_allocated() -> usize {
|
||||||
|
0
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load diff
347
crates/brightstaff/src/signals/environment/exhaustion.rs
Normal file
347
crates/brightstaff/src/signals/environment/exhaustion.rs
Normal file
|
|
@ -0,0 +1,347 @@
|
||||||
|
//! Environment exhaustion detector. Direct port of
|
||||||
|
//! `signals/environment/exhaustion.py`.
|
||||||
|
|
||||||
|
use std::sync::OnceLock;
|
||||||
|
|
||||||
|
use regex::Regex;
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
use crate::signals::analyzer::ShareGptMessage;
|
||||||
|
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||||
|
|
||||||
|
pub const API_ERROR_PATTERNS: &[&str] = &[
|
||||||
|
r"500\s*(internal\s+)?server\s+error",
|
||||||
|
r"502\s*bad\s+gateway",
|
||||||
|
r"503\s*service\s+unavailable",
|
||||||
|
r"504\s*gateway\s+timeout",
|
||||||
|
r"internal\s+server\s+error",
|
||||||
|
r"service\s+unavailable",
|
||||||
|
r"server\s+error",
|
||||||
|
r"backend\s+error",
|
||||||
|
r"upstream\s+error",
|
||||||
|
r"service\s+temporarily\s+unavailable",
|
||||||
|
r"maintenance\s+mode",
|
||||||
|
r"under\s+maintenance",
|
||||||
|
r"try\s+again\s+later",
|
||||||
|
r"temporarily\s+unavailable",
|
||||||
|
r"system\s+error",
|
||||||
|
r"unexpected\s+error",
|
||||||
|
r"unhandled\s+exception",
|
||||||
|
];
|
||||||
|
|
||||||
|
pub const TIMEOUT_PATTERNS: &[&str] = &[
|
||||||
|
r"timeout",
|
||||||
|
r"timed?\s*out",
|
||||||
|
r"etimedout",
|
||||||
|
r"connection\s+timed?\s*out",
|
||||||
|
r"read\s+timed?\s*out",
|
||||||
|
r"request\s+timed?\s*out",
|
||||||
|
r"gateway\s+timeout",
|
||||||
|
r"deadline\s+exceeded",
|
||||||
|
r"took\s+too\s+long",
|
||||||
|
r"operation\s+timed?\s*out",
|
||||||
|
r"socket\s+timeout",
|
||||||
|
];
|
||||||
|
|
||||||
|
pub const RATE_LIMIT_PATTERNS: &[&str] = &[
|
||||||
|
r"rate\s+limit",
|
||||||
|
r"rate.limited",
|
||||||
|
r"(status|error|http)\s*:?\s*429",
|
||||||
|
r"429\s+(too\s+many|rate|limit)",
|
||||||
|
r"too\s+many\s+requests?",
|
||||||
|
r"quota\s+exceeded",
|
||||||
|
r"quota\s+limit",
|
||||||
|
r"throttl(ed|ing)",
|
||||||
|
r"request\s+limit",
|
||||||
|
r"api\s+limit",
|
||||||
|
r"calls?\s+per\s+(second|minute|hour|day)",
|
||||||
|
r"exceeded\s+.*\s+limit",
|
||||||
|
r"slow\s+down",
|
||||||
|
r"retry\s+after",
|
||||||
|
r"requests?\s+exceeded",
|
||||||
|
];
|
||||||
|
|
||||||
|
pub const NETWORK_PATTERNS: &[&str] = &[
|
||||||
|
r"connection\s+refused",
|
||||||
|
r"econnrefused",
|
||||||
|
r"econnreset",
|
||||||
|
r"connection\s+reset",
|
||||||
|
r"enotfound",
|
||||||
|
r"dns\s+(error|failure|lookup)",
|
||||||
|
r"host\s+not\s+found",
|
||||||
|
r"network\s+(error|failure|unreachable)",
|
||||||
|
r"no\s+route\s+to\s+host",
|
||||||
|
r"socket\s+error",
|
||||||
|
r"connection\s+failed",
|
||||||
|
r"unable\s+to\s+connect",
|
||||||
|
r"cannot\s+connect",
|
||||||
|
r"could\s+not\s+connect",
|
||||||
|
r"connect\s+error",
|
||||||
|
r"ssl\s+(error|handshake|certificate)",
|
||||||
|
r"certificate\s+(error|invalid|expired)",
|
||||||
|
];
|
||||||
|
|
||||||
|
pub const MALFORMED_PATTERNS: &[&str] = &[
|
||||||
|
r"json\s+parse\s+error",
|
||||||
|
r"invalid\s+json",
|
||||||
|
r"unexpected\s+token",
|
||||||
|
r"syntax\s+error.*json",
|
||||||
|
r"malformed\s+(response|json|data)",
|
||||||
|
r"unexpected\s+end\s+of",
|
||||||
|
r"parse\s+error",
|
||||||
|
r"parsing\s+failed",
|
||||||
|
r"invalid\s+response",
|
||||||
|
r"unexpected\s+response",
|
||||||
|
r"response\s+format",
|
||||||
|
r"missing\s+field.*response",
|
||||||
|
r"unexpected\s+schema",
|
||||||
|
r"schema\s+validation",
|
||||||
|
r"deserialization\s+error",
|
||||||
|
r"failed\s+to\s+decode",
|
||||||
|
];
|
||||||
|
|
||||||
|
pub const CONTEXT_OVERFLOW_PATTERNS: &[&str] = &[
|
||||||
|
r"context\s+(length|limit|overflow|exceeded)",
|
||||||
|
r"token\s+(limit|overflow|exceeded)",
|
||||||
|
r"max(imum)?\s+tokens?",
|
||||||
|
r"input\s+too\s+(long|large)",
|
||||||
|
r"exceeds?\s+(context|token|character|input)\s+limit",
|
||||||
|
r"message\s+too\s+(long|large)",
|
||||||
|
r"content\s+too\s+(long|large)",
|
||||||
|
r"truncat(ed|ion)\s+(due\s+to|because|for)\s+(length|size|limit)",
|
||||||
|
r"maximum\s+context",
|
||||||
|
r"prompt\s+too\s+(long|large)",
|
||||||
|
];
|
||||||
|
|
||||||
|
fn compile(patterns: &[&str]) -> Regex {
|
||||||
|
let combined = patterns
|
||||||
|
.iter()
|
||||||
|
.map(|p| format!("({})", p))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("|");
|
||||||
|
Regex::new(&format!("(?i){}", combined)).expect("exhaustion pattern regex must compile")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn api_error_re() -> &'static Regex {
|
||||||
|
static R: OnceLock<Regex> = OnceLock::new();
|
||||||
|
R.get_or_init(|| compile(API_ERROR_PATTERNS))
|
||||||
|
}
|
||||||
|
fn timeout_re() -> &'static Regex {
|
||||||
|
static R: OnceLock<Regex> = OnceLock::new();
|
||||||
|
R.get_or_init(|| compile(TIMEOUT_PATTERNS))
|
||||||
|
}
|
||||||
|
fn rate_limit_re() -> &'static Regex {
|
||||||
|
static R: OnceLock<Regex> = OnceLock::new();
|
||||||
|
R.get_or_init(|| compile(RATE_LIMIT_PATTERNS))
|
||||||
|
}
|
||||||
|
fn network_re() -> &'static Regex {
|
||||||
|
static R: OnceLock<Regex> = OnceLock::new();
|
||||||
|
R.get_or_init(|| compile(NETWORK_PATTERNS))
|
||||||
|
}
|
||||||
|
fn malformed_re() -> &'static Regex {
|
||||||
|
static R: OnceLock<Regex> = OnceLock::new();
|
||||||
|
R.get_or_init(|| compile(MALFORMED_PATTERNS))
|
||||||
|
}
|
||||||
|
fn context_overflow_re() -> &'static Regex {
|
||||||
|
static R: OnceLock<Regex> = OnceLock::new();
|
||||||
|
R.get_or_init(|| compile(CONTEXT_OVERFLOW_PATTERNS))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn snippet_around(text: &str, m: regex::Match<'_>, context: usize) -> String {
|
||||||
|
let start = m.start().saturating_sub(context);
|
||||||
|
let end = (m.end() + context).min(text.len());
|
||||||
|
let start = align_char_boundary(text, start, false);
|
||||||
|
let end = align_char_boundary(text, end, true);
|
||||||
|
let mut snippet = String::new();
|
||||||
|
if start > 0 {
|
||||||
|
snippet.push_str("...");
|
||||||
|
}
|
||||||
|
snippet.push_str(&text[start..end]);
|
||||||
|
if end < text.len() {
|
||||||
|
snippet.push_str("...");
|
||||||
|
}
|
||||||
|
snippet
|
||||||
|
}
|
||||||
|
|
||||||
|
fn align_char_boundary(s: &str, mut idx: usize, forward: bool) -> usize {
|
||||||
|
if idx >= s.len() {
|
||||||
|
return s.len();
|
||||||
|
}
|
||||||
|
while !s.is_char_boundary(idx) {
|
||||||
|
if forward {
|
||||||
|
idx += 1;
|
||||||
|
} else if idx == 0 {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
idx -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
idx
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn analyze_exhaustion(messages: &[ShareGptMessage<'_>]) -> SignalGroup {
|
||||||
|
let mut group = SignalGroup::new("exhaustion");
|
||||||
|
|
||||||
|
for (i, msg) in messages.iter().enumerate() {
|
||||||
|
if msg.from != "observation" {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let value = msg.value;
|
||||||
|
let lower = value.to_lowercase();
|
||||||
|
|
||||||
|
if let Some(m) = rate_limit_re().find(&lower) {
|
||||||
|
group.add_signal(emit(
|
||||||
|
SignalType::EnvironmentExhaustionRateLimit,
|
||||||
|
i,
|
||||||
|
snippet_around(value, m, 50),
|
||||||
|
0.95,
|
||||||
|
"rate_limit",
|
||||||
|
m.as_str(),
|
||||||
|
));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(m) = api_error_re().find(&lower) {
|
||||||
|
group.add_signal(emit(
|
||||||
|
SignalType::EnvironmentExhaustionApiError,
|
||||||
|
i,
|
||||||
|
snippet_around(value, m, 50),
|
||||||
|
0.9,
|
||||||
|
"api_error",
|
||||||
|
m.as_str(),
|
||||||
|
));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(m) = timeout_re().find(&lower) {
|
||||||
|
group.add_signal(emit(
|
||||||
|
SignalType::EnvironmentExhaustionTimeout,
|
||||||
|
i,
|
||||||
|
snippet_around(value, m, 50),
|
||||||
|
0.9,
|
||||||
|
"timeout",
|
||||||
|
m.as_str(),
|
||||||
|
));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(m) = network_re().find(&lower) {
|
||||||
|
group.add_signal(emit(
|
||||||
|
SignalType::EnvironmentExhaustionNetwork,
|
||||||
|
i,
|
||||||
|
snippet_around(value, m, 50),
|
||||||
|
0.9,
|
||||||
|
"network",
|
||||||
|
m.as_str(),
|
||||||
|
));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(m) = malformed_re().find(&lower) {
|
||||||
|
group.add_signal(emit(
|
||||||
|
SignalType::EnvironmentExhaustionMalformed,
|
||||||
|
i,
|
||||||
|
snippet_around(value, m, 50),
|
||||||
|
0.85,
|
||||||
|
"malformed_response",
|
||||||
|
m.as_str(),
|
||||||
|
));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(m) = context_overflow_re().find(&lower) {
|
||||||
|
group.add_signal(emit(
|
||||||
|
SignalType::EnvironmentExhaustionContextOverflow,
|
||||||
|
i,
|
||||||
|
snippet_around(value, m, 50),
|
||||||
|
0.9,
|
||||||
|
"context_overflow",
|
||||||
|
m.as_str(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
group
|
||||||
|
}
|
||||||
|
|
||||||
|
fn emit(
|
||||||
|
t: SignalType,
|
||||||
|
idx: usize,
|
||||||
|
snippet: String,
|
||||||
|
confidence: f32,
|
||||||
|
kind: &str,
|
||||||
|
matched: &str,
|
||||||
|
) -> SignalInstance {
|
||||||
|
SignalInstance::new(t, idx, snippet)
|
||||||
|
.with_confidence(confidence)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"exhaustion_type": kind,
|
||||||
|
"matched": matched,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn obs(value: &str) -> ShareGptMessage<'_> {
|
||||||
|
ShareGptMessage {
|
||||||
|
from: "observation",
|
||||||
|
value,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_rate_limit() {
|
||||||
|
let g = analyze_exhaustion(&[obs("HTTP 429: too many requests, retry after 30s")]);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionRateLimit)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_api_error() {
|
||||||
|
let g = analyze_exhaustion(&[obs("503 service unavailable - try again later")]);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionApiError)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_timeout() {
|
||||||
|
let g = analyze_exhaustion(&[obs("Connection timed out after 30 seconds")]);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionTimeout)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_network_failure() {
|
||||||
|
let g = analyze_exhaustion(&[obs("ECONNREFUSED: connection refused by remote host")]);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionNetwork)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_malformed_response() {
|
||||||
|
let g = analyze_exhaustion(&[obs("Invalid JSON: unexpected token at position 42")]);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionMalformed)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_context_overflow() {
|
||||||
|
let g = analyze_exhaustion(&[obs("Maximum context length exceeded for this model")]);
|
||||||
|
assert!(g.signals.iter().any(|s| matches!(
|
||||||
|
s.signal_type,
|
||||||
|
SignalType::EnvironmentExhaustionContextOverflow
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
}
|
||||||
3
crates/brightstaff/src/signals/environment/mod.rs
Normal file
3
crates/brightstaff/src/signals/environment/mod.rs
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
//! Environment signals: exhaustion (external system failures and constraints).
|
||||||
|
|
||||||
|
pub mod exhaustion;
|
||||||
388
crates/brightstaff/src/signals/execution/failure.rs
Normal file
388
crates/brightstaff/src/signals/execution/failure.rs
Normal file
|
|
@ -0,0 +1,388 @@
|
||||||
|
//! Execution failure detector. Direct port of `signals/execution/failure.py`.
|
||||||
|
|
||||||
|
use std::sync::OnceLock;
|
||||||
|
|
||||||
|
use regex::Regex;
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
use crate::signals::analyzer::ShareGptMessage;
|
||||||
|
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||||
|
|
||||||
|
pub const INVALID_ARGS_PATTERNS: &[&str] = &[
|
||||||
|
r"invalid\s+argument",
|
||||||
|
r"invalid\s+parameter",
|
||||||
|
r"invalid\s+type",
|
||||||
|
r"type\s*error",
|
||||||
|
r"expected\s+\w+\s*,?\s*got\s+\w+",
|
||||||
|
r"required\s+field",
|
||||||
|
r"required\s+parameter",
|
||||||
|
r"missing\s+required",
|
||||||
|
r"missing\s+argument",
|
||||||
|
r"validation\s+failed",
|
||||||
|
r"validation\s+error",
|
||||||
|
r"invalid\s+value",
|
||||||
|
r"invalid\s+format",
|
||||||
|
r"must\s+be\s+(a|an)\s+\w+",
|
||||||
|
r"cannot\s+be\s+(null|empty|none)",
|
||||||
|
r"is\s+not\s+valid",
|
||||||
|
r"does\s+not\s+match",
|
||||||
|
r"out\s+of\s+range",
|
||||||
|
r"invalid\s+date",
|
||||||
|
r"invalid\s+json",
|
||||||
|
r"malformed\s+request",
|
||||||
|
];
|
||||||
|
|
||||||
|
pub const BAD_QUERY_PATTERNS: &[&str] = &[
|
||||||
|
r"invalid\s+query",
|
||||||
|
r"query\s+syntax\s+error",
|
||||||
|
r"malformed\s+query",
|
||||||
|
r"unknown\s+field",
|
||||||
|
r"invalid\s+field",
|
||||||
|
r"invalid\s+filter",
|
||||||
|
r"invalid\s+search",
|
||||||
|
r"unknown\s+id",
|
||||||
|
r"invalid\s+id",
|
||||||
|
r"id\s+format\s+error",
|
||||||
|
r"invalid\s+identifier",
|
||||||
|
r"query\s+failed",
|
||||||
|
r"search\s+error",
|
||||||
|
r"invalid\s+operator",
|
||||||
|
r"unsupported\s+query",
|
||||||
|
];
|
||||||
|
|
||||||
|
pub const TOOL_NOT_FOUND_PATTERNS: &[&str] = &[
|
||||||
|
r"unknown\s+function",
|
||||||
|
r"unknown\s+tool",
|
||||||
|
r"function\s+not\s+found",
|
||||||
|
r"tool\s+not\s+found",
|
||||||
|
r"no\s+such\s+function",
|
||||||
|
r"no\s+such\s+tool",
|
||||||
|
r"undefined\s+function",
|
||||||
|
r"action\s+not\s+supported",
|
||||||
|
r"invalid\s+tool",
|
||||||
|
r"invalid\s+function",
|
||||||
|
r"unrecognized\s+function",
|
||||||
|
];
|
||||||
|
|
||||||
|
pub const AUTH_MISUSE_PATTERNS: &[&str] = &[
|
||||||
|
r"\bunauthorized\b",
|
||||||
|
r"(status|error|http|code)\s*:?\s*401",
|
||||||
|
r"401\s+unauthorized",
|
||||||
|
r"403\s+forbidden",
|
||||||
|
r"permission\s+denied",
|
||||||
|
r"access\s+denied",
|
||||||
|
r"authentication\s+required",
|
||||||
|
r"invalid\s+credentials",
|
||||||
|
r"invalid\s+token",
|
||||||
|
r"token\s+expired",
|
||||||
|
r"missing\s+authorization",
|
||||||
|
r"\bforbidden\b",
|
||||||
|
r"not\s+authorized",
|
||||||
|
r"insufficient\s+permissions?",
|
||||||
|
];
|
||||||
|
|
||||||
|
pub const STATE_ERROR_PATTERNS: &[&str] = &[
|
||||||
|
r"invalid\s+state",
|
||||||
|
r"illegal\s+state",
|
||||||
|
r"must\s+call\s+\w+\s+first",
|
||||||
|
r"must\s+\w+\s+before",
|
||||||
|
r"cannot\s+\w+\s+before",
|
||||||
|
r"already\s+(exists?|created|started|finished)",
|
||||||
|
r"not\s+initialized",
|
||||||
|
r"not\s+started",
|
||||||
|
r"already\s+in\s+progress",
|
||||||
|
r"operation\s+in\s+progress",
|
||||||
|
r"sequence\s+error",
|
||||||
|
r"precondition\s+failed",
|
||||||
|
r"(status|error|http)\s*:?\s*409",
|
||||||
|
r"409\s+conflict",
|
||||||
|
r"\bconflict\b",
|
||||||
|
];
|
||||||
|
|
||||||
|
fn compile(patterns: &[&str]) -> Regex {
|
||||||
|
// Use `(?i)` flag for case-insensitive matching, matching Python's `re.IGNORECASE`.
|
||||||
|
let combined = patterns
|
||||||
|
.iter()
|
||||||
|
.map(|p| format!("({})", p))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("|");
|
||||||
|
Regex::new(&format!("(?i){}", combined)).expect("failure pattern regex must compile")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn invalid_args_re() -> &'static Regex {
|
||||||
|
static R: OnceLock<Regex> = OnceLock::new();
|
||||||
|
R.get_or_init(|| compile(INVALID_ARGS_PATTERNS))
|
||||||
|
}
|
||||||
|
fn bad_query_re() -> &'static Regex {
|
||||||
|
static R: OnceLock<Regex> = OnceLock::new();
|
||||||
|
R.get_or_init(|| compile(BAD_QUERY_PATTERNS))
|
||||||
|
}
|
||||||
|
fn tool_not_found_re() -> &'static Regex {
|
||||||
|
static R: OnceLock<Regex> = OnceLock::new();
|
||||||
|
R.get_or_init(|| compile(TOOL_NOT_FOUND_PATTERNS))
|
||||||
|
}
|
||||||
|
fn auth_misuse_re() -> &'static Regex {
|
||||||
|
static R: OnceLock<Regex> = OnceLock::new();
|
||||||
|
R.get_or_init(|| compile(AUTH_MISUSE_PATTERNS))
|
||||||
|
}
|
||||||
|
fn state_error_re() -> &'static Regex {
|
||||||
|
static R: OnceLock<Regex> = OnceLock::new();
|
||||||
|
R.get_or_init(|| compile(STATE_ERROR_PATTERNS))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pull tool name + args from a `function_call` message. Mirrors
|
||||||
|
/// `_extract_tool_info` in the reference.
|
||||||
|
pub(crate) fn extract_tool_info(value: &str) -> (String, String) {
|
||||||
|
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(value) {
|
||||||
|
if let Some(obj) = parsed.as_object() {
|
||||||
|
let name = obj
|
||||||
|
.get("name")
|
||||||
|
.or_else(|| obj.get("function"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.unwrap_or_else(|| "unknown".to_string());
|
||||||
|
let args = match obj.get("arguments").or_else(|| obj.get("args")) {
|
||||||
|
Some(serde_json::Value::Object(o)) => {
|
||||||
|
serde_json::to_string(&serde_json::Value::Object(o.clone())).unwrap_or_default()
|
||||||
|
}
|
||||||
|
Some(other) => other
|
||||||
|
.as_str()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.unwrap_or_else(|| serde_json::to_string(other).unwrap_or_default()),
|
||||||
|
None => String::new(),
|
||||||
|
};
|
||||||
|
return (name, args);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let mut snippet: String = value.chars().take(200).collect();
|
||||||
|
snippet.shrink_to_fit();
|
||||||
|
("unknown".to_string(), snippet)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a context-window snippet around a regex match, with leading/trailing
|
||||||
|
/// ellipses when truncated. Mirrors `_get_snippet`.
|
||||||
|
fn snippet_around(text: &str, m: regex::Match<'_>, context: usize) -> String {
|
||||||
|
let start = m.start().saturating_sub(context);
|
||||||
|
let end = (m.end() + context).min(text.len());
|
||||||
|
// Ensure we cut on UTF-8 boundaries.
|
||||||
|
let start = align_char_boundary(text, start, false);
|
||||||
|
let end = align_char_boundary(text, end, true);
|
||||||
|
let mut snippet = String::new();
|
||||||
|
if start > 0 {
|
||||||
|
snippet.push_str("...");
|
||||||
|
}
|
||||||
|
snippet.push_str(&text[start..end]);
|
||||||
|
if end < text.len() {
|
||||||
|
snippet.push_str("...");
|
||||||
|
}
|
||||||
|
snippet
|
||||||
|
}
|
||||||
|
|
||||||
|
fn align_char_boundary(s: &str, mut idx: usize, forward: bool) -> usize {
|
||||||
|
if idx >= s.len() {
|
||||||
|
return s.len();
|
||||||
|
}
|
||||||
|
while !s.is_char_boundary(idx) {
|
||||||
|
if forward {
|
||||||
|
idx += 1;
|
||||||
|
} else if idx == 0 {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
idx -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
idx
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn analyze_failure(messages: &[ShareGptMessage<'_>]) -> SignalGroup {
|
||||||
|
let mut group = SignalGroup::new("failure");
|
||||||
|
let mut last_call: Option<(usize, String, String)> = None;
|
||||||
|
|
||||||
|
for (i, msg) in messages.iter().enumerate() {
|
||||||
|
match msg.from {
|
||||||
|
"function_call" => {
|
||||||
|
let (name, args) = extract_tool_info(msg.value);
|
||||||
|
last_call = Some((i, name, args));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
"observation" => {}
|
||||||
|
_ => continue,
|
||||||
|
}
|
||||||
|
|
||||||
|
let value = msg.value;
|
||||||
|
let lower = value.to_lowercase();
|
||||||
|
let (call_index, tool_name) = match &last_call {
|
||||||
|
Some((idx, name, _)) => (*idx, name.clone()),
|
||||||
|
None => (i.saturating_sub(1), "unknown".to_string()),
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(m) = invalid_args_re().find(&lower) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::ExecutionFailureInvalidArgs,
|
||||||
|
i,
|
||||||
|
snippet_around(value, m, 50),
|
||||||
|
)
|
||||||
|
.with_confidence(0.9)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"tool_name": tool_name,
|
||||||
|
"call_index": call_index,
|
||||||
|
"error_type": "invalid_args",
|
||||||
|
"matched": m.as_str(),
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(m) = tool_not_found_re().find(&lower) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::ExecutionFailureToolNotFound,
|
||||||
|
i,
|
||||||
|
snippet_around(value, m, 50),
|
||||||
|
)
|
||||||
|
.with_confidence(0.95)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"tool_name": tool_name,
|
||||||
|
"call_index": call_index,
|
||||||
|
"error_type": "tool_not_found",
|
||||||
|
"matched": m.as_str(),
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(m) = auth_misuse_re().find(&lower) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::ExecutionFailureAuthMisuse,
|
||||||
|
i,
|
||||||
|
snippet_around(value, m, 50),
|
||||||
|
)
|
||||||
|
.with_confidence(0.8)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"tool_name": tool_name,
|
||||||
|
"call_index": call_index,
|
||||||
|
"error_type": "auth_misuse",
|
||||||
|
"matched": m.as_str(),
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(m) = state_error_re().find(&lower) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::ExecutionFailureStateError,
|
||||||
|
i,
|
||||||
|
snippet_around(value, m, 50),
|
||||||
|
)
|
||||||
|
.with_confidence(0.85)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"tool_name": tool_name,
|
||||||
|
"call_index": call_index,
|
||||||
|
"error_type": "state_error",
|
||||||
|
"matched": m.as_str(),
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(m) = bad_query_re().find(&lower) {
|
||||||
|
let confidence = if ["error", "invalid", "failed"]
|
||||||
|
.iter()
|
||||||
|
.any(|w| lower.contains(w))
|
||||||
|
{
|
||||||
|
0.8
|
||||||
|
} else {
|
||||||
|
0.6
|
||||||
|
};
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::ExecutionFailureBadQuery,
|
||||||
|
i,
|
||||||
|
snippet_around(value, m, 50),
|
||||||
|
)
|
||||||
|
.with_confidence(confidence)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"tool_name": tool_name,
|
||||||
|
"call_index": call_index,
|
||||||
|
"error_type": "bad_query",
|
||||||
|
"matched": m.as_str(),
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
group
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn fc(value: &str) -> ShareGptMessage<'_> {
|
||||||
|
ShareGptMessage {
|
||||||
|
from: "function_call",
|
||||||
|
value,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fn obs(value: &str) -> ShareGptMessage<'_> {
|
||||||
|
ShareGptMessage {
|
||||||
|
from: "observation",
|
||||||
|
value,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_invalid_args() {
|
||||||
|
let msgs = vec![
|
||||||
|
fc(r#"{"name":"create_user","arguments":{"age":"twelve"}}"#),
|
||||||
|
obs("Error: validation failed - expected integer got string for field age"),
|
||||||
|
];
|
||||||
|
let g = analyze_failure(&msgs);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::ExecutionFailureInvalidArgs)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_tool_not_found() {
|
||||||
|
let msgs = vec![
|
||||||
|
fc(r#"{"name":"send_thought","arguments":{}}"#),
|
||||||
|
obs("Error: unknown function 'send_thought'"),
|
||||||
|
];
|
||||||
|
let g = analyze_failure(&msgs);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::ExecutionFailureToolNotFound)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_auth_misuse() {
|
||||||
|
let msgs = vec![
|
||||||
|
fc(r#"{"name":"get_secret","arguments":{}}"#),
|
||||||
|
obs("HTTP 401 Unauthorized"),
|
||||||
|
];
|
||||||
|
let g = analyze_failure(&msgs);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::ExecutionFailureAuthMisuse)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_state_error() {
|
||||||
|
let msgs = vec![
|
||||||
|
fc(r#"{"name":"commit_tx","arguments":{}}"#),
|
||||||
|
obs("must call begin_tx first"),
|
||||||
|
];
|
||||||
|
let g = analyze_failure(&msgs);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::ExecutionFailureStateError)));
|
||||||
|
}
|
||||||
|
}
|
||||||
433
crates/brightstaff/src/signals/execution/loops.rs
Normal file
433
crates/brightstaff/src/signals/execution/loops.rs
Normal file
|
|
@ -0,0 +1,433 @@
|
||||||
|
//! Execution loops detector. Direct port of `signals/execution/loops.py`.
|
||||||
|
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
use crate::signals::analyzer::ShareGptMessage;
|
||||||
|
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||||
|
|
||||||
|
pub const RETRY_THRESHOLD: usize = 3;
|
||||||
|
pub const PARAMETER_DRIFT_THRESHOLD: usize = 3;
|
||||||
|
pub const OSCILLATION_CYCLES_THRESHOLD: usize = 3;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ToolCall {
|
||||||
|
pub index: usize,
|
||||||
|
pub name: String,
|
||||||
|
/// Canonical JSON string of arguments (sorted keys when parseable).
|
||||||
|
pub args: String,
|
||||||
|
pub args_dict: Option<serde_json::Map<String, serde_json::Value>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ToolCall {
|
||||||
|
pub fn args_equal(&self, other: &ToolCall) -> bool {
|
||||||
|
match (&self.args_dict, &other.args_dict) {
|
||||||
|
(Some(a), Some(b)) => a == b,
|
||||||
|
_ => self.args == other.args,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_tool_call(index: usize, msg: &ShareGptMessage<'_>) -> Option<ToolCall> {
|
||||||
|
if msg.from != "function_call" {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let value = msg.value;
|
||||||
|
|
||||||
|
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(value) {
|
||||||
|
if let Some(obj) = parsed.as_object() {
|
||||||
|
let name = obj
|
||||||
|
.get("name")
|
||||||
|
.or_else(|| obj.get("function"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.unwrap_or_else(|| "unknown".to_string());
|
||||||
|
let raw_args = obj.get("arguments").or_else(|| obj.get("args"));
|
||||||
|
let (args_str, args_dict) = match raw_args {
|
||||||
|
Some(serde_json::Value::Object(o)) => {
|
||||||
|
let mut keys: Vec<&String> = o.keys().collect();
|
||||||
|
keys.sort();
|
||||||
|
let mut canon = serde_json::Map::new();
|
||||||
|
for k in keys {
|
||||||
|
canon.insert(k.clone(), o[k].clone());
|
||||||
|
}
|
||||||
|
(
|
||||||
|
serde_json::to_string(&serde_json::Value::Object(canon.clone()))
|
||||||
|
.unwrap_or_default(),
|
||||||
|
Some(canon),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
Some(other) => (
|
||||||
|
other
|
||||||
|
.as_str()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.unwrap_or_else(|| serde_json::to_string(other).unwrap_or_default()),
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
None => (String::new(), None),
|
||||||
|
};
|
||||||
|
return Some(ToolCall {
|
||||||
|
index,
|
||||||
|
name,
|
||||||
|
args: args_str,
|
||||||
|
args_dict,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(paren) = value.find('(') {
|
||||||
|
if paren > 0 {
|
||||||
|
let name = value[..paren].trim().to_string();
|
||||||
|
let args_part = &value[paren..];
|
||||||
|
if args_part.starts_with('(') && args_part.ends_with(')') {
|
||||||
|
let inner = args_part[1..args_part.len() - 1].trim();
|
||||||
|
if let Ok(serde_json::Value::Object(o)) =
|
||||||
|
serde_json::from_str::<serde_json::Value>(inner)
|
||||||
|
{
|
||||||
|
let mut keys: Vec<&String> = o.keys().collect();
|
||||||
|
keys.sort();
|
||||||
|
let mut canon = serde_json::Map::new();
|
||||||
|
for k in keys {
|
||||||
|
canon.insert(k.clone(), o[k].clone());
|
||||||
|
}
|
||||||
|
return Some(ToolCall {
|
||||||
|
index,
|
||||||
|
name,
|
||||||
|
args: serde_json::to_string(&serde_json::Value::Object(canon.clone()))
|
||||||
|
.unwrap_or_default(),
|
||||||
|
args_dict: Some(canon),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return Some(ToolCall {
|
||||||
|
index,
|
||||||
|
name,
|
||||||
|
args: inner.to_string(),
|
||||||
|
args_dict: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return Some(ToolCall {
|
||||||
|
index,
|
||||||
|
name,
|
||||||
|
args: args_part.to_string(),
|
||||||
|
args_dict: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(ToolCall {
|
||||||
|
index,
|
||||||
|
name: value.trim().to_string(),
|
||||||
|
args: String::new(),
|
||||||
|
args_dict: None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_tool_calls(messages: &[ShareGptMessage<'_>]) -> Vec<ToolCall> {
|
||||||
|
let mut out = Vec::new();
|
||||||
|
for (i, msg) in messages.iter().enumerate() {
|
||||||
|
if let Some(c) = parse_tool_call(i, msg) {
|
||||||
|
out.push(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
fn detect_retry(calls: &[ToolCall]) -> Vec<(usize, usize, String)> {
|
||||||
|
if calls.len() < RETRY_THRESHOLD {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
let mut patterns = Vec::new();
|
||||||
|
let mut i = 0;
|
||||||
|
while i < calls.len() {
|
||||||
|
let current = &calls[i];
|
||||||
|
let mut j = i + 1;
|
||||||
|
let mut run_length = 1;
|
||||||
|
while j < calls.len() {
|
||||||
|
if calls[j].name == current.name && calls[j].args_equal(current) {
|
||||||
|
run_length += 1;
|
||||||
|
j += 1;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if run_length >= RETRY_THRESHOLD {
|
||||||
|
patterns.push((calls[i].index, calls[j - 1].index, current.name.clone()));
|
||||||
|
i = j;
|
||||||
|
} else {
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
patterns
|
||||||
|
}
|
||||||
|
|
||||||
|
fn detect_parameter_drift(calls: &[ToolCall]) -> Vec<(usize, usize, String, usize)> {
|
||||||
|
if calls.len() < PARAMETER_DRIFT_THRESHOLD {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
let mut patterns = Vec::new();
|
||||||
|
let mut i = 0;
|
||||||
|
while i < calls.len() {
|
||||||
|
let current_name = calls[i].name.clone();
|
||||||
|
let mut seen_args: Vec<String> = vec![calls[i].args.clone()];
|
||||||
|
let mut unique_args = 1;
|
||||||
|
let mut j = i + 1;
|
||||||
|
while j < calls.len() {
|
||||||
|
if calls[j].name != current_name {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if !seen_args.iter().any(|a| a == &calls[j].args) {
|
||||||
|
seen_args.push(calls[j].args.clone());
|
||||||
|
unique_args += 1;
|
||||||
|
}
|
||||||
|
j += 1;
|
||||||
|
}
|
||||||
|
let run_length = j - i;
|
||||||
|
if run_length >= PARAMETER_DRIFT_THRESHOLD && unique_args >= 2 {
|
||||||
|
patterns.push((
|
||||||
|
calls[i].index,
|
||||||
|
calls[j - 1].index,
|
||||||
|
current_name,
|
||||||
|
unique_args,
|
||||||
|
));
|
||||||
|
i = j;
|
||||||
|
} else {
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
patterns
|
||||||
|
}
|
||||||
|
|
||||||
|
fn detect_oscillation(calls: &[ToolCall]) -> Vec<(usize, usize, Vec<String>, usize)> {
|
||||||
|
let min_calls = 2 * OSCILLATION_CYCLES_THRESHOLD;
|
||||||
|
if calls.len() < min_calls {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
let mut patterns = Vec::new();
|
||||||
|
let mut i: usize = 0;
|
||||||
|
while i + min_calls <= calls.len() {
|
||||||
|
let max_pat_len = (5usize).min(calls.len() - i);
|
||||||
|
let mut found_for_i = false;
|
||||||
|
for pat_len in 2..=max_pat_len {
|
||||||
|
let pattern_names: Vec<String> =
|
||||||
|
(0..pat_len).map(|k| calls[i + k].name.clone()).collect();
|
||||||
|
let unique: std::collections::HashSet<&String> = pattern_names.iter().collect();
|
||||||
|
if unique.len() < 2 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let mut cycles = 1;
|
||||||
|
let mut pos = i + pat_len;
|
||||||
|
while pos + pat_len <= calls.len() {
|
||||||
|
let mut all_match = true;
|
||||||
|
for k in 0..pat_len {
|
||||||
|
if calls[pos + k].name != pattern_names[k] {
|
||||||
|
all_match = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if all_match {
|
||||||
|
cycles += 1;
|
||||||
|
pos += pat_len;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if cycles >= OSCILLATION_CYCLES_THRESHOLD {
|
||||||
|
let end_idx_in_calls = i + (cycles * pat_len) - 1;
|
||||||
|
patterns.push((
|
||||||
|
calls[i].index,
|
||||||
|
calls[end_idx_in_calls].index,
|
||||||
|
pattern_names,
|
||||||
|
cycles,
|
||||||
|
));
|
||||||
|
// Mirror Python: `i = end_idx + 1 - pattern_len`. We set `i` so that
|
||||||
|
// the next outer iteration begins after we account for overlap.
|
||||||
|
i = end_idx_in_calls + 1 - pat_len;
|
||||||
|
found_for_i = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found_for_i {
|
||||||
|
i += 1;
|
||||||
|
} else {
|
||||||
|
// Match Python's `i = end_idx + 1 - pattern_len; break` then loop.
|
||||||
|
// We'll continue; the outer while re-checks i.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if patterns.len() > 1 {
|
||||||
|
patterns = deduplicate_patterns(patterns);
|
||||||
|
}
|
||||||
|
patterns
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deduplicate_patterns(
|
||||||
|
mut patterns: Vec<(usize, usize, Vec<String>, usize)>,
|
||||||
|
) -> Vec<(usize, usize, Vec<String>, usize)> {
|
||||||
|
if patterns.is_empty() {
|
||||||
|
return patterns;
|
||||||
|
}
|
||||||
|
patterns.sort_by(|a, b| {
|
||||||
|
let ord = a.0.cmp(&b.0);
|
||||||
|
if ord != std::cmp::Ordering::Equal {
|
||||||
|
ord
|
||||||
|
} else {
|
||||||
|
(b.1 - b.0).cmp(&(a.1 - a.0))
|
||||||
|
}
|
||||||
|
});
|
||||||
|
let mut result = Vec::new();
|
||||||
|
let mut last_end: i64 = -1;
|
||||||
|
for p in patterns {
|
||||||
|
if (p.0 as i64) > last_end {
|
||||||
|
last_end = p.1 as i64;
|
||||||
|
result.push(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn analyze_loops(messages: &[ShareGptMessage<'_>]) -> SignalGroup {
|
||||||
|
let mut group = SignalGroup::new("loops");
|
||||||
|
let calls = extract_tool_calls(messages);
|
||||||
|
if calls.len() < RETRY_THRESHOLD {
|
||||||
|
return group;
|
||||||
|
}
|
||||||
|
|
||||||
|
let retries = detect_retry(&calls);
|
||||||
|
for (start_idx, end_idx, tool_name) in &retries {
|
||||||
|
let call_count = calls
|
||||||
|
.iter()
|
||||||
|
.filter(|c| *start_idx <= c.index && c.index <= *end_idx)
|
||||||
|
.count();
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::ExecutionLoopsRetry,
|
||||||
|
*start_idx,
|
||||||
|
format!(
|
||||||
|
"Tool '{}' called {} times with identical arguments",
|
||||||
|
tool_name, call_count
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.with_confidence(0.95)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"tool_name": tool_name,
|
||||||
|
"start_index": start_idx,
|
||||||
|
"end_index": end_idx,
|
||||||
|
"call_count": call_count,
|
||||||
|
"loop_type": "retry",
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let drifts = detect_parameter_drift(&calls);
|
||||||
|
for (start_idx, end_idx, tool_name, variation_count) in &drifts {
|
||||||
|
let overlaps_retry = retries
|
||||||
|
.iter()
|
||||||
|
.any(|r| !(*end_idx < r.0 || *start_idx > r.1));
|
||||||
|
if overlaps_retry {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let call_count = calls
|
||||||
|
.iter()
|
||||||
|
.filter(|c| *start_idx <= c.index && c.index <= *end_idx)
|
||||||
|
.count();
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::ExecutionLoopsParameterDrift,
|
||||||
|
*start_idx,
|
||||||
|
format!(
|
||||||
|
"Tool '{}' called {} times with {} different argument variations",
|
||||||
|
tool_name, call_count, variation_count
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.with_confidence(0.85)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"tool_name": tool_name,
|
||||||
|
"start_index": start_idx,
|
||||||
|
"end_index": end_idx,
|
||||||
|
"call_count": call_count,
|
||||||
|
"variation_count": variation_count,
|
||||||
|
"loop_type": "parameter_drift",
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let oscillations = detect_oscillation(&calls);
|
||||||
|
for (start_idx, end_idx, tool_names, cycle_count) in &oscillations {
|
||||||
|
let pattern_str = tool_names.join(" \u{2192} ");
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::ExecutionLoopsOscillation,
|
||||||
|
*start_idx,
|
||||||
|
format!(
|
||||||
|
"Oscillation pattern [{}] repeated {} times",
|
||||||
|
pattern_str, cycle_count
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.with_confidence(0.9)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"pattern": tool_names,
|
||||||
|
"start_index": start_idx,
|
||||||
|
"end_index": end_idx,
|
||||||
|
"cycle_count": cycle_count,
|
||||||
|
"loop_type": "oscillation",
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
group
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn fc(value: &str) -> ShareGptMessage<'_> {
|
||||||
|
ShareGptMessage {
|
||||||
|
from: "function_call",
|
||||||
|
value,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_retry_loop() {
|
||||||
|
let arg = r#"{"name":"check_status","arguments":{"id":"abc"}}"#;
|
||||||
|
let msgs = vec![fc(arg), fc(arg), fc(arg), fc(arg)];
|
||||||
|
let g = analyze_loops(&msgs);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::ExecutionLoopsRetry)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_parameter_drift() {
|
||||||
|
let msgs = vec![
|
||||||
|
fc(r#"{"name":"search","arguments":{"q":"a"}}"#),
|
||||||
|
fc(r#"{"name":"search","arguments":{"q":"ab"}}"#),
|
||||||
|
fc(r#"{"name":"search","arguments":{"q":"abc"}}"#),
|
||||||
|
fc(r#"{"name":"search","arguments":{"q":"abcd"}}"#),
|
||||||
|
];
|
||||||
|
let g = analyze_loops(&msgs);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::ExecutionLoopsParameterDrift)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_oscillation() {
|
||||||
|
let a = r#"{"name":"toolA","arguments":{}}"#;
|
||||||
|
let b = r#"{"name":"toolB","arguments":{}}"#;
|
||||||
|
let msgs = vec![fc(a), fc(b), fc(a), fc(b), fc(a), fc(b)];
|
||||||
|
let g = analyze_loops(&msgs);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::ExecutionLoopsOscillation)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_signals_when_few_calls() {
|
||||||
|
let msgs = vec![fc(r#"{"name":"only_once","arguments":{}}"#)];
|
||||||
|
let g = analyze_loops(&msgs);
|
||||||
|
assert!(g.signals.is_empty());
|
||||||
|
}
|
||||||
|
}
|
||||||
5
crates/brightstaff/src/signals/execution/mod.rs
Normal file
5
crates/brightstaff/src/signals/execution/mod.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
//! Execution signals: failure (agent-caused tool errors) and loops
|
||||||
|
//! (repetitive tool-call behavior).
|
||||||
|
|
||||||
|
pub mod failure;
|
||||||
|
pub mod loops;
|
||||||
193
crates/brightstaff/src/signals/interaction/constants.rs
Normal file
193
crates/brightstaff/src/signals/interaction/constants.rs
Normal file
|
|
@ -0,0 +1,193 @@
|
||||||
|
//! Shared constants for the interaction layer detectors.
|
||||||
|
//!
|
||||||
|
//! Direct port of `signals/interaction/constants.py`.
|
||||||
|
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use std::sync::OnceLock;
|
||||||
|
|
||||||
|
pub const POSITIVE_PREFIXES: &[&str] = &[
|
||||||
|
"yes",
|
||||||
|
"yeah",
|
||||||
|
"yep",
|
||||||
|
"yup",
|
||||||
|
"sure",
|
||||||
|
"ok",
|
||||||
|
"okay",
|
||||||
|
"great",
|
||||||
|
"awesome",
|
||||||
|
"perfect",
|
||||||
|
"thanks",
|
||||||
|
"thank",
|
||||||
|
"wonderful",
|
||||||
|
"excellent",
|
||||||
|
"amazing",
|
||||||
|
"nice",
|
||||||
|
"good",
|
||||||
|
"cool",
|
||||||
|
"absolutely",
|
||||||
|
"definitely",
|
||||||
|
"please",
|
||||||
|
];
|
||||||
|
|
||||||
|
pub const CONFIRMATION_PREFIXES: &[&str] = &[
|
||||||
|
"yes",
|
||||||
|
"yeah",
|
||||||
|
"yep",
|
||||||
|
"yup",
|
||||||
|
"correct",
|
||||||
|
"right",
|
||||||
|
"that's correct",
|
||||||
|
"thats correct",
|
||||||
|
"that's right",
|
||||||
|
"thats right",
|
||||||
|
"that is correct",
|
||||||
|
"that is right",
|
||||||
|
];
|
||||||
|
|
||||||
|
const STOPWORD_LIST: &[&str] = &[
|
||||||
|
"a",
|
||||||
|
"about",
|
||||||
|
"above",
|
||||||
|
"after",
|
||||||
|
"again",
|
||||||
|
"against",
|
||||||
|
"all",
|
||||||
|
"am",
|
||||||
|
"an",
|
||||||
|
"and",
|
||||||
|
"any",
|
||||||
|
"are",
|
||||||
|
"as",
|
||||||
|
"at",
|
||||||
|
"be",
|
||||||
|
"because",
|
||||||
|
"been",
|
||||||
|
"before",
|
||||||
|
"being",
|
||||||
|
"below",
|
||||||
|
"between",
|
||||||
|
"both",
|
||||||
|
"but",
|
||||||
|
"by",
|
||||||
|
"can",
|
||||||
|
"could",
|
||||||
|
"did",
|
||||||
|
"do",
|
||||||
|
"does",
|
||||||
|
"doing",
|
||||||
|
"down",
|
||||||
|
"during",
|
||||||
|
"each",
|
||||||
|
"few",
|
||||||
|
"for",
|
||||||
|
"from",
|
||||||
|
"further",
|
||||||
|
"had",
|
||||||
|
"has",
|
||||||
|
"have",
|
||||||
|
"having",
|
||||||
|
"he",
|
||||||
|
"her",
|
||||||
|
"here",
|
||||||
|
"hers",
|
||||||
|
"herself",
|
||||||
|
"him",
|
||||||
|
"himself",
|
||||||
|
"his",
|
||||||
|
"how",
|
||||||
|
"i",
|
||||||
|
"if",
|
||||||
|
"in",
|
||||||
|
"into",
|
||||||
|
"is",
|
||||||
|
"it",
|
||||||
|
"its",
|
||||||
|
"itself",
|
||||||
|
"just",
|
||||||
|
"me",
|
||||||
|
"more",
|
||||||
|
"most",
|
||||||
|
"my",
|
||||||
|
"myself",
|
||||||
|
"no",
|
||||||
|
"nor",
|
||||||
|
"not",
|
||||||
|
"now",
|
||||||
|
"of",
|
||||||
|
"off",
|
||||||
|
"on",
|
||||||
|
"once",
|
||||||
|
"only",
|
||||||
|
"or",
|
||||||
|
"other",
|
||||||
|
"our",
|
||||||
|
"ours",
|
||||||
|
"ourselves",
|
||||||
|
"out",
|
||||||
|
"over",
|
||||||
|
"own",
|
||||||
|
"same",
|
||||||
|
"she",
|
||||||
|
"should",
|
||||||
|
"so",
|
||||||
|
"some",
|
||||||
|
"such",
|
||||||
|
"than",
|
||||||
|
"that",
|
||||||
|
"the",
|
||||||
|
"their",
|
||||||
|
"theirs",
|
||||||
|
"them",
|
||||||
|
"themselves",
|
||||||
|
"then",
|
||||||
|
"there",
|
||||||
|
"these",
|
||||||
|
"they",
|
||||||
|
"this",
|
||||||
|
"those",
|
||||||
|
"through",
|
||||||
|
"to",
|
||||||
|
"too",
|
||||||
|
"under",
|
||||||
|
"until",
|
||||||
|
"up",
|
||||||
|
"very",
|
||||||
|
"was",
|
||||||
|
"we",
|
||||||
|
"were",
|
||||||
|
"what",
|
||||||
|
"when",
|
||||||
|
"where",
|
||||||
|
"which",
|
||||||
|
"while",
|
||||||
|
"who",
|
||||||
|
"whom",
|
||||||
|
"why",
|
||||||
|
"with",
|
||||||
|
"would",
|
||||||
|
"you",
|
||||||
|
"your",
|
||||||
|
"yours",
|
||||||
|
"yourself",
|
||||||
|
"yourselves",
|
||||||
|
];
|
||||||
|
|
||||||
|
pub fn stopwords() -> &'static HashSet<&'static str> {
|
||||||
|
static SET: OnceLock<HashSet<&'static str>> = OnceLock::new();
|
||||||
|
SET.get_or_init(|| STOPWORD_LIST.iter().copied().collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if `text` (case-insensitive, trimmed) starts with any of the
|
||||||
|
/// given prefixes treated as **whole tokens or token sequences**. This matches
|
||||||
|
/// the Python's `text_lower.startswith(prefix)` plus the natural intent that
|
||||||
|
/// `"please"` shouldn't fire on `"pleased"`.
|
||||||
|
pub fn starts_with_prefix(text: &str, prefixes: &[&str]) -> bool {
|
||||||
|
let lowered = text.to_lowercase();
|
||||||
|
let trimmed = lowered.trim_start();
|
||||||
|
for prefix in prefixes {
|
||||||
|
if trimmed.starts_with(prefix) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
445
crates/brightstaff/src/signals/interaction/disengagement.rs
Normal file
445
crates/brightstaff/src/signals/interaction/disengagement.rs
Normal file
|
|
@ -0,0 +1,445 @@
|
||||||
|
//! Disengagement signals: escalation, quit, negative stance.
|
||||||
|
//!
|
||||||
|
//! Direct port of `signals/interaction/disengagement.py`.
|
||||||
|
|
||||||
|
use std::sync::OnceLock;
|
||||||
|
|
||||||
|
use regex::Regex;
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
use super::constants::{starts_with_prefix, POSITIVE_PREFIXES};
|
||||||
|
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||||
|
use crate::signals::text_processing::{normalize_patterns, NormalizedMessage, NormalizedPattern};
|
||||||
|
|
||||||
|
const ESCALATION_PATTERN_TEXTS: &[&str] = &[
|
||||||
|
// Human requests
|
||||||
|
"speak to a human",
|
||||||
|
"talk to a human",
|
||||||
|
"connect me to a human",
|
||||||
|
"connect me with a human",
|
||||||
|
"transfer me to a human",
|
||||||
|
"get me a human",
|
||||||
|
"chat with a human",
|
||||||
|
// Person requests
|
||||||
|
"speak to a person",
|
||||||
|
"talk to a person",
|
||||||
|
"connect me to a person",
|
||||||
|
"connect me with a person",
|
||||||
|
"transfer me to a person",
|
||||||
|
"get me a person",
|
||||||
|
"chat with a person",
|
||||||
|
// Real person requests
|
||||||
|
"speak to a real person",
|
||||||
|
"talk to a real person",
|
||||||
|
"connect me to a real person",
|
||||||
|
"connect me with a real person",
|
||||||
|
"transfer me to a real person",
|
||||||
|
"get me a real person",
|
||||||
|
"chat with a real person",
|
||||||
|
// Actual person requests
|
||||||
|
"speak to an actual person",
|
||||||
|
"talk to an actual person",
|
||||||
|
"connect me to an actual person",
|
||||||
|
"connect me with an actual person",
|
||||||
|
"transfer me to an actual person",
|
||||||
|
"get me an actual person",
|
||||||
|
"chat with an actual person",
|
||||||
|
// Supervisor requests
|
||||||
|
"speak to a supervisor",
|
||||||
|
"talk to a supervisor",
|
||||||
|
"connect me to a supervisor",
|
||||||
|
"connect me with a supervisor",
|
||||||
|
"transfer me to a supervisor",
|
||||||
|
"get me a supervisor",
|
||||||
|
"chat with a supervisor",
|
||||||
|
// Manager requests
|
||||||
|
"speak to a manager",
|
||||||
|
"talk to a manager",
|
||||||
|
"connect me to a manager",
|
||||||
|
"connect me with a manager",
|
||||||
|
"transfer me to a manager",
|
||||||
|
"get me a manager",
|
||||||
|
"chat with a manager",
|
||||||
|
// Customer service requests
|
||||||
|
"speak to customer service",
|
||||||
|
"talk to customer service",
|
||||||
|
"connect me to customer service",
|
||||||
|
"connect me with customer service",
|
||||||
|
"transfer me to customer service",
|
||||||
|
"get me customer service",
|
||||||
|
"chat with customer service",
|
||||||
|
// Customer support requests
|
||||||
|
"speak to customer support",
|
||||||
|
"talk to customer support",
|
||||||
|
"connect me to customer support",
|
||||||
|
"connect me with customer support",
|
||||||
|
"transfer me to customer support",
|
||||||
|
"get me customer support",
|
||||||
|
"chat with customer support",
|
||||||
|
// Support requests
|
||||||
|
"speak to support",
|
||||||
|
"talk to support",
|
||||||
|
"connect me to support",
|
||||||
|
"connect me with support",
|
||||||
|
"transfer me to support",
|
||||||
|
"get me support",
|
||||||
|
"chat with support",
|
||||||
|
// Tech support requests
|
||||||
|
"speak to tech support",
|
||||||
|
"talk to tech support",
|
||||||
|
"connect me to tech support",
|
||||||
|
"connect me with tech support",
|
||||||
|
"transfer me to tech support",
|
||||||
|
"get me tech support",
|
||||||
|
"chat with tech support",
|
||||||
|
// Help desk requests
|
||||||
|
"speak to help desk",
|
||||||
|
"talk to help desk",
|
||||||
|
"connect me to help desk",
|
||||||
|
"connect me with help desk",
|
||||||
|
"transfer me to help desk",
|
||||||
|
"get me help desk",
|
||||||
|
"chat with help desk",
|
||||||
|
// Explicit escalation
|
||||||
|
"escalate this",
|
||||||
|
];
|
||||||
|
|
||||||
|
const QUIT_PATTERN_TEXTS: &[&str] = &[
|
||||||
|
"i give up",
|
||||||
|
"i'm giving up",
|
||||||
|
"im giving up",
|
||||||
|
"i'm going to quit",
|
||||||
|
"i quit",
|
||||||
|
"forget it",
|
||||||
|
"forget this",
|
||||||
|
"screw it",
|
||||||
|
"screw this",
|
||||||
|
"don't bother trying",
|
||||||
|
"don't bother with this",
|
||||||
|
"don't bother with it",
|
||||||
|
"don't even bother",
|
||||||
|
"why bother",
|
||||||
|
"not worth it",
|
||||||
|
"this is hopeless",
|
||||||
|
"going elsewhere",
|
||||||
|
"try somewhere else",
|
||||||
|
"look elsewhere",
|
||||||
|
];
|
||||||
|
|
||||||
|
const NEGATIVE_STANCE_PATTERN_TEXTS: &[&str] = &[
|
||||||
|
"this is useless",
|
||||||
|
"not helpful",
|
||||||
|
"doesn't help",
|
||||||
|
"not helping",
|
||||||
|
"you're not helping",
|
||||||
|
"youre not helping",
|
||||||
|
"this doesn't work",
|
||||||
|
"this doesnt work",
|
||||||
|
"this isn't working",
|
||||||
|
"this isnt working",
|
||||||
|
"still doesn't work",
|
||||||
|
"still doesnt work",
|
||||||
|
"still not working",
|
||||||
|
"still isn't working",
|
||||||
|
"still isnt working",
|
||||||
|
"waste of time",
|
||||||
|
"wasting my time",
|
||||||
|
"this is ridiculous",
|
||||||
|
"this is absurd",
|
||||||
|
"this is insane",
|
||||||
|
"this is stupid",
|
||||||
|
"this is dumb",
|
||||||
|
"this sucks",
|
||||||
|
"this is frustrating",
|
||||||
|
"not good enough",
|
||||||
|
"why can't you",
|
||||||
|
"why cant you",
|
||||||
|
"same issue",
|
||||||
|
"did that already",
|
||||||
|
"done that already",
|
||||||
|
"tried that already",
|
||||||
|
"already tried that",
|
||||||
|
"i've done that",
|
||||||
|
"ive done that",
|
||||||
|
"i've tried that",
|
||||||
|
"ive tried that",
|
||||||
|
"i'm disappointed",
|
||||||
|
"im disappointed",
|
||||||
|
"disappointed with you",
|
||||||
|
"disappointed in you",
|
||||||
|
"useless bot",
|
||||||
|
"dumb bot",
|
||||||
|
"stupid bot",
|
||||||
|
];
|
||||||
|
|
||||||
|
const AGENT_DIRECTED_PROFANITY_PATTERN_TEXTS: &[&str] = &[
|
||||||
|
"this is bullshit",
|
||||||
|
"what bullshit",
|
||||||
|
"such bullshit",
|
||||||
|
"total bullshit",
|
||||||
|
"complete bullshit",
|
||||||
|
"this is crap",
|
||||||
|
"what crap",
|
||||||
|
"this is shit",
|
||||||
|
"what the hell is wrong with you",
|
||||||
|
"what the fuck is wrong with you",
|
||||||
|
"you're fucking useless",
|
||||||
|
"youre fucking useless",
|
||||||
|
"you are fucking useless",
|
||||||
|
"fucking useless",
|
||||||
|
"this bot is shit",
|
||||||
|
"this bot is crap",
|
||||||
|
"damn bot",
|
||||||
|
"fucking bot",
|
||||||
|
"stupid fucking",
|
||||||
|
"are you fucking kidding",
|
||||||
|
"wtf is wrong with you",
|
||||||
|
"wtf is this",
|
||||||
|
"ffs just",
|
||||||
|
"for fucks sake",
|
||||||
|
"for fuck's sake",
|
||||||
|
"what the f**k",
|
||||||
|
"what the f*ck",
|
||||||
|
"what the f***",
|
||||||
|
"that's bullsh*t",
|
||||||
|
"thats bullsh*t",
|
||||||
|
"that's bull***t",
|
||||||
|
"thats bull***t",
|
||||||
|
"that's bs",
|
||||||
|
"thats bs",
|
||||||
|
"this is bullsh*t",
|
||||||
|
"this is bull***t",
|
||||||
|
"this is bs",
|
||||||
|
];
|
||||||
|
|
||||||
|
fn escalation_patterns() -> &'static Vec<NormalizedPattern> {
|
||||||
|
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||||
|
PATS.get_or_init(|| normalize_patterns(ESCALATION_PATTERN_TEXTS))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn quit_patterns() -> &'static Vec<NormalizedPattern> {
|
||||||
|
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||||
|
PATS.get_or_init(|| normalize_patterns(QUIT_PATTERN_TEXTS))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn negative_stance_patterns() -> &'static Vec<NormalizedPattern> {
|
||||||
|
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||||
|
PATS.get_or_init(|| normalize_patterns(NEGATIVE_STANCE_PATTERN_TEXTS))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn profanity_patterns() -> &'static Vec<NormalizedPattern> {
|
||||||
|
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||||
|
PATS.get_or_init(|| normalize_patterns(AGENT_DIRECTED_PROFANITY_PATTERN_TEXTS))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn re_consecutive_q() -> &'static Regex {
|
||||||
|
static R: OnceLock<Regex> = OnceLock::new();
|
||||||
|
R.get_or_init(|| Regex::new(r"\?{2,}").unwrap())
|
||||||
|
}
|
||||||
|
fn re_consecutive_e() -> &'static Regex {
|
||||||
|
static R: OnceLock<Regex> = OnceLock::new();
|
||||||
|
R.get_or_init(|| Regex::new(r"!{2,}").unwrap())
|
||||||
|
}
|
||||||
|
fn re_mixed_punct() -> &'static Regex {
|
||||||
|
static R: OnceLock<Regex> = OnceLock::new();
|
||||||
|
R.get_or_init(|| Regex::new(r"[?!]{3,}").unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn analyze_disengagement(
|
||||||
|
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||||
|
char_ngram_threshold: f32,
|
||||||
|
token_cosine_threshold: f32,
|
||||||
|
) -> SignalGroup {
|
||||||
|
let mut group = SignalGroup::new("disengagement");
|
||||||
|
|
||||||
|
for (idx, role, norm_msg) in normalized_messages {
|
||||||
|
if *role != "human" {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let text = &norm_msg.raw;
|
||||||
|
|
||||||
|
// All-caps shouting check.
|
||||||
|
let alpha_chars: String = text.chars().filter(|c| c.is_alphabetic()).collect();
|
||||||
|
if alpha_chars.chars().count() >= 10 {
|
||||||
|
let upper_count = alpha_chars.chars().filter(|c| c.is_uppercase()).count();
|
||||||
|
let upper_ratio = upper_count as f32 / alpha_chars.chars().count() as f32;
|
||||||
|
if upper_ratio >= 0.8 {
|
||||||
|
let snippet: String = text.chars().take(50).collect();
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(SignalType::DisengagementNegativeStance, *idx, snippet)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"indicator_type": "all_caps",
|
||||||
|
"upper_ratio": upper_ratio,
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Excessive consecutive punctuation.
|
||||||
|
let starts_with_positive = starts_with_prefix(text, POSITIVE_PREFIXES);
|
||||||
|
let cq = re_consecutive_q().find_iter(text).count();
|
||||||
|
let ce = re_consecutive_e().find_iter(text).count();
|
||||||
|
let mixed = re_mixed_punct().find_iter(text).count();
|
||||||
|
if !starts_with_positive && (cq >= 1 || ce >= 1 || mixed >= 1) {
|
||||||
|
let snippet: String = text.chars().take(50).collect();
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(SignalType::DisengagementNegativeStance, *idx, snippet)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"indicator_type": "excessive_punctuation",
|
||||||
|
"consecutive_questions": cq,
|
||||||
|
"consecutive_exclamations": ce,
|
||||||
|
"mixed_punctuation": mixed,
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Escalation patterns.
|
||||||
|
let mut found_escalation = false;
|
||||||
|
for pattern in escalation_patterns() {
|
||||||
|
if norm_msg.matches_normalized_pattern(
|
||||||
|
pattern,
|
||||||
|
char_ngram_threshold,
|
||||||
|
token_cosine_threshold,
|
||||||
|
) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::DisengagementEscalation,
|
||||||
|
*idx,
|
||||||
|
pattern.raw.clone(),
|
||||||
|
)
|
||||||
|
.with_metadata(json!({"pattern_type": "escalation"})),
|
||||||
|
);
|
||||||
|
found_escalation = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quit patterns (independent of escalation).
|
||||||
|
for pattern in quit_patterns() {
|
||||||
|
if norm_msg.matches_normalized_pattern(
|
||||||
|
pattern,
|
||||||
|
char_ngram_threshold,
|
||||||
|
token_cosine_threshold,
|
||||||
|
) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(SignalType::DisengagementQuit, *idx, pattern.raw.clone())
|
||||||
|
.with_metadata(json!({"pattern_type": "quit"})),
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Profanity (more specific) before generic negative stance.
|
||||||
|
let mut found_profanity = false;
|
||||||
|
for pattern in profanity_patterns() {
|
||||||
|
if norm_msg.matches_normalized_pattern(
|
||||||
|
pattern,
|
||||||
|
char_ngram_threshold,
|
||||||
|
token_cosine_threshold,
|
||||||
|
) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::DisengagementNegativeStance,
|
||||||
|
*idx,
|
||||||
|
pattern.raw.clone(),
|
||||||
|
)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"indicator_type": "profanity",
|
||||||
|
"pattern": pattern.raw,
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
found_profanity = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !found_escalation && !found_profanity {
|
||||||
|
for pattern in negative_stance_patterns() {
|
||||||
|
if norm_msg.matches_normalized_pattern(
|
||||||
|
pattern,
|
||||||
|
char_ngram_threshold,
|
||||||
|
token_cosine_threshold,
|
||||||
|
) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::DisengagementNegativeStance,
|
||||||
|
*idx,
|
||||||
|
pattern.raw.clone(),
|
||||||
|
)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"indicator_type": "complaint",
|
||||||
|
"pattern": pattern.raw,
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
group
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn nm(s: &str) -> NormalizedMessage {
|
||||||
|
NormalizedMessage::from_text(s, 2000)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_human_escalation_request() {
|
||||||
|
let msgs = vec![(
|
||||||
|
0usize,
|
||||||
|
"human",
|
||||||
|
nm("This is taking forever, get me a human"),
|
||||||
|
)];
|
||||||
|
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::DisengagementEscalation)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_quit_intent() {
|
||||||
|
let msgs = vec![(0usize, "human", nm("Forget it, I give up"))];
|
||||||
|
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::DisengagementQuit)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_negative_stance_complaint() {
|
||||||
|
let msgs = vec![(0usize, "human", nm("This is useless"))];
|
||||||
|
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::DisengagementNegativeStance)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_excessive_punctuation_as_negative_stance() {
|
||||||
|
let msgs = vec![(0usize, "human", nm("WHY isn't this working???"))];
|
||||||
|
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::DisengagementNegativeStance)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn positive_excitement_is_not_disengagement() {
|
||||||
|
let msgs = vec![(0usize, "human", nm("Yes!! That's perfect!!!"))];
|
||||||
|
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.all(|s| !matches!(s.signal_type, SignalType::DisengagementNegativeStance)));
|
||||||
|
}
|
||||||
|
}
|
||||||
338
crates/brightstaff/src/signals/interaction/misalignment.rs
Normal file
338
crates/brightstaff/src/signals/interaction/misalignment.rs
Normal file
|
|
@ -0,0 +1,338 @@
|
||||||
|
//! Misalignment signals: corrections, rephrases, clarifications.
|
||||||
|
//!
|
||||||
|
//! Direct port of `signals/interaction/misalignment.py`.
|
||||||
|
|
||||||
|
use std::sync::OnceLock;
|
||||||
|
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
use super::constants::{stopwords, CONFIRMATION_PREFIXES};
|
||||||
|
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||||
|
use crate::signals::text_processing::{normalize_patterns, NormalizedMessage, NormalizedPattern};
|
||||||
|
|
||||||
|
const CORRECTION_PATTERN_TEXTS: &[&str] = &[
|
||||||
|
"no, i meant",
|
||||||
|
"no i meant",
|
||||||
|
"no, i said",
|
||||||
|
"no i said",
|
||||||
|
"no, i asked",
|
||||||
|
"no i asked",
|
||||||
|
"nah, i meant",
|
||||||
|
"nope, i meant",
|
||||||
|
"not what i said",
|
||||||
|
"not what i asked",
|
||||||
|
"that's not what i said",
|
||||||
|
"that's not what i asked",
|
||||||
|
"that's not what i meant",
|
||||||
|
"thats not what i said",
|
||||||
|
"thats not what i asked",
|
||||||
|
"thats not what i meant",
|
||||||
|
"that's not what you",
|
||||||
|
"no that's not what i",
|
||||||
|
"no, that's not what i",
|
||||||
|
"you're not quite right",
|
||||||
|
"youre not quite right",
|
||||||
|
"you're not exactly right",
|
||||||
|
"youre not exactly right",
|
||||||
|
"you're wrong about",
|
||||||
|
"youre wrong about",
|
||||||
|
"i just said",
|
||||||
|
"i already said",
|
||||||
|
"i already told you",
|
||||||
|
];
|
||||||
|
|
||||||
|
const REPHRASE_PATTERN_TEXTS: &[&str] = &[
|
||||||
|
"let me rephrase",
|
||||||
|
"let me explain again",
|
||||||
|
"what i'm trying to say",
|
||||||
|
"what i'm saying is",
|
||||||
|
"in other words",
|
||||||
|
];
|
||||||
|
|
||||||
|
const CLARIFICATION_PATTERN_TEXTS: &[&str] = &[
|
||||||
|
"i don't understand",
|
||||||
|
"don't understand",
|
||||||
|
"not understanding",
|
||||||
|
"can't understand",
|
||||||
|
"don't get it",
|
||||||
|
"don't follow",
|
||||||
|
"i'm confused",
|
||||||
|
"so confused",
|
||||||
|
"makes no sense",
|
||||||
|
"doesn't make sense",
|
||||||
|
"not making sense",
|
||||||
|
"what do you mean",
|
||||||
|
"what does that mean",
|
||||||
|
"what are you saying",
|
||||||
|
"i'm lost",
|
||||||
|
"totally lost",
|
||||||
|
"lost me",
|
||||||
|
"no clue what you",
|
||||||
|
"no idea what you",
|
||||||
|
"no clue what that",
|
||||||
|
"no idea what that",
|
||||||
|
"come again",
|
||||||
|
"say that again",
|
||||||
|
"repeat that",
|
||||||
|
"trouble following",
|
||||||
|
"hard to follow",
|
||||||
|
"can't follow",
|
||||||
|
];
|
||||||
|
|
||||||
|
fn correction_patterns() -> &'static Vec<NormalizedPattern> {
|
||||||
|
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||||
|
PATS.get_or_init(|| normalize_patterns(CORRECTION_PATTERN_TEXTS))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rephrase_patterns() -> &'static Vec<NormalizedPattern> {
|
||||||
|
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||||
|
PATS.get_or_init(|| normalize_patterns(REPHRASE_PATTERN_TEXTS))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clarification_patterns() -> &'static Vec<NormalizedPattern> {
|
||||||
|
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||||
|
PATS.get_or_init(|| normalize_patterns(CLARIFICATION_PATTERN_TEXTS))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_confirmation_message(text: &str) -> bool {
|
||||||
|
let lowered = text.to_lowercase();
|
||||||
|
let trimmed = lowered.trim();
|
||||||
|
CONFIRMATION_PREFIXES.iter().any(|p| trimmed.starts_with(p))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Detect whether two user messages appear to be rephrases of each other.
|
||||||
|
pub fn is_similar_rephrase(
|
||||||
|
norm_msg1: &NormalizedMessage,
|
||||||
|
norm_msg2: &NormalizedMessage,
|
||||||
|
overlap_threshold: f32,
|
||||||
|
min_meaningful_tokens: usize,
|
||||||
|
max_new_content_ratio: f32,
|
||||||
|
) -> bool {
|
||||||
|
if norm_msg1.tokens.len() < 3 || norm_msg2.tokens.len() < 3 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if is_confirmation_message(&norm_msg1.raw) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let stops = stopwords();
|
||||||
|
let tokens1: std::collections::HashSet<&str> = norm_msg1
|
||||||
|
.tokens
|
||||||
|
.iter()
|
||||||
|
.filter(|t| !stops.contains(t.as_str()))
|
||||||
|
.map(|s| s.as_str())
|
||||||
|
.collect();
|
||||||
|
let tokens2: std::collections::HashSet<&str> = norm_msg2
|
||||||
|
.tokens
|
||||||
|
.iter()
|
||||||
|
.filter(|t| !stops.contains(t.as_str()))
|
||||||
|
.map(|s| s.as_str())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if tokens1.len() < min_meaningful_tokens || tokens2.len() < min_meaningful_tokens {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let new_tokens: std::collections::HashSet<&&str> = tokens1.difference(&tokens2).collect();
|
||||||
|
let new_content_ratio = if tokens1.is_empty() {
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
new_tokens.len() as f32 / tokens1.len() as f32
|
||||||
|
};
|
||||||
|
if new_content_ratio > max_new_content_ratio {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let intersection = tokens1.intersection(&tokens2).count();
|
||||||
|
let min_size = tokens1.len().min(tokens2.len());
|
||||||
|
if min_size == 0 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let overlap_ratio = intersection as f32 / min_size as f32;
|
||||||
|
overlap_ratio >= overlap_threshold
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Analyze user messages for misalignment signals.
|
||||||
|
pub fn analyze_misalignment(
|
||||||
|
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||||
|
char_ngram_threshold: f32,
|
||||||
|
token_cosine_threshold: f32,
|
||||||
|
) -> SignalGroup {
|
||||||
|
let mut group = SignalGroup::new("misalignment");
|
||||||
|
|
||||||
|
let mut prev_user_idx: Option<usize> = None;
|
||||||
|
let mut prev_user_msg: Option<&NormalizedMessage> = None;
|
||||||
|
|
||||||
|
for (idx, role, norm_msg) in normalized_messages {
|
||||||
|
if *role != "human" {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut found_in_turn = false;
|
||||||
|
|
||||||
|
for pattern in correction_patterns() {
|
||||||
|
if norm_msg.matches_normalized_pattern(
|
||||||
|
pattern,
|
||||||
|
char_ngram_threshold,
|
||||||
|
token_cosine_threshold,
|
||||||
|
) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::MisalignmentCorrection,
|
||||||
|
*idx,
|
||||||
|
pattern.raw.clone(),
|
||||||
|
)
|
||||||
|
.with_metadata(json!({"pattern_type": "correction"})),
|
||||||
|
);
|
||||||
|
found_in_turn = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if found_in_turn {
|
||||||
|
prev_user_idx = Some(*idx);
|
||||||
|
prev_user_msg = Some(norm_msg);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for pattern in rephrase_patterns() {
|
||||||
|
if norm_msg.matches_normalized_pattern(
|
||||||
|
pattern,
|
||||||
|
char_ngram_threshold,
|
||||||
|
token_cosine_threshold,
|
||||||
|
) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::MisalignmentRephrase,
|
||||||
|
*idx,
|
||||||
|
pattern.raw.clone(),
|
||||||
|
)
|
||||||
|
.with_metadata(json!({"pattern_type": "rephrase"})),
|
||||||
|
);
|
||||||
|
found_in_turn = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if found_in_turn {
|
||||||
|
prev_user_idx = Some(*idx);
|
||||||
|
prev_user_msg = Some(norm_msg);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for pattern in clarification_patterns() {
|
||||||
|
if norm_msg.matches_normalized_pattern(
|
||||||
|
pattern,
|
||||||
|
char_ngram_threshold,
|
||||||
|
token_cosine_threshold,
|
||||||
|
) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::MisalignmentClarification,
|
||||||
|
*idx,
|
||||||
|
pattern.raw.clone(),
|
||||||
|
)
|
||||||
|
.with_metadata(json!({"pattern_type": "clarification"})),
|
||||||
|
);
|
||||||
|
found_in_turn = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if found_in_turn {
|
||||||
|
prev_user_idx = Some(*idx);
|
||||||
|
prev_user_msg = Some(norm_msg);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Semantic rephrase vs the previous user message (recent only).
|
||||||
|
if let (Some(prev_idx), Some(prev_msg)) = (prev_user_idx, prev_user_msg) {
|
||||||
|
let turns_between = idx.saturating_sub(prev_idx);
|
||||||
|
if turns_between <= 3 && is_similar_rephrase(norm_msg, prev_msg, 0.75, 4, 0.5) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::MisalignmentRephrase,
|
||||||
|
*idx,
|
||||||
|
"[similar rephrase detected]",
|
||||||
|
)
|
||||||
|
.with_confidence(0.8)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"pattern_type": "semantic_rephrase",
|
||||||
|
"compared_to": prev_idx,
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
prev_user_idx = Some(*idx);
|
||||||
|
prev_user_msg = Some(norm_msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
group
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn nm(s: &str) -> NormalizedMessage {
|
||||||
|
NormalizedMessage::from_text(s, 2000)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make(items: &[(&'static str, &str)]) -> Vec<(usize, &'static str, NormalizedMessage)> {
|
||||||
|
items
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, (role, text))| (i, *role, nm(text)))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_explicit_correction() {
|
||||||
|
let msgs = make(&[
|
||||||
|
("human", "Show me my orders"),
|
||||||
|
("gpt", "Sure, here are your invoices"),
|
||||||
|
("human", "No, I meant my recent orders"),
|
||||||
|
]);
|
||||||
|
let g = analyze_misalignment(&msgs, 0.65, 0.6);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::MisalignmentCorrection)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_rephrase_marker() {
|
||||||
|
let msgs = make(&[
|
||||||
|
("human", "Show me X"),
|
||||||
|
("gpt", "Sure"),
|
||||||
|
("human", "Let me rephrase: I want X grouped by date"),
|
||||||
|
]);
|
||||||
|
let g = analyze_misalignment(&msgs, 0.65, 0.6);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::MisalignmentRephrase)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_clarification_request() {
|
||||||
|
let msgs = make(&[
|
||||||
|
("human", "Run the report"),
|
||||||
|
("gpt", "Foobar quux baz."),
|
||||||
|
("human", "I don't understand what you mean"),
|
||||||
|
]);
|
||||||
|
let g = analyze_misalignment(&msgs, 0.65, 0.6);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::MisalignmentClarification)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn confirmation_is_not_a_rephrase() {
|
||||||
|
let m1 = nm("Yes, that's correct, please proceed with the order");
|
||||||
|
let m2 = nm("please proceed with the order for the same product");
|
||||||
|
assert!(!is_similar_rephrase(&m1, &m2, 0.75, 4, 0.5));
|
||||||
|
}
|
||||||
|
}
|
||||||
10
crates/brightstaff/src/signals/interaction/mod.rs
Normal file
10
crates/brightstaff/src/signals/interaction/mod.rs
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
//! Interaction signals: misalignment, stagnation, disengagement, satisfaction.
|
||||||
|
//!
|
||||||
|
//! These signals capture how the dialogue itself unfolds (semantic alignment,
|
||||||
|
//! progress, engagement, closure) independent of tool execution outcomes.
|
||||||
|
|
||||||
|
pub mod constants;
|
||||||
|
pub mod disengagement;
|
||||||
|
pub mod misalignment;
|
||||||
|
pub mod satisfaction;
|
||||||
|
pub mod stagnation;
|
||||||
177
crates/brightstaff/src/signals/interaction/satisfaction.rs
Normal file
177
crates/brightstaff/src/signals/interaction/satisfaction.rs
Normal file
|
|
@ -0,0 +1,177 @@
|
||||||
|
//! Satisfaction signals: gratitude, confirmation, success.
|
||||||
|
//!
|
||||||
|
//! Direct port of `signals/interaction/satisfaction.py`.
|
||||||
|
|
||||||
|
use std::sync::OnceLock;
|
||||||
|
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||||
|
use crate::signals::text_processing::{normalize_patterns, NormalizedMessage, NormalizedPattern};
|
||||||
|
|
||||||
|
const GRATITUDE_PATTERN_TEXTS: &[&str] = &[
|
||||||
|
"that's helpful",
|
||||||
|
"that helps",
|
||||||
|
"this helps",
|
||||||
|
"appreciate it",
|
||||||
|
"appreciate that",
|
||||||
|
"that's perfect",
|
||||||
|
"exactly what i needed",
|
||||||
|
"just what i needed",
|
||||||
|
"you're the best",
|
||||||
|
"you rock",
|
||||||
|
"you're awesome",
|
||||||
|
"you're amazing",
|
||||||
|
"you're great",
|
||||||
|
];
|
||||||
|
|
||||||
|
const CONFIRMATION_PATTERN_TEXTS: &[&str] = &[
|
||||||
|
"that works",
|
||||||
|
"this works",
|
||||||
|
"that's great",
|
||||||
|
"that's amazing",
|
||||||
|
"this is great",
|
||||||
|
"that's awesome",
|
||||||
|
"love it",
|
||||||
|
"love this",
|
||||||
|
"love that",
|
||||||
|
];
|
||||||
|
|
||||||
|
const SUCCESS_PATTERN_TEXTS: &[&str] = &[
|
||||||
|
"it worked",
|
||||||
|
"that worked",
|
||||||
|
"this worked",
|
||||||
|
"it's working",
|
||||||
|
"that's working",
|
||||||
|
"this is working",
|
||||||
|
];
|
||||||
|
|
||||||
|
fn gratitude_patterns() -> &'static Vec<NormalizedPattern> {
|
||||||
|
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||||
|
PATS.get_or_init(|| normalize_patterns(GRATITUDE_PATTERN_TEXTS))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn confirmation_patterns() -> &'static Vec<NormalizedPattern> {
|
||||||
|
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||||
|
PATS.get_or_init(|| normalize_patterns(CONFIRMATION_PATTERN_TEXTS))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn success_patterns() -> &'static Vec<NormalizedPattern> {
|
||||||
|
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||||
|
PATS.get_or_init(|| normalize_patterns(SUCCESS_PATTERN_TEXTS))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn analyze_satisfaction(
|
||||||
|
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||||
|
char_ngram_threshold: f32,
|
||||||
|
token_cosine_threshold: f32,
|
||||||
|
) -> SignalGroup {
|
||||||
|
let mut group = SignalGroup::new("satisfaction");
|
||||||
|
|
||||||
|
for (idx, role, norm_msg) in normalized_messages {
|
||||||
|
if *role != "human" {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut found = false;
|
||||||
|
|
||||||
|
for pattern in gratitude_patterns() {
|
||||||
|
if norm_msg.matches_normalized_pattern(
|
||||||
|
pattern,
|
||||||
|
char_ngram_threshold,
|
||||||
|
token_cosine_threshold,
|
||||||
|
) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::SatisfactionGratitude,
|
||||||
|
*idx,
|
||||||
|
pattern.raw.clone(),
|
||||||
|
)
|
||||||
|
.with_metadata(json!({"pattern_type": "gratitude"})),
|
||||||
|
);
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if found {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for pattern in confirmation_patterns() {
|
||||||
|
if norm_msg.matches_normalized_pattern(
|
||||||
|
pattern,
|
||||||
|
char_ngram_threshold,
|
||||||
|
token_cosine_threshold,
|
||||||
|
) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::SatisfactionConfirmation,
|
||||||
|
*idx,
|
||||||
|
pattern.raw.clone(),
|
||||||
|
)
|
||||||
|
.with_metadata(json!({"pattern_type": "confirmation"})),
|
||||||
|
);
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if found {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for pattern in success_patterns() {
|
||||||
|
if norm_msg.matches_normalized_pattern(
|
||||||
|
pattern,
|
||||||
|
char_ngram_threshold,
|
||||||
|
token_cosine_threshold,
|
||||||
|
) {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(SignalType::SatisfactionSuccess, *idx, pattern.raw.clone())
|
||||||
|
.with_metadata(json!({"pattern_type": "success"})),
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
group
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn nm(s: &str) -> NormalizedMessage {
|
||||||
|
NormalizedMessage::from_text(s, 2000)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_gratitude() {
|
||||||
|
let msgs = vec![(0usize, "human", nm("That's perfect, appreciate it!"))];
|
||||||
|
let g = analyze_satisfaction(&msgs, 0.65, 0.6);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::SatisfactionGratitude)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_confirmation() {
|
||||||
|
let msgs = vec![(0usize, "human", nm("That works for me, thanks"))];
|
||||||
|
let g = analyze_satisfaction(&msgs, 0.65, 0.6);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::SatisfactionConfirmation)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_success() {
|
||||||
|
let msgs = vec![(0usize, "human", nm("Great, it worked!"))];
|
||||||
|
let g = analyze_satisfaction(&msgs, 0.65, 0.6);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::SatisfactionSuccess)));
|
||||||
|
}
|
||||||
|
}
|
||||||
241
crates/brightstaff/src/signals/interaction/stagnation.rs
Normal file
241
crates/brightstaff/src/signals/interaction/stagnation.rs
Normal file
|
|
@ -0,0 +1,241 @@
|
||||||
|
//! Stagnation signals: dragging (turn-count efficiency) and repetition.
|
||||||
|
//!
|
||||||
|
//! Direct port of `signals/interaction/stagnation.py`.
|
||||||
|
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
use super::constants::{starts_with_prefix, POSITIVE_PREFIXES};
|
||||||
|
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType, TurnMetrics};
|
||||||
|
use crate::signals::text_processing::NormalizedMessage;
|
||||||
|
|
||||||
|
/// Adapter row used by stagnation::dragging detector. Mirrors the ShareGPT
|
||||||
|
/// `{"from": role, "value": text}` shape used in the Python reference.
|
||||||
|
pub struct ShareGptMsg<'a> {
|
||||||
|
pub from: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn analyze_dragging(
|
||||||
|
messages: &[ShareGptMsg<'_>],
|
||||||
|
baseline_turns: usize,
|
||||||
|
efficiency_threshold: f32,
|
||||||
|
) -> (SignalGroup, TurnMetrics) {
|
||||||
|
let mut group = SignalGroup::new("stagnation");
|
||||||
|
|
||||||
|
let mut user_turns: usize = 0;
|
||||||
|
let mut assistant_turns: usize = 0;
|
||||||
|
for m in messages {
|
||||||
|
match m.from {
|
||||||
|
"human" => user_turns += 1,
|
||||||
|
"gpt" => assistant_turns += 1,
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_turns = user_turns;
|
||||||
|
let efficiency_score: f32 = if total_turns == 0 || total_turns <= baseline_turns {
|
||||||
|
1.0
|
||||||
|
} else {
|
||||||
|
let excess = (total_turns - baseline_turns) as f32;
|
||||||
|
1.0 / (1.0 + excess * 0.25)
|
||||||
|
};
|
||||||
|
|
||||||
|
let is_dragging = efficiency_score < efficiency_threshold;
|
||||||
|
let metrics = TurnMetrics {
|
||||||
|
total_turns,
|
||||||
|
user_turns,
|
||||||
|
assistant_turns,
|
||||||
|
is_dragging,
|
||||||
|
efficiency_score,
|
||||||
|
};
|
||||||
|
|
||||||
|
if is_dragging {
|
||||||
|
let last_idx = messages.len().saturating_sub(1);
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::StagnationDragging,
|
||||||
|
last_idx,
|
||||||
|
format!(
|
||||||
|
"Conversation dragging: {} turns (efficiency: {:.2})",
|
||||||
|
total_turns, efficiency_score
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.with_confidence(1.0 - efficiency_score)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"total_turns": total_turns,
|
||||||
|
"efficiency_score": efficiency_score,
|
||||||
|
"baseline_turns": baseline_turns,
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
(group, metrics)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn analyze_repetition(
|
||||||
|
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||||
|
lookback: usize,
|
||||||
|
exact_threshold: f32,
|
||||||
|
near_duplicate_threshold: f32,
|
||||||
|
) -> SignalGroup {
|
||||||
|
let mut group = SignalGroup::new("stagnation");
|
||||||
|
|
||||||
|
// We keep references into `normalized_messages`. Since `normalized_messages`
|
||||||
|
// is borrowed for the whole function, this avoids cloning.
|
||||||
|
let mut prev_human: Vec<(usize, &NormalizedMessage)> = Vec::new();
|
||||||
|
let mut prev_gpt: Vec<(usize, &NormalizedMessage)> = Vec::new();
|
||||||
|
|
||||||
|
for (idx, role, norm_msg) in normalized_messages {
|
||||||
|
if *role != "human" && *role != "gpt" {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip human positive-prefix messages; they're naturally repetitive.
|
||||||
|
if *role == "human" && starts_with_prefix(&norm_msg.raw, POSITIVE_PREFIXES) {
|
||||||
|
prev_human.push((*idx, norm_msg));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if norm_msg.tokens.len() < 5 {
|
||||||
|
if *role == "human" {
|
||||||
|
prev_human.push((*idx, norm_msg));
|
||||||
|
} else {
|
||||||
|
prev_gpt.push((*idx, norm_msg));
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let prev = if *role == "human" {
|
||||||
|
&prev_human
|
||||||
|
} else {
|
||||||
|
&prev_gpt
|
||||||
|
};
|
||||||
|
let start = prev.len().saturating_sub(lookback);
|
||||||
|
let mut matched = false;
|
||||||
|
for (prev_idx, prev_msg) in &prev[start..] {
|
||||||
|
if prev_msg.tokens.len() < 5 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let similarity = norm_msg.ngram_similarity_with_message(prev_msg);
|
||||||
|
if similarity >= exact_threshold {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::StagnationRepetition,
|
||||||
|
*idx,
|
||||||
|
format!("Exact repetition with message {}", prev_idx),
|
||||||
|
)
|
||||||
|
.with_confidence(similarity)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"repetition_type": "exact",
|
||||||
|
"compared_to": prev_idx,
|
||||||
|
"similarity": similarity,
|
||||||
|
"role": role,
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
matched = true;
|
||||||
|
break;
|
||||||
|
} else if similarity >= near_duplicate_threshold {
|
||||||
|
group.add_signal(
|
||||||
|
SignalInstance::new(
|
||||||
|
SignalType::StagnationRepetition,
|
||||||
|
*idx,
|
||||||
|
format!("Near-duplicate with message {}", prev_idx),
|
||||||
|
)
|
||||||
|
.with_confidence(similarity)
|
||||||
|
.with_metadata(json!({
|
||||||
|
"repetition_type": "near_duplicate",
|
||||||
|
"compared_to": prev_idx,
|
||||||
|
"similarity": similarity,
|
||||||
|
"role": role,
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
matched = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let _ = matched;
|
||||||
|
|
||||||
|
if *role == "human" {
|
||||||
|
prev_human.push((*idx, norm_msg));
|
||||||
|
} else {
|
||||||
|
prev_gpt.push((*idx, norm_msg));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
group
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Combined stagnation analyzer: dragging + repetition.
|
||||||
|
pub fn analyze_stagnation(
|
||||||
|
messages: &[ShareGptMsg<'_>],
|
||||||
|
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||||
|
baseline_turns: usize,
|
||||||
|
) -> (SignalGroup, TurnMetrics) {
|
||||||
|
let (dragging_group, metrics) = analyze_dragging(messages, baseline_turns, 0.5);
|
||||||
|
let repetition_group = analyze_repetition(normalized_messages, 2, 0.95, 0.85);
|
||||||
|
|
||||||
|
let mut combined = SignalGroup::new("stagnation");
|
||||||
|
for s in dragging_group.signals.iter().cloned() {
|
||||||
|
combined.add_signal(s);
|
||||||
|
}
|
||||||
|
for s in repetition_group.signals.iter().cloned() {
|
||||||
|
combined.add_signal(s);
|
||||||
|
}
|
||||||
|
(combined, metrics)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn nm(s: &str) -> NormalizedMessage {
|
||||||
|
NormalizedMessage::from_text(s, 2000)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dragging_after_many_user_turns() {
|
||||||
|
let msgs: Vec<_> = (0..15)
|
||||||
|
.flat_map(|_| [ShareGptMsg { from: "human" }, ShareGptMsg { from: "gpt" }])
|
||||||
|
.collect();
|
||||||
|
let (g, m) = analyze_dragging(&msgs, 5, 0.5);
|
||||||
|
assert!(m.is_dragging);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::StagnationDragging)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_dragging_below_baseline() {
|
||||||
|
let msgs = vec![
|
||||||
|
ShareGptMsg { from: "human" },
|
||||||
|
ShareGptMsg { from: "gpt" },
|
||||||
|
ShareGptMsg { from: "human" },
|
||||||
|
ShareGptMsg { from: "gpt" },
|
||||||
|
];
|
||||||
|
let (g, m) = analyze_dragging(&msgs, 5, 0.5);
|
||||||
|
assert!(!m.is_dragging);
|
||||||
|
assert!(g.signals.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detects_exact_repetition_in_user_messages() {
|
||||||
|
let n = vec![
|
||||||
|
(
|
||||||
|
0usize,
|
||||||
|
"human",
|
||||||
|
nm("This widget is broken and needs repair right now"),
|
||||||
|
),
|
||||||
|
(1, "gpt", nm("Sorry to hear that. Let me look into it.")),
|
||||||
|
(
|
||||||
|
2,
|
||||||
|
"human",
|
||||||
|
nm("This widget is broken and needs repair right now"),
|
||||||
|
),
|
||||||
|
];
|
||||||
|
let g = analyze_repetition(&n, 2, 0.95, 0.85);
|
||||||
|
assert!(g
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s.signal_type, SignalType::StagnationRepetition)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,3 +1,26 @@
|
||||||
mod analyzer;
|
//! Plano signals: behavioral quality indicators for agent interactions.
|
||||||
|
//!
|
||||||
|
//! This is a Rust port of the paper-aligned Python reference implementation at
|
||||||
|
//! `https://github.com/katanemo/signals` (or `/Users/shashmi/repos/signals`).
|
||||||
|
//!
|
||||||
|
//! Three layers of signals are detected from a conversation transcript:
|
||||||
|
//!
|
||||||
|
//! - **Interaction**: misalignment, stagnation, disengagement, satisfaction
|
||||||
|
//! - **Execution**: failure, loops
|
||||||
|
//! - **Environment**: exhaustion
|
||||||
|
//!
|
||||||
|
//! See `SignalType` for the full hierarchy.
|
||||||
|
|
||||||
pub use analyzer::*;
|
pub mod analyzer;
|
||||||
|
pub mod environment;
|
||||||
|
pub mod execution;
|
||||||
|
pub mod interaction;
|
||||||
|
pub mod otel;
|
||||||
|
pub mod schemas;
|
||||||
|
pub mod text_processing;
|
||||||
|
|
||||||
|
pub use analyzer::{SignalAnalyzer, FLAG_MARKER};
|
||||||
|
pub use schemas::{
|
||||||
|
EnvironmentSignals, ExecutionSignals, InteractionQuality, InteractionSignals, SignalGroup,
|
||||||
|
SignalInstance, SignalLayer, SignalReport, SignalType, TurnMetrics,
|
||||||
|
};
|
||||||
|
|
|
||||||
241
crates/brightstaff/src/signals/otel.rs
Normal file
241
crates/brightstaff/src/signals/otel.rs
Normal file
|
|
@ -0,0 +1,241 @@
|
||||||
|
//! Helpers for emitting `SignalReport` data to OpenTelemetry spans.
|
||||||
|
//!
|
||||||
|
//! Two sets of attributes are emitted:
|
||||||
|
//!
|
||||||
|
//! - **Legacy** keys under `signals.*` (e.g. `signals.frustration.count`),
|
||||||
|
//! computed from the new layered counts. Preserved for one release for
|
||||||
|
//! backward compatibility with existing dashboards.
|
||||||
|
//! - **New** layered keys (e.g. `signals.interaction.misalignment.count`),
|
||||||
|
//! one set of `count`/`severity` attributes per category, plus per-instance
|
||||||
|
//! span events named `signal.<dotted_signal_type>`.
|
||||||
|
|
||||||
|
use opentelemetry::trace::SpanRef;
|
||||||
|
use opentelemetry::KeyValue;
|
||||||
|
|
||||||
|
use crate::signals::schemas::{SignalGroup, SignalReport, SignalType};
|
||||||
|
|
||||||
|
/// Emit both legacy and layered OTel attributes/events for a `SignalReport`.
|
||||||
|
///
|
||||||
|
/// Returns `true` if any "concerning" signal was found, mirroring the previous
|
||||||
|
/// behavior used to flag the span operation name.
|
||||||
|
pub fn emit_signals_to_span(span: &SpanRef<'_>, report: &SignalReport) -> bool {
|
||||||
|
emit_overall(span, report);
|
||||||
|
emit_layered_attributes(span, report);
|
||||||
|
emit_legacy_attributes(span, report);
|
||||||
|
emit_signal_events(span, report);
|
||||||
|
|
||||||
|
is_concerning(report)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn emit_overall(span: &SpanRef<'_>, report: &SignalReport) {
|
||||||
|
span.set_attribute(KeyValue::new(
|
||||||
|
"signals.quality",
|
||||||
|
report.overall_quality.as_str().to_string(),
|
||||||
|
));
|
||||||
|
span.set_attribute(KeyValue::new(
|
||||||
|
"signals.quality_score",
|
||||||
|
report.quality_score as f64,
|
||||||
|
));
|
||||||
|
span.set_attribute(KeyValue::new(
|
||||||
|
"signals.turn_count",
|
||||||
|
report.turn_metrics.total_turns as i64,
|
||||||
|
));
|
||||||
|
span.set_attribute(KeyValue::new(
|
||||||
|
"signals.efficiency_score",
|
||||||
|
report.turn_metrics.efficiency_score as f64,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn emit_group(span: &SpanRef<'_>, prefix: &str, group: &SignalGroup) {
|
||||||
|
if group.count == 0 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
span.set_attribute(KeyValue::new(
|
||||||
|
format!("{}.count", prefix),
|
||||||
|
group.count as i64,
|
||||||
|
));
|
||||||
|
span.set_attribute(KeyValue::new(
|
||||||
|
format!("{}.severity", prefix),
|
||||||
|
group.severity as i64,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn emit_layered_attributes(span: &SpanRef<'_>, report: &SignalReport) {
|
||||||
|
emit_group(
|
||||||
|
span,
|
||||||
|
"signals.interaction.misalignment",
|
||||||
|
&report.interaction.misalignment,
|
||||||
|
);
|
||||||
|
emit_group(
|
||||||
|
span,
|
||||||
|
"signals.interaction.stagnation",
|
||||||
|
&report.interaction.stagnation,
|
||||||
|
);
|
||||||
|
emit_group(
|
||||||
|
span,
|
||||||
|
"signals.interaction.disengagement",
|
||||||
|
&report.interaction.disengagement,
|
||||||
|
);
|
||||||
|
emit_group(
|
||||||
|
span,
|
||||||
|
"signals.interaction.satisfaction",
|
||||||
|
&report.interaction.satisfaction,
|
||||||
|
);
|
||||||
|
emit_group(span, "signals.execution.failure", &report.execution.failure);
|
||||||
|
emit_group(span, "signals.execution.loops", &report.execution.loops);
|
||||||
|
emit_group(
|
||||||
|
span,
|
||||||
|
"signals.environment.exhaustion",
|
||||||
|
&report.environment.exhaustion,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn count_of(report: &SignalReport, t: SignalType) -> usize {
|
||||||
|
report.iter_signals().filter(|s| s.signal_type == t).count()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Emit the legacy attribute keys consumed by existing dashboards. These are
|
||||||
|
/// derived from the new `SignalReport` so no detector contract is broken.
|
||||||
|
fn emit_legacy_attributes(span: &SpanRef<'_>, report: &SignalReport) {
|
||||||
|
use crate::tracing::signals as legacy;
|
||||||
|
|
||||||
|
// signals.follow_up.repair.{count,ratio} - misalignment proxies repairs.
|
||||||
|
let repair_count = report.interaction.misalignment.count;
|
||||||
|
let user_turns = report.turn_metrics.user_turns.max(1) as f32;
|
||||||
|
if repair_count > 0 {
|
||||||
|
span.set_attribute(KeyValue::new(legacy::REPAIR_COUNT, repair_count as i64));
|
||||||
|
let ratio = repair_count as f32 / user_turns;
|
||||||
|
span.set_attribute(KeyValue::new(legacy::REPAIR_RATIO, format!("{:.3}", ratio)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// signals.frustration.{count,severity} - disengagement.negative_stance is
|
||||||
|
// the closest legacy analog of "frustration".
|
||||||
|
let frustration_count = count_of(report, SignalType::DisengagementNegativeStance);
|
||||||
|
if frustration_count > 0 {
|
||||||
|
span.set_attribute(KeyValue::new(
|
||||||
|
legacy::FRUSTRATION_COUNT,
|
||||||
|
frustration_count as i64,
|
||||||
|
));
|
||||||
|
let severity = match frustration_count {
|
||||||
|
0 => 0,
|
||||||
|
1..=2 => 1,
|
||||||
|
3..=4 => 2,
|
||||||
|
_ => 3,
|
||||||
|
};
|
||||||
|
span.set_attribute(KeyValue::new(legacy::FRUSTRATION_SEVERITY, severity as i64));
|
||||||
|
}
|
||||||
|
|
||||||
|
// signals.repetition.count - stagnation (repetition + dragging).
|
||||||
|
if report.interaction.stagnation.count > 0 {
|
||||||
|
span.set_attribute(KeyValue::new(
|
||||||
|
legacy::REPETITION_COUNT,
|
||||||
|
report.interaction.stagnation.count as i64,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// signals.escalation.requested - any escalation/quit signal.
|
||||||
|
let escalated = report.interaction.disengagement.signals.iter().any(|s| {
|
||||||
|
matches!(
|
||||||
|
s.signal_type,
|
||||||
|
SignalType::DisengagementEscalation | SignalType::DisengagementQuit
|
||||||
|
)
|
||||||
|
});
|
||||||
|
if escalated {
|
||||||
|
span.set_attribute(KeyValue::new(legacy::ESCALATION_REQUESTED, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
// signals.positive_feedback.count - satisfaction signals.
|
||||||
|
if report.interaction.satisfaction.count > 0 {
|
||||||
|
span.set_attribute(KeyValue::new(
|
||||||
|
legacy::POSITIVE_FEEDBACK_COUNT,
|
||||||
|
report.interaction.satisfaction.count as i64,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn emit_signal_events(span: &SpanRef<'_>, report: &SignalReport) {
|
||||||
|
for sig in report.iter_signals() {
|
||||||
|
let event_name = format!("signal.{}", sig.signal_type.as_str());
|
||||||
|
let mut attrs: Vec<KeyValue> = vec![
|
||||||
|
KeyValue::new("signal.type", sig.signal_type.as_str().to_string()),
|
||||||
|
KeyValue::new("signal.message_index", sig.message_index as i64),
|
||||||
|
KeyValue::new("signal.confidence", sig.confidence as f64),
|
||||||
|
];
|
||||||
|
if !sig.snippet.is_empty() {
|
||||||
|
attrs.push(KeyValue::new("signal.snippet", sig.snippet.clone()));
|
||||||
|
}
|
||||||
|
if !sig.metadata.is_null() {
|
||||||
|
attrs.push(KeyValue::new("signal.metadata", sig.metadata.to_string()));
|
||||||
|
}
|
||||||
|
span.add_event(event_name, attrs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_concerning(report: &SignalReport) -> bool {
|
||||||
|
use crate::signals::schemas::InteractionQuality;
|
||||||
|
if matches!(
|
||||||
|
report.overall_quality,
|
||||||
|
InteractionQuality::Poor | InteractionQuality::Severe
|
||||||
|
) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if report.interaction.disengagement.count > 0 {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if report.interaction.stagnation.count > 2 {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if report.execution.failure.count > 0 || report.execution.loops.count > 0 {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::signals::schemas::{
|
||||||
|
EnvironmentSignals, ExecutionSignals, InteractionQuality, InteractionSignals, SignalGroup,
|
||||||
|
SignalInstance, SignalReport, SignalType, TurnMetrics,
|
||||||
|
};
|
||||||
|
|
||||||
|
fn report_with_escalation() -> SignalReport {
|
||||||
|
let mut diseng = SignalGroup::new("disengagement");
|
||||||
|
diseng.add_signal(SignalInstance::new(
|
||||||
|
SignalType::DisengagementEscalation,
|
||||||
|
3,
|
||||||
|
"get me a human",
|
||||||
|
));
|
||||||
|
SignalReport {
|
||||||
|
interaction: InteractionSignals {
|
||||||
|
disengagement: diseng,
|
||||||
|
..InteractionSignals::default()
|
||||||
|
},
|
||||||
|
execution: ExecutionSignals::default(),
|
||||||
|
environment: EnvironmentSignals::default(),
|
||||||
|
overall_quality: InteractionQuality::Severe,
|
||||||
|
quality_score: 0.0,
|
||||||
|
turn_metrics: TurnMetrics {
|
||||||
|
total_turns: 3,
|
||||||
|
user_turns: 2,
|
||||||
|
assistant_turns: 1,
|
||||||
|
is_dragging: false,
|
||||||
|
efficiency_score: 1.0,
|
||||||
|
},
|
||||||
|
summary: String::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn is_concerning_flags_disengagement() {
|
||||||
|
let r = report_with_escalation();
|
||||||
|
assert!(is_concerning(&r));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn count_of_returns_per_type_count() {
|
||||||
|
let r = report_with_escalation();
|
||||||
|
assert_eq!(count_of(&r, SignalType::DisengagementEscalation), 1);
|
||||||
|
assert_eq!(count_of(&r, SignalType::DisengagementNegativeStance), 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
431
crates/brightstaff/src/signals/schemas.rs
Normal file
431
crates/brightstaff/src/signals/schemas.rs
Normal file
|
|
@ -0,0 +1,431 @@
|
||||||
|
//! Data shapes for the signal analyzer.
|
||||||
|
//!
|
||||||
|
//! Mirrors `signals/schemas.py` from the reference implementation. Where the
|
||||||
|
//! Python library exposes a `Dict[str, SignalGroup]` partitioned by category,
|
||||||
|
//! the Rust port uses strongly-typed sub-structs (`InteractionSignals`,
|
||||||
|
//! `ExecutionSignals`, `EnvironmentSignals`) for the same partitioning.
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
/// Hierarchical signal type. The 20 leaf variants mirror the paper taxonomy
|
||||||
|
/// and the Python reference's `SignalType` string enum.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||||
|
pub enum SignalType {
|
||||||
|
// Interaction > Misalignment
|
||||||
|
MisalignmentCorrection,
|
||||||
|
MisalignmentRephrase,
|
||||||
|
MisalignmentClarification,
|
||||||
|
|
||||||
|
// Interaction > Stagnation
|
||||||
|
StagnationDragging,
|
||||||
|
StagnationRepetition,
|
||||||
|
|
||||||
|
// Interaction > Disengagement
|
||||||
|
DisengagementEscalation,
|
||||||
|
DisengagementQuit,
|
||||||
|
DisengagementNegativeStance,
|
||||||
|
|
||||||
|
// Interaction > Satisfaction
|
||||||
|
SatisfactionGratitude,
|
||||||
|
SatisfactionConfirmation,
|
||||||
|
SatisfactionSuccess,
|
||||||
|
|
||||||
|
// Execution > Failure
|
||||||
|
ExecutionFailureInvalidArgs,
|
||||||
|
ExecutionFailureBadQuery,
|
||||||
|
ExecutionFailureToolNotFound,
|
||||||
|
ExecutionFailureAuthMisuse,
|
||||||
|
ExecutionFailureStateError,
|
||||||
|
|
||||||
|
// Execution > Loops
|
||||||
|
ExecutionLoopsRetry,
|
||||||
|
ExecutionLoopsParameterDrift,
|
||||||
|
ExecutionLoopsOscillation,
|
||||||
|
|
||||||
|
// Environment > Exhaustion
|
||||||
|
EnvironmentExhaustionApiError,
|
||||||
|
EnvironmentExhaustionTimeout,
|
||||||
|
EnvironmentExhaustionRateLimit,
|
||||||
|
EnvironmentExhaustionNetwork,
|
||||||
|
EnvironmentExhaustionMalformed,
|
||||||
|
EnvironmentExhaustionContextOverflow,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SignalType {
|
||||||
|
/// Dotted hierarchical string identifier, e.g.
|
||||||
|
/// `"interaction.misalignment.correction"`. Matches the Python reference's
|
||||||
|
/// `SignalType` enum *value* strings byte-for-byte.
|
||||||
|
pub fn as_str(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
SignalType::MisalignmentCorrection => "interaction.misalignment.correction",
|
||||||
|
SignalType::MisalignmentRephrase => "interaction.misalignment.rephrase",
|
||||||
|
SignalType::MisalignmentClarification => "interaction.misalignment.clarification",
|
||||||
|
SignalType::StagnationDragging => "interaction.stagnation.dragging",
|
||||||
|
SignalType::StagnationRepetition => "interaction.stagnation.repetition",
|
||||||
|
SignalType::DisengagementEscalation => "interaction.disengagement.escalation",
|
||||||
|
SignalType::DisengagementQuit => "interaction.disengagement.quit",
|
||||||
|
SignalType::DisengagementNegativeStance => "interaction.disengagement.negative_stance",
|
||||||
|
SignalType::SatisfactionGratitude => "interaction.satisfaction.gratitude",
|
||||||
|
SignalType::SatisfactionConfirmation => "interaction.satisfaction.confirmation",
|
||||||
|
SignalType::SatisfactionSuccess => "interaction.satisfaction.success",
|
||||||
|
SignalType::ExecutionFailureInvalidArgs => "execution.failure.invalid_args",
|
||||||
|
SignalType::ExecutionFailureBadQuery => "execution.failure.bad_query",
|
||||||
|
SignalType::ExecutionFailureToolNotFound => "execution.failure.tool_not_found",
|
||||||
|
SignalType::ExecutionFailureAuthMisuse => "execution.failure.auth_misuse",
|
||||||
|
SignalType::ExecutionFailureStateError => "execution.failure.state_error",
|
||||||
|
SignalType::ExecutionLoopsRetry => "execution.loops.retry",
|
||||||
|
SignalType::ExecutionLoopsParameterDrift => "execution.loops.parameter_drift",
|
||||||
|
SignalType::ExecutionLoopsOscillation => "execution.loops.oscillation",
|
||||||
|
SignalType::EnvironmentExhaustionApiError => "environment.exhaustion.api_error",
|
||||||
|
SignalType::EnvironmentExhaustionTimeout => "environment.exhaustion.timeout",
|
||||||
|
SignalType::EnvironmentExhaustionRateLimit => "environment.exhaustion.rate_limit",
|
||||||
|
SignalType::EnvironmentExhaustionNetwork => "environment.exhaustion.network",
|
||||||
|
SignalType::EnvironmentExhaustionMalformed => {
|
||||||
|
"environment.exhaustion.malformed_response"
|
||||||
|
}
|
||||||
|
SignalType::EnvironmentExhaustionContextOverflow => {
|
||||||
|
"environment.exhaustion.context_overflow"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn layer(&self) -> SignalLayer {
|
||||||
|
match self {
|
||||||
|
SignalType::MisalignmentCorrection
|
||||||
|
| SignalType::MisalignmentRephrase
|
||||||
|
| SignalType::MisalignmentClarification
|
||||||
|
| SignalType::StagnationDragging
|
||||||
|
| SignalType::StagnationRepetition
|
||||||
|
| SignalType::DisengagementEscalation
|
||||||
|
| SignalType::DisengagementQuit
|
||||||
|
| SignalType::DisengagementNegativeStance
|
||||||
|
| SignalType::SatisfactionGratitude
|
||||||
|
| SignalType::SatisfactionConfirmation
|
||||||
|
| SignalType::SatisfactionSuccess => SignalLayer::Interaction,
|
||||||
|
SignalType::ExecutionFailureInvalidArgs
|
||||||
|
| SignalType::ExecutionFailureBadQuery
|
||||||
|
| SignalType::ExecutionFailureToolNotFound
|
||||||
|
| SignalType::ExecutionFailureAuthMisuse
|
||||||
|
| SignalType::ExecutionFailureStateError
|
||||||
|
| SignalType::ExecutionLoopsRetry
|
||||||
|
| SignalType::ExecutionLoopsParameterDrift
|
||||||
|
| SignalType::ExecutionLoopsOscillation => SignalLayer::Execution,
|
||||||
|
SignalType::EnvironmentExhaustionApiError
|
||||||
|
| SignalType::EnvironmentExhaustionTimeout
|
||||||
|
| SignalType::EnvironmentExhaustionRateLimit
|
||||||
|
| SignalType::EnvironmentExhaustionNetwork
|
||||||
|
| SignalType::EnvironmentExhaustionMalformed
|
||||||
|
| SignalType::EnvironmentExhaustionContextOverflow => SignalLayer::Environment,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Category name within the layer (e.g. `"misalignment"`, `"failure"`).
|
||||||
|
pub fn category(&self) -> &'static str {
|
||||||
|
// Strip the layer prefix and take everything before the next dot.
|
||||||
|
let s = self.as_str();
|
||||||
|
let after_layer = s.split_once('.').map(|(_, rest)| rest).unwrap_or(s);
|
||||||
|
after_layer
|
||||||
|
.split_once('.')
|
||||||
|
.map(|(c, _)| c)
|
||||||
|
.unwrap_or(after_layer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||||
|
pub enum SignalLayer {
|
||||||
|
Interaction,
|
||||||
|
Execution,
|
||||||
|
Environment,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SignalLayer {
|
||||||
|
pub fn as_str(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
SignalLayer::Interaction => "interaction",
|
||||||
|
SignalLayer::Execution => "execution",
|
||||||
|
SignalLayer::Environment => "environment",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Overall quality assessment for an agent interaction session.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
|
pub enum InteractionQuality {
|
||||||
|
Excellent,
|
||||||
|
Good,
|
||||||
|
Neutral,
|
||||||
|
Poor,
|
||||||
|
Severe,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl InteractionQuality {
|
||||||
|
pub fn as_str(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
InteractionQuality::Excellent => "excellent",
|
||||||
|
InteractionQuality::Good => "good",
|
||||||
|
InteractionQuality::Neutral => "neutral",
|
||||||
|
InteractionQuality::Poor => "poor",
|
||||||
|
InteractionQuality::Severe => "severe",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A single detected signal instance.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SignalInstance {
|
||||||
|
pub signal_type: SignalType,
|
||||||
|
/// Absolute index into the original conversation `Vec<Message>`.
|
||||||
|
pub message_index: usize,
|
||||||
|
pub snippet: String,
|
||||||
|
pub confidence: f32,
|
||||||
|
/// Free-form metadata payload mirroring the Python `Dict[str, Any]`.
|
||||||
|
/// Stored as a JSON object so we can faithfully reproduce the reference's
|
||||||
|
/// flexible per-detector metadata.
|
||||||
|
#[serde(default)]
|
||||||
|
pub metadata: serde_json::Value,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SignalInstance {
|
||||||
|
pub fn new(signal_type: SignalType, message_index: usize, snippet: impl Into<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
signal_type,
|
||||||
|
message_index,
|
||||||
|
snippet: snippet.into(),
|
||||||
|
confidence: 1.0,
|
||||||
|
metadata: serde_json::Value::Object(serde_json::Map::new()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_confidence(mut self, c: f32) -> Self {
|
||||||
|
self.confidence = c;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_metadata(mut self, m: serde_json::Value) -> Self {
|
||||||
|
self.metadata = m;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Aggregated signals for a specific category.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SignalGroup {
|
||||||
|
pub category: String,
|
||||||
|
pub count: usize,
|
||||||
|
pub signals: Vec<SignalInstance>,
|
||||||
|
/// Severity level (0-3: none, mild, moderate, severe).
|
||||||
|
pub severity: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SignalGroup {
|
||||||
|
pub fn new(category: impl Into<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
category: category.into(),
|
||||||
|
count: 0,
|
||||||
|
signals: Vec::new(),
|
||||||
|
severity: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_signal(&mut self, signal: SignalInstance) {
|
||||||
|
self.signals.push(signal);
|
||||||
|
self.count = self.signals.len();
|
||||||
|
self.update_severity();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn update_severity(&mut self) {
|
||||||
|
self.severity = match self.count {
|
||||||
|
0 => 0,
|
||||||
|
1..=2 => 1,
|
||||||
|
3..=4 => 2,
|
||||||
|
_ => 3,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Turn count and efficiency metrics, used by stagnation.dragging.
|
||||||
|
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||||
|
pub struct TurnMetrics {
|
||||||
|
pub total_turns: usize,
|
||||||
|
pub user_turns: usize,
|
||||||
|
pub assistant_turns: usize,
|
||||||
|
pub is_dragging: bool,
|
||||||
|
pub efficiency_score: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct InteractionSignals {
|
||||||
|
pub misalignment: SignalGroup,
|
||||||
|
pub stagnation: SignalGroup,
|
||||||
|
pub disengagement: SignalGroup,
|
||||||
|
pub satisfaction: SignalGroup,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for InteractionSignals {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
misalignment: SignalGroup::new("misalignment"),
|
||||||
|
stagnation: SignalGroup::new("stagnation"),
|
||||||
|
disengagement: SignalGroup::new("disengagement"),
|
||||||
|
satisfaction: SignalGroup::new("satisfaction"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl InteractionSignals {
|
||||||
|
/// Ratio of misalignment instances to user turns. Used as a quality
|
||||||
|
/// scoring input and as a threshold for the "high misalignment rate"
|
||||||
|
/// summary callout. Mirrors `misalignment.count / max(user_turns, 1)`
|
||||||
|
/// from the Python reference's `_assess_quality` and `_generate_summary`.
|
||||||
|
pub fn misalignment_ratio(&self, user_turns: usize) -> f32 {
|
||||||
|
let denom = user_turns.max(1) as f32;
|
||||||
|
self.misalignment.count as f32 / denom
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct ExecutionSignals {
|
||||||
|
pub failure: SignalGroup,
|
||||||
|
pub loops: SignalGroup,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for ExecutionSignals {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
failure: SignalGroup::new("failure"),
|
||||||
|
loops: SignalGroup::new("loops"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct EnvironmentSignals {
|
||||||
|
pub exhaustion: SignalGroup,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for EnvironmentSignals {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
exhaustion: SignalGroup::new("exhaustion"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Complete signal analysis report for a conversation.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SignalReport {
|
||||||
|
pub interaction: InteractionSignals,
|
||||||
|
pub execution: ExecutionSignals,
|
||||||
|
pub environment: EnvironmentSignals,
|
||||||
|
pub overall_quality: InteractionQuality,
|
||||||
|
pub quality_score: f32,
|
||||||
|
pub turn_metrics: TurnMetrics,
|
||||||
|
pub summary: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SignalReport {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
interaction: InteractionSignals::default(),
|
||||||
|
execution: ExecutionSignals::default(),
|
||||||
|
environment: EnvironmentSignals::default(),
|
||||||
|
overall_quality: InteractionQuality::Neutral,
|
||||||
|
quality_score: 50.0,
|
||||||
|
turn_metrics: TurnMetrics::default(),
|
||||||
|
summary: String::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SignalReport {
|
||||||
|
/// Iterate over every `SignalInstance` across all layers and groups.
|
||||||
|
pub fn iter_signals(&self) -> impl Iterator<Item = &SignalInstance> {
|
||||||
|
self.interaction
|
||||||
|
.misalignment
|
||||||
|
.signals
|
||||||
|
.iter()
|
||||||
|
.chain(self.interaction.stagnation.signals.iter())
|
||||||
|
.chain(self.interaction.disengagement.signals.iter())
|
||||||
|
.chain(self.interaction.satisfaction.signals.iter())
|
||||||
|
.chain(self.execution.failure.signals.iter())
|
||||||
|
.chain(self.execution.loops.signals.iter())
|
||||||
|
.chain(self.environment.exhaustion.signals.iter())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn has_signal_type(&self, t: SignalType) -> bool {
|
||||||
|
self.iter_signals().any(|s| s.signal_type == t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn signal_type_strings_match_paper_taxonomy() {
|
||||||
|
assert_eq!(
|
||||||
|
SignalType::MisalignmentCorrection.as_str(),
|
||||||
|
"interaction.misalignment.correction"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
SignalType::ExecutionFailureInvalidArgs.as_str(),
|
||||||
|
"execution.failure.invalid_args"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
SignalType::EnvironmentExhaustionMalformed.as_str(),
|
||||||
|
"environment.exhaustion.malformed_response"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn signal_type_layer_and_category() {
|
||||||
|
assert_eq!(
|
||||||
|
SignalType::MisalignmentRephrase.layer(),
|
||||||
|
SignalLayer::Interaction
|
||||||
|
);
|
||||||
|
assert_eq!(SignalType::MisalignmentRephrase.category(), "misalignment");
|
||||||
|
assert_eq!(
|
||||||
|
SignalType::ExecutionLoopsRetry.layer(),
|
||||||
|
SignalLayer::Execution
|
||||||
|
);
|
||||||
|
assert_eq!(SignalType::ExecutionLoopsRetry.category(), "loops");
|
||||||
|
assert_eq!(
|
||||||
|
SignalType::EnvironmentExhaustionTimeout.layer(),
|
||||||
|
SignalLayer::Environment
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
SignalType::EnvironmentExhaustionTimeout.category(),
|
||||||
|
"exhaustion"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn signal_group_severity_buckets_match_python() {
|
||||||
|
let mut g = SignalGroup::new("misalignment");
|
||||||
|
assert_eq!(g.severity, 0);
|
||||||
|
for n in 1..=2 {
|
||||||
|
g.add_signal(SignalInstance::new(
|
||||||
|
SignalType::MisalignmentCorrection,
|
||||||
|
n,
|
||||||
|
"x",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
assert_eq!(g.severity, 1);
|
||||||
|
for n in 3..=4 {
|
||||||
|
g.add_signal(SignalInstance::new(
|
||||||
|
SignalType::MisalignmentCorrection,
|
||||||
|
n,
|
||||||
|
"x",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
assert_eq!(g.severity, 2);
|
||||||
|
for n in 5..=6 {
|
||||||
|
g.add_signal(SignalInstance::new(
|
||||||
|
SignalType::MisalignmentCorrection,
|
||||||
|
n,
|
||||||
|
"x",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
assert_eq!(g.severity, 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
401
crates/brightstaff/src/signals/text_processing.rs
Normal file
401
crates/brightstaff/src/signals/text_processing.rs
Normal file
|
|
@ -0,0 +1,401 @@
|
||||||
|
//! Text normalization and similarity primitives.
|
||||||
|
//!
|
||||||
|
//! Direct Rust port of `signals/text_processing.py` from the reference. The
|
||||||
|
//! shapes (`NormalizedMessage`, `NormalizedPattern`) and similarity formulas
|
||||||
|
//! match the Python implementation exactly so that pattern matching produces
|
||||||
|
//! the same results on the same inputs.
|
||||||
|
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
|
/// Size of character n-grams used for fuzzy similarity (3 = trigrams).
|
||||||
|
pub const NGRAM_SIZE: usize = 3;
|
||||||
|
|
||||||
|
const PUNCT_TRIM: &[char] = &[
|
||||||
|
'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=',
|
||||||
|
'>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~',
|
||||||
|
];
|
||||||
|
|
||||||
|
/// Pre-processed message with normalized text and tokens for efficient matching.
|
||||||
|
#[derive(Debug, Clone, Default)]
|
||||||
|
pub struct NormalizedMessage {
|
||||||
|
pub raw: String,
|
||||||
|
pub tokens: Vec<String>,
|
||||||
|
pub token_set: HashSet<String>,
|
||||||
|
pub bigram_set: HashSet<String>,
|
||||||
|
pub char_ngram_set: HashSet<String>,
|
||||||
|
pub token_frequency: HashMap<String, usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NormalizedMessage {
|
||||||
|
/// Create a normalized message from raw text. Mirrors
|
||||||
|
/// `NormalizedMessage.from_text` in the reference, including the
|
||||||
|
/// head-20%/tail-80% truncation strategy when text exceeds `max_length`.
|
||||||
|
pub fn from_text(text: &str, max_length: usize) -> Self {
|
||||||
|
let char_count = text.chars().count();
|
||||||
|
|
||||||
|
let raw: String = if char_count <= max_length {
|
||||||
|
text.to_string()
|
||||||
|
} else {
|
||||||
|
let head_len = max_length / 5;
|
||||||
|
// Reserve one char for the joining space.
|
||||||
|
let tail_len = max_length.saturating_sub(head_len + 1);
|
||||||
|
let head: String = text.chars().take(head_len).collect();
|
||||||
|
let tail: String = text
|
||||||
|
.chars()
|
||||||
|
.skip(char_count.saturating_sub(tail_len))
|
||||||
|
.collect();
|
||||||
|
format!("{} {}", head, tail)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Normalize unicode punctuation to ASCII equivalents.
|
||||||
|
let normalized_unicode = raw
|
||||||
|
.replace(['\u{2019}', '\u{2018}'], "'")
|
||||||
|
.replace(['\u{201c}', '\u{201d}'], "\"")
|
||||||
|
.replace(['\u{2013}', '\u{2014}'], "-");
|
||||||
|
|
||||||
|
// Lowercase + collapse whitespace (matches Python's `" ".join(s.split())`).
|
||||||
|
let normalized: String = normalized_unicode
|
||||||
|
.to_lowercase()
|
||||||
|
.split_whitespace()
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(" ");
|
||||||
|
|
||||||
|
let mut tokens: Vec<String> = Vec::new();
|
||||||
|
for word in normalized.split_whitespace() {
|
||||||
|
let stripped: String = word.trim_matches(PUNCT_TRIM).to_string();
|
||||||
|
if !stripped.is_empty() {
|
||||||
|
tokens.push(stripped);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let token_set: HashSet<String> = tokens.iter().cloned().collect();
|
||||||
|
|
||||||
|
let mut bigram_set: HashSet<String> = HashSet::new();
|
||||||
|
for i in 0..tokens.len().saturating_sub(1) {
|
||||||
|
bigram_set.insert(format!("{} {}", tokens[i], tokens[i + 1]));
|
||||||
|
}
|
||||||
|
|
||||||
|
let tokens_text = tokens.join(" ");
|
||||||
|
let char_ngram_set = char_ngrams(&tokens_text, NGRAM_SIZE);
|
||||||
|
|
||||||
|
let mut token_frequency: HashMap<String, usize> = HashMap::new();
|
||||||
|
for t in &tokens {
|
||||||
|
*token_frequency.entry(t.clone()).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Self {
|
||||||
|
raw,
|
||||||
|
tokens,
|
||||||
|
token_set,
|
||||||
|
bigram_set,
|
||||||
|
char_ngram_set,
|
||||||
|
token_frequency,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn contains_token(&self, token: &str) -> bool {
|
||||||
|
self.token_set.contains(token)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn contains_phrase(&self, phrase: &str) -> bool {
|
||||||
|
let phrase_tokens: Vec<&str> = phrase.split_whitespace().collect();
|
||||||
|
if phrase_tokens.is_empty() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if phrase_tokens.len() == 1 {
|
||||||
|
return self.contains_token(phrase_tokens[0]);
|
||||||
|
}
|
||||||
|
if phrase_tokens.len() > self.tokens.len() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let n = phrase_tokens.len();
|
||||||
|
for i in 0..=self.tokens.len() - n {
|
||||||
|
if self.tokens[i..i + n]
|
||||||
|
.iter()
|
||||||
|
.zip(phrase_tokens.iter())
|
||||||
|
.all(|(a, b)| a == b)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Character n-gram (Jaccard) similarity vs another normalized message.
|
||||||
|
pub fn ngram_similarity_with_message(&self, other: &NormalizedMessage) -> f32 {
|
||||||
|
jaccard(&self.char_ngram_set, &other.char_ngram_set)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Character n-gram (Jaccard) similarity vs a raw pattern string.
|
||||||
|
pub fn ngram_similarity_with_pattern(&self, pattern: &str) -> f32 {
|
||||||
|
let normalized = strip_non_word_chars(&pattern.to_lowercase());
|
||||||
|
let pattern_ngrams = char_ngrams(&normalized, NGRAM_SIZE);
|
||||||
|
jaccard(&self.char_ngram_set, &pattern_ngrams)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fraction of pattern's ngrams contained in this message's ngram set.
|
||||||
|
pub fn char_ngram_containment(&self, pattern: &str) -> f32 {
|
||||||
|
let normalized = strip_non_word_chars(&pattern.to_lowercase());
|
||||||
|
let pattern_ngrams = char_ngrams(&normalized, NGRAM_SIZE);
|
||||||
|
if pattern_ngrams.is_empty() {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
let contained = pattern_ngrams
|
||||||
|
.iter()
|
||||||
|
.filter(|ng| self.char_ngram_set.contains(*ng))
|
||||||
|
.count();
|
||||||
|
contained as f32 / pattern_ngrams.len() as f32
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Token-frequency cosine similarity vs a raw pattern string.
|
||||||
|
pub fn token_cosine_similarity(&self, pattern: &str) -> f32 {
|
||||||
|
let mut pattern_freq: HashMap<String, usize> = HashMap::new();
|
||||||
|
for word in pattern.to_lowercase().split_whitespace() {
|
||||||
|
let stripped = word.trim_matches(PUNCT_TRIM);
|
||||||
|
if !stripped.is_empty() {
|
||||||
|
*pattern_freq.entry(stripped.to_string()).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cosine_freq(&self.token_frequency, &pattern_freq)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Layered match against a pre-normalized pattern. Mirrors
|
||||||
|
/// `matches_normalized_pattern` from the reference: exact phrase ->
|
||||||
|
/// char-ngram Jaccard -> token cosine.
|
||||||
|
pub fn matches_normalized_pattern(
|
||||||
|
&self,
|
||||||
|
pattern: &NormalizedPattern,
|
||||||
|
char_ngram_threshold: f32,
|
||||||
|
token_cosine_threshold: f32,
|
||||||
|
) -> bool {
|
||||||
|
// Layer 0: exact phrase match using pre-tokenized message.
|
||||||
|
let plen = pattern.tokens.len();
|
||||||
|
let slen = self.tokens.len();
|
||||||
|
if plen > 0 && plen <= slen {
|
||||||
|
for i in 0..=slen - plen {
|
||||||
|
if self.tokens[i..i + plen] == pattern.tokens[..] {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Layer 1: character n-gram Jaccard similarity.
|
||||||
|
if !self.char_ngram_set.is_empty() && !pattern.char_ngram_set.is_empty() {
|
||||||
|
let inter = self
|
||||||
|
.char_ngram_set
|
||||||
|
.intersection(&pattern.char_ngram_set)
|
||||||
|
.count();
|
||||||
|
let union = self.char_ngram_set.union(&pattern.char_ngram_set).count();
|
||||||
|
if union > 0 {
|
||||||
|
let sim = inter as f32 / union as f32;
|
||||||
|
if sim >= char_ngram_threshold {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Layer 2: token frequency cosine similarity.
|
||||||
|
if !self.token_frequency.is_empty() && !pattern.token_frequency.is_empty() {
|
||||||
|
let sim = cosine_freq(&self.token_frequency, &pattern.token_frequency);
|
||||||
|
if sim >= token_cosine_threshold {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pre-processed pattern with normalized text and pre-computed n-grams/tokens.
|
||||||
|
#[derive(Debug, Clone, Default)]
|
||||||
|
pub struct NormalizedPattern {
|
||||||
|
pub raw: String,
|
||||||
|
pub tokens: Vec<String>,
|
||||||
|
pub char_ngram_set: HashSet<String>,
|
||||||
|
pub token_frequency: HashMap<String, usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NormalizedPattern {
|
||||||
|
pub fn from_text(pattern: &str) -> Self {
|
||||||
|
let normalized = pattern
|
||||||
|
.to_lowercase()
|
||||||
|
.replace(['\u{2019}', '\u{2018}'], "'")
|
||||||
|
.replace(['\u{201c}', '\u{201d}'], "\"")
|
||||||
|
.replace(['\u{2013}', '\u{2014}'], "-");
|
||||||
|
let normalized: String = normalized.split_whitespace().collect::<Vec<_>>().join(" ");
|
||||||
|
|
||||||
|
// Tokenize the same way as NormalizedMessage (trim boundary punctuation,
|
||||||
|
// keep internal punctuation).
|
||||||
|
let mut tokens: Vec<String> = Vec::new();
|
||||||
|
for word in normalized.split_whitespace() {
|
||||||
|
let stripped = word.trim_matches(PUNCT_TRIM);
|
||||||
|
if !stripped.is_empty() {
|
||||||
|
tokens.push(stripped.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For ngrams + cosine, strip ALL punctuation (matches Python's
|
||||||
|
// `re.sub(r"[^\w\s]", "", normalized)`).
|
||||||
|
let normalized_for_ngrams = strip_non_word_chars(&normalized);
|
||||||
|
let char_ngram_set = char_ngrams(&normalized_for_ngrams, NGRAM_SIZE);
|
||||||
|
|
||||||
|
let tokens_no_punct: Vec<&str> = normalized_for_ngrams.split_whitespace().collect();
|
||||||
|
let mut token_frequency: HashMap<String, usize> = HashMap::new();
|
||||||
|
for t in &tokens_no_punct {
|
||||||
|
*token_frequency.entry((*t).to_string()).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Self {
|
||||||
|
raw: pattern.to_string(),
|
||||||
|
tokens,
|
||||||
|
char_ngram_set,
|
||||||
|
token_frequency,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convenience: normalize a list of raw pattern strings into `NormalizedPattern`s.
|
||||||
|
pub fn normalize_patterns(patterns: &[&str]) -> Vec<NormalizedPattern> {
|
||||||
|
patterns
|
||||||
|
.iter()
|
||||||
|
.map(|p| NormalizedPattern::from_text(p))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Similarity primitives
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
fn char_ngrams(s: &str, n: usize) -> HashSet<String> {
|
||||||
|
// Python iterates by character index, not byte; mirror that with .chars().
|
||||||
|
let chars: Vec<char> = s.chars().collect();
|
||||||
|
let mut out: HashSet<String> = HashSet::new();
|
||||||
|
if chars.len() < n {
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
for i in 0..=chars.len() - n {
|
||||||
|
out.insert(chars[i..i + n].iter().collect());
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
fn jaccard(a: &HashSet<String>, b: &HashSet<String>) -> f32 {
|
||||||
|
if a.is_empty() && b.is_empty() {
|
||||||
|
return 1.0;
|
||||||
|
}
|
||||||
|
if a.is_empty() || b.is_empty() {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
let inter = a.intersection(b).count();
|
||||||
|
let union = a.union(b).count();
|
||||||
|
if union == 0 {
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
inter as f32 / union as f32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cosine_freq(a: &HashMap<String, usize>, b: &HashMap<String, usize>) -> f32 {
|
||||||
|
if a.is_empty() && b.is_empty() {
|
||||||
|
return 1.0;
|
||||||
|
}
|
||||||
|
if a.is_empty() || b.is_empty() {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
let mut dot: f64 = 0.0;
|
||||||
|
let mut n1_sq: f64 = 0.0;
|
||||||
|
let mut n2_sq: f64 = 0.0;
|
||||||
|
for (token, &freq2) in b {
|
||||||
|
let freq1 = *a.get(token).unwrap_or(&0);
|
||||||
|
dot += (freq1 * freq2) as f64;
|
||||||
|
n2_sq += (freq2 * freq2) as f64;
|
||||||
|
}
|
||||||
|
for &freq1 in a.values() {
|
||||||
|
n1_sq += (freq1 * freq1) as f64;
|
||||||
|
}
|
||||||
|
let n1 = n1_sq.sqrt();
|
||||||
|
let n2 = n2_sq.sqrt();
|
||||||
|
if n1 == 0.0 || n2 == 0.0 {
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
(dot / (n1 * n2)) as f32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Python equivalent: `re.sub(r"[^\w\s]", "", text)` followed by whitespace
|
||||||
|
/// collapse. Python's `\w` is `[A-Za-z0-9_]` plus unicode word characters; we
|
||||||
|
/// use Rust's `char::is_alphanumeric()` plus `_` for an equivalent definition.
|
||||||
|
fn strip_non_word_chars(text: &str) -> String {
|
||||||
|
let mut out = String::with_capacity(text.len());
|
||||||
|
for c in text.chars() {
|
||||||
|
if c.is_alphanumeric() || c == '_' || c.is_whitespace() {
|
||||||
|
out.push(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn normalize_lowercases_and_strips_punctuation() {
|
||||||
|
let m = NormalizedMessage::from_text("Hello, World!", 2000);
|
||||||
|
assert_eq!(m.tokens, vec!["hello".to_string(), "world".to_string()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn normalizes_smart_quotes() {
|
||||||
|
let m = NormalizedMessage::from_text("don\u{2019}t", 2000);
|
||||||
|
assert!(m.tokens.contains(&"don't".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn truncates_long_text_with_head_tail() {
|
||||||
|
let long = "a".repeat(3000);
|
||||||
|
let m = NormalizedMessage::from_text(&long, 2000);
|
||||||
|
// raw should be ~ 2000 chars (head + space + tail)
|
||||||
|
assert!(m.raw.chars().count() <= 2001);
|
||||||
|
assert!(m.raw.starts_with("aa"));
|
||||||
|
assert!(m.raw.ends_with("aa"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn contains_phrase_matches_consecutive_tokens() {
|
||||||
|
let m = NormalizedMessage::from_text("I think this is great work", 2000);
|
||||||
|
assert!(m.contains_phrase("this is great"));
|
||||||
|
assert!(!m.contains_phrase("great this"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn matches_pattern_via_exact_phrase() {
|
||||||
|
let m = NormalizedMessage::from_text("No, I meant the second one", 2000);
|
||||||
|
let p = NormalizedPattern::from_text("no i meant");
|
||||||
|
assert!(m.matches_normalized_pattern(&p, 0.65, 0.6));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn matches_pattern_via_char_ngram_fuzziness() {
|
||||||
|
// Typo in "meant" -> "ment" so layer 0 (exact phrase) cannot match,
|
||||||
|
// forcing the matcher to fall back to layer 1 (char n-gram Jaccard).
|
||||||
|
let m = NormalizedMessage::from_text("No I ment", 2000);
|
||||||
|
let p = NormalizedPattern::from_text("no i meant");
|
||||||
|
assert!(m.matches_normalized_pattern(&p, 0.4, 0.6));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn jaccard_identical_sets_is_one() {
|
||||||
|
let a: HashSet<String> = ["abc", "bcd"].iter().map(|s| s.to_string()).collect();
|
||||||
|
assert!((jaccard(&a, &a) - 1.0).abs() < 1e-6);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cosine_freq_orthogonal_is_zero() {
|
||||||
|
let mut a: HashMap<String, usize> = HashMap::new();
|
||||||
|
a.insert("hello".to_string(), 1);
|
||||||
|
let mut b: HashMap<String, usize> = HashMap::new();
|
||||||
|
b.insert("world".to_string(), 1);
|
||||||
|
assert_eq!(cosine_freq(&a, &b), 0.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -20,8 +20,11 @@ const STREAM_BUFFER_SIZE: usize = 16;
|
||||||
/// Most chat responses are well under this; pathological ones are dropped without
|
/// Most chat responses are well under this; pathological ones are dropped without
|
||||||
/// affecting pass-through streaming to the client.
|
/// affecting pass-through streaming to the client.
|
||||||
const USAGE_BUFFER_MAX: usize = 2 * 1024 * 1024;
|
const USAGE_BUFFER_MAX: usize = 2 * 1024 * 1024;
|
||||||
use crate::signals::{InteractionQuality, SignalAnalyzer, TextBasedSignalAnalyzer, FLAG_MARKER};
|
use crate::metrics as bs_metrics;
|
||||||
use crate::tracing::{llm, set_service_name, signals as signal_constants};
|
use crate::metrics::labels as metric_labels;
|
||||||
|
use crate::signals::otel::emit_signals_to_span;
|
||||||
|
use crate::signals::{SignalAnalyzer, FLAG_MARKER};
|
||||||
|
use crate::tracing::{llm, set_service_name};
|
||||||
use hermesllm::apis::openai::Message;
|
use hermesllm::apis::openai::Message;
|
||||||
|
|
||||||
/// Parsed usage + resolved-model details from a provider response.
|
/// Parsed usage + resolved-model details from a provider response.
|
||||||
|
|
@ -172,6 +175,18 @@ impl StreamProcessor for Box<dyn StreamProcessor> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Optional Prometheus-metric context for an LLM upstream call. When present,
|
||||||
|
/// [`ObservableStreamProcessor`] emits `brightstaff_llm_*` metrics at
|
||||||
|
/// first-byte / complete / error callbacks.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct LlmMetricsCtx {
|
||||||
|
pub provider: String,
|
||||||
|
pub model: String,
|
||||||
|
/// HTTP status of the upstream response. Used to pick `status_class` and
|
||||||
|
/// `error_class` on `on_complete`.
|
||||||
|
pub upstream_status: u16,
|
||||||
|
}
|
||||||
|
|
||||||
/// A processor that tracks streaming metrics
|
/// A processor that tracks streaming metrics
|
||||||
pub struct ObservableStreamProcessor {
|
pub struct ObservableStreamProcessor {
|
||||||
service_name: String,
|
service_name: String,
|
||||||
|
|
@ -185,6 +200,8 @@ pub struct ObservableStreamProcessor {
|
||||||
/// on `on_complete`. Capped at `USAGE_BUFFER_MAX`; excess chunks are dropped
|
/// on `on_complete`. Capped at `USAGE_BUFFER_MAX`; excess chunks are dropped
|
||||||
/// from the buffer (they still pass through to the client).
|
/// from the buffer (they still pass through to the client).
|
||||||
response_buffer: Vec<u8>,
|
response_buffer: Vec<u8>,
|
||||||
|
llm_metrics: Option<LlmMetricsCtx>,
|
||||||
|
metrics_recorded: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ObservableStreamProcessor {
|
impl ObservableStreamProcessor {
|
||||||
|
|
@ -219,8 +236,17 @@ impl ObservableStreamProcessor {
|
||||||
time_to_first_token: None,
|
time_to_first_token: None,
|
||||||
messages,
|
messages,
|
||||||
response_buffer: Vec::new(),
|
response_buffer: Vec::new(),
|
||||||
|
llm_metrics: None,
|
||||||
|
metrics_recorded: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Attach LLM upstream metric context so the processor emits
|
||||||
|
/// `brightstaff_llm_*` metrics on first-byte / complete / error.
|
||||||
|
pub fn with_llm_metrics(mut self, ctx: LlmMetricsCtx) -> Self {
|
||||||
|
self.llm_metrics = Some(ctx);
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl StreamProcessor for ObservableStreamProcessor {
|
impl StreamProcessor for ObservableStreamProcessor {
|
||||||
|
|
@ -240,7 +266,11 @@ impl StreamProcessor for ObservableStreamProcessor {
|
||||||
fn on_first_bytes(&mut self) {
|
fn on_first_bytes(&mut self) {
|
||||||
// Record time to first token (only for streaming)
|
// Record time to first token (only for streaming)
|
||||||
if self.time_to_first_token.is_none() {
|
if self.time_to_first_token.is_none() {
|
||||||
self.time_to_first_token = Some(self.start_time.elapsed().as_millis());
|
let elapsed = self.start_time.elapsed();
|
||||||
|
self.time_to_first_token = Some(elapsed.as_millis());
|
||||||
|
if let Some(ref ctx) = self.llm_metrics {
|
||||||
|
bs_metrics::record_llm_ttft(&ctx.provider, &ctx.model, elapsed);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -299,81 +329,56 @@ impl StreamProcessor for ObservableStreamProcessor {
|
||||||
otel_span.set_attribute(KeyValue::new(llm::MODEL_NAME, resolved));
|
otel_span.set_attribute(KeyValue::new(llm::MODEL_NAME, resolved));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Emit LLM upstream prometheus metrics (duration + tokens) if wired.
|
||||||
|
// The upstream responded (we have a status), so status_class alone
|
||||||
|
// carries the non-2xx signal — error_class stays "none".
|
||||||
|
if let Some(ref ctx) = self.llm_metrics {
|
||||||
|
bs_metrics::record_llm_upstream(
|
||||||
|
&ctx.provider,
|
||||||
|
&ctx.model,
|
||||||
|
ctx.upstream_status,
|
||||||
|
metric_labels::LLM_ERR_NONE,
|
||||||
|
self.start_time.elapsed(),
|
||||||
|
);
|
||||||
|
if let Some(v) = usage.prompt_tokens {
|
||||||
|
bs_metrics::record_llm_tokens(
|
||||||
|
&ctx.provider,
|
||||||
|
&ctx.model,
|
||||||
|
metric_labels::TOKEN_KIND_PROMPT,
|
||||||
|
v.max(0) as u64,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if let Some(v) = usage.completion_tokens {
|
||||||
|
bs_metrics::record_llm_tokens(
|
||||||
|
&ctx.provider,
|
||||||
|
&ctx.model,
|
||||||
|
metric_labels::TOKEN_KIND_COMPLETION,
|
||||||
|
v.max(0) as u64,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if usage.prompt_tokens.is_none() && usage.completion_tokens.is_none() {
|
||||||
|
bs_metrics::record_llm_tokens_usage_missing(&ctx.provider, &ctx.model);
|
||||||
|
}
|
||||||
|
self.metrics_recorded = true;
|
||||||
|
}
|
||||||
// Release the buffered bytes early; nothing downstream needs them.
|
// Release the buffered bytes early; nothing downstream needs them.
|
||||||
self.response_buffer.clear();
|
self.response_buffer.clear();
|
||||||
self.response_buffer.shrink_to_fit();
|
self.response_buffer.shrink_to_fit();
|
||||||
|
|
||||||
// Analyze signals if messages are available and record as span attributes
|
// Analyze signals if messages are available and record as span
|
||||||
|
// attributes + per-signal events. We dual-emit legacy aggregate keys
|
||||||
|
// and the new layered taxonomy so existing dashboards keep working
|
||||||
|
// while new consumers can opt into the richer hierarchy.
|
||||||
if let Some(ref messages) = self.messages {
|
if let Some(ref messages) = self.messages {
|
||||||
let analyzer: Box<dyn SignalAnalyzer> = Box::new(TextBasedSignalAnalyzer::new());
|
let analyzer = SignalAnalyzer::default();
|
||||||
let report = analyzer.analyze(messages);
|
let report = analyzer.analyze_openai(messages);
|
||||||
|
|
||||||
// Get the current OTel span to set signal attributes
|
|
||||||
let span = tracing::Span::current();
|
let span = tracing::Span::current();
|
||||||
let otel_context = span.context();
|
let otel_context = span.context();
|
||||||
let otel_span = otel_context.span();
|
let otel_span = otel_context.span();
|
||||||
|
|
||||||
// Add overall quality
|
let should_flag = emit_signals_to_span(&otel_span, &report);
|
||||||
otel_span.set_attribute(KeyValue::new(
|
|
||||||
signal_constants::QUALITY,
|
|
||||||
format!("{:?}", report.overall_quality),
|
|
||||||
));
|
|
||||||
|
|
||||||
// Add repair/follow-up metrics if concerning
|
|
||||||
if report.follow_up.is_concerning || report.follow_up.repair_count > 0 {
|
|
||||||
otel_span.set_attribute(KeyValue::new(
|
|
||||||
signal_constants::REPAIR_COUNT,
|
|
||||||
report.follow_up.repair_count as i64,
|
|
||||||
));
|
|
||||||
otel_span.set_attribute(KeyValue::new(
|
|
||||||
signal_constants::REPAIR_RATIO,
|
|
||||||
format!("{:.3}", report.follow_up.repair_ratio),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add frustration metrics
|
|
||||||
if report.frustration.has_frustration {
|
|
||||||
otel_span.set_attribute(KeyValue::new(
|
|
||||||
signal_constants::FRUSTRATION_COUNT,
|
|
||||||
report.frustration.frustration_count as i64,
|
|
||||||
));
|
|
||||||
otel_span.set_attribute(KeyValue::new(
|
|
||||||
signal_constants::FRUSTRATION_SEVERITY,
|
|
||||||
report.frustration.severity as i64,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add repetition metrics
|
|
||||||
if report.repetition.has_looping {
|
|
||||||
otel_span.set_attribute(KeyValue::new(
|
|
||||||
signal_constants::REPETITION_COUNT,
|
|
||||||
report.repetition.repetition_count as i64,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add escalation metrics
|
|
||||||
if report.escalation.escalation_requested {
|
|
||||||
otel_span
|
|
||||||
.set_attribute(KeyValue::new(signal_constants::ESCALATION_REQUESTED, true));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add positive feedback metrics
|
|
||||||
if report.positive_feedback.has_positive_feedback {
|
|
||||||
otel_span.set_attribute(KeyValue::new(
|
|
||||||
signal_constants::POSITIVE_FEEDBACK_COUNT,
|
|
||||||
report.positive_feedback.positive_count as i64,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flag the span name if any concerning signal is detected
|
|
||||||
let should_flag = report.frustration.has_frustration
|
|
||||||
|| report.repetition.has_looping
|
|
||||||
|| report.escalation.escalation_requested
|
|
||||||
|| matches!(
|
|
||||||
report.overall_quality,
|
|
||||||
InteractionQuality::Poor | InteractionQuality::Severe
|
|
||||||
);
|
|
||||||
|
|
||||||
if should_flag {
|
if should_flag {
|
||||||
otel_span.update_name(format!("{} {}", self.operation_name, FLAG_MARKER));
|
otel_span.update_name(format!("{} {}", self.operation_name, FLAG_MARKER));
|
||||||
}
|
}
|
||||||
|
|
@ -396,6 +401,18 @@ impl StreamProcessor for ObservableStreamProcessor {
|
||||||
duration_ms = self.start_time.elapsed().as_millis(),
|
duration_ms = self.start_time.elapsed().as_millis(),
|
||||||
"stream error"
|
"stream error"
|
||||||
);
|
);
|
||||||
|
if let Some(ref ctx) = self.llm_metrics {
|
||||||
|
if !self.metrics_recorded {
|
||||||
|
bs_metrics::record_llm_upstream(
|
||||||
|
&ctx.provider,
|
||||||
|
&ctx.model,
|
||||||
|
ctx.upstream_status,
|
||||||
|
metric_labels::LLM_ERR_STREAM,
|
||||||
|
self.start_time.elapsed(),
|
||||||
|
);
|
||||||
|
self.metrics_recorded = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -234,6 +234,7 @@ pub struct Overrides {
|
||||||
pub llm_routing_model: Option<String>,
|
pub llm_routing_model: Option<String>,
|
||||||
pub agent_orchestration_model: Option<String>,
|
pub agent_orchestration_model: Option<String>,
|
||||||
pub orchestrator_model_context_length: Option<usize>,
|
pub orchestrator_model_context_length: Option<usize>,
|
||||||
|
pub disable_signals: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||||
|
|
@ -755,4 +756,29 @@ mod test {
|
||||||
assert!(model_ids.contains(&"openai-gpt4".to_string()));
|
assert!(model_ids.contains(&"openai-gpt4".to_string()));
|
||||||
assert!(!model_ids.contains(&"plano-orchestrator".to_string()));
|
assert!(!model_ids.contains(&"plano-orchestrator".to_string()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_overrides_disable_signals_default_none() {
|
||||||
|
let overrides = super::Overrides::default();
|
||||||
|
assert_eq!(overrides.disable_signals, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_overrides_disable_signals_deserialize() {
|
||||||
|
let yaml = r#"
|
||||||
|
disable_signals: true
|
||||||
|
"#;
|
||||||
|
let overrides: super::Overrides = serde_yaml::from_str(yaml).unwrap();
|
||||||
|
assert_eq!(overrides.disable_signals, Some(true));
|
||||||
|
|
||||||
|
let yaml_false = r#"
|
||||||
|
disable_signals: false
|
||||||
|
"#;
|
||||||
|
let overrides: super::Overrides = serde_yaml::from_str(yaml_false).unwrap();
|
||||||
|
assert_eq!(overrides.disable_signals, Some(false));
|
||||||
|
|
||||||
|
let yaml_missing = "{}";
|
||||||
|
let overrides: super::Overrides = serde_yaml::from_str(yaml_missing).unwrap();
|
||||||
|
assert_eq!(overrides.disable_signals, None);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -346,12 +346,10 @@ impl TryFrom<(SseEvent, &SupportedAPIsFromClient, &SupportedUpstreamAPIs)> for S
|
||||||
(
|
(
|
||||||
SupportedAPIsFromClient::OpenAIChatCompletions(_),
|
SupportedAPIsFromClient::OpenAIChatCompletions(_),
|
||||||
SupportedUpstreamAPIs::AnthropicMessagesAPI(_),
|
SupportedUpstreamAPIs::AnthropicMessagesAPI(_),
|
||||||
) => {
|
) if transformed_event.is_event_only() && transformed_event.event.is_some() => {
|
||||||
// OpenAI clients don't expect separate event: lines
|
// OpenAI clients don't expect separate event: lines
|
||||||
// Suppress upstream Anthropic event-only lines
|
// Suppress upstream Anthropic event-only lines
|
||||||
if transformed_event.is_event_only() && transformed_event.event.is_some() {
|
transformed_event.sse_transformed_lines = "\n".to_string();
|
||||||
transformed_event.sse_transformed_lines = "\n".to_string();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
// Other cross-API combinations can be handled here as needed
|
// Other cross-API combinations can be handled here as needed
|
||||||
|
|
@ -371,12 +369,10 @@ impl TryFrom<(SseEvent, &SupportedAPIsFromClient, &SupportedUpstreamAPIs)> for S
|
||||||
| (
|
| (
|
||||||
SupportedAPIsFromClient::OpenAIResponsesAPI(_),
|
SupportedAPIsFromClient::OpenAIResponsesAPI(_),
|
||||||
SupportedUpstreamAPIs::OpenAIResponsesAPI(_),
|
SupportedUpstreamAPIs::OpenAIResponsesAPI(_),
|
||||||
) => {
|
) if transformed_event.is_event_only() && transformed_event.event.is_some() => {
|
||||||
if transformed_event.is_event_only() && transformed_event.event.is_some() {
|
// Mark as should-skip by clearing sse_transformed_lines
|
||||||
// Mark as should-skip by clearing sse_transformed_lines
|
// The event line is already included when the data line is transformed
|
||||||
// The event line is already included when the data line is transformed
|
transformed_event.sse_transformed_lines = String::new();
|
||||||
transformed_event.sse_transformed_lines = String::new();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
// Other passthrough combinations (OpenAI ChatCompletions, etc.) don't have this issue
|
// Other passthrough combinations (OpenAI ChatCompletions, etc.) don't have this issue
|
||||||
|
|
|
||||||
|
|
@ -188,14 +188,13 @@ pub fn convert_openai_message_to_anthropic_content(
|
||||||
|
|
||||||
// Handle regular content
|
// Handle regular content
|
||||||
match &message.content {
|
match &message.content {
|
||||||
Some(MessageContent::Text(text)) => {
|
Some(MessageContent::Text(text)) if !text.is_empty() => {
|
||||||
if !text.is_empty() {
|
blocks.push(MessagesContentBlock::Text {
|
||||||
blocks.push(MessagesContentBlock::Text {
|
text: text.clone(),
|
||||||
text: text.clone(),
|
cache_control: None,
|
||||||
cache_control: None,
|
});
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Some(MessageContent::Text(_)) => {}
|
||||||
Some(MessageContent::Parts(parts)) => {
|
Some(MessageContent::Parts(parts)) => {
|
||||||
for part in parts {
|
for part in parts {
|
||||||
match part {
|
match part {
|
||||||
|
|
|
||||||
|
|
@ -354,10 +354,10 @@ impl TryFrom<MessagesMessage> for BedrockMessage {
|
||||||
MessagesMessageContent::Blocks(blocks) => {
|
MessagesMessageContent::Blocks(blocks) => {
|
||||||
for block in blocks {
|
for block in blocks {
|
||||||
match block {
|
match block {
|
||||||
crate::apis::anthropic::MessagesContentBlock::Text { text, .. } => {
|
crate::apis::anthropic::MessagesContentBlock::Text { text, .. }
|
||||||
if !text.is_empty() {
|
if !text.is_empty() =>
|
||||||
content_blocks.push(ContentBlock::Text { text });
|
{
|
||||||
}
|
content_blocks.push(ContentBlock::Text { text });
|
||||||
}
|
}
|
||||||
crate::apis::anthropic::MessagesContentBlock::ToolUse {
|
crate::apis::anthropic::MessagesContentBlock::ToolUse {
|
||||||
id,
|
id,
|
||||||
|
|
|
||||||
|
|
@ -317,11 +317,10 @@ impl TryFrom<Message> for BedrockMessage {
|
||||||
Role::User => {
|
Role::User => {
|
||||||
// Convert user message content to content blocks
|
// Convert user message content to content blocks
|
||||||
match message.content {
|
match message.content {
|
||||||
Some(MessageContent::Text(text)) => {
|
Some(MessageContent::Text(text)) if !text.is_empty() => {
|
||||||
if !text.is_empty() {
|
content_blocks.push(ContentBlock::Text { text });
|
||||||
content_blocks.push(ContentBlock::Text { text });
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Some(MessageContent::Text(_)) => {}
|
||||||
Some(MessageContent::Parts(parts)) => {
|
Some(MessageContent::Parts(parts)) => {
|
||||||
// Convert OpenAI content parts to Bedrock ContentBlocks
|
// Convert OpenAI content parts to Bedrock ContentBlocks
|
||||||
for part in parts {
|
for part in parts {
|
||||||
|
|
|
||||||
|
|
@ -75,3 +75,54 @@ are some sample configuration files for both, respectively.
|
||||||
isDefault: true
|
isDefault: true
|
||||||
access: proxy
|
access: proxy
|
||||||
editable: true
|
editable: true
|
||||||
|
|
||||||
|
Brightstaff metrics
|
||||||
|
~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
In addition to Envoy's stats on ``:9901``, the brightstaff dataplane
|
||||||
|
process exposes its own Prometheus endpoint on ``0.0.0.0:9092`` (override
|
||||||
|
with ``METRICS_BIND_ADDRESS``). It publishes:
|
||||||
|
|
||||||
|
* HTTP RED — ``brightstaff_http_requests_total``,
|
||||||
|
``brightstaff_http_request_duration_seconds``,
|
||||||
|
``brightstaff_http_in_flight_requests`` (labels: ``handler``, ``method``,
|
||||||
|
``status_class``).
|
||||||
|
* LLM upstream — ``brightstaff_llm_upstream_requests_total``,
|
||||||
|
``brightstaff_llm_upstream_duration_seconds``,
|
||||||
|
``brightstaff_llm_time_to_first_token_seconds``,
|
||||||
|
``brightstaff_llm_tokens_total`` (labels: ``provider``, ``model``,
|
||||||
|
``error_class``, ``kind``).
|
||||||
|
* Routing — ``brightstaff_router_decisions_total``,
|
||||||
|
``brightstaff_router_decision_duration_seconds``,
|
||||||
|
``brightstaff_routing_service_requests_total``,
|
||||||
|
``brightstaff_session_cache_events_total``.
|
||||||
|
* Process & build — ``process_resident_memory_bytes``,
|
||||||
|
``process_cpu_seconds_total``, ``brightstaff_build_info``.
|
||||||
|
|
||||||
|
A self-contained Prometheus + Grafana stack is shipped under
|
||||||
|
``config/grafana/``. With Plano already running on the host, bring it up
|
||||||
|
with one command:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd config/grafana
|
||||||
|
docker compose up -d
|
||||||
|
open http://localhost:3000 # admin / admin (anonymous viewer also enabled)
|
||||||
|
|
||||||
|
Grafana auto-loads the Prometheus datasource and the brightstaff
|
||||||
|
dashboard (look under the *Plano* folder). Prometheus scrapes the host's
|
||||||
|
``:9092`` and ``:9901`` via ``host.docker.internal``.
|
||||||
|
|
||||||
|
Files:
|
||||||
|
|
||||||
|
* ``config/grafana/docker-compose.yaml`` — one-command Prom + Grafana
|
||||||
|
stack with provisioning.
|
||||||
|
* ``config/grafana/prometheus_scrape.yaml`` — complete Prometheus config
|
||||||
|
with ``envoy`` and ``brightstaff`` scrape jobs (mounted by the
|
||||||
|
compose).
|
||||||
|
* ``config/grafana/brightstaff_dashboard.json`` — 19-panel dashboard
|
||||||
|
across HTTP RED, LLM upstream, Routing service, and Process & Envoy
|
||||||
|
link rows. Auto-provisioned by the compose; can also be imported by
|
||||||
|
hand via *Dashboards → New → Import*.
|
||||||
|
* ``config/grafana/provisioning/`` — Grafana provisioning files for the
|
||||||
|
datasource and dashboard provider.
|
||||||
|
|
|
||||||
|
|
@ -173,6 +173,9 @@ overrides:
|
||||||
llm_routing_model: Plano-Orchestrator
|
llm_routing_model: Plano-Orchestrator
|
||||||
# Model used for agent orchestration (must be listed in model_providers)
|
# Model used for agent orchestration (must be listed in model_providers)
|
||||||
agent_orchestration_model: Plano-Orchestrator
|
agent_orchestration_model: Plano-Orchestrator
|
||||||
|
# Disable agentic signal analysis (frustration, repetition, escalation, etc.)
|
||||||
|
# on LLM responses to save CPU. Default: false.
|
||||||
|
disable_signals: false
|
||||||
|
|
||||||
# Model affinity — pin routing decisions for agentic loops
|
# Model affinity — pin routing decisions for agentic loops
|
||||||
routing:
|
routing:
|
||||||
|
|
|
||||||
|
|
@ -170,6 +170,7 @@ model_providers:
|
||||||
provider_interface: plano
|
provider_interface: plano
|
||||||
overrides:
|
overrides:
|
||||||
agent_orchestration_model: Plano-Orchestrator
|
agent_orchestration_model: Plano-Orchestrator
|
||||||
|
disable_signals: false
|
||||||
llm_routing_model: Plano-Orchestrator
|
llm_routing_model: Plano-Orchestrator
|
||||||
optimize_context_window: true
|
optimize_context_window: true
|
||||||
prompt_target_intent_matching_threshold: 0.7
|
prompt_target_intent_matching_threshold: 0.7
|
||||||
|
|
|
||||||
4
tests/parity/signals/.gitignore
vendored
Normal file
4
tests/parity/signals/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
out/
|
||||||
|
.venv/
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
98
tests/parity/signals/README.md
Normal file
98
tests/parity/signals/README.md
Normal file
|
|
@ -0,0 +1,98 @@
|
||||||
|
# Signals Parity Harness
|
||||||
|
|
||||||
|
Validates that `crates/brightstaff/src/signals/` (Rust port) produces the same
|
||||||
|
`SignalReport` as the Python reference at <https://github.com/katanemo/signals>
|
||||||
|
on a fixed sample of `lmsys/lmsys-chat-1m` conversations.
|
||||||
|
|
||||||
|
This harness is **not** part of normal CI. It downloads several GB and is run
|
||||||
|
on demand to gate releases of the signals subsystem (or to investigate
|
||||||
|
regressions reported in production).
|
||||||
|
|
||||||
|
## What gets compared
|
||||||
|
|
||||||
|
For each conversation, both analyzers emit a `SignalReport`. The comparator
|
||||||
|
classifies any divergence into three tiers:
|
||||||
|
|
||||||
|
| Tier | Field | Action on divergence |
|
||||||
|
|------|------------------------------------------------|----------------------|
|
||||||
|
| A | set of `SignalType` present, per-type counts, `overall_quality` | Fail the run |
|
||||||
|
| B | per-instance `message_index`, instance counts per type | Log + collect, do not fail |
|
||||||
|
| C | metadata, snippet text, summary | Information only |
|
||||||
|
|
||||||
|
Quality buckets are compared by string (`excellent` / `good` / ...).
|
||||||
|
|
||||||
|
## What this harness does *not* cover
|
||||||
|
|
||||||
|
`lmsys-chat-1m` is plain user/assistant chat. It exercises the **interaction**
|
||||||
|
layer well (misalignment, stagnation, disengagement, satisfaction) but does
|
||||||
|
**not** exercise:
|
||||||
|
|
||||||
|
- `execution.failure.*`
|
||||||
|
- `execution.loops.*`
|
||||||
|
- `environment.exhaustion.*`
|
||||||
|
|
||||||
|
Those signals require `function_call` / `observation` ShareGPT roles. They are
|
||||||
|
covered by the Rust unit tests and the Python repo's own test fixtures, both
|
||||||
|
of which run on every PR. A synthetic tool-trace dataset for full coverage is
|
||||||
|
deferred to a follow-up.
|
||||||
|
|
||||||
|
## One-time setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Build the Rust replay binary.
|
||||||
|
cd ../../../crates && cargo build --release -p brightstaff --bin signals_replay
|
||||||
|
|
||||||
|
# 2. Set up the Python environment for the harness driver.
|
||||||
|
cd ../tests/parity/signals
|
||||||
|
python3 -m venv .venv && source .venv/bin/activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# 3. Install the Python signals reference.
|
||||||
|
# Either point at a local checkout:
|
||||||
|
pip install -e /path/to/signals
|
||||||
|
# or pull from git:
|
||||||
|
pip install 'signals @ git+https://github.com/katanemo/signals@<sha>'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running
|
||||||
|
|
||||||
|
```bash
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
python run_parity.py \
|
||||||
|
--num-samples 2000 \
|
||||||
|
--seed 42 \
|
||||||
|
--dataset-revision <hf-dataset-revision-sha> \
|
||||||
|
--rust-binary ../../../crates/target/release/signals_replay \
|
||||||
|
--output-dir out/
|
||||||
|
|
||||||
|
python compare.py --output-dir out/
|
||||||
|
```
|
||||||
|
|
||||||
|
`run_parity.py` will:
|
||||||
|
|
||||||
|
1. Download `lmsys/lmsys-chat-1m` (cached in `~/.cache/huggingface`).
|
||||||
|
2. Pick `--num-samples` rows under `--seed`.
|
||||||
|
3. Convert each to ShareGPT, write `out/conversations.jsonl`.
|
||||||
|
4. Run the Rust binary as a subprocess → `out/rust_reports.jsonl`.
|
||||||
|
5. Run the Python analyzer in-process → `out/python_reports.jsonl`.
|
||||||
|
|
||||||
|
`compare.py` reads both report files and writes:
|
||||||
|
|
||||||
|
- `out/diffs.jsonl` — one record per mismatched conversation, with tier + structural diff
|
||||||
|
- `out/metrics.json` — agreement %, per-`SignalType` confusion matrix, quality-bucket confusion matrix
|
||||||
|
- `out/summary.md` — human-readable PR-ready report
|
||||||
|
|
||||||
|
Exit code is non-zero iff any Tier-A divergence is observed.
|
||||||
|
|
||||||
|
## Reproducibility
|
||||||
|
|
||||||
|
Every run pins:
|
||||||
|
|
||||||
|
- `dataset_revision` — the HF dataset commit
|
||||||
|
- `seed` — RNG seed for sampling
|
||||||
|
- `signals_python_version` — `pip show signals` version
|
||||||
|
- `plano_git_sha` — `git rev-parse HEAD` of this repo
|
||||||
|
- `signals_replay_binary_sha256` — the hash of the Rust bin
|
||||||
|
|
||||||
|
All are stamped into `metrics.json`.
|
||||||
103
tests/parity/signals/_smoke_test.py
Normal file
103
tests/parity/signals/_smoke_test.py
Normal file
|
|
@ -0,0 +1,103 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Local smoke test for the parity harness — runs both runners on a tiny
|
||||||
|
hand-picked set of conversations without touching the lmsys dataset.
|
||||||
|
|
||||||
|
Run from this directory:
|
||||||
|
python _smoke_test.py --rust-binary <path>
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from signals.analyzer import SignalAnalyzer
|
||||||
|
|
||||||
|
SAMPLES = [
|
||||||
|
{
|
||||||
|
"id": "smoke-gratitude",
|
||||||
|
"messages": [
|
||||||
|
{"from": "human", "value": "What is the weather in Istanbul?"},
|
||||||
|
{"from": "gpt", "value": "Istanbul is 14C and partly cloudy."},
|
||||||
|
{"from": "human", "value": "That worked, exactly what I needed. Thanks!"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "smoke-escalation",
|
||||||
|
"messages": [
|
||||||
|
{"from": "human", "value": "This isn't helpful at all"},
|
||||||
|
{"from": "gpt", "value": "I'm sorry, can you tell me more?"},
|
||||||
|
{"from": "human", "value": "Get me a human, this is useless"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "smoke-correction",
|
||||||
|
"messages": [
|
||||||
|
{"from": "human", "value": "Book me a flight to NYC for tomorrow"},
|
||||||
|
{"from": "gpt", "value": "Sure, here are flights to NYC for Friday."},
|
||||||
|
{
|
||||||
|
"from": "human",
|
||||||
|
"value": "No, I meant flights for Saturday, not tomorrow",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "smoke-clean",
|
||||||
|
"messages": [
|
||||||
|
{"from": "human", "value": "Hi"},
|
||||||
|
{"from": "gpt", "value": "Hello, how can I help?"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "smoke-rephrase",
|
||||||
|
"messages": [
|
||||||
|
{"from": "human", "value": "Can you summarize the news please"},
|
||||||
|
{"from": "gpt", "value": "Sure, here is a summary."},
|
||||||
|
{"from": "human", "value": "Could you please summarize the news"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
p = argparse.ArgumentParser()
|
||||||
|
p.add_argument("--rust-binary", required=True, type=Path)
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
out_dir = Path("out_smoke")
|
||||||
|
out_dir.mkdir(exist_ok=True)
|
||||||
|
conv_path = out_dir / "conversations.jsonl"
|
||||||
|
rust_path = out_dir / "rust_reports.jsonl"
|
||||||
|
py_path = out_dir / "python_reports.jsonl"
|
||||||
|
|
||||||
|
with conv_path.open("w") as f:
|
||||||
|
for s in SAMPLES:
|
||||||
|
f.write(json.dumps(s) + "\n")
|
||||||
|
|
||||||
|
with conv_path.open("rb") as fin, rust_path.open("wb") as fout:
|
||||||
|
proc = subprocess.run(
|
||||||
|
[str(args.rust_binary)], stdin=fin, stdout=fout, stderr=subprocess.PIPE
|
||||||
|
)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
sys.stderr.write(proc.stderr.decode("utf-8", errors="replace"))
|
||||||
|
return 2
|
||||||
|
|
||||||
|
analyzer = SignalAnalyzer()
|
||||||
|
with conv_path.open() as fin, py_path.open("w") as fout:
|
||||||
|
for line in fin:
|
||||||
|
obj = json.loads(line)
|
||||||
|
r = analyzer.analyze(obj["messages"])
|
||||||
|
fout.write(json.dumps({"id": obj["id"], "report": r.to_dict()}) + "\n")
|
||||||
|
|
||||||
|
rc = subprocess.call(
|
||||||
|
[sys.executable, "compare.py", "--output-dir", str(out_dir)],
|
||||||
|
)
|
||||||
|
return rc
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
333
tests/parity/signals/compare.py
Normal file
333
tests/parity/signals/compare.py
Normal file
|
|
@ -0,0 +1,333 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Diff Rust vs Python signal reports produced by run_parity.py.
|
||||||
|
|
||||||
|
See README.md for the tier definitions. Exits non-zero iff any Tier-A
|
||||||
|
divergence is found.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from collections import Counter, defaultdict
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Tuple
|
||||||
|
|
||||||
|
CATEGORIES_BY_LAYER = {
|
||||||
|
"interaction_signals": [
|
||||||
|
"misalignment",
|
||||||
|
"stagnation",
|
||||||
|
"disengagement",
|
||||||
|
"satisfaction",
|
||||||
|
],
|
||||||
|
"execution_signals": ["failure", "loops"],
|
||||||
|
"environment_signals": ["exhaustion"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
p = argparse.ArgumentParser(description=__doc__)
|
||||||
|
p.add_argument("--output-dir", type=Path, default=Path("out"))
|
||||||
|
return p.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def load_jsonl(path: Path) -> Dict[str, Dict[str, Any]]:
|
||||||
|
"""Load a JSONL file keyed by `id`. Lines with errors are still indexed."""
|
||||||
|
out: Dict[str, Dict[str, Any]] = {}
|
||||||
|
with path.open() as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
obj = json.loads(line)
|
||||||
|
out[str(obj.get("id"))] = obj
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def per_type_counts(report: Dict[str, Any]) -> Dict[str, int]:
|
||||||
|
"""Return {signal_type: count} across all groups in a report dict."""
|
||||||
|
counts: Counter[str] = Counter()
|
||||||
|
for layer in CATEGORIES_BY_LAYER:
|
||||||
|
groups = report.get(layer, {}) or {}
|
||||||
|
for category in CATEGORIES_BY_LAYER[layer]:
|
||||||
|
group = groups.get(category)
|
||||||
|
if not group:
|
||||||
|
continue
|
||||||
|
for sig in group.get("signals", []) or []:
|
||||||
|
counts[sig["signal_type"]] += 1
|
||||||
|
return dict(counts)
|
||||||
|
|
||||||
|
|
||||||
|
def per_type_indices(report: Dict[str, Any]) -> Dict[str, List[int]]:
|
||||||
|
out: Dict[str, List[int]] = defaultdict(list)
|
||||||
|
for layer in CATEGORIES_BY_LAYER:
|
||||||
|
groups = report.get(layer, {}) or {}
|
||||||
|
for category in CATEGORIES_BY_LAYER[layer]:
|
||||||
|
group = groups.get(category)
|
||||||
|
if not group:
|
||||||
|
continue
|
||||||
|
for sig in group.get("signals", []) or []:
|
||||||
|
out[sig["signal_type"]].append(sig.get("message_index"))
|
||||||
|
for k in out:
|
||||||
|
out[k].sort(key=lambda x: (x is None, x))
|
||||||
|
return dict(out)
|
||||||
|
|
||||||
|
|
||||||
|
def diff_counts(a: Dict[str, int], b: Dict[str, int]) -> List[Tuple[str, int, int]]:
|
||||||
|
"""Return [(signal_type, a_count, b_count)] for entries that differ."""
|
||||||
|
keys = set(a) | set(b)
|
||||||
|
out = []
|
||||||
|
for k in sorted(keys):
|
||||||
|
ac = a.get(k, 0)
|
||||||
|
bc = b.get(k, 0)
|
||||||
|
if ac != bc:
|
||||||
|
out.append((k, ac, bc))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def diff_indices(
|
||||||
|
a: Dict[str, List[int]], b: Dict[str, List[int]]
|
||||||
|
) -> List[Tuple[str, List[int], List[int]]]:
|
||||||
|
keys = set(a) | set(b)
|
||||||
|
out = []
|
||||||
|
for k in sorted(keys):
|
||||||
|
ai = a.get(k, [])
|
||||||
|
bi = b.get(k, [])
|
||||||
|
if ai != bi:
|
||||||
|
out.append((k, ai, bi))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def compare_one(
|
||||||
|
convo_id: str, py: Dict[str, Any], rust: Dict[str, Any]
|
||||||
|
) -> Dict[str, Any] | None:
|
||||||
|
"""Compare a single conversation. Return diff record, or None if identical."""
|
||||||
|
if "error" in py or "error" in rust:
|
||||||
|
return {
|
||||||
|
"id": convo_id,
|
||||||
|
"tier": "A",
|
||||||
|
"kind": "error_in_runner",
|
||||||
|
"python_error": py.get("error"),
|
||||||
|
"rust_error": rust.get("error"),
|
||||||
|
}
|
||||||
|
py_report = py["report"]
|
||||||
|
rust_report = rust["report"]
|
||||||
|
|
||||||
|
py_counts = per_type_counts(py_report)
|
||||||
|
rust_counts = per_type_counts(rust_report)
|
||||||
|
count_diff = diff_counts(py_counts, rust_counts)
|
||||||
|
|
||||||
|
py_quality = py_report.get("overall_quality")
|
||||||
|
rust_quality = rust_report.get("overall_quality")
|
||||||
|
quality_mismatch = py_quality != rust_quality
|
||||||
|
|
||||||
|
if count_diff or quality_mismatch:
|
||||||
|
return {
|
||||||
|
"id": convo_id,
|
||||||
|
"tier": "A",
|
||||||
|
"kind": "signal_or_quality_mismatch",
|
||||||
|
"quality": {"python": py_quality, "rust": rust_quality},
|
||||||
|
"count_diff": [
|
||||||
|
{"signal_type": st, "python": pc, "rust": rc}
|
||||||
|
for (st, pc, rc) in count_diff
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
py_idx = per_type_indices(py_report)
|
||||||
|
rust_idx = per_type_indices(rust_report)
|
||||||
|
idx_diff = diff_indices(py_idx, rust_idx)
|
||||||
|
if idx_diff:
|
||||||
|
return {
|
||||||
|
"id": convo_id,
|
||||||
|
"tier": "B",
|
||||||
|
"kind": "instance_index_mismatch",
|
||||||
|
"diff": [
|
||||||
|
{"signal_type": st, "python_indices": pi, "rust_indices": ri}
|
||||||
|
for (st, pi, ri) in idx_diff
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def confusion_matrix(
|
||||||
|
pairs: List[Tuple[str, str]], labels: List[str]
|
||||||
|
) -> Dict[str, Dict[str, int]]:
|
||||||
|
cm: Dict[str, Dict[str, int]] = {a: {b: 0 for b in labels} for a in labels}
|
||||||
|
for py, rust in pairs:
|
||||||
|
if py not in cm:
|
||||||
|
cm[py] = {b: 0 for b in labels}
|
||||||
|
if rust not in cm[py]:
|
||||||
|
cm[py][rust] = 0
|
||||||
|
cm[py][rust] += 1
|
||||||
|
return cm
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
args = parse_args()
|
||||||
|
out_dir = args.output_dir
|
||||||
|
|
||||||
|
py_reports = load_jsonl(out_dir / "python_reports.jsonl")
|
||||||
|
rust_reports = load_jsonl(out_dir / "rust_reports.jsonl")
|
||||||
|
|
||||||
|
common_ids = sorted(set(py_reports) & set(rust_reports))
|
||||||
|
only_py = sorted(set(py_reports) - set(rust_reports))
|
||||||
|
only_rust = sorted(set(rust_reports) - set(py_reports))
|
||||||
|
|
||||||
|
diffs: List[Dict[str, Any]] = []
|
||||||
|
quality_pairs: List[Tuple[str, str]] = []
|
||||||
|
per_type_total = Counter()
|
||||||
|
per_type_disagree = Counter()
|
||||||
|
|
||||||
|
tier_a = 0
|
||||||
|
tier_b = 0
|
||||||
|
for cid in common_ids:
|
||||||
|
d = compare_one(cid, py_reports[cid], rust_reports[cid])
|
||||||
|
if d is None:
|
||||||
|
quality_pairs.append(
|
||||||
|
(
|
||||||
|
py_reports[cid]["report"]["overall_quality"],
|
||||||
|
rust_reports[cid]["report"]["overall_quality"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for st, _ in per_type_counts(py_reports[cid]["report"]).items():
|
||||||
|
per_type_total[st] += 1
|
||||||
|
else:
|
||||||
|
diffs.append(d)
|
||||||
|
if d["tier"] == "A":
|
||||||
|
tier_a += 1
|
||||||
|
elif d["tier"] == "B":
|
||||||
|
tier_b += 1
|
||||||
|
if "report" in py_reports[cid] and "report" in rust_reports[cid]:
|
||||||
|
quality_pairs.append(
|
||||||
|
(
|
||||||
|
py_reports[cid]["report"].get("overall_quality", "?"),
|
||||||
|
rust_reports[cid]["report"].get("overall_quality", "?"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for cd in d.get("count_diff", []) or []:
|
||||||
|
per_type_disagree[cd["signal_type"]] += 1
|
||||||
|
per_type_total[cd["signal_type"]] += 1
|
||||||
|
|
||||||
|
n_total = len(common_ids)
|
||||||
|
n_match = n_total - len(diffs)
|
||||||
|
agreement = (n_match / n_total) if n_total else 0.0
|
||||||
|
|
||||||
|
quality_labels = ["excellent", "good", "neutral", "poor", "severe"]
|
||||||
|
cm = confusion_matrix(quality_pairs, quality_labels)
|
||||||
|
|
||||||
|
metrics = {
|
||||||
|
"n_python_reports": len(py_reports),
|
||||||
|
"n_rust_reports": len(rust_reports),
|
||||||
|
"n_common": n_total,
|
||||||
|
"n_only_python": len(only_py),
|
||||||
|
"n_only_rust": len(only_rust),
|
||||||
|
"n_full_match": n_match,
|
||||||
|
"agreement_pct": round(100.0 * agreement, 4),
|
||||||
|
"tier_a_divergences": tier_a,
|
||||||
|
"tier_b_divergences": tier_b,
|
||||||
|
"quality_confusion_matrix": cm,
|
||||||
|
"per_signal_type_total": dict(per_type_total),
|
||||||
|
"per_signal_type_disagree": dict(per_type_disagree),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Pull in run metadata if present.
|
||||||
|
rm_path = out_dir / "run_metadata.json"
|
||||||
|
if rm_path.exists():
|
||||||
|
metrics["run_metadata"] = json.loads(rm_path.read_text())
|
||||||
|
|
||||||
|
(out_dir / "metrics.json").write_text(json.dumps(metrics, indent=2))
|
||||||
|
with (out_dir / "diffs.jsonl").open("w") as f:
|
||||||
|
for d in diffs:
|
||||||
|
f.write(json.dumps(d, ensure_ascii=False))
|
||||||
|
f.write("\n")
|
||||||
|
|
||||||
|
write_summary_md(out_dir / "summary.md", metrics, diffs[:20])
|
||||||
|
|
||||||
|
print(
|
||||||
|
json.dumps(
|
||||||
|
{k: v for k, v in metrics.items() if k != "quality_confusion_matrix"},
|
||||||
|
indent=2,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
print(f"\ndiffs: {out_dir / 'diffs.jsonl'} metrics: {out_dir / 'metrics.json'}")
|
||||||
|
print(f"summary: {out_dir / 'summary.md'}")
|
||||||
|
|
||||||
|
if tier_a > 0:
|
||||||
|
print(f"\nFAIL: {tier_a} Tier-A divergence(s) detected.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def write_summary_md(
|
||||||
|
path: Path, metrics: Dict[str, Any], sample_diffs: List[Dict[str, Any]]
|
||||||
|
) -> None:
|
||||||
|
lines: List[str] = []
|
||||||
|
lines.append("# Signals Parity Report")
|
||||||
|
lines.append("")
|
||||||
|
rm = metrics.get("run_metadata", {})
|
||||||
|
if rm:
|
||||||
|
lines.append("## Run metadata")
|
||||||
|
lines.append("")
|
||||||
|
for k in (
|
||||||
|
"dataset_name",
|
||||||
|
"dataset_revision",
|
||||||
|
"seed",
|
||||||
|
"num_samples_actual",
|
||||||
|
"plano_git_sha",
|
||||||
|
"signals_python_version",
|
||||||
|
"rust_binary_sha256",
|
||||||
|
):
|
||||||
|
if k in rm:
|
||||||
|
lines.append(f"- **{k}**: `{rm[k]}`")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("## Summary")
|
||||||
|
lines.append("")
|
||||||
|
lines.append(f"- Conversations compared: **{metrics['n_common']}**")
|
||||||
|
lines.append(f"- Full matches: **{metrics['n_full_match']}**")
|
||||||
|
lines.append(f"- Agreement: **{metrics['agreement_pct']}%**")
|
||||||
|
lines.append(f"- Tier-A divergences: **{metrics['tier_a_divergences']}**")
|
||||||
|
lines.append(f"- Tier-B divergences: **{metrics['tier_b_divergences']}**")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("## Per-signal-type disagreement")
|
||||||
|
lines.append("")
|
||||||
|
lines.append("| Signal type | Total reports | Disagreements |")
|
||||||
|
lines.append("|---|---:|---:|")
|
||||||
|
totals = metrics["per_signal_type_total"]
|
||||||
|
disagrees = metrics["per_signal_type_disagree"]
|
||||||
|
for k in sorted(set(totals) | set(disagrees)):
|
||||||
|
lines.append(f"| `{k}` | {totals.get(k, 0)} | {disagrees.get(k, 0)} |")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("## Quality bucket confusion matrix (rows = python, cols = rust)")
|
||||||
|
lines.append("")
|
||||||
|
cm = metrics["quality_confusion_matrix"]
|
||||||
|
labels = list(cm.keys())
|
||||||
|
lines.append("| | " + " | ".join(labels) + " |")
|
||||||
|
lines.append("|---|" + "|".join(["---:"] * len(labels)) + "|")
|
||||||
|
for r in labels:
|
||||||
|
lines.append(
|
||||||
|
f"| {r} | " + " | ".join(str(cm[r].get(c, 0)) for c in labels) + " |"
|
||||||
|
)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if sample_diffs:
|
||||||
|
lines.append("## Sample divergences (first 20)")
|
||||||
|
lines.append("")
|
||||||
|
for d in sample_diffs:
|
||||||
|
lines.append(f"### `{d['id']}` — tier {d['tier']} — {d['kind']}")
|
||||||
|
lines.append("")
|
||||||
|
lines.append("```json")
|
||||||
|
lines.append(json.dumps(d, indent=2))
|
||||||
|
lines.append("```")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
path.write_text("\n".join(lines))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
3
tests/parity/signals/requirements.txt
Normal file
3
tests/parity/signals/requirements.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
huggingface_hub>=0.25
|
||||||
|
pyarrow>=15
|
||||||
|
tqdm>=4.65
|
||||||
332
tests/parity/signals/run_parity.py
Normal file
332
tests/parity/signals/run_parity.py
Normal file
|
|
@ -0,0 +1,332 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Parity harness driver.
|
||||||
|
|
||||||
|
Samples conversations from `lmsys/lmsys-chat-1m`, runs both the Python
|
||||||
|
reference analyzer (in-process) and the Rust port (subprocess), writes both
|
||||||
|
reports to disk for `compare.py` to diff.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python run_parity.py \\
|
||||||
|
--num-samples 2000 \\
|
||||||
|
--seed 42 \\
|
||||||
|
--dataset-revision <hf-revision-sha> \\
|
||||||
|
--rust-binary ../../../crates/target/release/signals_replay \\
|
||||||
|
--output-dir out/
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, Iterator, List
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pyarrow.parquet as pq
|
||||||
|
from huggingface_hub import hf_hub_download, list_repo_files
|
||||||
|
except ImportError:
|
||||||
|
print(
|
||||||
|
"error: install dependencies first: pip install -r requirements.txt",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from signals.analyzer import SignalAnalyzer
|
||||||
|
except ImportError:
|
||||||
|
print(
|
||||||
|
"error: the python `signals` package is not installed. "
|
||||||
|
"install it from your local checkout: pip install -e /path/to/signals",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from tqdm import tqdm
|
||||||
|
except ImportError:
|
||||||
|
|
||||||
|
def tqdm(it, **_kwargs): # type: ignore[no-redef]
|
||||||
|
return it
|
||||||
|
|
||||||
|
|
||||||
|
DATASET_NAME = "lmsys/lmsys-chat-1m"
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
p = argparse.ArgumentParser(description=__doc__)
|
||||||
|
p.add_argument("--num-samples", type=int, default=2000)
|
||||||
|
p.add_argument("--seed", type=int, default=42)
|
||||||
|
p.add_argument(
|
||||||
|
"--dataset-revision",
|
||||||
|
default=None,
|
||||||
|
help="HF dataset revision to pin (default: latest, NOT recommended for reproducibility)",
|
||||||
|
)
|
||||||
|
p.add_argument(
|
||||||
|
"--rust-binary",
|
||||||
|
type=Path,
|
||||||
|
required=True,
|
||||||
|
help="path to the `signals_replay` binary built from crates/brightstaff",
|
||||||
|
)
|
||||||
|
p.add_argument(
|
||||||
|
"--output-dir",
|
||||||
|
type=Path,
|
||||||
|
default=Path("out"),
|
||||||
|
help="directory to write the conversations + both runners' outputs",
|
||||||
|
)
|
||||||
|
p.add_argument(
|
||||||
|
"--max-conv-messages",
|
||||||
|
type=int,
|
||||||
|
default=200,
|
||||||
|
help="drop conversations with more than this many messages (the analyzer "
|
||||||
|
"truncates to last 100 anyway; this is a sanity cap on input parsing)",
|
||||||
|
)
|
||||||
|
return p.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def lmsys_to_sharegpt(conversation: List[Dict[str, str]]) -> List[Dict[str, str]]:
|
||||||
|
"""Convert lmsys-chat-1m's `[{role, content}]` to ShareGPT's `[{from, value}]`.
|
||||||
|
|
||||||
|
lmsys uses `user` / `assistant` (no tools, no system role in `conversation`).
|
||||||
|
"""
|
||||||
|
out = []
|
||||||
|
for m in conversation:
|
||||||
|
role = m.get("role", "")
|
||||||
|
content = m.get("content", "")
|
||||||
|
if not isinstance(content, str):
|
||||||
|
content = str(content) if content is not None else ""
|
||||||
|
if role == "user":
|
||||||
|
from_ = "human"
|
||||||
|
elif role == "assistant":
|
||||||
|
from_ = "gpt"
|
||||||
|
else:
|
||||||
|
# lmsys is human/assistant only; skip anything else defensively.
|
||||||
|
continue
|
||||||
|
out.append({"from": from_, "value": content})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _list_parquet_files(revision: str | None) -> List[str]:
|
||||||
|
"""Return the list of parquet shard paths in the dataset repo."""
|
||||||
|
files = list_repo_files(DATASET_NAME, repo_type="dataset", revision=revision)
|
||||||
|
return sorted(f for f in files if f.endswith(".parquet"))
|
||||||
|
|
||||||
|
|
||||||
|
def _download_shards(paths: List[str], revision: str | None) -> List[Path]:
|
||||||
|
"""Download each parquet shard to the HF cache, return local paths."""
|
||||||
|
local: List[Path] = []
|
||||||
|
for rel in tqdm(paths, desc="downloading shards", unit="shard"):
|
||||||
|
p = hf_hub_download(
|
||||||
|
DATASET_NAME,
|
||||||
|
filename=rel,
|
||||||
|
repo_type="dataset",
|
||||||
|
revision=revision,
|
||||||
|
)
|
||||||
|
local.append(Path(p))
|
||||||
|
return local
|
||||||
|
|
||||||
|
|
||||||
|
def sample_conversations(
|
||||||
|
*,
|
||||||
|
num_samples: int,
|
||||||
|
seed: int,
|
||||||
|
revision: str | None,
|
||||||
|
max_conv_messages: int,
|
||||||
|
) -> Iterator[Dict[str, Any]]:
|
||||||
|
"""Yield `num_samples` conversations sampled uniformly across the dataset.
|
||||||
|
|
||||||
|
We bypass the `datasets` loader (which has a Python 3.14 pickle issue)
|
||||||
|
and read the parquet shards directly via pyarrow.
|
||||||
|
"""
|
||||||
|
print(
|
||||||
|
f"listing {DATASET_NAME}"
|
||||||
|
f"{' @ ' + revision if revision else ' (no revision pinned!)'}",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
shard_paths = _list_parquet_files(revision)
|
||||||
|
if not shard_paths:
|
||||||
|
raise SystemExit(f"no parquet shards found for {DATASET_NAME}")
|
||||||
|
local_paths = _download_shards(shard_paths, revision)
|
||||||
|
|
||||||
|
# Collect row counts without reading data.
|
||||||
|
shard_row_counts: List[int] = []
|
||||||
|
for p in local_paths:
|
||||||
|
pf = pq.ParquetFile(str(p))
|
||||||
|
shard_row_counts.append(pf.metadata.num_rows)
|
||||||
|
total_rows = sum(shard_row_counts)
|
||||||
|
print(
|
||||||
|
f"dataset has {total_rows:,} rows across {len(local_paths)} shards",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
|
||||||
|
rng = random.Random(seed)
|
||||||
|
global_indices = sorted(rng.sample(range(total_rows), num_samples))
|
||||||
|
|
||||||
|
# Bucket indices by shard.
|
||||||
|
by_shard: Dict[int, List[int]] = {}
|
||||||
|
cumulative = 0
|
||||||
|
shard_offsets = []
|
||||||
|
for c in shard_row_counts:
|
||||||
|
shard_offsets.append(cumulative)
|
||||||
|
cumulative += c
|
||||||
|
for gi in global_indices:
|
||||||
|
# Find which shard this index belongs to.
|
||||||
|
for si, off in enumerate(shard_offsets):
|
||||||
|
if gi < off + shard_row_counts[si]:
|
||||||
|
by_shard.setdefault(si, []).append(gi - off)
|
||||||
|
break
|
||||||
|
|
||||||
|
yielded = 0
|
||||||
|
for si in sorted(by_shard.keys()):
|
||||||
|
local_rows = by_shard[si]
|
||||||
|
pf = pq.ParquetFile(str(local_paths[si]))
|
||||||
|
table = pf.read(columns=["conversation"])
|
||||||
|
conv_col = table.column("conversation")
|
||||||
|
for local_idx in local_rows:
|
||||||
|
raw = conv_col[local_idx].as_py()
|
||||||
|
if not raw:
|
||||||
|
continue
|
||||||
|
conversation = raw if isinstance(raw, list) else raw.get("conversation", [])
|
||||||
|
if len(conversation) > max_conv_messages:
|
||||||
|
continue
|
||||||
|
messages = lmsys_to_sharegpt(conversation)
|
||||||
|
if not messages:
|
||||||
|
continue
|
||||||
|
global_idx = shard_offsets[si] + local_idx
|
||||||
|
yield {
|
||||||
|
"id": f"lmsys-{global_idx}",
|
||||||
|
"messages": messages,
|
||||||
|
}
|
||||||
|
yielded += 1
|
||||||
|
print(f"yielded {yielded} conversations after filtering", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def write_conversations(out_path: Path, samples: Iterator[Dict[str, Any]]) -> int:
|
||||||
|
n = 0
|
||||||
|
with out_path.open("w") as f:
|
||||||
|
for s in tqdm(samples, desc="sampling", unit="convo"):
|
||||||
|
f.write(json.dumps(s, ensure_ascii=False))
|
||||||
|
f.write("\n")
|
||||||
|
n += 1
|
||||||
|
return n
|
||||||
|
|
||||||
|
|
||||||
|
def run_rust(rust_binary: Path, conv_path: Path, out_path: Path) -> None:
|
||||||
|
print(f"running rust analyzer: {rust_binary}", file=sys.stderr)
|
||||||
|
t0 = time.monotonic()
|
||||||
|
with conv_path.open("rb") as fin, out_path.open("wb") as fout:
|
||||||
|
proc = subprocess.run(
|
||||||
|
[str(rust_binary)],
|
||||||
|
stdin=fin,
|
||||||
|
stdout=fout,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
sys.stderr.write(proc.stderr.decode("utf-8", errors="replace"))
|
||||||
|
raise SystemExit(f"rust runner exited {proc.returncode}")
|
||||||
|
elapsed = time.monotonic() - t0
|
||||||
|
print(f" rust runner: {elapsed:.1f}s", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def run_python(conv_path: Path, out_path: Path) -> None:
|
||||||
|
print("running python analyzer...", file=sys.stderr)
|
||||||
|
t0 = time.monotonic()
|
||||||
|
analyzer = SignalAnalyzer()
|
||||||
|
with conv_path.open() as fin, out_path.open("w") as fout:
|
||||||
|
for line in tqdm(fin, desc="python", unit="convo"):
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
obj = json.loads(line)
|
||||||
|
report = analyzer.analyze(obj["messages"])
|
||||||
|
fout.write(
|
||||||
|
json.dumps(
|
||||||
|
{"id": obj["id"], "report": report.to_dict()},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
fout.write(json.dumps({"id": obj.get("id"), "error": str(e)}))
|
||||||
|
fout.write("\n")
|
||||||
|
elapsed = time.monotonic() - t0
|
||||||
|
print(f" python runner: {elapsed:.1f}s", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def stamp_metadata(args: argparse.Namespace, output_dir: Path, n_samples: int) -> None:
|
||||||
|
"""Write the input metadata so compare.py can include it in the report."""
|
||||||
|
binary_sha = hashlib.sha256(args.rust_binary.read_bytes()).hexdigest()
|
||||||
|
try:
|
||||||
|
plano_sha = (
|
||||||
|
subprocess.check_output(
|
||||||
|
["git", "rev-parse", "HEAD"], cwd=Path(__file__).parent
|
||||||
|
)
|
||||||
|
.decode()
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
plano_sha = "unknown"
|
||||||
|
try:
|
||||||
|
signals_version = subprocess.check_output(
|
||||||
|
[sys.executable, "-m", "pip", "show", "signals"]
|
||||||
|
).decode()
|
||||||
|
signals_version = next(
|
||||||
|
(
|
||||||
|
l.split(":", 1)[1].strip()
|
||||||
|
for l in signals_version.splitlines()
|
||||||
|
if l.startswith("Version")
|
||||||
|
),
|
||||||
|
"unknown",
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
signals_version = "unknown"
|
||||||
|
|
||||||
|
meta = {
|
||||||
|
"dataset_name": DATASET_NAME,
|
||||||
|
"dataset_revision": args.dataset_revision,
|
||||||
|
"seed": args.seed,
|
||||||
|
"num_samples_requested": args.num_samples,
|
||||||
|
"num_samples_actual": n_samples,
|
||||||
|
"rust_binary": str(args.rust_binary.resolve()),
|
||||||
|
"rust_binary_sha256": binary_sha,
|
||||||
|
"plano_git_sha": plano_sha,
|
||||||
|
"signals_python_version": signals_version,
|
||||||
|
"max_conv_messages": args.max_conv_messages,
|
||||||
|
}
|
||||||
|
(output_dir / "run_metadata.json").write_text(json.dumps(meta, indent=2))
|
||||||
|
print(f"wrote {output_dir / 'run_metadata.json'}", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
args = parse_args()
|
||||||
|
args.output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
if not args.rust_binary.exists():
|
||||||
|
raise SystemExit(f"rust binary not found at {args.rust_binary}")
|
||||||
|
|
||||||
|
conv_path = args.output_dir / "conversations.jsonl"
|
||||||
|
rust_path = args.output_dir / "rust_reports.jsonl"
|
||||||
|
py_path = args.output_dir / "python_reports.jsonl"
|
||||||
|
|
||||||
|
samples = sample_conversations(
|
||||||
|
num_samples=args.num_samples,
|
||||||
|
seed=args.seed,
|
||||||
|
revision=args.dataset_revision,
|
||||||
|
max_conv_messages=args.max_conv_messages,
|
||||||
|
)
|
||||||
|
n = write_conversations(conv_path, samples)
|
||||||
|
print(f"wrote {n} conversations to {conv_path}", file=sys.stderr)
|
||||||
|
|
||||||
|
run_rust(args.rust_binary, conv_path, rust_path)
|
||||||
|
run_python(conv_path, py_path)
|
||||||
|
stamp_metadata(args, args.output_dir, n)
|
||||||
|
print("done. now run: python compare.py --output-dir " + str(args.output_dir))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue