mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
signals: feature parity with the latest Signals paper. Porting logic from python repo (#903)
* signals: port to layered taxonomy with dual-emit OTel Made-with: Cursor * fix: silence collapsible_match clippy lint (rustc 1.95) Made-with: Cursor * test: parity harness for rust vs python signals analyzer Validates the brightstaff signals port against the katanemo/signals Python reference on lmsys/lmsys-chat-1m. Adds a signals_replay bin emitting python- compatible JSON, a pyarrow-based driver (bypasses the datasets loader pickle bug on python 3.14), a 3-tier comparator, and an on-demand workflow_dispatch CI job. Made-with: Cursor * Remove signals test from the gitops flow * style: format parity harness with black Made-with: Cursor * signals: group summary by taxonomy, factor misalignment_ratio Addresses #903 review feedback from @nehcgs: - generate_summary() now renders explicit Interaction / Execution / Environment headers so the paper taxonomy is visible at a glance, even when no signals fired in a given layer. Quality-driving callouts (high misalignment rate, looping detected, escalation requested) are appended after the layer summary as an alerts tail. - repair_ratio (legacy taxonomy name) renamed to misalignment_ratio and factored into a single InteractionSignals::misalignment_ratio() helper so assess_quality and generate_summary share one source of truth instead of recomputing the same divide twice. Two new unit tests pin the layer headers and the (sev N) severity suffix. Parity with the python reference is preserved at the Tier-A level (per-type counts + overall_quality); only the human-readable summary string diverges, which the parity comparator already classifies as Tier-C. Made-with: Cursor
This commit is contained in:
parent
6701195a5d
commit
c8079ac971
31 changed files with 5246 additions and 3261 deletions
1
crates/Cargo.lock
generated
1
crates/Cargo.lock
generated
|
|
@ -358,6 +358,7 @@ dependencies = [
|
|||
"pretty_assertions",
|
||||
"rand 0.9.4",
|
||||
"redis",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
|
|||
|
|
@ -3,6 +3,14 @@ name = "brightstaff"
|
|||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "brightstaff"
|
||||
path = "src/main.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "signals_replay"
|
||||
path = "src/bin/signals_replay.rs"
|
||||
|
||||
[dependencies]
|
||||
async-openai = "0.30.1"
|
||||
async-trait = "0.1"
|
||||
|
|
@ -26,6 +34,7 @@ opentelemetry-stdout = "0.31"
|
|||
opentelemetry_sdk = { version = "0.31", features = ["rt-tokio"] }
|
||||
pretty_assertions = "1.4.1"
|
||||
rand = "0.9.2"
|
||||
regex = "1.10"
|
||||
lru = "0.12"
|
||||
metrics = "0.23"
|
||||
metrics-exporter-prometheus = { version = "0.15", default-features = false, features = ["http-listener"] }
|
||||
|
|
|
|||
175
crates/brightstaff/src/bin/signals_replay.rs
Normal file
175
crates/brightstaff/src/bin/signals_replay.rs
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
//! `signals-replay` — batch driver for the `brightstaff` signal analyzer.
|
||||
//!
|
||||
//! Reads JSONL conversations from stdin (one per line) and emits matching
|
||||
//! JSONL reports on stdout, one per input conversation, in the same order.
|
||||
//!
|
||||
//! Input shape (per line):
|
||||
//! ```json
|
||||
//! {"id": "convo-42", "messages": [{"from": "human", "value": "..."}, ...]}
|
||||
//! ```
|
||||
//!
|
||||
//! Output shape (per line, success):
|
||||
//! ```json
|
||||
//! {"id": "convo-42", "report": { ...python-compatible SignalReport dict... }}
|
||||
//! ```
|
||||
//!
|
||||
//! On per-line failure (parse / analyzer error), emits:
|
||||
//! ```json
|
||||
//! {"id": "convo-42", "error": "..."}
|
||||
//! ```
|
||||
//!
|
||||
//! The output report dict is shaped to match the Python reference's
|
||||
//! `SignalReport.to_dict()` byte-for-byte so the parity comparator can do a
|
||||
//! direct structural diff.
|
||||
|
||||
use std::io::{self, BufRead, BufWriter, Write};
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde_json::{json, Map, Value};
|
||||
|
||||
use brightstaff::signals::{SignalAnalyzer, SignalGroup, SignalReport};
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct InputLine {
|
||||
id: Value,
|
||||
messages: Vec<MessageRow>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct MessageRow {
|
||||
#[serde(default)]
|
||||
from: String,
|
||||
#[serde(default)]
|
||||
value: String,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let stdin = io::stdin();
|
||||
let stdout = io::stdout();
|
||||
let mut out = BufWriter::new(stdout.lock());
|
||||
let analyzer = SignalAnalyzer::default();
|
||||
|
||||
for line in stdin.lock().lines() {
|
||||
let line = match line {
|
||||
Ok(l) => l,
|
||||
Err(e) => {
|
||||
eprintln!("read error: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let result = process_line(&analyzer, trimmed);
|
||||
// Always emit one line per input line so id ordering stays aligned.
|
||||
if let Err(e) = writeln!(out, "{result}") {
|
||||
eprintln!("write error: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
// Flush periodically isn't strictly needed — BufWriter handles it,
|
||||
// and the parent process reads the whole stream when we're done.
|
||||
}
|
||||
let _ = out.flush();
|
||||
}
|
||||
|
||||
fn process_line(analyzer: &SignalAnalyzer, line: &str) -> Value {
|
||||
let parsed: InputLine = match serde_json::from_str(line) {
|
||||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
return json!({
|
||||
"id": Value::Null,
|
||||
"error": format!("input parse: {e}"),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let id = parsed.id.clone();
|
||||
|
||||
let view: Vec<brightstaff::signals::analyzer::ShareGptMessage<'_>> = parsed
|
||||
.messages
|
||||
.iter()
|
||||
.map(|m| brightstaff::signals::analyzer::ShareGptMessage {
|
||||
from: m.from.as_str(),
|
||||
value: m.value.as_str(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let report = analyzer.analyze_sharegpt(&view);
|
||||
let report_dict = report_to_python_dict(&report);
|
||||
json!({
|
||||
"id": id,
|
||||
"report": report_dict,
|
||||
})
|
||||
}
|
||||
|
||||
/// Convert a `SignalReport` into the Python reference's `to_dict()` shape.
|
||||
///
|
||||
/// Ordering of category keys in each layer dict follows the Python source
|
||||
/// exactly so even string-equality comparisons behave deterministically.
|
||||
fn report_to_python_dict(r: &SignalReport) -> Value {
|
||||
let mut interaction = Map::new();
|
||||
interaction.insert(
|
||||
"misalignment".to_string(),
|
||||
signal_group_to_python(&r.interaction.misalignment),
|
||||
);
|
||||
interaction.insert(
|
||||
"stagnation".to_string(),
|
||||
signal_group_to_python(&r.interaction.stagnation),
|
||||
);
|
||||
interaction.insert(
|
||||
"disengagement".to_string(),
|
||||
signal_group_to_python(&r.interaction.disengagement),
|
||||
);
|
||||
interaction.insert(
|
||||
"satisfaction".to_string(),
|
||||
signal_group_to_python(&r.interaction.satisfaction),
|
||||
);
|
||||
|
||||
let mut execution = Map::new();
|
||||
execution.insert(
|
||||
"failure".to_string(),
|
||||
signal_group_to_python(&r.execution.failure),
|
||||
);
|
||||
execution.insert(
|
||||
"loops".to_string(),
|
||||
signal_group_to_python(&r.execution.loops),
|
||||
);
|
||||
|
||||
let mut environment = Map::new();
|
||||
environment.insert(
|
||||
"exhaustion".to_string(),
|
||||
signal_group_to_python(&r.environment.exhaustion),
|
||||
);
|
||||
|
||||
json!({
|
||||
"interaction_signals": Value::Object(interaction),
|
||||
"execution_signals": Value::Object(execution),
|
||||
"environment_signals": Value::Object(environment),
|
||||
"overall_quality": r.overall_quality.as_str(),
|
||||
"summary": r.summary,
|
||||
})
|
||||
}
|
||||
|
||||
fn signal_group_to_python(g: &SignalGroup) -> Value {
|
||||
let signals: Vec<Value> = g
|
||||
.signals
|
||||
.iter()
|
||||
.map(|s| {
|
||||
json!({
|
||||
"signal_type": s.signal_type.as_str(),
|
||||
"message_index": s.message_index,
|
||||
"snippet": s.snippet,
|
||||
"confidence": s.confidence,
|
||||
"metadata": s.metadata,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
json!({
|
||||
"category": g.category,
|
||||
"count": g.count,
|
||||
"severity": g.severity,
|
||||
"signals": signals,
|
||||
})
|
||||
}
|
||||
|
|
@ -441,10 +441,8 @@ impl ArchFunctionHandler {
|
|||
}
|
||||
}
|
||||
// Handle str/string conversions
|
||||
"str" | "string" => {
|
||||
if !value.is_string() {
|
||||
return Ok(json!(value.to_string()));
|
||||
}
|
||||
"str" | "string" if !value.is_string() => {
|
||||
return Ok(json!(value.to_string()));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
347
crates/brightstaff/src/signals/environment/exhaustion.rs
Normal file
347
crates/brightstaff/src/signals/environment/exhaustion.rs
Normal file
|
|
@ -0,0 +1,347 @@
|
|||
//! Environment exhaustion detector. Direct port of
|
||||
//! `signals/environment/exhaustion.py`.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use regex::Regex;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::signals::analyzer::ShareGptMessage;
|
||||
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||
|
||||
pub const API_ERROR_PATTERNS: &[&str] = &[
|
||||
r"500\s*(internal\s+)?server\s+error",
|
||||
r"502\s*bad\s+gateway",
|
||||
r"503\s*service\s+unavailable",
|
||||
r"504\s*gateway\s+timeout",
|
||||
r"internal\s+server\s+error",
|
||||
r"service\s+unavailable",
|
||||
r"server\s+error",
|
||||
r"backend\s+error",
|
||||
r"upstream\s+error",
|
||||
r"service\s+temporarily\s+unavailable",
|
||||
r"maintenance\s+mode",
|
||||
r"under\s+maintenance",
|
||||
r"try\s+again\s+later",
|
||||
r"temporarily\s+unavailable",
|
||||
r"system\s+error",
|
||||
r"unexpected\s+error",
|
||||
r"unhandled\s+exception",
|
||||
];
|
||||
|
||||
pub const TIMEOUT_PATTERNS: &[&str] = &[
|
||||
r"timeout",
|
||||
r"timed?\s*out",
|
||||
r"etimedout",
|
||||
r"connection\s+timed?\s*out",
|
||||
r"read\s+timed?\s*out",
|
||||
r"request\s+timed?\s*out",
|
||||
r"gateway\s+timeout",
|
||||
r"deadline\s+exceeded",
|
||||
r"took\s+too\s+long",
|
||||
r"operation\s+timed?\s*out",
|
||||
r"socket\s+timeout",
|
||||
];
|
||||
|
||||
pub const RATE_LIMIT_PATTERNS: &[&str] = &[
|
||||
r"rate\s+limit",
|
||||
r"rate.limited",
|
||||
r"(status|error|http)\s*:?\s*429",
|
||||
r"429\s+(too\s+many|rate|limit)",
|
||||
r"too\s+many\s+requests?",
|
||||
r"quota\s+exceeded",
|
||||
r"quota\s+limit",
|
||||
r"throttl(ed|ing)",
|
||||
r"request\s+limit",
|
||||
r"api\s+limit",
|
||||
r"calls?\s+per\s+(second|minute|hour|day)",
|
||||
r"exceeded\s+.*\s+limit",
|
||||
r"slow\s+down",
|
||||
r"retry\s+after",
|
||||
r"requests?\s+exceeded",
|
||||
];
|
||||
|
||||
pub const NETWORK_PATTERNS: &[&str] = &[
|
||||
r"connection\s+refused",
|
||||
r"econnrefused",
|
||||
r"econnreset",
|
||||
r"connection\s+reset",
|
||||
r"enotfound",
|
||||
r"dns\s+(error|failure|lookup)",
|
||||
r"host\s+not\s+found",
|
||||
r"network\s+(error|failure|unreachable)",
|
||||
r"no\s+route\s+to\s+host",
|
||||
r"socket\s+error",
|
||||
r"connection\s+failed",
|
||||
r"unable\s+to\s+connect",
|
||||
r"cannot\s+connect",
|
||||
r"could\s+not\s+connect",
|
||||
r"connect\s+error",
|
||||
r"ssl\s+(error|handshake|certificate)",
|
||||
r"certificate\s+(error|invalid|expired)",
|
||||
];
|
||||
|
||||
pub const MALFORMED_PATTERNS: &[&str] = &[
|
||||
r"json\s+parse\s+error",
|
||||
r"invalid\s+json",
|
||||
r"unexpected\s+token",
|
||||
r"syntax\s+error.*json",
|
||||
r"malformed\s+(response|json|data)",
|
||||
r"unexpected\s+end\s+of",
|
||||
r"parse\s+error",
|
||||
r"parsing\s+failed",
|
||||
r"invalid\s+response",
|
||||
r"unexpected\s+response",
|
||||
r"response\s+format",
|
||||
r"missing\s+field.*response",
|
||||
r"unexpected\s+schema",
|
||||
r"schema\s+validation",
|
||||
r"deserialization\s+error",
|
||||
r"failed\s+to\s+decode",
|
||||
];
|
||||
|
||||
pub const CONTEXT_OVERFLOW_PATTERNS: &[&str] = &[
|
||||
r"context\s+(length|limit|overflow|exceeded)",
|
||||
r"token\s+(limit|overflow|exceeded)",
|
||||
r"max(imum)?\s+tokens?",
|
||||
r"input\s+too\s+(long|large)",
|
||||
r"exceeds?\s+(context|token|character|input)\s+limit",
|
||||
r"message\s+too\s+(long|large)",
|
||||
r"content\s+too\s+(long|large)",
|
||||
r"truncat(ed|ion)\s+(due\s+to|because|for)\s+(length|size|limit)",
|
||||
r"maximum\s+context",
|
||||
r"prompt\s+too\s+(long|large)",
|
||||
];
|
||||
|
||||
fn compile(patterns: &[&str]) -> Regex {
|
||||
let combined = patterns
|
||||
.iter()
|
||||
.map(|p| format!("({})", p))
|
||||
.collect::<Vec<_>>()
|
||||
.join("|");
|
||||
Regex::new(&format!("(?i){}", combined)).expect("exhaustion pattern regex must compile")
|
||||
}
|
||||
|
||||
fn api_error_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(API_ERROR_PATTERNS))
|
||||
}
|
||||
fn timeout_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(TIMEOUT_PATTERNS))
|
||||
}
|
||||
fn rate_limit_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(RATE_LIMIT_PATTERNS))
|
||||
}
|
||||
fn network_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(NETWORK_PATTERNS))
|
||||
}
|
||||
fn malformed_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(MALFORMED_PATTERNS))
|
||||
}
|
||||
fn context_overflow_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(CONTEXT_OVERFLOW_PATTERNS))
|
||||
}
|
||||
|
||||
fn snippet_around(text: &str, m: regex::Match<'_>, context: usize) -> String {
|
||||
let start = m.start().saturating_sub(context);
|
||||
let end = (m.end() + context).min(text.len());
|
||||
let start = align_char_boundary(text, start, false);
|
||||
let end = align_char_boundary(text, end, true);
|
||||
let mut snippet = String::new();
|
||||
if start > 0 {
|
||||
snippet.push_str("...");
|
||||
}
|
||||
snippet.push_str(&text[start..end]);
|
||||
if end < text.len() {
|
||||
snippet.push_str("...");
|
||||
}
|
||||
snippet
|
||||
}
|
||||
|
||||
fn align_char_boundary(s: &str, mut idx: usize, forward: bool) -> usize {
|
||||
if idx >= s.len() {
|
||||
return s.len();
|
||||
}
|
||||
while !s.is_char_boundary(idx) {
|
||||
if forward {
|
||||
idx += 1;
|
||||
} else if idx == 0 {
|
||||
break;
|
||||
} else {
|
||||
idx -= 1;
|
||||
}
|
||||
}
|
||||
idx
|
||||
}
|
||||
|
||||
pub fn analyze_exhaustion(messages: &[ShareGptMessage<'_>]) -> SignalGroup {
|
||||
let mut group = SignalGroup::new("exhaustion");
|
||||
|
||||
for (i, msg) in messages.iter().enumerate() {
|
||||
if msg.from != "observation" {
|
||||
continue;
|
||||
}
|
||||
let value = msg.value;
|
||||
let lower = value.to_lowercase();
|
||||
|
||||
if let Some(m) = rate_limit_re().find(&lower) {
|
||||
group.add_signal(emit(
|
||||
SignalType::EnvironmentExhaustionRateLimit,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
0.95,
|
||||
"rate_limit",
|
||||
m.as_str(),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = api_error_re().find(&lower) {
|
||||
group.add_signal(emit(
|
||||
SignalType::EnvironmentExhaustionApiError,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
0.9,
|
||||
"api_error",
|
||||
m.as_str(),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = timeout_re().find(&lower) {
|
||||
group.add_signal(emit(
|
||||
SignalType::EnvironmentExhaustionTimeout,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
0.9,
|
||||
"timeout",
|
||||
m.as_str(),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = network_re().find(&lower) {
|
||||
group.add_signal(emit(
|
||||
SignalType::EnvironmentExhaustionNetwork,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
0.9,
|
||||
"network",
|
||||
m.as_str(),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = malformed_re().find(&lower) {
|
||||
group.add_signal(emit(
|
||||
SignalType::EnvironmentExhaustionMalformed,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
0.85,
|
||||
"malformed_response",
|
||||
m.as_str(),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = context_overflow_re().find(&lower) {
|
||||
group.add_signal(emit(
|
||||
SignalType::EnvironmentExhaustionContextOverflow,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
0.9,
|
||||
"context_overflow",
|
||||
m.as_str(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
group
|
||||
}
|
||||
|
||||
fn emit(
|
||||
t: SignalType,
|
||||
idx: usize,
|
||||
snippet: String,
|
||||
confidence: f32,
|
||||
kind: &str,
|
||||
matched: &str,
|
||||
) -> SignalInstance {
|
||||
SignalInstance::new(t, idx, snippet)
|
||||
.with_confidence(confidence)
|
||||
.with_metadata(json!({
|
||||
"exhaustion_type": kind,
|
||||
"matched": matched,
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn obs(value: &str) -> ShareGptMessage<'_> {
|
||||
ShareGptMessage {
|
||||
from: "observation",
|
||||
value,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_rate_limit() {
|
||||
let g = analyze_exhaustion(&[obs("HTTP 429: too many requests, retry after 30s")]);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionRateLimit)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_api_error() {
|
||||
let g = analyze_exhaustion(&[obs("503 service unavailable - try again later")]);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionApiError)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_timeout() {
|
||||
let g = analyze_exhaustion(&[obs("Connection timed out after 30 seconds")]);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionTimeout)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_network_failure() {
|
||||
let g = analyze_exhaustion(&[obs("ECONNREFUSED: connection refused by remote host")]);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionNetwork)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_malformed_response() {
|
||||
let g = analyze_exhaustion(&[obs("Invalid JSON: unexpected token at position 42")]);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionMalformed)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_context_overflow() {
|
||||
let g = analyze_exhaustion(&[obs("Maximum context length exceeded for this model")]);
|
||||
assert!(g.signals.iter().any(|s| matches!(
|
||||
s.signal_type,
|
||||
SignalType::EnvironmentExhaustionContextOverflow
|
||||
)));
|
||||
}
|
||||
}
|
||||
3
crates/brightstaff/src/signals/environment/mod.rs
Normal file
3
crates/brightstaff/src/signals/environment/mod.rs
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
//! Environment signals: exhaustion (external system failures and constraints).
|
||||
|
||||
pub mod exhaustion;
|
||||
388
crates/brightstaff/src/signals/execution/failure.rs
Normal file
388
crates/brightstaff/src/signals/execution/failure.rs
Normal file
|
|
@ -0,0 +1,388 @@
|
|||
//! Execution failure detector. Direct port of `signals/execution/failure.py`.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use regex::Regex;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::signals::analyzer::ShareGptMessage;
|
||||
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||
|
||||
pub const INVALID_ARGS_PATTERNS: &[&str] = &[
|
||||
r"invalid\s+argument",
|
||||
r"invalid\s+parameter",
|
||||
r"invalid\s+type",
|
||||
r"type\s*error",
|
||||
r"expected\s+\w+\s*,?\s*got\s+\w+",
|
||||
r"required\s+field",
|
||||
r"required\s+parameter",
|
||||
r"missing\s+required",
|
||||
r"missing\s+argument",
|
||||
r"validation\s+failed",
|
||||
r"validation\s+error",
|
||||
r"invalid\s+value",
|
||||
r"invalid\s+format",
|
||||
r"must\s+be\s+(a|an)\s+\w+",
|
||||
r"cannot\s+be\s+(null|empty|none)",
|
||||
r"is\s+not\s+valid",
|
||||
r"does\s+not\s+match",
|
||||
r"out\s+of\s+range",
|
||||
r"invalid\s+date",
|
||||
r"invalid\s+json",
|
||||
r"malformed\s+request",
|
||||
];
|
||||
|
||||
pub const BAD_QUERY_PATTERNS: &[&str] = &[
|
||||
r"invalid\s+query",
|
||||
r"query\s+syntax\s+error",
|
||||
r"malformed\s+query",
|
||||
r"unknown\s+field",
|
||||
r"invalid\s+field",
|
||||
r"invalid\s+filter",
|
||||
r"invalid\s+search",
|
||||
r"unknown\s+id",
|
||||
r"invalid\s+id",
|
||||
r"id\s+format\s+error",
|
||||
r"invalid\s+identifier",
|
||||
r"query\s+failed",
|
||||
r"search\s+error",
|
||||
r"invalid\s+operator",
|
||||
r"unsupported\s+query",
|
||||
];
|
||||
|
||||
pub const TOOL_NOT_FOUND_PATTERNS: &[&str] = &[
|
||||
r"unknown\s+function",
|
||||
r"unknown\s+tool",
|
||||
r"function\s+not\s+found",
|
||||
r"tool\s+not\s+found",
|
||||
r"no\s+such\s+function",
|
||||
r"no\s+such\s+tool",
|
||||
r"undefined\s+function",
|
||||
r"action\s+not\s+supported",
|
||||
r"invalid\s+tool",
|
||||
r"invalid\s+function",
|
||||
r"unrecognized\s+function",
|
||||
];
|
||||
|
||||
pub const AUTH_MISUSE_PATTERNS: &[&str] = &[
|
||||
r"\bunauthorized\b",
|
||||
r"(status|error|http|code)\s*:?\s*401",
|
||||
r"401\s+unauthorized",
|
||||
r"403\s+forbidden",
|
||||
r"permission\s+denied",
|
||||
r"access\s+denied",
|
||||
r"authentication\s+required",
|
||||
r"invalid\s+credentials",
|
||||
r"invalid\s+token",
|
||||
r"token\s+expired",
|
||||
r"missing\s+authorization",
|
||||
r"\bforbidden\b",
|
||||
r"not\s+authorized",
|
||||
r"insufficient\s+permissions?",
|
||||
];
|
||||
|
||||
pub const STATE_ERROR_PATTERNS: &[&str] = &[
|
||||
r"invalid\s+state",
|
||||
r"illegal\s+state",
|
||||
r"must\s+call\s+\w+\s+first",
|
||||
r"must\s+\w+\s+before",
|
||||
r"cannot\s+\w+\s+before",
|
||||
r"already\s+(exists?|created|started|finished)",
|
||||
r"not\s+initialized",
|
||||
r"not\s+started",
|
||||
r"already\s+in\s+progress",
|
||||
r"operation\s+in\s+progress",
|
||||
r"sequence\s+error",
|
||||
r"precondition\s+failed",
|
||||
r"(status|error|http)\s*:?\s*409",
|
||||
r"409\s+conflict",
|
||||
r"\bconflict\b",
|
||||
];
|
||||
|
||||
fn compile(patterns: &[&str]) -> Regex {
|
||||
// Use `(?i)` flag for case-insensitive matching, matching Python's `re.IGNORECASE`.
|
||||
let combined = patterns
|
||||
.iter()
|
||||
.map(|p| format!("({})", p))
|
||||
.collect::<Vec<_>>()
|
||||
.join("|");
|
||||
Regex::new(&format!("(?i){}", combined)).expect("failure pattern regex must compile")
|
||||
}
|
||||
|
||||
fn invalid_args_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(INVALID_ARGS_PATTERNS))
|
||||
}
|
||||
fn bad_query_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(BAD_QUERY_PATTERNS))
|
||||
}
|
||||
fn tool_not_found_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(TOOL_NOT_FOUND_PATTERNS))
|
||||
}
|
||||
fn auth_misuse_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(AUTH_MISUSE_PATTERNS))
|
||||
}
|
||||
fn state_error_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(STATE_ERROR_PATTERNS))
|
||||
}
|
||||
|
||||
/// Pull tool name + args from a `function_call` message. Mirrors
|
||||
/// `_extract_tool_info` in the reference.
|
||||
pub(crate) fn extract_tool_info(value: &str) -> (String, String) {
|
||||
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(value) {
|
||||
if let Some(obj) = parsed.as_object() {
|
||||
let name = obj
|
||||
.get("name")
|
||||
.or_else(|| obj.get("function"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
let args = match obj.get("arguments").or_else(|| obj.get("args")) {
|
||||
Some(serde_json::Value::Object(o)) => {
|
||||
serde_json::to_string(&serde_json::Value::Object(o.clone())).unwrap_or_default()
|
||||
}
|
||||
Some(other) => other
|
||||
.as_str()
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| serde_json::to_string(other).unwrap_or_default()),
|
||||
None => String::new(),
|
||||
};
|
||||
return (name, args);
|
||||
}
|
||||
}
|
||||
let mut snippet: String = value.chars().take(200).collect();
|
||||
snippet.shrink_to_fit();
|
||||
("unknown".to_string(), snippet)
|
||||
}
|
||||
|
||||
/// Build a context-window snippet around a regex match, with leading/trailing
|
||||
/// ellipses when truncated. Mirrors `_get_snippet`.
|
||||
fn snippet_around(text: &str, m: regex::Match<'_>, context: usize) -> String {
|
||||
let start = m.start().saturating_sub(context);
|
||||
let end = (m.end() + context).min(text.len());
|
||||
// Ensure we cut on UTF-8 boundaries.
|
||||
let start = align_char_boundary(text, start, false);
|
||||
let end = align_char_boundary(text, end, true);
|
||||
let mut snippet = String::new();
|
||||
if start > 0 {
|
||||
snippet.push_str("...");
|
||||
}
|
||||
snippet.push_str(&text[start..end]);
|
||||
if end < text.len() {
|
||||
snippet.push_str("...");
|
||||
}
|
||||
snippet
|
||||
}
|
||||
|
||||
fn align_char_boundary(s: &str, mut idx: usize, forward: bool) -> usize {
|
||||
if idx >= s.len() {
|
||||
return s.len();
|
||||
}
|
||||
while !s.is_char_boundary(idx) {
|
||||
if forward {
|
||||
idx += 1;
|
||||
} else if idx == 0 {
|
||||
break;
|
||||
} else {
|
||||
idx -= 1;
|
||||
}
|
||||
}
|
||||
idx
|
||||
}
|
||||
|
||||
pub fn analyze_failure(messages: &[ShareGptMessage<'_>]) -> SignalGroup {
|
||||
let mut group = SignalGroup::new("failure");
|
||||
let mut last_call: Option<(usize, String, String)> = None;
|
||||
|
||||
for (i, msg) in messages.iter().enumerate() {
|
||||
match msg.from {
|
||||
"function_call" => {
|
||||
let (name, args) = extract_tool_info(msg.value);
|
||||
last_call = Some((i, name, args));
|
||||
continue;
|
||||
}
|
||||
"observation" => {}
|
||||
_ => continue,
|
||||
}
|
||||
|
||||
let value = msg.value;
|
||||
let lower = value.to_lowercase();
|
||||
let (call_index, tool_name) = match &last_call {
|
||||
Some((idx, name, _)) => (*idx, name.clone()),
|
||||
None => (i.saturating_sub(1), "unknown".to_string()),
|
||||
};
|
||||
|
||||
if let Some(m) = invalid_args_re().find(&lower) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionFailureInvalidArgs,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
)
|
||||
.with_confidence(0.9)
|
||||
.with_metadata(json!({
|
||||
"tool_name": tool_name,
|
||||
"call_index": call_index,
|
||||
"error_type": "invalid_args",
|
||||
"matched": m.as_str(),
|
||||
})),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = tool_not_found_re().find(&lower) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionFailureToolNotFound,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
)
|
||||
.with_confidence(0.95)
|
||||
.with_metadata(json!({
|
||||
"tool_name": tool_name,
|
||||
"call_index": call_index,
|
||||
"error_type": "tool_not_found",
|
||||
"matched": m.as_str(),
|
||||
})),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = auth_misuse_re().find(&lower) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionFailureAuthMisuse,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
)
|
||||
.with_confidence(0.8)
|
||||
.with_metadata(json!({
|
||||
"tool_name": tool_name,
|
||||
"call_index": call_index,
|
||||
"error_type": "auth_misuse",
|
||||
"matched": m.as_str(),
|
||||
})),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = state_error_re().find(&lower) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionFailureStateError,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
)
|
||||
.with_confidence(0.85)
|
||||
.with_metadata(json!({
|
||||
"tool_name": tool_name,
|
||||
"call_index": call_index,
|
||||
"error_type": "state_error",
|
||||
"matched": m.as_str(),
|
||||
})),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = bad_query_re().find(&lower) {
|
||||
let confidence = if ["error", "invalid", "failed"]
|
||||
.iter()
|
||||
.any(|w| lower.contains(w))
|
||||
{
|
||||
0.8
|
||||
} else {
|
||||
0.6
|
||||
};
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionFailureBadQuery,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
)
|
||||
.with_confidence(confidence)
|
||||
.with_metadata(json!({
|
||||
"tool_name": tool_name,
|
||||
"call_index": call_index,
|
||||
"error_type": "bad_query",
|
||||
"matched": m.as_str(),
|
||||
})),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
group
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn fc(value: &str) -> ShareGptMessage<'_> {
|
||||
ShareGptMessage {
|
||||
from: "function_call",
|
||||
value,
|
||||
}
|
||||
}
|
||||
fn obs(value: &str) -> ShareGptMessage<'_> {
|
||||
ShareGptMessage {
|
||||
from: "observation",
|
||||
value,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_invalid_args() {
|
||||
let msgs = vec![
|
||||
fc(r#"{"name":"create_user","arguments":{"age":"twelve"}}"#),
|
||||
obs("Error: validation failed - expected integer got string for field age"),
|
||||
];
|
||||
let g = analyze_failure(&msgs);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::ExecutionFailureInvalidArgs)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_tool_not_found() {
|
||||
let msgs = vec![
|
||||
fc(r#"{"name":"send_thought","arguments":{}}"#),
|
||||
obs("Error: unknown function 'send_thought'"),
|
||||
];
|
||||
let g = analyze_failure(&msgs);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::ExecutionFailureToolNotFound)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_auth_misuse() {
|
||||
let msgs = vec![
|
||||
fc(r#"{"name":"get_secret","arguments":{}}"#),
|
||||
obs("HTTP 401 Unauthorized"),
|
||||
];
|
||||
let g = analyze_failure(&msgs);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::ExecutionFailureAuthMisuse)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_state_error() {
|
||||
let msgs = vec![
|
||||
fc(r#"{"name":"commit_tx","arguments":{}}"#),
|
||||
obs("must call begin_tx first"),
|
||||
];
|
||||
let g = analyze_failure(&msgs);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::ExecutionFailureStateError)));
|
||||
}
|
||||
}
|
||||
433
crates/brightstaff/src/signals/execution/loops.rs
Normal file
433
crates/brightstaff/src/signals/execution/loops.rs
Normal file
|
|
@ -0,0 +1,433 @@
|
|||
//! Execution loops detector. Direct port of `signals/execution/loops.py`.
|
||||
|
||||
use serde_json::json;
|
||||
|
||||
use crate::signals::analyzer::ShareGptMessage;
|
||||
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||
|
||||
pub const RETRY_THRESHOLD: usize = 3;
|
||||
pub const PARAMETER_DRIFT_THRESHOLD: usize = 3;
|
||||
pub const OSCILLATION_CYCLES_THRESHOLD: usize = 3;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ToolCall {
|
||||
pub index: usize,
|
||||
pub name: String,
|
||||
/// Canonical JSON string of arguments (sorted keys when parseable).
|
||||
pub args: String,
|
||||
pub args_dict: Option<serde_json::Map<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
impl ToolCall {
|
||||
pub fn args_equal(&self, other: &ToolCall) -> bool {
|
||||
match (&self.args_dict, &other.args_dict) {
|
||||
(Some(a), Some(b)) => a == b,
|
||||
_ => self.args == other.args,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_tool_call(index: usize, msg: &ShareGptMessage<'_>) -> Option<ToolCall> {
|
||||
if msg.from != "function_call" {
|
||||
return None;
|
||||
}
|
||||
let value = msg.value;
|
||||
|
||||
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(value) {
|
||||
if let Some(obj) = parsed.as_object() {
|
||||
let name = obj
|
||||
.get("name")
|
||||
.or_else(|| obj.get("function"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
let raw_args = obj.get("arguments").or_else(|| obj.get("args"));
|
||||
let (args_str, args_dict) = match raw_args {
|
||||
Some(serde_json::Value::Object(o)) => {
|
||||
let mut keys: Vec<&String> = o.keys().collect();
|
||||
keys.sort();
|
||||
let mut canon = serde_json::Map::new();
|
||||
for k in keys {
|
||||
canon.insert(k.clone(), o[k].clone());
|
||||
}
|
||||
(
|
||||
serde_json::to_string(&serde_json::Value::Object(canon.clone()))
|
||||
.unwrap_or_default(),
|
||||
Some(canon),
|
||||
)
|
||||
}
|
||||
Some(other) => (
|
||||
other
|
||||
.as_str()
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| serde_json::to_string(other).unwrap_or_default()),
|
||||
None,
|
||||
),
|
||||
None => (String::new(), None),
|
||||
};
|
||||
return Some(ToolCall {
|
||||
index,
|
||||
name,
|
||||
args: args_str,
|
||||
args_dict,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(paren) = value.find('(') {
|
||||
if paren > 0 {
|
||||
let name = value[..paren].trim().to_string();
|
||||
let args_part = &value[paren..];
|
||||
if args_part.starts_with('(') && args_part.ends_with(')') {
|
||||
let inner = args_part[1..args_part.len() - 1].trim();
|
||||
if let Ok(serde_json::Value::Object(o)) =
|
||||
serde_json::from_str::<serde_json::Value>(inner)
|
||||
{
|
||||
let mut keys: Vec<&String> = o.keys().collect();
|
||||
keys.sort();
|
||||
let mut canon = serde_json::Map::new();
|
||||
for k in keys {
|
||||
canon.insert(k.clone(), o[k].clone());
|
||||
}
|
||||
return Some(ToolCall {
|
||||
index,
|
||||
name,
|
||||
args: serde_json::to_string(&serde_json::Value::Object(canon.clone()))
|
||||
.unwrap_or_default(),
|
||||
args_dict: Some(canon),
|
||||
});
|
||||
}
|
||||
return Some(ToolCall {
|
||||
index,
|
||||
name,
|
||||
args: inner.to_string(),
|
||||
args_dict: None,
|
||||
});
|
||||
}
|
||||
return Some(ToolCall {
|
||||
index,
|
||||
name,
|
||||
args: args_part.to_string(),
|
||||
args_dict: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Some(ToolCall {
|
||||
index,
|
||||
name: value.trim().to_string(),
|
||||
args: String::new(),
|
||||
args_dict: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_tool_calls(messages: &[ShareGptMessage<'_>]) -> Vec<ToolCall> {
|
||||
let mut out = Vec::new();
|
||||
for (i, msg) in messages.iter().enumerate() {
|
||||
if let Some(c) = parse_tool_call(i, msg) {
|
||||
out.push(c);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn detect_retry(calls: &[ToolCall]) -> Vec<(usize, usize, String)> {
|
||||
if calls.len() < RETRY_THRESHOLD {
|
||||
return Vec::new();
|
||||
}
|
||||
let mut patterns = Vec::new();
|
||||
let mut i = 0;
|
||||
while i < calls.len() {
|
||||
let current = &calls[i];
|
||||
let mut j = i + 1;
|
||||
let mut run_length = 1;
|
||||
while j < calls.len() {
|
||||
if calls[j].name == current.name && calls[j].args_equal(current) {
|
||||
run_length += 1;
|
||||
j += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if run_length >= RETRY_THRESHOLD {
|
||||
patterns.push((calls[i].index, calls[j - 1].index, current.name.clone()));
|
||||
i = j;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
patterns
|
||||
}
|
||||
|
||||
fn detect_parameter_drift(calls: &[ToolCall]) -> Vec<(usize, usize, String, usize)> {
|
||||
if calls.len() < PARAMETER_DRIFT_THRESHOLD {
|
||||
return Vec::new();
|
||||
}
|
||||
let mut patterns = Vec::new();
|
||||
let mut i = 0;
|
||||
while i < calls.len() {
|
||||
let current_name = calls[i].name.clone();
|
||||
let mut seen_args: Vec<String> = vec![calls[i].args.clone()];
|
||||
let mut unique_args = 1;
|
||||
let mut j = i + 1;
|
||||
while j < calls.len() {
|
||||
if calls[j].name != current_name {
|
||||
break;
|
||||
}
|
||||
if !seen_args.iter().any(|a| a == &calls[j].args) {
|
||||
seen_args.push(calls[j].args.clone());
|
||||
unique_args += 1;
|
||||
}
|
||||
j += 1;
|
||||
}
|
||||
let run_length = j - i;
|
||||
if run_length >= PARAMETER_DRIFT_THRESHOLD && unique_args >= 2 {
|
||||
patterns.push((
|
||||
calls[i].index,
|
||||
calls[j - 1].index,
|
||||
current_name,
|
||||
unique_args,
|
||||
));
|
||||
i = j;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
patterns
|
||||
}
|
||||
|
||||
fn detect_oscillation(calls: &[ToolCall]) -> Vec<(usize, usize, Vec<String>, usize)> {
|
||||
let min_calls = 2 * OSCILLATION_CYCLES_THRESHOLD;
|
||||
if calls.len() < min_calls {
|
||||
return Vec::new();
|
||||
}
|
||||
let mut patterns = Vec::new();
|
||||
let mut i: usize = 0;
|
||||
while i + min_calls <= calls.len() {
|
||||
let max_pat_len = (5usize).min(calls.len() - i);
|
||||
let mut found_for_i = false;
|
||||
for pat_len in 2..=max_pat_len {
|
||||
let pattern_names: Vec<String> =
|
||||
(0..pat_len).map(|k| calls[i + k].name.clone()).collect();
|
||||
let unique: std::collections::HashSet<&String> = pattern_names.iter().collect();
|
||||
if unique.len() < 2 {
|
||||
continue;
|
||||
}
|
||||
let mut cycles = 1;
|
||||
let mut pos = i + pat_len;
|
||||
while pos + pat_len <= calls.len() {
|
||||
let mut all_match = true;
|
||||
for k in 0..pat_len {
|
||||
if calls[pos + k].name != pattern_names[k] {
|
||||
all_match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if all_match {
|
||||
cycles += 1;
|
||||
pos += pat_len;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if cycles >= OSCILLATION_CYCLES_THRESHOLD {
|
||||
let end_idx_in_calls = i + (cycles * pat_len) - 1;
|
||||
patterns.push((
|
||||
calls[i].index,
|
||||
calls[end_idx_in_calls].index,
|
||||
pattern_names,
|
||||
cycles,
|
||||
));
|
||||
// Mirror Python: `i = end_idx + 1 - pattern_len`. We set `i` so that
|
||||
// the next outer iteration begins after we account for overlap.
|
||||
i = end_idx_in_calls + 1 - pat_len;
|
||||
found_for_i = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !found_for_i {
|
||||
i += 1;
|
||||
} else {
|
||||
// Match Python's `i = end_idx + 1 - pattern_len; break` then loop.
|
||||
// We'll continue; the outer while re-checks i.
|
||||
}
|
||||
}
|
||||
if patterns.len() > 1 {
|
||||
patterns = deduplicate_patterns(patterns);
|
||||
}
|
||||
patterns
|
||||
}
|
||||
|
||||
fn deduplicate_patterns(
|
||||
mut patterns: Vec<(usize, usize, Vec<String>, usize)>,
|
||||
) -> Vec<(usize, usize, Vec<String>, usize)> {
|
||||
if patterns.is_empty() {
|
||||
return patterns;
|
||||
}
|
||||
patterns.sort_by(|a, b| {
|
||||
let ord = a.0.cmp(&b.0);
|
||||
if ord != std::cmp::Ordering::Equal {
|
||||
ord
|
||||
} else {
|
||||
(b.1 - b.0).cmp(&(a.1 - a.0))
|
||||
}
|
||||
});
|
||||
let mut result = Vec::new();
|
||||
let mut last_end: i64 = -1;
|
||||
for p in patterns {
|
||||
if (p.0 as i64) > last_end {
|
||||
last_end = p.1 as i64;
|
||||
result.push(p);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn analyze_loops(messages: &[ShareGptMessage<'_>]) -> SignalGroup {
|
||||
let mut group = SignalGroup::new("loops");
|
||||
let calls = extract_tool_calls(messages);
|
||||
if calls.len() < RETRY_THRESHOLD {
|
||||
return group;
|
||||
}
|
||||
|
||||
let retries = detect_retry(&calls);
|
||||
for (start_idx, end_idx, tool_name) in &retries {
|
||||
let call_count = calls
|
||||
.iter()
|
||||
.filter(|c| *start_idx <= c.index && c.index <= *end_idx)
|
||||
.count();
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionLoopsRetry,
|
||||
*start_idx,
|
||||
format!(
|
||||
"Tool '{}' called {} times with identical arguments",
|
||||
tool_name, call_count
|
||||
),
|
||||
)
|
||||
.with_confidence(0.95)
|
||||
.with_metadata(json!({
|
||||
"tool_name": tool_name,
|
||||
"start_index": start_idx,
|
||||
"end_index": end_idx,
|
||||
"call_count": call_count,
|
||||
"loop_type": "retry",
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
let drifts = detect_parameter_drift(&calls);
|
||||
for (start_idx, end_idx, tool_name, variation_count) in &drifts {
|
||||
let overlaps_retry = retries
|
||||
.iter()
|
||||
.any(|r| !(*end_idx < r.0 || *start_idx > r.1));
|
||||
if overlaps_retry {
|
||||
continue;
|
||||
}
|
||||
let call_count = calls
|
||||
.iter()
|
||||
.filter(|c| *start_idx <= c.index && c.index <= *end_idx)
|
||||
.count();
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionLoopsParameterDrift,
|
||||
*start_idx,
|
||||
format!(
|
||||
"Tool '{}' called {} times with {} different argument variations",
|
||||
tool_name, call_count, variation_count
|
||||
),
|
||||
)
|
||||
.with_confidence(0.85)
|
||||
.with_metadata(json!({
|
||||
"tool_name": tool_name,
|
||||
"start_index": start_idx,
|
||||
"end_index": end_idx,
|
||||
"call_count": call_count,
|
||||
"variation_count": variation_count,
|
||||
"loop_type": "parameter_drift",
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
let oscillations = detect_oscillation(&calls);
|
||||
for (start_idx, end_idx, tool_names, cycle_count) in &oscillations {
|
||||
let pattern_str = tool_names.join(" \u{2192} ");
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionLoopsOscillation,
|
||||
*start_idx,
|
||||
format!(
|
||||
"Oscillation pattern [{}] repeated {} times",
|
||||
pattern_str, cycle_count
|
||||
),
|
||||
)
|
||||
.with_confidence(0.9)
|
||||
.with_metadata(json!({
|
||||
"pattern": tool_names,
|
||||
"start_index": start_idx,
|
||||
"end_index": end_idx,
|
||||
"cycle_count": cycle_count,
|
||||
"loop_type": "oscillation",
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
group
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn fc(value: &str) -> ShareGptMessage<'_> {
|
||||
ShareGptMessage {
|
||||
from: "function_call",
|
||||
value,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_retry_loop() {
|
||||
let arg = r#"{"name":"check_status","arguments":{"id":"abc"}}"#;
|
||||
let msgs = vec![fc(arg), fc(arg), fc(arg), fc(arg)];
|
||||
let g = analyze_loops(&msgs);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::ExecutionLoopsRetry)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_parameter_drift() {
|
||||
let msgs = vec![
|
||||
fc(r#"{"name":"search","arguments":{"q":"a"}}"#),
|
||||
fc(r#"{"name":"search","arguments":{"q":"ab"}}"#),
|
||||
fc(r#"{"name":"search","arguments":{"q":"abc"}}"#),
|
||||
fc(r#"{"name":"search","arguments":{"q":"abcd"}}"#),
|
||||
];
|
||||
let g = analyze_loops(&msgs);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::ExecutionLoopsParameterDrift)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_oscillation() {
|
||||
let a = r#"{"name":"toolA","arguments":{}}"#;
|
||||
let b = r#"{"name":"toolB","arguments":{}}"#;
|
||||
let msgs = vec![fc(a), fc(b), fc(a), fc(b), fc(a), fc(b)];
|
||||
let g = analyze_loops(&msgs);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::ExecutionLoopsOscillation)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_signals_when_few_calls() {
|
||||
let msgs = vec![fc(r#"{"name":"only_once","arguments":{}}"#)];
|
||||
let g = analyze_loops(&msgs);
|
||||
assert!(g.signals.is_empty());
|
||||
}
|
||||
}
|
||||
5
crates/brightstaff/src/signals/execution/mod.rs
Normal file
5
crates/brightstaff/src/signals/execution/mod.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
//! Execution signals: failure (agent-caused tool errors) and loops
|
||||
//! (repetitive tool-call behavior).
|
||||
|
||||
pub mod failure;
|
||||
pub mod loops;
|
||||
193
crates/brightstaff/src/signals/interaction/constants.rs
Normal file
193
crates/brightstaff/src/signals/interaction/constants.rs
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
//! Shared constants for the interaction layer detectors.
|
||||
//!
|
||||
//! Direct port of `signals/interaction/constants.py`.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
pub const POSITIVE_PREFIXES: &[&str] = &[
|
||||
"yes",
|
||||
"yeah",
|
||||
"yep",
|
||||
"yup",
|
||||
"sure",
|
||||
"ok",
|
||||
"okay",
|
||||
"great",
|
||||
"awesome",
|
||||
"perfect",
|
||||
"thanks",
|
||||
"thank",
|
||||
"wonderful",
|
||||
"excellent",
|
||||
"amazing",
|
||||
"nice",
|
||||
"good",
|
||||
"cool",
|
||||
"absolutely",
|
||||
"definitely",
|
||||
"please",
|
||||
];
|
||||
|
||||
pub const CONFIRMATION_PREFIXES: &[&str] = &[
|
||||
"yes",
|
||||
"yeah",
|
||||
"yep",
|
||||
"yup",
|
||||
"correct",
|
||||
"right",
|
||||
"that's correct",
|
||||
"thats correct",
|
||||
"that's right",
|
||||
"thats right",
|
||||
"that is correct",
|
||||
"that is right",
|
||||
];
|
||||
|
||||
const STOPWORD_LIST: &[&str] = &[
|
||||
"a",
|
||||
"about",
|
||||
"above",
|
||||
"after",
|
||||
"again",
|
||||
"against",
|
||||
"all",
|
||||
"am",
|
||||
"an",
|
||||
"and",
|
||||
"any",
|
||||
"are",
|
||||
"as",
|
||||
"at",
|
||||
"be",
|
||||
"because",
|
||||
"been",
|
||||
"before",
|
||||
"being",
|
||||
"below",
|
||||
"between",
|
||||
"both",
|
||||
"but",
|
||||
"by",
|
||||
"can",
|
||||
"could",
|
||||
"did",
|
||||
"do",
|
||||
"does",
|
||||
"doing",
|
||||
"down",
|
||||
"during",
|
||||
"each",
|
||||
"few",
|
||||
"for",
|
||||
"from",
|
||||
"further",
|
||||
"had",
|
||||
"has",
|
||||
"have",
|
||||
"having",
|
||||
"he",
|
||||
"her",
|
||||
"here",
|
||||
"hers",
|
||||
"herself",
|
||||
"him",
|
||||
"himself",
|
||||
"his",
|
||||
"how",
|
||||
"i",
|
||||
"if",
|
||||
"in",
|
||||
"into",
|
||||
"is",
|
||||
"it",
|
||||
"its",
|
||||
"itself",
|
||||
"just",
|
||||
"me",
|
||||
"more",
|
||||
"most",
|
||||
"my",
|
||||
"myself",
|
||||
"no",
|
||||
"nor",
|
||||
"not",
|
||||
"now",
|
||||
"of",
|
||||
"off",
|
||||
"on",
|
||||
"once",
|
||||
"only",
|
||||
"or",
|
||||
"other",
|
||||
"our",
|
||||
"ours",
|
||||
"ourselves",
|
||||
"out",
|
||||
"over",
|
||||
"own",
|
||||
"same",
|
||||
"she",
|
||||
"should",
|
||||
"so",
|
||||
"some",
|
||||
"such",
|
||||
"than",
|
||||
"that",
|
||||
"the",
|
||||
"their",
|
||||
"theirs",
|
||||
"them",
|
||||
"themselves",
|
||||
"then",
|
||||
"there",
|
||||
"these",
|
||||
"they",
|
||||
"this",
|
||||
"those",
|
||||
"through",
|
||||
"to",
|
||||
"too",
|
||||
"under",
|
||||
"until",
|
||||
"up",
|
||||
"very",
|
||||
"was",
|
||||
"we",
|
||||
"were",
|
||||
"what",
|
||||
"when",
|
||||
"where",
|
||||
"which",
|
||||
"while",
|
||||
"who",
|
||||
"whom",
|
||||
"why",
|
||||
"with",
|
||||
"would",
|
||||
"you",
|
||||
"your",
|
||||
"yours",
|
||||
"yourself",
|
||||
"yourselves",
|
||||
];
|
||||
|
||||
pub fn stopwords() -> &'static HashSet<&'static str> {
|
||||
static SET: OnceLock<HashSet<&'static str>> = OnceLock::new();
|
||||
SET.get_or_init(|| STOPWORD_LIST.iter().copied().collect())
|
||||
}
|
||||
|
||||
/// Returns true if `text` (case-insensitive, trimmed) starts with any of the
|
||||
/// given prefixes treated as **whole tokens or token sequences**. This matches
|
||||
/// the Python's `text_lower.startswith(prefix)` plus the natural intent that
|
||||
/// `"please"` shouldn't fire on `"pleased"`.
|
||||
pub fn starts_with_prefix(text: &str, prefixes: &[&str]) -> bool {
|
||||
let lowered = text.to_lowercase();
|
||||
let trimmed = lowered.trim_start();
|
||||
for prefix in prefixes {
|
||||
if trimmed.starts_with(prefix) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
445
crates/brightstaff/src/signals/interaction/disengagement.rs
Normal file
445
crates/brightstaff/src/signals/interaction/disengagement.rs
Normal file
|
|
@ -0,0 +1,445 @@
|
|||
//! Disengagement signals: escalation, quit, negative stance.
|
||||
//!
|
||||
//! Direct port of `signals/interaction/disengagement.py`.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use regex::Regex;
|
||||
use serde_json::json;
|
||||
|
||||
use super::constants::{starts_with_prefix, POSITIVE_PREFIXES};
|
||||
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||
use crate::signals::text_processing::{normalize_patterns, NormalizedMessage, NormalizedPattern};
|
||||
|
||||
const ESCALATION_PATTERN_TEXTS: &[&str] = &[
|
||||
// Human requests
|
||||
"speak to a human",
|
||||
"talk to a human",
|
||||
"connect me to a human",
|
||||
"connect me with a human",
|
||||
"transfer me to a human",
|
||||
"get me a human",
|
||||
"chat with a human",
|
||||
// Person requests
|
||||
"speak to a person",
|
||||
"talk to a person",
|
||||
"connect me to a person",
|
||||
"connect me with a person",
|
||||
"transfer me to a person",
|
||||
"get me a person",
|
||||
"chat with a person",
|
||||
// Real person requests
|
||||
"speak to a real person",
|
||||
"talk to a real person",
|
||||
"connect me to a real person",
|
||||
"connect me with a real person",
|
||||
"transfer me to a real person",
|
||||
"get me a real person",
|
||||
"chat with a real person",
|
||||
// Actual person requests
|
||||
"speak to an actual person",
|
||||
"talk to an actual person",
|
||||
"connect me to an actual person",
|
||||
"connect me with an actual person",
|
||||
"transfer me to an actual person",
|
||||
"get me an actual person",
|
||||
"chat with an actual person",
|
||||
// Supervisor requests
|
||||
"speak to a supervisor",
|
||||
"talk to a supervisor",
|
||||
"connect me to a supervisor",
|
||||
"connect me with a supervisor",
|
||||
"transfer me to a supervisor",
|
||||
"get me a supervisor",
|
||||
"chat with a supervisor",
|
||||
// Manager requests
|
||||
"speak to a manager",
|
||||
"talk to a manager",
|
||||
"connect me to a manager",
|
||||
"connect me with a manager",
|
||||
"transfer me to a manager",
|
||||
"get me a manager",
|
||||
"chat with a manager",
|
||||
// Customer service requests
|
||||
"speak to customer service",
|
||||
"talk to customer service",
|
||||
"connect me to customer service",
|
||||
"connect me with customer service",
|
||||
"transfer me to customer service",
|
||||
"get me customer service",
|
||||
"chat with customer service",
|
||||
// Customer support requests
|
||||
"speak to customer support",
|
||||
"talk to customer support",
|
||||
"connect me to customer support",
|
||||
"connect me with customer support",
|
||||
"transfer me to customer support",
|
||||
"get me customer support",
|
||||
"chat with customer support",
|
||||
// Support requests
|
||||
"speak to support",
|
||||
"talk to support",
|
||||
"connect me to support",
|
||||
"connect me with support",
|
||||
"transfer me to support",
|
||||
"get me support",
|
||||
"chat with support",
|
||||
// Tech support requests
|
||||
"speak to tech support",
|
||||
"talk to tech support",
|
||||
"connect me to tech support",
|
||||
"connect me with tech support",
|
||||
"transfer me to tech support",
|
||||
"get me tech support",
|
||||
"chat with tech support",
|
||||
// Help desk requests
|
||||
"speak to help desk",
|
||||
"talk to help desk",
|
||||
"connect me to help desk",
|
||||
"connect me with help desk",
|
||||
"transfer me to help desk",
|
||||
"get me help desk",
|
||||
"chat with help desk",
|
||||
// Explicit escalation
|
||||
"escalate this",
|
||||
];
|
||||
|
||||
const QUIT_PATTERN_TEXTS: &[&str] = &[
|
||||
"i give up",
|
||||
"i'm giving up",
|
||||
"im giving up",
|
||||
"i'm going to quit",
|
||||
"i quit",
|
||||
"forget it",
|
||||
"forget this",
|
||||
"screw it",
|
||||
"screw this",
|
||||
"don't bother trying",
|
||||
"don't bother with this",
|
||||
"don't bother with it",
|
||||
"don't even bother",
|
||||
"why bother",
|
||||
"not worth it",
|
||||
"this is hopeless",
|
||||
"going elsewhere",
|
||||
"try somewhere else",
|
||||
"look elsewhere",
|
||||
];
|
||||
|
||||
const NEGATIVE_STANCE_PATTERN_TEXTS: &[&str] = &[
|
||||
"this is useless",
|
||||
"not helpful",
|
||||
"doesn't help",
|
||||
"not helping",
|
||||
"you're not helping",
|
||||
"youre not helping",
|
||||
"this doesn't work",
|
||||
"this doesnt work",
|
||||
"this isn't working",
|
||||
"this isnt working",
|
||||
"still doesn't work",
|
||||
"still doesnt work",
|
||||
"still not working",
|
||||
"still isn't working",
|
||||
"still isnt working",
|
||||
"waste of time",
|
||||
"wasting my time",
|
||||
"this is ridiculous",
|
||||
"this is absurd",
|
||||
"this is insane",
|
||||
"this is stupid",
|
||||
"this is dumb",
|
||||
"this sucks",
|
||||
"this is frustrating",
|
||||
"not good enough",
|
||||
"why can't you",
|
||||
"why cant you",
|
||||
"same issue",
|
||||
"did that already",
|
||||
"done that already",
|
||||
"tried that already",
|
||||
"already tried that",
|
||||
"i've done that",
|
||||
"ive done that",
|
||||
"i've tried that",
|
||||
"ive tried that",
|
||||
"i'm disappointed",
|
||||
"im disappointed",
|
||||
"disappointed with you",
|
||||
"disappointed in you",
|
||||
"useless bot",
|
||||
"dumb bot",
|
||||
"stupid bot",
|
||||
];
|
||||
|
||||
const AGENT_DIRECTED_PROFANITY_PATTERN_TEXTS: &[&str] = &[
|
||||
"this is bullshit",
|
||||
"what bullshit",
|
||||
"such bullshit",
|
||||
"total bullshit",
|
||||
"complete bullshit",
|
||||
"this is crap",
|
||||
"what crap",
|
||||
"this is shit",
|
||||
"what the hell is wrong with you",
|
||||
"what the fuck is wrong with you",
|
||||
"you're fucking useless",
|
||||
"youre fucking useless",
|
||||
"you are fucking useless",
|
||||
"fucking useless",
|
||||
"this bot is shit",
|
||||
"this bot is crap",
|
||||
"damn bot",
|
||||
"fucking bot",
|
||||
"stupid fucking",
|
||||
"are you fucking kidding",
|
||||
"wtf is wrong with you",
|
||||
"wtf is this",
|
||||
"ffs just",
|
||||
"for fucks sake",
|
||||
"for fuck's sake",
|
||||
"what the f**k",
|
||||
"what the f*ck",
|
||||
"what the f***",
|
||||
"that's bullsh*t",
|
||||
"thats bullsh*t",
|
||||
"that's bull***t",
|
||||
"thats bull***t",
|
||||
"that's bs",
|
||||
"thats bs",
|
||||
"this is bullsh*t",
|
||||
"this is bull***t",
|
||||
"this is bs",
|
||||
];
|
||||
|
||||
fn escalation_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(ESCALATION_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn quit_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(QUIT_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn negative_stance_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(NEGATIVE_STANCE_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn profanity_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(AGENT_DIRECTED_PROFANITY_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn re_consecutive_q() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| Regex::new(r"\?{2,}").unwrap())
|
||||
}
|
||||
fn re_consecutive_e() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| Regex::new(r"!{2,}").unwrap())
|
||||
}
|
||||
fn re_mixed_punct() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| Regex::new(r"[?!]{3,}").unwrap())
|
||||
}
|
||||
|
||||
pub fn analyze_disengagement(
|
||||
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||
char_ngram_threshold: f32,
|
||||
token_cosine_threshold: f32,
|
||||
) -> SignalGroup {
|
||||
let mut group = SignalGroup::new("disengagement");
|
||||
|
||||
for (idx, role, norm_msg) in normalized_messages {
|
||||
if *role != "human" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let text = &norm_msg.raw;
|
||||
|
||||
// All-caps shouting check.
|
||||
let alpha_chars: String = text.chars().filter(|c| c.is_alphabetic()).collect();
|
||||
if alpha_chars.chars().count() >= 10 {
|
||||
let upper_count = alpha_chars.chars().filter(|c| c.is_uppercase()).count();
|
||||
let upper_ratio = upper_count as f32 / alpha_chars.chars().count() as f32;
|
||||
if upper_ratio >= 0.8 {
|
||||
let snippet: String = text.chars().take(50).collect();
|
||||
group.add_signal(
|
||||
SignalInstance::new(SignalType::DisengagementNegativeStance, *idx, snippet)
|
||||
.with_metadata(json!({
|
||||
"indicator_type": "all_caps",
|
||||
"upper_ratio": upper_ratio,
|
||||
})),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Excessive consecutive punctuation.
|
||||
let starts_with_positive = starts_with_prefix(text, POSITIVE_PREFIXES);
|
||||
let cq = re_consecutive_q().find_iter(text).count();
|
||||
let ce = re_consecutive_e().find_iter(text).count();
|
||||
let mixed = re_mixed_punct().find_iter(text).count();
|
||||
if !starts_with_positive && (cq >= 1 || ce >= 1 || mixed >= 1) {
|
||||
let snippet: String = text.chars().take(50).collect();
|
||||
group.add_signal(
|
||||
SignalInstance::new(SignalType::DisengagementNegativeStance, *idx, snippet)
|
||||
.with_metadata(json!({
|
||||
"indicator_type": "excessive_punctuation",
|
||||
"consecutive_questions": cq,
|
||||
"consecutive_exclamations": ce,
|
||||
"mixed_punctuation": mixed,
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
// Escalation patterns.
|
||||
let mut found_escalation = false;
|
||||
for pattern in escalation_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::DisengagementEscalation,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({"pattern_type": "escalation"})),
|
||||
);
|
||||
found_escalation = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Quit patterns (independent of escalation).
|
||||
for pattern in quit_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(SignalType::DisengagementQuit, *idx, pattern.raw.clone())
|
||||
.with_metadata(json!({"pattern_type": "quit"})),
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Profanity (more specific) before generic negative stance.
|
||||
let mut found_profanity = false;
|
||||
for pattern in profanity_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::DisengagementNegativeStance,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({
|
||||
"indicator_type": "profanity",
|
||||
"pattern": pattern.raw,
|
||||
})),
|
||||
);
|
||||
found_profanity = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !found_escalation && !found_profanity {
|
||||
for pattern in negative_stance_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::DisengagementNegativeStance,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({
|
||||
"indicator_type": "complaint",
|
||||
"pattern": pattern.raw,
|
||||
})),
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn nm(s: &str) -> NormalizedMessage {
|
||||
NormalizedMessage::from_text(s, 2000)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_human_escalation_request() {
|
||||
let msgs = vec![(
|
||||
0usize,
|
||||
"human",
|
||||
nm("This is taking forever, get me a human"),
|
||||
)];
|
||||
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::DisengagementEscalation)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_quit_intent() {
|
||||
let msgs = vec![(0usize, "human", nm("Forget it, I give up"))];
|
||||
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::DisengagementQuit)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_negative_stance_complaint() {
|
||||
let msgs = vec![(0usize, "human", nm("This is useless"))];
|
||||
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::DisengagementNegativeStance)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_excessive_punctuation_as_negative_stance() {
|
||||
let msgs = vec![(0usize, "human", nm("WHY isn't this working???"))];
|
||||
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::DisengagementNegativeStance)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn positive_excitement_is_not_disengagement() {
|
||||
let msgs = vec![(0usize, "human", nm("Yes!! That's perfect!!!"))];
|
||||
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.all(|s| !matches!(s.signal_type, SignalType::DisengagementNegativeStance)));
|
||||
}
|
||||
}
|
||||
338
crates/brightstaff/src/signals/interaction/misalignment.rs
Normal file
338
crates/brightstaff/src/signals/interaction/misalignment.rs
Normal file
|
|
@ -0,0 +1,338 @@
|
|||
//! Misalignment signals: corrections, rephrases, clarifications.
|
||||
//!
|
||||
//! Direct port of `signals/interaction/misalignment.py`.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use serde_json::json;
|
||||
|
||||
use super::constants::{stopwords, CONFIRMATION_PREFIXES};
|
||||
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||
use crate::signals::text_processing::{normalize_patterns, NormalizedMessage, NormalizedPattern};
|
||||
|
||||
const CORRECTION_PATTERN_TEXTS: &[&str] = &[
|
||||
"no, i meant",
|
||||
"no i meant",
|
||||
"no, i said",
|
||||
"no i said",
|
||||
"no, i asked",
|
||||
"no i asked",
|
||||
"nah, i meant",
|
||||
"nope, i meant",
|
||||
"not what i said",
|
||||
"not what i asked",
|
||||
"that's not what i said",
|
||||
"that's not what i asked",
|
||||
"that's not what i meant",
|
||||
"thats not what i said",
|
||||
"thats not what i asked",
|
||||
"thats not what i meant",
|
||||
"that's not what you",
|
||||
"no that's not what i",
|
||||
"no, that's not what i",
|
||||
"you're not quite right",
|
||||
"youre not quite right",
|
||||
"you're not exactly right",
|
||||
"youre not exactly right",
|
||||
"you're wrong about",
|
||||
"youre wrong about",
|
||||
"i just said",
|
||||
"i already said",
|
||||
"i already told you",
|
||||
];
|
||||
|
||||
const REPHRASE_PATTERN_TEXTS: &[&str] = &[
|
||||
"let me rephrase",
|
||||
"let me explain again",
|
||||
"what i'm trying to say",
|
||||
"what i'm saying is",
|
||||
"in other words",
|
||||
];
|
||||
|
||||
const CLARIFICATION_PATTERN_TEXTS: &[&str] = &[
|
||||
"i don't understand",
|
||||
"don't understand",
|
||||
"not understanding",
|
||||
"can't understand",
|
||||
"don't get it",
|
||||
"don't follow",
|
||||
"i'm confused",
|
||||
"so confused",
|
||||
"makes no sense",
|
||||
"doesn't make sense",
|
||||
"not making sense",
|
||||
"what do you mean",
|
||||
"what does that mean",
|
||||
"what are you saying",
|
||||
"i'm lost",
|
||||
"totally lost",
|
||||
"lost me",
|
||||
"no clue what you",
|
||||
"no idea what you",
|
||||
"no clue what that",
|
||||
"no idea what that",
|
||||
"come again",
|
||||
"say that again",
|
||||
"repeat that",
|
||||
"trouble following",
|
||||
"hard to follow",
|
||||
"can't follow",
|
||||
];
|
||||
|
||||
fn correction_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(CORRECTION_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn rephrase_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(REPHRASE_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn clarification_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(CLARIFICATION_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn is_confirmation_message(text: &str) -> bool {
|
||||
let lowered = text.to_lowercase();
|
||||
let trimmed = lowered.trim();
|
||||
CONFIRMATION_PREFIXES.iter().any(|p| trimmed.starts_with(p))
|
||||
}
|
||||
|
||||
/// Detect whether two user messages appear to be rephrases of each other.
|
||||
pub fn is_similar_rephrase(
|
||||
norm_msg1: &NormalizedMessage,
|
||||
norm_msg2: &NormalizedMessage,
|
||||
overlap_threshold: f32,
|
||||
min_meaningful_tokens: usize,
|
||||
max_new_content_ratio: f32,
|
||||
) -> bool {
|
||||
if norm_msg1.tokens.len() < 3 || norm_msg2.tokens.len() < 3 {
|
||||
return false;
|
||||
}
|
||||
if is_confirmation_message(&norm_msg1.raw) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let stops = stopwords();
|
||||
let tokens1: std::collections::HashSet<&str> = norm_msg1
|
||||
.tokens
|
||||
.iter()
|
||||
.filter(|t| !stops.contains(t.as_str()))
|
||||
.map(|s| s.as_str())
|
||||
.collect();
|
||||
let tokens2: std::collections::HashSet<&str> = norm_msg2
|
||||
.tokens
|
||||
.iter()
|
||||
.filter(|t| !stops.contains(t.as_str()))
|
||||
.map(|s| s.as_str())
|
||||
.collect();
|
||||
|
||||
if tokens1.len() < min_meaningful_tokens || tokens2.len() < min_meaningful_tokens {
|
||||
return false;
|
||||
}
|
||||
|
||||
let new_tokens: std::collections::HashSet<&&str> = tokens1.difference(&tokens2).collect();
|
||||
let new_content_ratio = if tokens1.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
new_tokens.len() as f32 / tokens1.len() as f32
|
||||
};
|
||||
if new_content_ratio > max_new_content_ratio {
|
||||
return false;
|
||||
}
|
||||
|
||||
let intersection = tokens1.intersection(&tokens2).count();
|
||||
let min_size = tokens1.len().min(tokens2.len());
|
||||
if min_size == 0 {
|
||||
return false;
|
||||
}
|
||||
let overlap_ratio = intersection as f32 / min_size as f32;
|
||||
overlap_ratio >= overlap_threshold
|
||||
}
|
||||
|
||||
/// Analyze user messages for misalignment signals.
|
||||
pub fn analyze_misalignment(
|
||||
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||
char_ngram_threshold: f32,
|
||||
token_cosine_threshold: f32,
|
||||
) -> SignalGroup {
|
||||
let mut group = SignalGroup::new("misalignment");
|
||||
|
||||
let mut prev_user_idx: Option<usize> = None;
|
||||
let mut prev_user_msg: Option<&NormalizedMessage> = None;
|
||||
|
||||
for (idx, role, norm_msg) in normalized_messages {
|
||||
if *role != "human" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut found_in_turn = false;
|
||||
|
||||
for pattern in correction_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::MisalignmentCorrection,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({"pattern_type": "correction"})),
|
||||
);
|
||||
found_in_turn = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if found_in_turn {
|
||||
prev_user_idx = Some(*idx);
|
||||
prev_user_msg = Some(norm_msg);
|
||||
continue;
|
||||
}
|
||||
|
||||
for pattern in rephrase_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::MisalignmentRephrase,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({"pattern_type": "rephrase"})),
|
||||
);
|
||||
found_in_turn = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if found_in_turn {
|
||||
prev_user_idx = Some(*idx);
|
||||
prev_user_msg = Some(norm_msg);
|
||||
continue;
|
||||
}
|
||||
|
||||
for pattern in clarification_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::MisalignmentClarification,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({"pattern_type": "clarification"})),
|
||||
);
|
||||
found_in_turn = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if found_in_turn {
|
||||
prev_user_idx = Some(*idx);
|
||||
prev_user_msg = Some(norm_msg);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Semantic rephrase vs the previous user message (recent only).
|
||||
if let (Some(prev_idx), Some(prev_msg)) = (prev_user_idx, prev_user_msg) {
|
||||
let turns_between = idx.saturating_sub(prev_idx);
|
||||
if turns_between <= 3 && is_similar_rephrase(norm_msg, prev_msg, 0.75, 4, 0.5) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::MisalignmentRephrase,
|
||||
*idx,
|
||||
"[similar rephrase detected]",
|
||||
)
|
||||
.with_confidence(0.8)
|
||||
.with_metadata(json!({
|
||||
"pattern_type": "semantic_rephrase",
|
||||
"compared_to": prev_idx,
|
||||
})),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
prev_user_idx = Some(*idx);
|
||||
prev_user_msg = Some(norm_msg);
|
||||
}
|
||||
|
||||
group
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn nm(s: &str) -> NormalizedMessage {
|
||||
NormalizedMessage::from_text(s, 2000)
|
||||
}
|
||||
|
||||
fn make(items: &[(&'static str, &str)]) -> Vec<(usize, &'static str, NormalizedMessage)> {
|
||||
items
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, (role, text))| (i, *role, nm(text)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_explicit_correction() {
|
||||
let msgs = make(&[
|
||||
("human", "Show me my orders"),
|
||||
("gpt", "Sure, here are your invoices"),
|
||||
("human", "No, I meant my recent orders"),
|
||||
]);
|
||||
let g = analyze_misalignment(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::MisalignmentCorrection)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_rephrase_marker() {
|
||||
let msgs = make(&[
|
||||
("human", "Show me X"),
|
||||
("gpt", "Sure"),
|
||||
("human", "Let me rephrase: I want X grouped by date"),
|
||||
]);
|
||||
let g = analyze_misalignment(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::MisalignmentRephrase)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_clarification_request() {
|
||||
let msgs = make(&[
|
||||
("human", "Run the report"),
|
||||
("gpt", "Foobar quux baz."),
|
||||
("human", "I don't understand what you mean"),
|
||||
]);
|
||||
let g = analyze_misalignment(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::MisalignmentClarification)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn confirmation_is_not_a_rephrase() {
|
||||
let m1 = nm("Yes, that's correct, please proceed with the order");
|
||||
let m2 = nm("please proceed with the order for the same product");
|
||||
assert!(!is_similar_rephrase(&m1, &m2, 0.75, 4, 0.5));
|
||||
}
|
||||
}
|
||||
10
crates/brightstaff/src/signals/interaction/mod.rs
Normal file
10
crates/brightstaff/src/signals/interaction/mod.rs
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
//! Interaction signals: misalignment, stagnation, disengagement, satisfaction.
|
||||
//!
|
||||
//! These signals capture how the dialogue itself unfolds (semantic alignment,
|
||||
//! progress, engagement, closure) independent of tool execution outcomes.
|
||||
|
||||
pub mod constants;
|
||||
pub mod disengagement;
|
||||
pub mod misalignment;
|
||||
pub mod satisfaction;
|
||||
pub mod stagnation;
|
||||
177
crates/brightstaff/src/signals/interaction/satisfaction.rs
Normal file
177
crates/brightstaff/src/signals/interaction/satisfaction.rs
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
//! Satisfaction signals: gratitude, confirmation, success.
|
||||
//!
|
||||
//! Direct port of `signals/interaction/satisfaction.py`.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use serde_json::json;
|
||||
|
||||
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||
use crate::signals::text_processing::{normalize_patterns, NormalizedMessage, NormalizedPattern};
|
||||
|
||||
const GRATITUDE_PATTERN_TEXTS: &[&str] = &[
|
||||
"that's helpful",
|
||||
"that helps",
|
||||
"this helps",
|
||||
"appreciate it",
|
||||
"appreciate that",
|
||||
"that's perfect",
|
||||
"exactly what i needed",
|
||||
"just what i needed",
|
||||
"you're the best",
|
||||
"you rock",
|
||||
"you're awesome",
|
||||
"you're amazing",
|
||||
"you're great",
|
||||
];
|
||||
|
||||
const CONFIRMATION_PATTERN_TEXTS: &[&str] = &[
|
||||
"that works",
|
||||
"this works",
|
||||
"that's great",
|
||||
"that's amazing",
|
||||
"this is great",
|
||||
"that's awesome",
|
||||
"love it",
|
||||
"love this",
|
||||
"love that",
|
||||
];
|
||||
|
||||
const SUCCESS_PATTERN_TEXTS: &[&str] = &[
|
||||
"it worked",
|
||||
"that worked",
|
||||
"this worked",
|
||||
"it's working",
|
||||
"that's working",
|
||||
"this is working",
|
||||
];
|
||||
|
||||
fn gratitude_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(GRATITUDE_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn confirmation_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(CONFIRMATION_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn success_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(SUCCESS_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
pub fn analyze_satisfaction(
|
||||
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||
char_ngram_threshold: f32,
|
||||
token_cosine_threshold: f32,
|
||||
) -> SignalGroup {
|
||||
let mut group = SignalGroup::new("satisfaction");
|
||||
|
||||
for (idx, role, norm_msg) in normalized_messages {
|
||||
if *role != "human" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut found = false;
|
||||
|
||||
for pattern in gratitude_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::SatisfactionGratitude,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({"pattern_type": "gratitude"})),
|
||||
);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if found {
|
||||
continue;
|
||||
}
|
||||
|
||||
for pattern in confirmation_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::SatisfactionConfirmation,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({"pattern_type": "confirmation"})),
|
||||
);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if found {
|
||||
continue;
|
||||
}
|
||||
|
||||
for pattern in success_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(SignalType::SatisfactionSuccess, *idx, pattern.raw.clone())
|
||||
.with_metadata(json!({"pattern_type": "success"})),
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn nm(s: &str) -> NormalizedMessage {
|
||||
NormalizedMessage::from_text(s, 2000)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_gratitude() {
|
||||
let msgs = vec![(0usize, "human", nm("That's perfect, appreciate it!"))];
|
||||
let g = analyze_satisfaction(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::SatisfactionGratitude)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_confirmation() {
|
||||
let msgs = vec![(0usize, "human", nm("That works for me, thanks"))];
|
||||
let g = analyze_satisfaction(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::SatisfactionConfirmation)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_success() {
|
||||
let msgs = vec![(0usize, "human", nm("Great, it worked!"))];
|
||||
let g = analyze_satisfaction(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::SatisfactionSuccess)));
|
||||
}
|
||||
}
|
||||
241
crates/brightstaff/src/signals/interaction/stagnation.rs
Normal file
241
crates/brightstaff/src/signals/interaction/stagnation.rs
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
//! Stagnation signals: dragging (turn-count efficiency) and repetition.
|
||||
//!
|
||||
//! Direct port of `signals/interaction/stagnation.py`.
|
||||
|
||||
use serde_json::json;
|
||||
|
||||
use super::constants::{starts_with_prefix, POSITIVE_PREFIXES};
|
||||
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType, TurnMetrics};
|
||||
use crate::signals::text_processing::NormalizedMessage;
|
||||
|
||||
/// Adapter row used by stagnation::dragging detector. Mirrors the ShareGPT
|
||||
/// `{"from": role, "value": text}` shape used in the Python reference.
|
||||
pub struct ShareGptMsg<'a> {
|
||||
pub from: &'a str,
|
||||
}
|
||||
|
||||
pub fn analyze_dragging(
|
||||
messages: &[ShareGptMsg<'_>],
|
||||
baseline_turns: usize,
|
||||
efficiency_threshold: f32,
|
||||
) -> (SignalGroup, TurnMetrics) {
|
||||
let mut group = SignalGroup::new("stagnation");
|
||||
|
||||
let mut user_turns: usize = 0;
|
||||
let mut assistant_turns: usize = 0;
|
||||
for m in messages {
|
||||
match m.from {
|
||||
"human" => user_turns += 1,
|
||||
"gpt" => assistant_turns += 1,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let total_turns = user_turns;
|
||||
let efficiency_score: f32 = if total_turns == 0 || total_turns <= baseline_turns {
|
||||
1.0
|
||||
} else {
|
||||
let excess = (total_turns - baseline_turns) as f32;
|
||||
1.0 / (1.0 + excess * 0.25)
|
||||
};
|
||||
|
||||
let is_dragging = efficiency_score < efficiency_threshold;
|
||||
let metrics = TurnMetrics {
|
||||
total_turns,
|
||||
user_turns,
|
||||
assistant_turns,
|
||||
is_dragging,
|
||||
efficiency_score,
|
||||
};
|
||||
|
||||
if is_dragging {
|
||||
let last_idx = messages.len().saturating_sub(1);
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::StagnationDragging,
|
||||
last_idx,
|
||||
format!(
|
||||
"Conversation dragging: {} turns (efficiency: {:.2})",
|
||||
total_turns, efficiency_score
|
||||
),
|
||||
)
|
||||
.with_confidence(1.0 - efficiency_score)
|
||||
.with_metadata(json!({
|
||||
"total_turns": total_turns,
|
||||
"efficiency_score": efficiency_score,
|
||||
"baseline_turns": baseline_turns,
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
(group, metrics)
|
||||
}
|
||||
|
||||
pub fn analyze_repetition(
|
||||
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||
lookback: usize,
|
||||
exact_threshold: f32,
|
||||
near_duplicate_threshold: f32,
|
||||
) -> SignalGroup {
|
||||
let mut group = SignalGroup::new("stagnation");
|
||||
|
||||
// We keep references into `normalized_messages`. Since `normalized_messages`
|
||||
// is borrowed for the whole function, this avoids cloning.
|
||||
let mut prev_human: Vec<(usize, &NormalizedMessage)> = Vec::new();
|
||||
let mut prev_gpt: Vec<(usize, &NormalizedMessage)> = Vec::new();
|
||||
|
||||
for (idx, role, norm_msg) in normalized_messages {
|
||||
if *role != "human" && *role != "gpt" {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip human positive-prefix messages; they're naturally repetitive.
|
||||
if *role == "human" && starts_with_prefix(&norm_msg.raw, POSITIVE_PREFIXES) {
|
||||
prev_human.push((*idx, norm_msg));
|
||||
continue;
|
||||
}
|
||||
|
||||
if norm_msg.tokens.len() < 5 {
|
||||
if *role == "human" {
|
||||
prev_human.push((*idx, norm_msg));
|
||||
} else {
|
||||
prev_gpt.push((*idx, norm_msg));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let prev = if *role == "human" {
|
||||
&prev_human
|
||||
} else {
|
||||
&prev_gpt
|
||||
};
|
||||
let start = prev.len().saturating_sub(lookback);
|
||||
let mut matched = false;
|
||||
for (prev_idx, prev_msg) in &prev[start..] {
|
||||
if prev_msg.tokens.len() < 5 {
|
||||
continue;
|
||||
}
|
||||
let similarity = norm_msg.ngram_similarity_with_message(prev_msg);
|
||||
if similarity >= exact_threshold {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::StagnationRepetition,
|
||||
*idx,
|
||||
format!("Exact repetition with message {}", prev_idx),
|
||||
)
|
||||
.with_confidence(similarity)
|
||||
.with_metadata(json!({
|
||||
"repetition_type": "exact",
|
||||
"compared_to": prev_idx,
|
||||
"similarity": similarity,
|
||||
"role": role,
|
||||
})),
|
||||
);
|
||||
matched = true;
|
||||
break;
|
||||
} else if similarity >= near_duplicate_threshold {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::StagnationRepetition,
|
||||
*idx,
|
||||
format!("Near-duplicate with message {}", prev_idx),
|
||||
)
|
||||
.with_confidence(similarity)
|
||||
.with_metadata(json!({
|
||||
"repetition_type": "near_duplicate",
|
||||
"compared_to": prev_idx,
|
||||
"similarity": similarity,
|
||||
"role": role,
|
||||
})),
|
||||
);
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
let _ = matched;
|
||||
|
||||
if *role == "human" {
|
||||
prev_human.push((*idx, norm_msg));
|
||||
} else {
|
||||
prev_gpt.push((*idx, norm_msg));
|
||||
}
|
||||
}
|
||||
|
||||
group
|
||||
}
|
||||
|
||||
/// Combined stagnation analyzer: dragging + repetition.
|
||||
pub fn analyze_stagnation(
|
||||
messages: &[ShareGptMsg<'_>],
|
||||
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||
baseline_turns: usize,
|
||||
) -> (SignalGroup, TurnMetrics) {
|
||||
let (dragging_group, metrics) = analyze_dragging(messages, baseline_turns, 0.5);
|
||||
let repetition_group = analyze_repetition(normalized_messages, 2, 0.95, 0.85);
|
||||
|
||||
let mut combined = SignalGroup::new("stagnation");
|
||||
for s in dragging_group.signals.iter().cloned() {
|
||||
combined.add_signal(s);
|
||||
}
|
||||
for s in repetition_group.signals.iter().cloned() {
|
||||
combined.add_signal(s);
|
||||
}
|
||||
(combined, metrics)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn nm(s: &str) -> NormalizedMessage {
|
||||
NormalizedMessage::from_text(s, 2000)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dragging_after_many_user_turns() {
|
||||
let msgs: Vec<_> = (0..15)
|
||||
.flat_map(|_| [ShareGptMsg { from: "human" }, ShareGptMsg { from: "gpt" }])
|
||||
.collect();
|
||||
let (g, m) = analyze_dragging(&msgs, 5, 0.5);
|
||||
assert!(m.is_dragging);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::StagnationDragging)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_dragging_below_baseline() {
|
||||
let msgs = vec![
|
||||
ShareGptMsg { from: "human" },
|
||||
ShareGptMsg { from: "gpt" },
|
||||
ShareGptMsg { from: "human" },
|
||||
ShareGptMsg { from: "gpt" },
|
||||
];
|
||||
let (g, m) = analyze_dragging(&msgs, 5, 0.5);
|
||||
assert!(!m.is_dragging);
|
||||
assert!(g.signals.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_exact_repetition_in_user_messages() {
|
||||
let n = vec![
|
||||
(
|
||||
0usize,
|
||||
"human",
|
||||
nm("This widget is broken and needs repair right now"),
|
||||
),
|
||||
(1, "gpt", nm("Sorry to hear that. Let me look into it.")),
|
||||
(
|
||||
2,
|
||||
"human",
|
||||
nm("This widget is broken and needs repair right now"),
|
||||
),
|
||||
];
|
||||
let g = analyze_repetition(&n, 2, 0.95, 0.85);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::StagnationRepetition)));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,3 +1,26 @@
|
|||
mod analyzer;
|
||||
//! Plano signals: behavioral quality indicators for agent interactions.
|
||||
//!
|
||||
//! This is a Rust port of the paper-aligned Python reference implementation at
|
||||
//! `https://github.com/katanemo/signals` (or `/Users/shashmi/repos/signals`).
|
||||
//!
|
||||
//! Three layers of signals are detected from a conversation transcript:
|
||||
//!
|
||||
//! - **Interaction**: misalignment, stagnation, disengagement, satisfaction
|
||||
//! - **Execution**: failure, loops
|
||||
//! - **Environment**: exhaustion
|
||||
//!
|
||||
//! See `SignalType` for the full hierarchy.
|
||||
|
||||
pub use analyzer::*;
|
||||
pub mod analyzer;
|
||||
pub mod environment;
|
||||
pub mod execution;
|
||||
pub mod interaction;
|
||||
pub mod otel;
|
||||
pub mod schemas;
|
||||
pub mod text_processing;
|
||||
|
||||
pub use analyzer::{SignalAnalyzer, FLAG_MARKER};
|
||||
pub use schemas::{
|
||||
EnvironmentSignals, ExecutionSignals, InteractionQuality, InteractionSignals, SignalGroup,
|
||||
SignalInstance, SignalLayer, SignalReport, SignalType, TurnMetrics,
|
||||
};
|
||||
|
|
|
|||
241
crates/brightstaff/src/signals/otel.rs
Normal file
241
crates/brightstaff/src/signals/otel.rs
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
//! Helpers for emitting `SignalReport` data to OpenTelemetry spans.
|
||||
//!
|
||||
//! Two sets of attributes are emitted:
|
||||
//!
|
||||
//! - **Legacy** keys under `signals.*` (e.g. `signals.frustration.count`),
|
||||
//! computed from the new layered counts. Preserved for one release for
|
||||
//! backward compatibility with existing dashboards.
|
||||
//! - **New** layered keys (e.g. `signals.interaction.misalignment.count`),
|
||||
//! one set of `count`/`severity` attributes per category, plus per-instance
|
||||
//! span events named `signal.<dotted_signal_type>`.
|
||||
|
||||
use opentelemetry::trace::SpanRef;
|
||||
use opentelemetry::KeyValue;
|
||||
|
||||
use crate::signals::schemas::{SignalGroup, SignalReport, SignalType};
|
||||
|
||||
/// Emit both legacy and layered OTel attributes/events for a `SignalReport`.
|
||||
///
|
||||
/// Returns `true` if any "concerning" signal was found, mirroring the previous
|
||||
/// behavior used to flag the span operation name.
|
||||
pub fn emit_signals_to_span(span: &SpanRef<'_>, report: &SignalReport) -> bool {
|
||||
emit_overall(span, report);
|
||||
emit_layered_attributes(span, report);
|
||||
emit_legacy_attributes(span, report);
|
||||
emit_signal_events(span, report);
|
||||
|
||||
is_concerning(report)
|
||||
}
|
||||
|
||||
fn emit_overall(span: &SpanRef<'_>, report: &SignalReport) {
|
||||
span.set_attribute(KeyValue::new(
|
||||
"signals.quality",
|
||||
report.overall_quality.as_str().to_string(),
|
||||
));
|
||||
span.set_attribute(KeyValue::new(
|
||||
"signals.quality_score",
|
||||
report.quality_score as f64,
|
||||
));
|
||||
span.set_attribute(KeyValue::new(
|
||||
"signals.turn_count",
|
||||
report.turn_metrics.total_turns as i64,
|
||||
));
|
||||
span.set_attribute(KeyValue::new(
|
||||
"signals.efficiency_score",
|
||||
report.turn_metrics.efficiency_score as f64,
|
||||
));
|
||||
}
|
||||
|
||||
fn emit_group(span: &SpanRef<'_>, prefix: &str, group: &SignalGroup) {
|
||||
if group.count == 0 {
|
||||
return;
|
||||
}
|
||||
span.set_attribute(KeyValue::new(
|
||||
format!("{}.count", prefix),
|
||||
group.count as i64,
|
||||
));
|
||||
span.set_attribute(KeyValue::new(
|
||||
format!("{}.severity", prefix),
|
||||
group.severity as i64,
|
||||
));
|
||||
}
|
||||
|
||||
fn emit_layered_attributes(span: &SpanRef<'_>, report: &SignalReport) {
|
||||
emit_group(
|
||||
span,
|
||||
"signals.interaction.misalignment",
|
||||
&report.interaction.misalignment,
|
||||
);
|
||||
emit_group(
|
||||
span,
|
||||
"signals.interaction.stagnation",
|
||||
&report.interaction.stagnation,
|
||||
);
|
||||
emit_group(
|
||||
span,
|
||||
"signals.interaction.disengagement",
|
||||
&report.interaction.disengagement,
|
||||
);
|
||||
emit_group(
|
||||
span,
|
||||
"signals.interaction.satisfaction",
|
||||
&report.interaction.satisfaction,
|
||||
);
|
||||
emit_group(span, "signals.execution.failure", &report.execution.failure);
|
||||
emit_group(span, "signals.execution.loops", &report.execution.loops);
|
||||
emit_group(
|
||||
span,
|
||||
"signals.environment.exhaustion",
|
||||
&report.environment.exhaustion,
|
||||
);
|
||||
}
|
||||
|
||||
fn count_of(report: &SignalReport, t: SignalType) -> usize {
|
||||
report.iter_signals().filter(|s| s.signal_type == t).count()
|
||||
}
|
||||
|
||||
/// Emit the legacy attribute keys consumed by existing dashboards. These are
|
||||
/// derived from the new `SignalReport` so no detector contract is broken.
|
||||
fn emit_legacy_attributes(span: &SpanRef<'_>, report: &SignalReport) {
|
||||
use crate::tracing::signals as legacy;
|
||||
|
||||
// signals.follow_up.repair.{count,ratio} - misalignment proxies repairs.
|
||||
let repair_count = report.interaction.misalignment.count;
|
||||
let user_turns = report.turn_metrics.user_turns.max(1) as f32;
|
||||
if repair_count > 0 {
|
||||
span.set_attribute(KeyValue::new(legacy::REPAIR_COUNT, repair_count as i64));
|
||||
let ratio = repair_count as f32 / user_turns;
|
||||
span.set_attribute(KeyValue::new(legacy::REPAIR_RATIO, format!("{:.3}", ratio)));
|
||||
}
|
||||
|
||||
// signals.frustration.{count,severity} - disengagement.negative_stance is
|
||||
// the closest legacy analog of "frustration".
|
||||
let frustration_count = count_of(report, SignalType::DisengagementNegativeStance);
|
||||
if frustration_count > 0 {
|
||||
span.set_attribute(KeyValue::new(
|
||||
legacy::FRUSTRATION_COUNT,
|
||||
frustration_count as i64,
|
||||
));
|
||||
let severity = match frustration_count {
|
||||
0 => 0,
|
||||
1..=2 => 1,
|
||||
3..=4 => 2,
|
||||
_ => 3,
|
||||
};
|
||||
span.set_attribute(KeyValue::new(legacy::FRUSTRATION_SEVERITY, severity as i64));
|
||||
}
|
||||
|
||||
// signals.repetition.count - stagnation (repetition + dragging).
|
||||
if report.interaction.stagnation.count > 0 {
|
||||
span.set_attribute(KeyValue::new(
|
||||
legacy::REPETITION_COUNT,
|
||||
report.interaction.stagnation.count as i64,
|
||||
));
|
||||
}
|
||||
|
||||
// signals.escalation.requested - any escalation/quit signal.
|
||||
let escalated = report.interaction.disengagement.signals.iter().any(|s| {
|
||||
matches!(
|
||||
s.signal_type,
|
||||
SignalType::DisengagementEscalation | SignalType::DisengagementQuit
|
||||
)
|
||||
});
|
||||
if escalated {
|
||||
span.set_attribute(KeyValue::new(legacy::ESCALATION_REQUESTED, true));
|
||||
}
|
||||
|
||||
// signals.positive_feedback.count - satisfaction signals.
|
||||
if report.interaction.satisfaction.count > 0 {
|
||||
span.set_attribute(KeyValue::new(
|
||||
legacy::POSITIVE_FEEDBACK_COUNT,
|
||||
report.interaction.satisfaction.count as i64,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
fn emit_signal_events(span: &SpanRef<'_>, report: &SignalReport) {
|
||||
for sig in report.iter_signals() {
|
||||
let event_name = format!("signal.{}", sig.signal_type.as_str());
|
||||
let mut attrs: Vec<KeyValue> = vec![
|
||||
KeyValue::new("signal.type", sig.signal_type.as_str().to_string()),
|
||||
KeyValue::new("signal.message_index", sig.message_index as i64),
|
||||
KeyValue::new("signal.confidence", sig.confidence as f64),
|
||||
];
|
||||
if !sig.snippet.is_empty() {
|
||||
attrs.push(KeyValue::new("signal.snippet", sig.snippet.clone()));
|
||||
}
|
||||
if !sig.metadata.is_null() {
|
||||
attrs.push(KeyValue::new("signal.metadata", sig.metadata.to_string()));
|
||||
}
|
||||
span.add_event(event_name, attrs);
|
||||
}
|
||||
}
|
||||
|
||||
fn is_concerning(report: &SignalReport) -> bool {
|
||||
use crate::signals::schemas::InteractionQuality;
|
||||
if matches!(
|
||||
report.overall_quality,
|
||||
InteractionQuality::Poor | InteractionQuality::Severe
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
if report.interaction.disengagement.count > 0 {
|
||||
return true;
|
||||
}
|
||||
if report.interaction.stagnation.count > 2 {
|
||||
return true;
|
||||
}
|
||||
if report.execution.failure.count > 0 || report.execution.loops.count > 0 {
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::signals::schemas::{
|
||||
EnvironmentSignals, ExecutionSignals, InteractionQuality, InteractionSignals, SignalGroup,
|
||||
SignalInstance, SignalReport, SignalType, TurnMetrics,
|
||||
};
|
||||
|
||||
fn report_with_escalation() -> SignalReport {
|
||||
let mut diseng = SignalGroup::new("disengagement");
|
||||
diseng.add_signal(SignalInstance::new(
|
||||
SignalType::DisengagementEscalation,
|
||||
3,
|
||||
"get me a human",
|
||||
));
|
||||
SignalReport {
|
||||
interaction: InteractionSignals {
|
||||
disengagement: diseng,
|
||||
..InteractionSignals::default()
|
||||
},
|
||||
execution: ExecutionSignals::default(),
|
||||
environment: EnvironmentSignals::default(),
|
||||
overall_quality: InteractionQuality::Severe,
|
||||
quality_score: 0.0,
|
||||
turn_metrics: TurnMetrics {
|
||||
total_turns: 3,
|
||||
user_turns: 2,
|
||||
assistant_turns: 1,
|
||||
is_dragging: false,
|
||||
efficiency_score: 1.0,
|
||||
},
|
||||
summary: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_concerning_flags_disengagement() {
|
||||
let r = report_with_escalation();
|
||||
assert!(is_concerning(&r));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn count_of_returns_per_type_count() {
|
||||
let r = report_with_escalation();
|
||||
assert_eq!(count_of(&r, SignalType::DisengagementEscalation), 1);
|
||||
assert_eq!(count_of(&r, SignalType::DisengagementNegativeStance), 0);
|
||||
}
|
||||
}
|
||||
431
crates/brightstaff/src/signals/schemas.rs
Normal file
431
crates/brightstaff/src/signals/schemas.rs
Normal file
|
|
@ -0,0 +1,431 @@
|
|||
//! Data shapes for the signal analyzer.
|
||||
//!
|
||||
//! Mirrors `signals/schemas.py` from the reference implementation. Where the
|
||||
//! Python library exposes a `Dict[str, SignalGroup]` partitioned by category,
|
||||
//! the Rust port uses strongly-typed sub-structs (`InteractionSignals`,
|
||||
//! `ExecutionSignals`, `EnvironmentSignals`) for the same partitioning.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Hierarchical signal type. The 20 leaf variants mirror the paper taxonomy
|
||||
/// and the Python reference's `SignalType` string enum.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub enum SignalType {
|
||||
// Interaction > Misalignment
|
||||
MisalignmentCorrection,
|
||||
MisalignmentRephrase,
|
||||
MisalignmentClarification,
|
||||
|
||||
// Interaction > Stagnation
|
||||
StagnationDragging,
|
||||
StagnationRepetition,
|
||||
|
||||
// Interaction > Disengagement
|
||||
DisengagementEscalation,
|
||||
DisengagementQuit,
|
||||
DisengagementNegativeStance,
|
||||
|
||||
// Interaction > Satisfaction
|
||||
SatisfactionGratitude,
|
||||
SatisfactionConfirmation,
|
||||
SatisfactionSuccess,
|
||||
|
||||
// Execution > Failure
|
||||
ExecutionFailureInvalidArgs,
|
||||
ExecutionFailureBadQuery,
|
||||
ExecutionFailureToolNotFound,
|
||||
ExecutionFailureAuthMisuse,
|
||||
ExecutionFailureStateError,
|
||||
|
||||
// Execution > Loops
|
||||
ExecutionLoopsRetry,
|
||||
ExecutionLoopsParameterDrift,
|
||||
ExecutionLoopsOscillation,
|
||||
|
||||
// Environment > Exhaustion
|
||||
EnvironmentExhaustionApiError,
|
||||
EnvironmentExhaustionTimeout,
|
||||
EnvironmentExhaustionRateLimit,
|
||||
EnvironmentExhaustionNetwork,
|
||||
EnvironmentExhaustionMalformed,
|
||||
EnvironmentExhaustionContextOverflow,
|
||||
}
|
||||
|
||||
impl SignalType {
|
||||
/// Dotted hierarchical string identifier, e.g.
|
||||
/// `"interaction.misalignment.correction"`. Matches the Python reference's
|
||||
/// `SignalType` enum *value* strings byte-for-byte.
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
SignalType::MisalignmentCorrection => "interaction.misalignment.correction",
|
||||
SignalType::MisalignmentRephrase => "interaction.misalignment.rephrase",
|
||||
SignalType::MisalignmentClarification => "interaction.misalignment.clarification",
|
||||
SignalType::StagnationDragging => "interaction.stagnation.dragging",
|
||||
SignalType::StagnationRepetition => "interaction.stagnation.repetition",
|
||||
SignalType::DisengagementEscalation => "interaction.disengagement.escalation",
|
||||
SignalType::DisengagementQuit => "interaction.disengagement.quit",
|
||||
SignalType::DisengagementNegativeStance => "interaction.disengagement.negative_stance",
|
||||
SignalType::SatisfactionGratitude => "interaction.satisfaction.gratitude",
|
||||
SignalType::SatisfactionConfirmation => "interaction.satisfaction.confirmation",
|
||||
SignalType::SatisfactionSuccess => "interaction.satisfaction.success",
|
||||
SignalType::ExecutionFailureInvalidArgs => "execution.failure.invalid_args",
|
||||
SignalType::ExecutionFailureBadQuery => "execution.failure.bad_query",
|
||||
SignalType::ExecutionFailureToolNotFound => "execution.failure.tool_not_found",
|
||||
SignalType::ExecutionFailureAuthMisuse => "execution.failure.auth_misuse",
|
||||
SignalType::ExecutionFailureStateError => "execution.failure.state_error",
|
||||
SignalType::ExecutionLoopsRetry => "execution.loops.retry",
|
||||
SignalType::ExecutionLoopsParameterDrift => "execution.loops.parameter_drift",
|
||||
SignalType::ExecutionLoopsOscillation => "execution.loops.oscillation",
|
||||
SignalType::EnvironmentExhaustionApiError => "environment.exhaustion.api_error",
|
||||
SignalType::EnvironmentExhaustionTimeout => "environment.exhaustion.timeout",
|
||||
SignalType::EnvironmentExhaustionRateLimit => "environment.exhaustion.rate_limit",
|
||||
SignalType::EnvironmentExhaustionNetwork => "environment.exhaustion.network",
|
||||
SignalType::EnvironmentExhaustionMalformed => {
|
||||
"environment.exhaustion.malformed_response"
|
||||
}
|
||||
SignalType::EnvironmentExhaustionContextOverflow => {
|
||||
"environment.exhaustion.context_overflow"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn layer(&self) -> SignalLayer {
|
||||
match self {
|
||||
SignalType::MisalignmentCorrection
|
||||
| SignalType::MisalignmentRephrase
|
||||
| SignalType::MisalignmentClarification
|
||||
| SignalType::StagnationDragging
|
||||
| SignalType::StagnationRepetition
|
||||
| SignalType::DisengagementEscalation
|
||||
| SignalType::DisengagementQuit
|
||||
| SignalType::DisengagementNegativeStance
|
||||
| SignalType::SatisfactionGratitude
|
||||
| SignalType::SatisfactionConfirmation
|
||||
| SignalType::SatisfactionSuccess => SignalLayer::Interaction,
|
||||
SignalType::ExecutionFailureInvalidArgs
|
||||
| SignalType::ExecutionFailureBadQuery
|
||||
| SignalType::ExecutionFailureToolNotFound
|
||||
| SignalType::ExecutionFailureAuthMisuse
|
||||
| SignalType::ExecutionFailureStateError
|
||||
| SignalType::ExecutionLoopsRetry
|
||||
| SignalType::ExecutionLoopsParameterDrift
|
||||
| SignalType::ExecutionLoopsOscillation => SignalLayer::Execution,
|
||||
SignalType::EnvironmentExhaustionApiError
|
||||
| SignalType::EnvironmentExhaustionTimeout
|
||||
| SignalType::EnvironmentExhaustionRateLimit
|
||||
| SignalType::EnvironmentExhaustionNetwork
|
||||
| SignalType::EnvironmentExhaustionMalformed
|
||||
| SignalType::EnvironmentExhaustionContextOverflow => SignalLayer::Environment,
|
||||
}
|
||||
}
|
||||
|
||||
/// Category name within the layer (e.g. `"misalignment"`, `"failure"`).
|
||||
pub fn category(&self) -> &'static str {
|
||||
// Strip the layer prefix and take everything before the next dot.
|
||||
let s = self.as_str();
|
||||
let after_layer = s.split_once('.').map(|(_, rest)| rest).unwrap_or(s);
|
||||
after_layer
|
||||
.split_once('.')
|
||||
.map(|(c, _)| c)
|
||||
.unwrap_or(after_layer)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub enum SignalLayer {
|
||||
Interaction,
|
||||
Execution,
|
||||
Environment,
|
||||
}
|
||||
|
||||
impl SignalLayer {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
SignalLayer::Interaction => "interaction",
|
||||
SignalLayer::Execution => "execution",
|
||||
SignalLayer::Environment => "environment",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Overall quality assessment for an agent interaction session.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum InteractionQuality {
|
||||
Excellent,
|
||||
Good,
|
||||
Neutral,
|
||||
Poor,
|
||||
Severe,
|
||||
}
|
||||
|
||||
impl InteractionQuality {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
InteractionQuality::Excellent => "excellent",
|
||||
InteractionQuality::Good => "good",
|
||||
InteractionQuality::Neutral => "neutral",
|
||||
InteractionQuality::Poor => "poor",
|
||||
InteractionQuality::Severe => "severe",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A single detected signal instance.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SignalInstance {
|
||||
pub signal_type: SignalType,
|
||||
/// Absolute index into the original conversation `Vec<Message>`.
|
||||
pub message_index: usize,
|
||||
pub snippet: String,
|
||||
pub confidence: f32,
|
||||
/// Free-form metadata payload mirroring the Python `Dict[str, Any]`.
|
||||
/// Stored as a JSON object so we can faithfully reproduce the reference's
|
||||
/// flexible per-detector metadata.
|
||||
#[serde(default)]
|
||||
pub metadata: serde_json::Value,
|
||||
}
|
||||
|
||||
impl SignalInstance {
|
||||
pub fn new(signal_type: SignalType, message_index: usize, snippet: impl Into<String>) -> Self {
|
||||
Self {
|
||||
signal_type,
|
||||
message_index,
|
||||
snippet: snippet.into(),
|
||||
confidence: 1.0,
|
||||
metadata: serde_json::Value::Object(serde_json::Map::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_confidence(mut self, c: f32) -> Self {
|
||||
self.confidence = c;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_metadata(mut self, m: serde_json::Value) -> Self {
|
||||
self.metadata = m;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Aggregated signals for a specific category.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SignalGroup {
|
||||
pub category: String,
|
||||
pub count: usize,
|
||||
pub signals: Vec<SignalInstance>,
|
||||
/// Severity level (0-3: none, mild, moderate, severe).
|
||||
pub severity: u8,
|
||||
}
|
||||
|
||||
impl SignalGroup {
|
||||
pub fn new(category: impl Into<String>) -> Self {
|
||||
Self {
|
||||
category: category.into(),
|
||||
count: 0,
|
||||
signals: Vec::new(),
|
||||
severity: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_signal(&mut self, signal: SignalInstance) {
|
||||
self.signals.push(signal);
|
||||
self.count = self.signals.len();
|
||||
self.update_severity();
|
||||
}
|
||||
|
||||
fn update_severity(&mut self) {
|
||||
self.severity = match self.count {
|
||||
0 => 0,
|
||||
1..=2 => 1,
|
||||
3..=4 => 2,
|
||||
_ => 3,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Turn count and efficiency metrics, used by stagnation.dragging.
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct TurnMetrics {
|
||||
pub total_turns: usize,
|
||||
pub user_turns: usize,
|
||||
pub assistant_turns: usize,
|
||||
pub is_dragging: bool,
|
||||
pub efficiency_score: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct InteractionSignals {
|
||||
pub misalignment: SignalGroup,
|
||||
pub stagnation: SignalGroup,
|
||||
pub disengagement: SignalGroup,
|
||||
pub satisfaction: SignalGroup,
|
||||
}
|
||||
|
||||
impl Default for InteractionSignals {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
misalignment: SignalGroup::new("misalignment"),
|
||||
stagnation: SignalGroup::new("stagnation"),
|
||||
disengagement: SignalGroup::new("disengagement"),
|
||||
satisfaction: SignalGroup::new("satisfaction"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl InteractionSignals {
|
||||
/// Ratio of misalignment instances to user turns. Used as a quality
|
||||
/// scoring input and as a threshold for the "high misalignment rate"
|
||||
/// summary callout. Mirrors `misalignment.count / max(user_turns, 1)`
|
||||
/// from the Python reference's `_assess_quality` and `_generate_summary`.
|
||||
pub fn misalignment_ratio(&self, user_turns: usize) -> f32 {
|
||||
let denom = user_turns.max(1) as f32;
|
||||
self.misalignment.count as f32 / denom
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ExecutionSignals {
|
||||
pub failure: SignalGroup,
|
||||
pub loops: SignalGroup,
|
||||
}
|
||||
|
||||
impl Default for ExecutionSignals {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
failure: SignalGroup::new("failure"),
|
||||
loops: SignalGroup::new("loops"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct EnvironmentSignals {
|
||||
pub exhaustion: SignalGroup,
|
||||
}
|
||||
|
||||
impl Default for EnvironmentSignals {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
exhaustion: SignalGroup::new("exhaustion"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Complete signal analysis report for a conversation.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SignalReport {
|
||||
pub interaction: InteractionSignals,
|
||||
pub execution: ExecutionSignals,
|
||||
pub environment: EnvironmentSignals,
|
||||
pub overall_quality: InteractionQuality,
|
||||
pub quality_score: f32,
|
||||
pub turn_metrics: TurnMetrics,
|
||||
pub summary: String,
|
||||
}
|
||||
|
||||
impl Default for SignalReport {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
interaction: InteractionSignals::default(),
|
||||
execution: ExecutionSignals::default(),
|
||||
environment: EnvironmentSignals::default(),
|
||||
overall_quality: InteractionQuality::Neutral,
|
||||
quality_score: 50.0,
|
||||
turn_metrics: TurnMetrics::default(),
|
||||
summary: String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SignalReport {
|
||||
/// Iterate over every `SignalInstance` across all layers and groups.
|
||||
pub fn iter_signals(&self) -> impl Iterator<Item = &SignalInstance> {
|
||||
self.interaction
|
||||
.misalignment
|
||||
.signals
|
||||
.iter()
|
||||
.chain(self.interaction.stagnation.signals.iter())
|
||||
.chain(self.interaction.disengagement.signals.iter())
|
||||
.chain(self.interaction.satisfaction.signals.iter())
|
||||
.chain(self.execution.failure.signals.iter())
|
||||
.chain(self.execution.loops.signals.iter())
|
||||
.chain(self.environment.exhaustion.signals.iter())
|
||||
}
|
||||
|
||||
pub fn has_signal_type(&self, t: SignalType) -> bool {
|
||||
self.iter_signals().any(|s| s.signal_type == t)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn signal_type_strings_match_paper_taxonomy() {
|
||||
assert_eq!(
|
||||
SignalType::MisalignmentCorrection.as_str(),
|
||||
"interaction.misalignment.correction"
|
||||
);
|
||||
assert_eq!(
|
||||
SignalType::ExecutionFailureInvalidArgs.as_str(),
|
||||
"execution.failure.invalid_args"
|
||||
);
|
||||
assert_eq!(
|
||||
SignalType::EnvironmentExhaustionMalformed.as_str(),
|
||||
"environment.exhaustion.malformed_response"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn signal_type_layer_and_category() {
|
||||
assert_eq!(
|
||||
SignalType::MisalignmentRephrase.layer(),
|
||||
SignalLayer::Interaction
|
||||
);
|
||||
assert_eq!(SignalType::MisalignmentRephrase.category(), "misalignment");
|
||||
assert_eq!(
|
||||
SignalType::ExecutionLoopsRetry.layer(),
|
||||
SignalLayer::Execution
|
||||
);
|
||||
assert_eq!(SignalType::ExecutionLoopsRetry.category(), "loops");
|
||||
assert_eq!(
|
||||
SignalType::EnvironmentExhaustionTimeout.layer(),
|
||||
SignalLayer::Environment
|
||||
);
|
||||
assert_eq!(
|
||||
SignalType::EnvironmentExhaustionTimeout.category(),
|
||||
"exhaustion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn signal_group_severity_buckets_match_python() {
|
||||
let mut g = SignalGroup::new("misalignment");
|
||||
assert_eq!(g.severity, 0);
|
||||
for n in 1..=2 {
|
||||
g.add_signal(SignalInstance::new(
|
||||
SignalType::MisalignmentCorrection,
|
||||
n,
|
||||
"x",
|
||||
));
|
||||
}
|
||||
assert_eq!(g.severity, 1);
|
||||
for n in 3..=4 {
|
||||
g.add_signal(SignalInstance::new(
|
||||
SignalType::MisalignmentCorrection,
|
||||
n,
|
||||
"x",
|
||||
));
|
||||
}
|
||||
assert_eq!(g.severity, 2);
|
||||
for n in 5..=6 {
|
||||
g.add_signal(SignalInstance::new(
|
||||
SignalType::MisalignmentCorrection,
|
||||
n,
|
||||
"x",
|
||||
));
|
||||
}
|
||||
assert_eq!(g.severity, 3);
|
||||
}
|
||||
}
|
||||
401
crates/brightstaff/src/signals/text_processing.rs
Normal file
401
crates/brightstaff/src/signals/text_processing.rs
Normal file
|
|
@ -0,0 +1,401 @@
|
|||
//! Text normalization and similarity primitives.
|
||||
//!
|
||||
//! Direct Rust port of `signals/text_processing.py` from the reference. The
|
||||
//! shapes (`NormalizedMessage`, `NormalizedPattern`) and similarity formulas
|
||||
//! match the Python implementation exactly so that pattern matching produces
|
||||
//! the same results on the same inputs.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
/// Size of character n-grams used for fuzzy similarity (3 = trigrams).
|
||||
pub const NGRAM_SIZE: usize = 3;
|
||||
|
||||
const PUNCT_TRIM: &[char] = &[
|
||||
'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=',
|
||||
'>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~',
|
||||
];
|
||||
|
||||
/// Pre-processed message with normalized text and tokens for efficient matching.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct NormalizedMessage {
|
||||
pub raw: String,
|
||||
pub tokens: Vec<String>,
|
||||
pub token_set: HashSet<String>,
|
||||
pub bigram_set: HashSet<String>,
|
||||
pub char_ngram_set: HashSet<String>,
|
||||
pub token_frequency: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl NormalizedMessage {
|
||||
/// Create a normalized message from raw text. Mirrors
|
||||
/// `NormalizedMessage.from_text` in the reference, including the
|
||||
/// head-20%/tail-80% truncation strategy when text exceeds `max_length`.
|
||||
pub fn from_text(text: &str, max_length: usize) -> Self {
|
||||
let char_count = text.chars().count();
|
||||
|
||||
let raw: String = if char_count <= max_length {
|
||||
text.to_string()
|
||||
} else {
|
||||
let head_len = max_length / 5;
|
||||
// Reserve one char for the joining space.
|
||||
let tail_len = max_length.saturating_sub(head_len + 1);
|
||||
let head: String = text.chars().take(head_len).collect();
|
||||
let tail: String = text
|
||||
.chars()
|
||||
.skip(char_count.saturating_sub(tail_len))
|
||||
.collect();
|
||||
format!("{} {}", head, tail)
|
||||
};
|
||||
|
||||
// Normalize unicode punctuation to ASCII equivalents.
|
||||
let normalized_unicode = raw
|
||||
.replace(['\u{2019}', '\u{2018}'], "'")
|
||||
.replace(['\u{201c}', '\u{201d}'], "\"")
|
||||
.replace(['\u{2013}', '\u{2014}'], "-");
|
||||
|
||||
// Lowercase + collapse whitespace (matches Python's `" ".join(s.split())`).
|
||||
let normalized: String = normalized_unicode
|
||||
.to_lowercase()
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
let mut tokens: Vec<String> = Vec::new();
|
||||
for word in normalized.split_whitespace() {
|
||||
let stripped: String = word.trim_matches(PUNCT_TRIM).to_string();
|
||||
if !stripped.is_empty() {
|
||||
tokens.push(stripped);
|
||||
}
|
||||
}
|
||||
|
||||
let token_set: HashSet<String> = tokens.iter().cloned().collect();
|
||||
|
||||
let mut bigram_set: HashSet<String> = HashSet::new();
|
||||
for i in 0..tokens.len().saturating_sub(1) {
|
||||
bigram_set.insert(format!("{} {}", tokens[i], tokens[i + 1]));
|
||||
}
|
||||
|
||||
let tokens_text = tokens.join(" ");
|
||||
let char_ngram_set = char_ngrams(&tokens_text, NGRAM_SIZE);
|
||||
|
||||
let mut token_frequency: HashMap<String, usize> = HashMap::new();
|
||||
for t in &tokens {
|
||||
*token_frequency.entry(t.clone()).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
Self {
|
||||
raw,
|
||||
tokens,
|
||||
token_set,
|
||||
bigram_set,
|
||||
char_ngram_set,
|
||||
token_frequency,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains_token(&self, token: &str) -> bool {
|
||||
self.token_set.contains(token)
|
||||
}
|
||||
|
||||
pub fn contains_phrase(&self, phrase: &str) -> bool {
|
||||
let phrase_tokens: Vec<&str> = phrase.split_whitespace().collect();
|
||||
if phrase_tokens.is_empty() {
|
||||
return false;
|
||||
}
|
||||
if phrase_tokens.len() == 1 {
|
||||
return self.contains_token(phrase_tokens[0]);
|
||||
}
|
||||
if phrase_tokens.len() > self.tokens.len() {
|
||||
return false;
|
||||
}
|
||||
let n = phrase_tokens.len();
|
||||
for i in 0..=self.tokens.len() - n {
|
||||
if self.tokens[i..i + n]
|
||||
.iter()
|
||||
.zip(phrase_tokens.iter())
|
||||
.all(|(a, b)| a == b)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Character n-gram (Jaccard) similarity vs another normalized message.
|
||||
pub fn ngram_similarity_with_message(&self, other: &NormalizedMessage) -> f32 {
|
||||
jaccard(&self.char_ngram_set, &other.char_ngram_set)
|
||||
}
|
||||
|
||||
/// Character n-gram (Jaccard) similarity vs a raw pattern string.
|
||||
pub fn ngram_similarity_with_pattern(&self, pattern: &str) -> f32 {
|
||||
let normalized = strip_non_word_chars(&pattern.to_lowercase());
|
||||
let pattern_ngrams = char_ngrams(&normalized, NGRAM_SIZE);
|
||||
jaccard(&self.char_ngram_set, &pattern_ngrams)
|
||||
}
|
||||
|
||||
/// Fraction of pattern's ngrams contained in this message's ngram set.
|
||||
pub fn char_ngram_containment(&self, pattern: &str) -> f32 {
|
||||
let normalized = strip_non_word_chars(&pattern.to_lowercase());
|
||||
let pattern_ngrams = char_ngrams(&normalized, NGRAM_SIZE);
|
||||
if pattern_ngrams.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
let contained = pattern_ngrams
|
||||
.iter()
|
||||
.filter(|ng| self.char_ngram_set.contains(*ng))
|
||||
.count();
|
||||
contained as f32 / pattern_ngrams.len() as f32
|
||||
}
|
||||
|
||||
/// Token-frequency cosine similarity vs a raw pattern string.
|
||||
pub fn token_cosine_similarity(&self, pattern: &str) -> f32 {
|
||||
let mut pattern_freq: HashMap<String, usize> = HashMap::new();
|
||||
for word in pattern.to_lowercase().split_whitespace() {
|
||||
let stripped = word.trim_matches(PUNCT_TRIM);
|
||||
if !stripped.is_empty() {
|
||||
*pattern_freq.entry(stripped.to_string()).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
cosine_freq(&self.token_frequency, &pattern_freq)
|
||||
}
|
||||
|
||||
/// Layered match against a pre-normalized pattern. Mirrors
|
||||
/// `matches_normalized_pattern` from the reference: exact phrase ->
|
||||
/// char-ngram Jaccard -> token cosine.
|
||||
pub fn matches_normalized_pattern(
|
||||
&self,
|
||||
pattern: &NormalizedPattern,
|
||||
char_ngram_threshold: f32,
|
||||
token_cosine_threshold: f32,
|
||||
) -> bool {
|
||||
// Layer 0: exact phrase match using pre-tokenized message.
|
||||
let plen = pattern.tokens.len();
|
||||
let slen = self.tokens.len();
|
||||
if plen > 0 && plen <= slen {
|
||||
for i in 0..=slen - plen {
|
||||
if self.tokens[i..i + plen] == pattern.tokens[..] {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Layer 1: character n-gram Jaccard similarity.
|
||||
if !self.char_ngram_set.is_empty() && !pattern.char_ngram_set.is_empty() {
|
||||
let inter = self
|
||||
.char_ngram_set
|
||||
.intersection(&pattern.char_ngram_set)
|
||||
.count();
|
||||
let union = self.char_ngram_set.union(&pattern.char_ngram_set).count();
|
||||
if union > 0 {
|
||||
let sim = inter as f32 / union as f32;
|
||||
if sim >= char_ngram_threshold {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Layer 2: token frequency cosine similarity.
|
||||
if !self.token_frequency.is_empty() && !pattern.token_frequency.is_empty() {
|
||||
let sim = cosine_freq(&self.token_frequency, &pattern.token_frequency);
|
||||
if sim >= token_cosine_threshold {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Pre-processed pattern with normalized text and pre-computed n-grams/tokens.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct NormalizedPattern {
|
||||
pub raw: String,
|
||||
pub tokens: Vec<String>,
|
||||
pub char_ngram_set: HashSet<String>,
|
||||
pub token_frequency: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl NormalizedPattern {
|
||||
pub fn from_text(pattern: &str) -> Self {
|
||||
let normalized = pattern
|
||||
.to_lowercase()
|
||||
.replace(['\u{2019}', '\u{2018}'], "'")
|
||||
.replace(['\u{201c}', '\u{201d}'], "\"")
|
||||
.replace(['\u{2013}', '\u{2014}'], "-");
|
||||
let normalized: String = normalized.split_whitespace().collect::<Vec<_>>().join(" ");
|
||||
|
||||
// Tokenize the same way as NormalizedMessage (trim boundary punctuation,
|
||||
// keep internal punctuation).
|
||||
let mut tokens: Vec<String> = Vec::new();
|
||||
for word in normalized.split_whitespace() {
|
||||
let stripped = word.trim_matches(PUNCT_TRIM);
|
||||
if !stripped.is_empty() {
|
||||
tokens.push(stripped.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// For ngrams + cosine, strip ALL punctuation (matches Python's
|
||||
// `re.sub(r"[^\w\s]", "", normalized)`).
|
||||
let normalized_for_ngrams = strip_non_word_chars(&normalized);
|
||||
let char_ngram_set = char_ngrams(&normalized_for_ngrams, NGRAM_SIZE);
|
||||
|
||||
let tokens_no_punct: Vec<&str> = normalized_for_ngrams.split_whitespace().collect();
|
||||
let mut token_frequency: HashMap<String, usize> = HashMap::new();
|
||||
for t in &tokens_no_punct {
|
||||
*token_frequency.entry((*t).to_string()).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
Self {
|
||||
raw: pattern.to_string(),
|
||||
tokens,
|
||||
char_ngram_set,
|
||||
token_frequency,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience: normalize a list of raw pattern strings into `NormalizedPattern`s.
|
||||
pub fn normalize_patterns(patterns: &[&str]) -> Vec<NormalizedPattern> {
|
||||
patterns
|
||||
.iter()
|
||||
.map(|p| NormalizedPattern::from_text(p))
|
||||
.collect()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Similarity primitives
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn char_ngrams(s: &str, n: usize) -> HashSet<String> {
|
||||
// Python iterates by character index, not byte; mirror that with .chars().
|
||||
let chars: Vec<char> = s.chars().collect();
|
||||
let mut out: HashSet<String> = HashSet::new();
|
||||
if chars.len() < n {
|
||||
return out;
|
||||
}
|
||||
for i in 0..=chars.len() - n {
|
||||
out.insert(chars[i..i + n].iter().collect());
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn jaccard(a: &HashSet<String>, b: &HashSet<String>) -> f32 {
|
||||
if a.is_empty() && b.is_empty() {
|
||||
return 1.0;
|
||||
}
|
||||
if a.is_empty() || b.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
let inter = a.intersection(b).count();
|
||||
let union = a.union(b).count();
|
||||
if union == 0 {
|
||||
0.0
|
||||
} else {
|
||||
inter as f32 / union as f32
|
||||
}
|
||||
}
|
||||
|
||||
fn cosine_freq(a: &HashMap<String, usize>, b: &HashMap<String, usize>) -> f32 {
|
||||
if a.is_empty() && b.is_empty() {
|
||||
return 1.0;
|
||||
}
|
||||
if a.is_empty() || b.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
let mut dot: f64 = 0.0;
|
||||
let mut n1_sq: f64 = 0.0;
|
||||
let mut n2_sq: f64 = 0.0;
|
||||
for (token, &freq2) in b {
|
||||
let freq1 = *a.get(token).unwrap_or(&0);
|
||||
dot += (freq1 * freq2) as f64;
|
||||
n2_sq += (freq2 * freq2) as f64;
|
||||
}
|
||||
for &freq1 in a.values() {
|
||||
n1_sq += (freq1 * freq1) as f64;
|
||||
}
|
||||
let n1 = n1_sq.sqrt();
|
||||
let n2 = n2_sq.sqrt();
|
||||
if n1 == 0.0 || n2 == 0.0 {
|
||||
0.0
|
||||
} else {
|
||||
(dot / (n1 * n2)) as f32
|
||||
}
|
||||
}
|
||||
|
||||
/// Python equivalent: `re.sub(r"[^\w\s]", "", text)` followed by whitespace
|
||||
/// collapse. Python's `\w` is `[A-Za-z0-9_]` plus unicode word characters; we
|
||||
/// use Rust's `char::is_alphanumeric()` plus `_` for an equivalent definition.
|
||||
fn strip_non_word_chars(text: &str) -> String {
|
||||
let mut out = String::with_capacity(text.len());
|
||||
for c in text.chars() {
|
||||
if c.is_alphanumeric() || c == '_' || c.is_whitespace() {
|
||||
out.push(c);
|
||||
}
|
||||
}
|
||||
out.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn normalize_lowercases_and_strips_punctuation() {
|
||||
let m = NormalizedMessage::from_text("Hello, World!", 2000);
|
||||
assert_eq!(m.tokens, vec!["hello".to_string(), "world".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalizes_smart_quotes() {
|
||||
let m = NormalizedMessage::from_text("don\u{2019}t", 2000);
|
||||
assert!(m.tokens.contains(&"don't".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncates_long_text_with_head_tail() {
|
||||
let long = "a".repeat(3000);
|
||||
let m = NormalizedMessage::from_text(&long, 2000);
|
||||
// raw should be ~ 2000 chars (head + space + tail)
|
||||
assert!(m.raw.chars().count() <= 2001);
|
||||
assert!(m.raw.starts_with("aa"));
|
||||
assert!(m.raw.ends_with("aa"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn contains_phrase_matches_consecutive_tokens() {
|
||||
let m = NormalizedMessage::from_text("I think this is great work", 2000);
|
||||
assert!(m.contains_phrase("this is great"));
|
||||
assert!(!m.contains_phrase("great this"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn matches_pattern_via_exact_phrase() {
|
||||
let m = NormalizedMessage::from_text("No, I meant the second one", 2000);
|
||||
let p = NormalizedPattern::from_text("no i meant");
|
||||
assert!(m.matches_normalized_pattern(&p, 0.65, 0.6));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn matches_pattern_via_char_ngram_fuzziness() {
|
||||
// Typo in "meant" -> "ment" so layer 0 (exact phrase) cannot match,
|
||||
// forcing the matcher to fall back to layer 1 (char n-gram Jaccard).
|
||||
let m = NormalizedMessage::from_text("No I ment", 2000);
|
||||
let p = NormalizedPattern::from_text("no i meant");
|
||||
assert!(m.matches_normalized_pattern(&p, 0.4, 0.6));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaccard_identical_sets_is_one() {
|
||||
let a: HashSet<String> = ["abc", "bcd"].iter().map(|s| s.to_string()).collect();
|
||||
assert!((jaccard(&a, &a) - 1.0).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cosine_freq_orthogonal_is_zero() {
|
||||
let mut a: HashMap<String, usize> = HashMap::new();
|
||||
a.insert("hello".to_string(), 1);
|
||||
let mut b: HashMap<String, usize> = HashMap::new();
|
||||
b.insert("world".to_string(), 1);
|
||||
assert_eq!(cosine_freq(&a, &b), 0.0);
|
||||
}
|
||||
}
|
||||
|
|
@ -22,8 +22,9 @@ const STREAM_BUFFER_SIZE: usize = 16;
|
|||
const USAGE_BUFFER_MAX: usize = 2 * 1024 * 1024;
|
||||
use crate::metrics as bs_metrics;
|
||||
use crate::metrics::labels as metric_labels;
|
||||
use crate::signals::{InteractionQuality, SignalAnalyzer, TextBasedSignalAnalyzer, FLAG_MARKER};
|
||||
use crate::tracing::{llm, set_service_name, signals as signal_constants};
|
||||
use crate::signals::otel::emit_signals_to_span;
|
||||
use crate::signals::{SignalAnalyzer, FLAG_MARKER};
|
||||
use crate::tracing::{llm, set_service_name};
|
||||
use hermesllm::apis::openai::Message;
|
||||
|
||||
/// Parsed usage + resolved-model details from a provider response.
|
||||
|
|
@ -365,77 +366,19 @@ impl StreamProcessor for ObservableStreamProcessor {
|
|||
self.response_buffer.clear();
|
||||
self.response_buffer.shrink_to_fit();
|
||||
|
||||
// Analyze signals if messages are available and record as span attributes
|
||||
// Analyze signals if messages are available and record as span
|
||||
// attributes + per-signal events. We dual-emit legacy aggregate keys
|
||||
// and the new layered taxonomy so existing dashboards keep working
|
||||
// while new consumers can opt into the richer hierarchy.
|
||||
if let Some(ref messages) = self.messages {
|
||||
let analyzer: Box<dyn SignalAnalyzer> = Box::new(TextBasedSignalAnalyzer::new());
|
||||
let report = analyzer.analyze(messages);
|
||||
let analyzer = SignalAnalyzer::default();
|
||||
let report = analyzer.analyze_openai(messages);
|
||||
|
||||
// Get the current OTel span to set signal attributes
|
||||
let span = tracing::Span::current();
|
||||
let otel_context = span.context();
|
||||
let otel_span = otel_context.span();
|
||||
|
||||
// Add overall quality
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::QUALITY,
|
||||
format!("{:?}", report.overall_quality),
|
||||
));
|
||||
|
||||
// Add repair/follow-up metrics if concerning
|
||||
if report.follow_up.is_concerning || report.follow_up.repair_count > 0 {
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::REPAIR_COUNT,
|
||||
report.follow_up.repair_count as i64,
|
||||
));
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::REPAIR_RATIO,
|
||||
format!("{:.3}", report.follow_up.repair_ratio),
|
||||
));
|
||||
}
|
||||
|
||||
// Add frustration metrics
|
||||
if report.frustration.has_frustration {
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::FRUSTRATION_COUNT,
|
||||
report.frustration.frustration_count as i64,
|
||||
));
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::FRUSTRATION_SEVERITY,
|
||||
report.frustration.severity as i64,
|
||||
));
|
||||
}
|
||||
|
||||
// Add repetition metrics
|
||||
if report.repetition.has_looping {
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::REPETITION_COUNT,
|
||||
report.repetition.repetition_count as i64,
|
||||
));
|
||||
}
|
||||
|
||||
// Add escalation metrics
|
||||
if report.escalation.escalation_requested {
|
||||
otel_span
|
||||
.set_attribute(KeyValue::new(signal_constants::ESCALATION_REQUESTED, true));
|
||||
}
|
||||
|
||||
// Add positive feedback metrics
|
||||
if report.positive_feedback.has_positive_feedback {
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::POSITIVE_FEEDBACK_COUNT,
|
||||
report.positive_feedback.positive_count as i64,
|
||||
));
|
||||
}
|
||||
|
||||
// Flag the span name if any concerning signal is detected
|
||||
let should_flag = report.frustration.has_frustration
|
||||
|| report.repetition.has_looping
|
||||
|| report.escalation.escalation_requested
|
||||
|| matches!(
|
||||
report.overall_quality,
|
||||
InteractionQuality::Poor | InteractionQuality::Severe
|
||||
);
|
||||
|
||||
let should_flag = emit_signals_to_span(&otel_span, &report);
|
||||
if should_flag {
|
||||
otel_span.update_name(format!("{} {}", self.operation_name, FLAG_MARKER));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -346,12 +346,10 @@ impl TryFrom<(SseEvent, &SupportedAPIsFromClient, &SupportedUpstreamAPIs)> for S
|
|||
(
|
||||
SupportedAPIsFromClient::OpenAIChatCompletions(_),
|
||||
SupportedUpstreamAPIs::AnthropicMessagesAPI(_),
|
||||
) => {
|
||||
) if transformed_event.is_event_only() && transformed_event.event.is_some() => {
|
||||
// OpenAI clients don't expect separate event: lines
|
||||
// Suppress upstream Anthropic event-only lines
|
||||
if transformed_event.is_event_only() && transformed_event.event.is_some() {
|
||||
transformed_event.sse_transformed_lines = "\n".to_string();
|
||||
}
|
||||
transformed_event.sse_transformed_lines = "\n".to_string();
|
||||
}
|
||||
_ => {
|
||||
// Other cross-API combinations can be handled here as needed
|
||||
|
|
@ -371,12 +369,10 @@ impl TryFrom<(SseEvent, &SupportedAPIsFromClient, &SupportedUpstreamAPIs)> for S
|
|||
| (
|
||||
SupportedAPIsFromClient::OpenAIResponsesAPI(_),
|
||||
SupportedUpstreamAPIs::OpenAIResponsesAPI(_),
|
||||
) => {
|
||||
if transformed_event.is_event_only() && transformed_event.event.is_some() {
|
||||
// Mark as should-skip by clearing sse_transformed_lines
|
||||
// The event line is already included when the data line is transformed
|
||||
transformed_event.sse_transformed_lines = String::new();
|
||||
}
|
||||
) if transformed_event.is_event_only() && transformed_event.event.is_some() => {
|
||||
// Mark as should-skip by clearing sse_transformed_lines
|
||||
// The event line is already included when the data line is transformed
|
||||
transformed_event.sse_transformed_lines = String::new();
|
||||
}
|
||||
_ => {
|
||||
// Other passthrough combinations (OpenAI ChatCompletions, etc.) don't have this issue
|
||||
|
|
|
|||
|
|
@ -188,14 +188,13 @@ pub fn convert_openai_message_to_anthropic_content(
|
|||
|
||||
// Handle regular content
|
||||
match &message.content {
|
||||
Some(MessageContent::Text(text)) => {
|
||||
if !text.is_empty() {
|
||||
blocks.push(MessagesContentBlock::Text {
|
||||
text: text.clone(),
|
||||
cache_control: None,
|
||||
});
|
||||
}
|
||||
Some(MessageContent::Text(text)) if !text.is_empty() => {
|
||||
blocks.push(MessagesContentBlock::Text {
|
||||
text: text.clone(),
|
||||
cache_control: None,
|
||||
});
|
||||
}
|
||||
Some(MessageContent::Text(_)) => {}
|
||||
Some(MessageContent::Parts(parts)) => {
|
||||
for part in parts {
|
||||
match part {
|
||||
|
|
|
|||
|
|
@ -354,10 +354,10 @@ impl TryFrom<MessagesMessage> for BedrockMessage {
|
|||
MessagesMessageContent::Blocks(blocks) => {
|
||||
for block in blocks {
|
||||
match block {
|
||||
crate::apis::anthropic::MessagesContentBlock::Text { text, .. } => {
|
||||
if !text.is_empty() {
|
||||
content_blocks.push(ContentBlock::Text { text });
|
||||
}
|
||||
crate::apis::anthropic::MessagesContentBlock::Text { text, .. }
|
||||
if !text.is_empty() =>
|
||||
{
|
||||
content_blocks.push(ContentBlock::Text { text });
|
||||
}
|
||||
crate::apis::anthropic::MessagesContentBlock::ToolUse {
|
||||
id,
|
||||
|
|
|
|||
|
|
@ -317,11 +317,10 @@ impl TryFrom<Message> for BedrockMessage {
|
|||
Role::User => {
|
||||
// Convert user message content to content blocks
|
||||
match message.content {
|
||||
Some(MessageContent::Text(text)) => {
|
||||
if !text.is_empty() {
|
||||
content_blocks.push(ContentBlock::Text { text });
|
||||
}
|
||||
Some(MessageContent::Text(text)) if !text.is_empty() => {
|
||||
content_blocks.push(ContentBlock::Text { text });
|
||||
}
|
||||
Some(MessageContent::Text(_)) => {}
|
||||
Some(MessageContent::Parts(parts)) => {
|
||||
// Convert OpenAI content parts to Bedrock ContentBlocks
|
||||
for part in parts {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue