plano/crates/brightstaff/src/signals/analyzer.rs

3191 lines
109 KiB
Rust
Raw Normal View History

//! Agentic Signals - Behavioral quality indicators for agent interactions
//!
//! This module implements various signals that serve as early warning indicators
//! of brilliant successes or failures in agentic interactions. These signals are
//! derived from conversation patterns and can be computed algorithmically from
//! message arrays.
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::sync::LazyLock;
use hermesllm::apis::openai::{Message, Role};
// ============================================================================
// Constants
// ============================================================================
/// Flag emoji for marking spans/operations worth investigating
pub const FLAG_MARKER: &str = "\u{1F6A9}";
/// Size of character n-grams for similarity matching (3 = trigrams)
const NGRAM_SIZE: usize = 3;
// ============================================================================
// Normalized Message Processing
// ============================================================================
/// Pre-processed message with normalized text and tokens for efficient matching
#[derive(Debug, Clone)]
struct NormalizedMessage {
/// Original raw text
raw: String,
/// Tokens (words) extracted from the message
tokens: Vec<String>,
/// Token set for fast lookup
token_set: HashSet<String>,
/// Bigram set for fast similarity computation
bigram_set: HashSet<String>,
/// Character ngram set for robust similarity matching
char_ngram_set: HashSet<String>,
/// Token frequency map for multiset cosine similarity
token_frequency: HashMap<String, usize>,
}
impl NormalizedMessage {
#[allow(dead_code)] // Used in tests for algorithm validation
fn from_text(text: &str) -> Self {
Self::from_text_with_limit(text, usize::MAX)
}
fn from_text_with_limit(text: &str, max_length: usize) -> Self {
// Truncate to max_length characters to prevent unbounded computation
// Keep head (20%) + tail (80%) to preserve both context and intent
let char_count = text.chars().count();
let raw = if char_count <= max_length {
text.to_string()
} else {
// Split: 20% head, 79% tail, 1 char space delimiter
let head_len = max_length / 5;
let tail_len = max_length - head_len - 1;
let head: String = text.chars().take(head_len).collect();
let tail: String = text.chars().skip(char_count - tail_len).collect();
format!("{} {}", head, tail)
};
// Normalize unicode punctuation to ASCII equivalents
let normalized_unicode = raw
.replace(['\u{2019}', '\u{2018}'], "'") // U+2019/U+2018 SINGLE QUOTATION MARKs
.replace(['\u{201C}', '\u{201D}'], "\"") // U+201C/U+201D DOUBLE QUOTATION MARKs
.replace(['\u{2013}', '\u{2014}'], "-"); // U+2013/U+2014 EN/EM DASHes
// Normalize: lowercase, collapse whitespace
let normalized = normalized_unicode
.to_lowercase()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
// Tokenize: split on whitespace and strip punctuation from boundaries
let tokens: Vec<String> = normalized
.split_whitespace()
.map(|word| {
// Strip leading/trailing punctuation but keep internal punctuation
word.trim_matches(|c: char| c.is_ascii_punctuation())
.to_string()
})
.filter(|w| !w.is_empty())
.collect();
let token_set: HashSet<String> = tokens.iter().cloned().collect();
// Generate bigram set directly for similarity matching
let bigram_set: HashSet<String> = tokens
.windows(2)
.map(|w| format!("{} {}", w[0], w[1]))
.collect();
// Generate character ngram set for robust similarity matching
// Uses tokens (with punctuation stripped) for consistency with pattern matching
let tokens_text = tokens.join(" ");
let char_ngram_set: HashSet<String> = tokens_text
.chars()
.collect::<Vec<_>>()
.windows(NGRAM_SIZE)
.map(|w| w.iter().collect::<String>())
.collect();
// Compute token frequency map for cosine similarity
let mut token_frequency: HashMap<String, usize> = HashMap::new();
for token in &tokens {
*token_frequency.entry(token.clone()).or_insert(0) += 1;
}
Self {
raw,
tokens,
token_set,
bigram_set,
char_ngram_set,
token_frequency,
}
}
/// Check if a single token exists in the message (word boundary aware)
fn contains_token(&self, token: &str) -> bool {
self.token_set.contains(token)
}
/// Check if a phrase (sequence of tokens) exists in the message
fn contains_phrase(&self, phrase: &str) -> bool {
let phrase_tokens: Vec<&str> = phrase.split_whitespace().collect();
if phrase_tokens.is_empty() {
return false;
}
if phrase_tokens.len() == 1 {
return self.contains_token(phrase_tokens[0]);
}
// Multi-word phrase: check for sequence in tokens
self.tokens.windows(phrase_tokens.len()).any(|window| {
window
.iter()
.zip(phrase_tokens.iter())
.all(|(token, phrase_token)| token == phrase_token)
})
}
/// Calculate character ngram similarity between this message and a pattern
/// Returns a similarity score between 0.0 and 1.0
/// This is robust to typos, small edits, and word insertions
#[allow(dead_code)] // Used in tests for algorithm validation
fn char_ngram_similarity(&self, pattern: &str) -> f64 {
// Normalize the pattern: lowercase and remove ALL punctuation
// This makes "doesn't" → "doesnt" for robust typo matching
let normalized_pattern = pattern
.to_lowercase()
.chars()
.filter(|c| c.is_alphanumeric() || c.is_whitespace())
.collect::<String>()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
// Generate ngrams for the pattern
let pattern_ngrams: HashSet<String> = normalized_pattern
.chars()
.collect::<Vec<_>>()
.windows(NGRAM_SIZE)
.map(|w| w.iter().collect::<String>())
.collect();
if self.char_ngram_set.is_empty() && pattern_ngrams.is_empty() {
return 1.0; // Both empty = identical
}
if self.char_ngram_set.is_empty() || pattern_ngrams.is_empty() {
return 0.0;
}
// Compute Jaccard similarity (intersection / union)
let intersection = self.char_ngram_set.intersection(&pattern_ngrams).count();
let union = self.char_ngram_set.union(&pattern_ngrams).count();
if union == 0 {
return 0.0;
}
intersection as f64 / union as f64
}
/// Calculate token-based cosine similarity using term frequencies
/// Returns a similarity score between 0.0 and 1.0
/// This handles word frequency and is stable for longer messages
#[allow(dead_code)] // Used in tests for algorithm validation
fn token_cosine_similarity(&self, pattern: &str) -> f64 {
// Tokenize and compute frequencies for the pattern
let pattern_tokens: Vec<String> = pattern
.to_lowercase()
.split_whitespace()
.map(|word| {
word.trim_matches(|c: char| c.is_ascii_punctuation())
.to_string()
})
.filter(|w| !w.is_empty())
.collect();
let mut pattern_frequency: HashMap<String, usize> = HashMap::new();
for token in &pattern_tokens {
*pattern_frequency.entry(token.clone()).or_insert(0) += 1;
}
if self.token_frequency.is_empty() && pattern_frequency.is_empty() {
return 1.0;
}
if self.token_frequency.is_empty() || pattern_frequency.is_empty() {
return 0.0;
}
// Compute cosine similarity
// cosine_sim = dot_product / (norm1 * norm2)
let mut dot_product = 0.0;
let mut norm1_squared = 0.0;
let mut norm2_squared = 0.0;
// Collect all unique tokens from both sets
let all_tokens: HashSet<String> = self
.token_frequency
.keys()
.chain(pattern_frequency.keys())
.cloned()
.collect();
for token in all_tokens {
let freq1 = *self.token_frequency.get(&token).unwrap_or(&0) as f64;
let freq2 = *pattern_frequency.get(&token).unwrap_or(&0) as f64;
dot_product += freq1 * freq2;
norm1_squared += freq1 * freq1;
norm2_squared += freq2 * freq2;
}
let norm1 = norm1_squared.sqrt();
let norm2 = norm2_squared.sqrt();
if norm1 == 0.0 || norm2 == 0.0 {
return 0.0;
}
dot_product / (norm1 * norm2)
}
/// Layered phrase matching: exact → character ngram → token cosine
/// Returns true if the pattern matches using any layer
#[allow(dead_code)] // Kept for reference; production uses matches_normalized_pattern
fn layered_contains_phrase(
&self,
pattern: &str,
char_ngram_threshold: f64,
token_cosine_threshold: f64,
) -> bool {
// Layer 0: Exact phrase match (fastest)
if self.contains_phrase(pattern) {
return true;
}
// Layer 1: Character ngram similarity (typo/edit robustness)
// Check whole message first (for short messages)
if self.char_ngram_similarity(pattern) >= char_ngram_threshold {
return true;
}
// ngram containment check for patterns buried in longer messages
// If ALL of the pattern's ngrams exist in the message, the pattern must be
// present (possibly with minor variations like missing apostrophes).
// This is O(pattern_ngrams) lookups vs expensive window sliding.
if self.char_ngram_containment(pattern) >= 1.0 {
return true;
}
// Layer 2: Token cosine similarity (semantic stability for long messages)
if self.token_cosine_similarity(pattern) >= token_cosine_threshold {
return true;
}
false
}
fn char_ngram_containment(&self, pattern: &str) -> f64 {
// Normalize the pattern the same way as char_ngram_similarity
let normalized_pattern = pattern
.to_lowercase()
.chars()
.filter(|c| c.is_alphanumeric() || c.is_whitespace())
.collect::<String>()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
// Generate ngrams for the pattern
let pattern_ngrams: HashSet<String> = normalized_pattern
.chars()
.collect::<Vec<_>>()
.windows(NGRAM_SIZE)
.map(|w| w.iter().collect::<String>())
.collect();
if pattern_ngrams.is_empty() {
return 0.0;
}
// Count how many pattern ngrams exist in the message
let contained = pattern_ngrams
.iter()
.filter(|t| self.char_ngram_set.contains(*t))
.count();
contained as f64 / pattern_ngrams.len() as f64
}
/// Fast matching against a pre-normalized pattern
/// This avoids re-normalizing and re-computing ngrams for each pattern
fn matches_normalized_pattern(
&self,
pattern: &NormalizedPattern,
char_ngram_threshold: f64,
token_cosine_threshold: f64,
) -> bool {
// Layer 0: Exact phrase match (fastest)
if self.contains_phrase(&pattern.raw) {
return true;
}
// Layer 1: Character ngram similarity using pre-computed ngrams
if !self.char_ngram_set.is_empty() && !pattern.char_ngram_set.is_empty() {
let intersection = self
.char_ngram_set
.intersection(&pattern.char_ngram_set)
.count();
let union = self.char_ngram_set.union(&pattern.char_ngram_set).count();
if union > 0 {
let similarity = intersection as f64 / union as f64;
if similarity >= char_ngram_threshold {
return true;
}
}
}
// Ngram containment check using pre-computed ngrams
if !pattern.char_ngram_set.is_empty() {
let contained = pattern
.char_ngram_set
.iter()
.filter(|t| self.char_ngram_set.contains(*t))
.count();
let containment = contained as f64 / pattern.char_ngram_set.len() as f64;
if containment >= 1.0 {
return true;
}
}
// Layer 2: Token cosine similarity using pre-computed frequencies
if !self.token_frequency.is_empty() && !pattern.token_frequency.is_empty() {
let mut dot_product = 0.0;
let mut norm1_squared = 0.0;
let mut norm2_squared = 0.0;
// Iterate over pattern tokens (usually smaller set)
for (token, &freq2) in &pattern.token_frequency {
let freq1 = *self.token_frequency.get(token).unwrap_or(&0) as f64;
let freq2 = freq2 as f64;
dot_product += freq1 * freq2;
norm2_squared += freq2 * freq2;
}
// Add self tokens not in pattern for norm1
for &freq1 in self.token_frequency.values() {
norm1_squared += (freq1 as f64) * (freq1 as f64);
}
let norm1 = norm1_squared.sqrt();
let norm2 = norm2_squared.sqrt();
if norm1 > 0.0 && norm2 > 0.0 {
let similarity = dot_product / (norm1 * norm2);
if similarity >= token_cosine_threshold {
return true;
}
}
}
false
}
}
// ============================================================================
// Normalized Pattern (pre-computed for performance)
// ============================================================================
/// Pre-processed pattern with normalized text and pre-computed ngrams/tokens
/// This avoids redundant computation when matching against many messages
#[derive(Debug, Clone)]
struct NormalizedPattern {
/// Original raw pattern text
raw: String,
/// Character ngram set for similarity matching
char_ngram_set: HashSet<String>,
/// Token frequency map for cosine similarity
token_frequency: HashMap<String, usize>,
}
impl NormalizedPattern {
fn new(pattern: &str) -> Self {
// Normalize: lowercase and remove ALL punctuation
let normalized = pattern
.to_lowercase()
.chars()
.filter(|c| c.is_alphanumeric() || c.is_whitespace())
.collect::<String>()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
// Generate ngrams
let char_ngram_set: HashSet<String> = normalized
.chars()
.collect::<Vec<_>>()
.windows(NGRAM_SIZE)
.map(|w| w.iter().collect::<String>())
.collect();
// Compute token frequency map
let tokens: Vec<String> = normalized
.split_whitespace()
.map(|s| s.to_string())
.collect();
let mut token_frequency: HashMap<String, usize> = HashMap::new();
for token in tokens {
*token_frequency.entry(token).or_insert(0) += 1;
}
Self {
raw: pattern.to_string(),
char_ngram_set,
token_frequency,
}
}
}
/// Helper to create a static slice of normalized patterns
fn normalize_patterns(patterns: &[&str]) -> Vec<NormalizedPattern> {
patterns.iter().map(|p| NormalizedPattern::new(p)).collect()
}
// ============================================================================
// Pre-computed Pattern Caches (initialized once at startup)
// ============================================================================
static REPAIR_PATTERNS: LazyLock<Vec<NormalizedPattern>> = LazyLock::new(|| {
normalize_patterns(&[
// Explicit corrections
"i meant",
"i mean",
"sorry, i meant",
"what i meant was",
"what i actually meant",
"i was trying to say",
"let me correct that",
"correction",
"i misspoke",
// Negations and disagreements
"no, i",
"no i",
"nah i",
"nope i",
"not what i",
"that's not",
"that's not what",
"that isn't what",
"not quite",
"not exactly",
// Rephrasing indicators
"let me rephrase",
"let me try again",
"let me clarify",
"to clarify",
"to be clear",
"let me explain",
"what i'm trying to",
"what i'm saying",
"in other words",
// Actual/really emphasis
"actually i",
"actually no",
"what i actually",
"i actually",
"i really meant",
// Mistake acknowledgment
"i was wrong",
"my mistake",
"my bad",
"i should have said",
"i should clarify",
// Wait/hold indicators
"wait, i",
"wait no",
"hold on",
"hang on",
])
});
static COMPLAINT_PATTERNS: LazyLock<Vec<NormalizedPattern>> = LazyLock::new(|| {
normalize_patterns(&[
// Useless/unhelpful (multi-word only)
"this is useless",
"not helpful",
"doesn't help",
"not helping",
"you're not helping",
"no help",
"unhelpful",
// Not working
"this doesn't work",
"doesn't work",
"not working",
"isn't working",
"won't work",
"still doesn't work",
"still not working",
// Not fixing/solving
"doesn't fix",
"not fixing",
"doesn't solve",
"doesn't seem to work",
"doesn't seem to fix",
"not resolving",
// Waste/pointless
"waste of time",
"wasting my time",
// Ridiculous/absurd
"this is ridiculous",
"ridiculous",
"this is absurd",
"absurd",
"this is insane",
"insane",
// Stupid/dumb (as adjectives, not as standalone tokens)
"this is stupid",
"this is dumb",
// Quality complaints (multi-word)
"this sucks",
"not good enough",
// Capability questions
"why can't you",
"can't you",
// Frustration
"this is frustrating",
"frustrated",
"incomplete",
"overwhelm",
"overwhelmed",
"overwhelming",
"exhausted",
"struggled",
// same issue
"same issue",
// polite dissatisfaction
"i'm disappointed",
"thanks, but",
"appreciate it, but",
"good, but",
// Fed up/done
"i give up",
"give up",
"fed up",
"had enough",
"can't take",
// Bot-specific complaints
"useless bot",
"dumb bot",
"stupid bot",
])
});
static CONFUSION_PATTERNS: LazyLock<Vec<NormalizedPattern>> = LazyLock::new(|| {
normalize_patterns(&[
// Don't understand
"i don't understand",
"don't understand",
"not understanding",
"can't understand",
"don't get it",
"don't follow",
// Confused state
"i'm confused",
"so confused",
// Makes no sense
"makes no sense",
"doesn't make sense",
"not making sense",
// What do you mean (keep multi-word)
"what do you mean",
"what does that mean",
"what are you saying",
// Lost/unclear
"i'm lost",
"totally lost",
"lost me",
// No clue
"no clue",
"no idea",
// Come again
"come again",
"say that again",
"repeat that",
])
});
static GRATITUDE_PATTERNS: LazyLock<Vec<NormalizedPattern>> = LazyLock::new(|| {
normalize_patterns(&[
// Standard gratitude
"thank you",
"thanks",
"thank u",
"thankyou",
"thx",
"ty",
"tyvm",
"tysm",
"thnx",
"thnks",
// Strong gratitude
"thanks so much",
"thank you so much",
"thanks a lot",
"thanks a bunch",
"much appreciated",
"really appreciate",
"greatly appreciate",
"appreciate it",
"appreciate that",
"i appreciate",
"grateful",
"so grateful",
// Helpfulness acknowledgment
"that's helpful",
"very helpful",
"super helpful",
"really helpful",
"that helps",
"this helps",
"helpful",
// Perfection expressions
"perfect",
"that's perfect",
"just perfect",
"exactly what i needed",
"exactly right",
"just what i needed",
"that's exactly",
// Informal positive
"you're the best",
"you rock",
"you're awesome",
"awesome sauce",
"legend",
])
});
static SATISFACTION_PATTERNS: LazyLock<Vec<NormalizedPattern>> = LazyLock::new(|| {
normalize_patterns(&[
// Works/functions
"that works",
"this works",
"works great",
"works perfectly",
"works for me",
// Great variations
"that's great",
"that's amazing",
"this is great",
"sounds great",
"looks great",
"great job",
// Excellent/perfect
"excellent",
"outstanding",
"superb",
"spectacular",
// Awesome/amazing
"awesome",
"that's awesome",
"amazing",
"incredible",
// Love expressions
"love it",
"love this",
"i love",
"loving it",
"love that",
// Brilliant/wonderful
"brilliant",
"wonderful",
"fantastic",
"fabulous",
"marvelous",
])
});
static SUCCESS_PATTERNS: LazyLock<Vec<NormalizedPattern>> = LazyLock::new(|| {
normalize_patterns(&[
// Understanding confirmation
"got it",
"i got it",
"understand",
"understood",
"i understand",
"makes sense",
"clear now",
"i see",
// Success/completion
"success",
"successful",
"it worked",
"that worked",
"this worked",
"worked",
// Problem resolution
"solved",
"resolved",
"fixed",
"fixed it",
"issue resolved",
"problem solved",
// Working state
"working now",
"it's working",
"works now",
"working fine",
"working great",
// Completion
"all set",
"all good",
"we're good",
"i'm good",
"all done",
"done",
"complete",
"finished",
// Perfect fit
"spot on",
"nailed it",
"bingo",
"exactly",
"just right",
])
});
static HUMAN_AGENT_PATTERNS: LazyLock<Vec<NormalizedPattern>> = LazyLock::new(|| {
normalize_patterns(&[
// Speak to human
"speak to a human",
"speak to human",
"speak with a human",
"speak with human",
"talk to a human",
"talk to human",
"talk to a person",
"talk to person",
"talk to someone",
// Human/real agent
"human agent",
"real agent",
"actual agent",
"live agent",
"human support",
// Real/actual person
"real person",
"actual person",
"real human",
"actual human",
"someone real",
// Need/want human
"need a human",
"need human",
"want a human",
"want human",
"get me a human",
"get me human",
"get me someone",
// Transfer/connect
"transfer me",
"connect me",
"escalate this",
// Representative (removed standalone "rep" - too many false positives)
"representative",
"customer service rep",
"customer service representative",
// Not a bot
"not a bot",
"not talking to a bot",
"tired of bots",
])
});
static SUPPORT_PATTERNS: LazyLock<Vec<NormalizedPattern>> = LazyLock::new(|| {
normalize_patterns(&[
// Contact support
"contact support",
"call support",
"reach support",
"get support",
// Customer support
"customer support",
"customer service",
"tech support",
"technical support",
// Help desk
"help desk",
"helpdesk",
"support desk",
// Talk to support
"talk to support",
"speak to support",
"speak with support",
"chat with support",
// Need help
"need real help",
"need actual help",
"help me now",
])
});
static QUIT_PATTERNS: LazyLock<Vec<NormalizedPattern>> = LazyLock::new(|| {
normalize_patterns(&[
// Give up
"i give up",
"give up",
"giving up",
// Quit/leaving
"i'm going to quit",
"i quit",
"quitting",
"i'm leaving",
"i'm done",
"i'm out",
// Forget it
"forget it",
"forget this",
"screw it",
"screw this",
// Never mind
"never mind",
"nevermind",
"don't bother",
"not worth it",
// Hopeless
"this is hopeless",
// Going elsewhere
"going elsewhere",
"try somewhere else",
"look elsewhere",
"find another",
])
});
// ============================================================================
// Core Signal Types
// ============================================================================
/// Overall quality assessment for an agent interaction session
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum InteractionQuality {
/// Excellent interaction with strong positive signals
Excellent,
/// Good interaction with mostly positive signals
Good,
/// Neutral interaction with mixed signals
Neutral,
/// Poor interaction with concerning signals
Poor,
/// Critical interaction with severe negative signals
Severe,
}
/// Container for all computed signals for a conversation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SignalReport {
/// Turn count and efficiency metrics
pub turn_count: TurnCountSignal,
/// Follow-up and repair frequency
pub follow_up: FollowUpSignal,
/// User frustration indicators
pub frustration: FrustrationSignal,
/// Repetition and looping behavior
pub repetition: RepetitionSignal,
/// Positive feedback indicators
pub positive_feedback: PositiveFeedbackSignal,
/// User escalation requests
pub escalation: EscalationSignal,
/// Overall quality assessment
pub overall_quality: InteractionQuality,
/// Human-readable summary
pub summary: String,
}
// ============================================================================
// Individual Signal Types
// ============================================================================
/// Turn count and efficiency metrics
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TurnCountSignal {
/// Total number of turns (user-agent exchanges)
pub total_turns: usize,
/// Number of user messages
pub user_turns: usize,
/// Number of assistant messages
pub assistant_turns: usize,
/// Whether the turn count is concerning (> 7)
pub is_concerning: bool,
/// Whether the turn count is excessive (> 12)
pub is_excessive: bool,
/// Efficiency score (0.0-1.0, lower turns = higher score)
pub efficiency_score: f64,
}
/// Follow-up and repair frequency signal
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FollowUpSignal {
/// Number of detected repair attempts
pub repair_count: usize,
/// Ratio of repairs to total user turns
pub repair_ratio: f64,
/// Whether repair ratio is concerning (> 0.3)
pub is_concerning: bool,
/// List of detected repair phrases
pub repair_phrases: Vec<String>,
}
/// User frustration indicators
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FrustrationSignal {
/// Number of frustration indicators detected
pub frustration_count: usize,
/// Whether frustration is detected
pub has_frustration: bool,
/// Severity level (0-3: none, mild, moderate, severe)
pub severity: u8,
/// List of detected frustration indicators
pub indicators: Vec<FrustrationIndicator>,
}
/// Individual frustration indicator
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FrustrationIndicator {
/// Type of frustration detected
pub indicator_type: FrustrationType,
/// Message index where detected
pub message_index: usize,
/// Relevant text snippet
pub snippet: String,
}
/// Types of frustration indicators
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum FrustrationType {
/// Negative sentiment detected
NegativeSentiment,
/// All caps typing
AllCaps,
/// Excessive punctuation
ExcessivePunctuation,
/// Profanity detected
Profanity,
/// Direct complaint
DirectComplaint,
/// Expression of confusion
Confusion,
}
/// Repetition and looping behavior signal
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RepetitionSignal {
/// Number of repetitions detected
pub repetition_count: usize,
/// Whether significant looping detected (> 2 repetitions)
pub has_looping: bool,
/// Severity level (0-3: none, mild, moderate, severe)
pub severity: u8,
/// List of detected repetitions
pub repetitions: Vec<RepetitionInstance>,
}
/// Individual repetition instance
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RepetitionInstance {
/// Message indices involved in repetition
pub message_indices: Vec<usize>,
/// Similarity score (0.0-1.0)
pub similarity: f64,
/// Type of repetition
pub repetition_type: RepetitionType,
}
/// Types of repetition
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum RepetitionType {
/// Exact repetition
Exact,
/// Near-duplicate (high similarity)
NearDuplicate,
/// Semantic repetition (similar meaning)
Semantic,
}
/// Positive feedback indicators
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PositiveFeedbackSignal {
/// Number of positive indicators detected
pub positive_count: usize,
/// Whether positive feedback is present
pub has_positive_feedback: bool,
/// Confidence score (0.0-1.0)
pub confidence: f64,
/// List of detected positive indicators
pub indicators: Vec<PositiveIndicator>,
}
/// Individual positive indicator
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PositiveIndicator {
/// Type of positive feedback
pub indicator_type: PositiveType,
/// Message index where detected
pub message_index: usize,
/// Relevant text snippet
pub snippet: String,
}
/// Types of positive indicators
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum PositiveType {
/// Expression of gratitude
Gratitude,
/// Explicit satisfaction
Satisfaction,
/// Confirmation of success
Success,
/// Positive sentiment
PositiveSentiment,
/// Natural topic transition
TopicTransition,
}
/// User escalation signal
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EscalationSignal {
/// Whether escalation was requested
pub escalation_requested: bool,
/// Number of escalation requests
pub escalation_count: usize,
/// List of detected escalation requests
pub requests: Vec<EscalationRequest>,
}
/// Individual escalation request
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EscalationRequest {
/// Message index where detected
pub message_index: usize,
/// Relevant text snippet
pub snippet: String,
/// Type of escalation
pub escalation_type: EscalationType,
}
/// Types of escalation
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum EscalationType {
/// Request for human agent
HumanAgent,
/// Request for support
Support,
/// Threat to quit/leave
ThreatToQuit,
/// General help request
HelpRequest,
}
// ============================================================================
// Signal Analyzer
// ============================================================================
/// Trait for analyzing conversation signals
pub trait SignalAnalyzer {
/// Analyze a conversation and generate a complete signal report
fn analyze(&self, messages: &[Message]) -> SignalReport;
}
/// Text-based implementation of signal analyzer that computes all signals from a message array
pub struct TextBasedSignalAnalyzer {
/// Baseline expected turns for normal interactions
baseline_turns: usize,
/// Threshold for character ngram similarity (0.0-1.0)
char_ngram_threshold: f64,
/// Threshold for token cosine similarity (0.0-1.0)
token_cosine_threshold: f64,
/// Maximum message length in characters (prevents unbounded computation)
max_message_length: usize,
/// Maximum number of messages to process (prevents unbounded computation)
max_messages: usize,
/// Maximum window size for repetition detection (prevents O(n²) explosion)
max_repetition_window: usize,
}
impl TextBasedSignalAnalyzer {
/// Extract text content from MessageContent, skipping non-text content
fn extract_text(content: &Option<hermesllm::apis::openai::MessageContent>) -> Option<String> {
match content {
Some(hermesllm::apis::openai::MessageContent::Text(text)) => Some(text.clone()),
// Tool calls and other structured content are skipped
_ => None,
}
}
/// Create a new signal analyzer with default settings
pub fn new() -> Self {
Self {
baseline_turns: 5,
char_ngram_threshold: 0.50, // Lowered to handle typos and small edits realistically
token_cosine_threshold: 0.60, // Lowered for better semantic match in varied contexts
max_message_length: 2000, // Prevent unbounded ngram generation
max_messages: 100, // Prevent unbounded message processing
max_repetition_window: 20, // Prevent O(n²) explosion in repetition detection
}
}
/// Create a new signal analyzer with custom baseline
pub fn with_baseline(baseline_turns: usize) -> Self {
Self {
baseline_turns,
char_ngram_threshold: 0.50,
token_cosine_threshold: 0.60,
max_message_length: 2000,
max_messages: 100,
max_repetition_window: 20,
}
}
/// Create a new signal analyzer with custom settings
///
/// # Arguments
/// * `baseline_turns` - Expected baseline turns for normal interactions
/// * `char_ngram_threshold` - Threshold for character ngram similarity (0.0-1.0)
/// * `token_cosine_threshold` - Threshold for token cosine similarity (0.0-1.0)
pub fn with_settings(
baseline_turns: usize,
char_ngram_threshold: f64,
token_cosine_threshold: f64,
) -> Self {
Self {
baseline_turns,
char_ngram_threshold,
token_cosine_threshold,
max_message_length: 2000,
max_messages: 100,
max_repetition_window: 20,
}
}
/// Create a new signal analyzer with full custom settings including computation limits
///
/// # Arguments
/// * `baseline_turns` - Expected baseline turns for normal interactions
/// * `char_ngram_threshold` - Threshold for character ngram similarity (0.0-1.0)
/// * `token_cosine_threshold` - Threshold for token cosine similarity (0.0-1.0)
/// * `max_message_length` - Maximum characters per message to process
/// * `max_messages` - Maximum number of messages to process
/// * `max_repetition_window` - Maximum messages to compare for repetition detection
pub fn with_full_settings(
baseline_turns: usize,
char_ngram_threshold: f64,
token_cosine_threshold: f64,
max_message_length: usize,
max_messages: usize,
max_repetition_window: usize,
) -> Self {
Self {
baseline_turns,
char_ngram_threshold,
token_cosine_threshold,
max_message_length,
max_messages,
max_repetition_window,
}
}
// ========================================================================
// Individual Signal Analyzers
// ========================================================================
/// Analyze turn count and efficiency
fn analyze_turn_count(&self, messages: &[Message]) -> TurnCountSignal {
let mut user_turns = 0;
let mut assistant_turns = 0;
for message in messages {
match message.role {
Role::User => user_turns += 1,
Role::Assistant => assistant_turns += 1,
_ => {}
}
}
let total_turns = user_turns + assistant_turns;
let is_concerning = total_turns > 7;
let is_excessive = total_turns > 12;
// Calculate efficiency score (exponential decay after baseline)
let efficiency_score = if total_turns == 0 || total_turns <= self.baseline_turns {
1.0
} else {
let excess = total_turns - self.baseline_turns;
1.0 / (1.0 + (excess as f64 * 0.3))
};
TurnCountSignal {
total_turns,
user_turns,
assistant_turns,
is_concerning,
is_excessive,
efficiency_score,
}
}
/// Analyze follow-up and repair frequency
fn analyze_follow_up(
&self,
normalized_messages: &[(usize, Role, NormalizedMessage)],
) -> FollowUpSignal {
let mut repair_count = 0;
let mut repair_phrases = Vec::new();
let mut user_turn_count = 0;
for (i, role, norm_msg) in normalized_messages {
if *role != Role::User {
continue;
}
user_turn_count += 1;
// Use per-turn boolean to prevent double-counting
let mut found_in_turn = false;
// Use pre-computed patterns for fast matching
for pattern in REPAIR_PATTERNS.iter() {
if norm_msg.matches_normalized_pattern(
pattern,
self.char_ngram_threshold,
self.token_cosine_threshold,
) {
repair_count += 1;
repair_phrases.push(format!("Turn {}: '{}'", i + 1, pattern.raw));
found_in_turn = true;
break;
}
}
// Only check for semantic similarity if no pattern matched
if !found_in_turn && *i >= 2 {
// Find previous user message
for j in (0..*i).rev() {
let (_, prev_role, prev_norm_msg) = &normalized_messages[j];
if *prev_role == Role::User {
if self.is_similar_rephrase(norm_msg, prev_norm_msg) {
repair_count += 1;
repair_phrases
.push(format!("Turn {}: Similar rephrase detected", i + 1));
}
break;
}
}
}
}
let repair_ratio = if user_turn_count == 0 {
0.0
} else {
repair_count as f64 / user_turn_count as f64
};
let is_concerning = repair_ratio > 0.3;
FollowUpSignal {
repair_count,
repair_ratio,
is_concerning,
repair_phrases,
}
}
/// Analyze user frustration indicators
fn analyze_frustration(
&self,
normalized_messages: &[(usize, Role, NormalizedMessage)],
) -> FrustrationSignal {
let mut indicators = Vec::new();
// Profanity list - only as standalone tokens, not substrings
let profanity_tokens = [
"damn", "damnit", "crap", "wtf", "ffs", "bullshit", "shit", "fuck", "fucking",
];
for (i, role, norm_msg) in normalized_messages {
if *role != Role::User {
continue;
}
let text = &norm_msg.raw;
// Check for all caps (at least 10 chars and 80% uppercase)
let alpha_chars: String = text.chars().filter(|c| c.is_alphabetic()).collect();
if alpha_chars.len() >= 10 {
let upper_count = alpha_chars.chars().filter(|c| c.is_uppercase()).count();
let upper_ratio = upper_count as f64 / alpha_chars.len() as f64;
if upper_ratio >= 0.8 {
indicators.push(FrustrationIndicator {
indicator_type: FrustrationType::AllCaps,
message_index: *i,
snippet: text.chars().take(50).collect(),
});
}
}
// Check for excessive punctuation
let question_marks = text.matches('?').count();
let exclamation_marks = text.matches('!').count();
if question_marks >= 3 || exclamation_marks >= 3 {
indicators.push(FrustrationIndicator {
indicator_type: FrustrationType::ExcessivePunctuation,
message_index: *i,
snippet: text.chars().take(50).collect(),
});
}
// Check for complaint patterns using pre-computed patterns
for pattern in COMPLAINT_PATTERNS.iter() {
if norm_msg.matches_normalized_pattern(
pattern,
self.char_ngram_threshold,
self.token_cosine_threshold,
) {
indicators.push(FrustrationIndicator {
indicator_type: FrustrationType::DirectComplaint,
message_index: *i,
snippet: pattern.raw.clone(),
});
break;
}
}
// Check for confusion patterns using pre-computed patterns
for pattern in CONFUSION_PATTERNS.iter() {
if norm_msg.matches_normalized_pattern(
pattern,
self.char_ngram_threshold,
self.token_cosine_threshold,
) {
indicators.push(FrustrationIndicator {
indicator_type: FrustrationType::Confusion,
message_index: *i,
snippet: pattern.raw.clone(),
});
break;
}
}
// Check for profanity (token-based, not substring)
for token in &profanity_tokens {
if norm_msg.contains_token(token) {
indicators.push(FrustrationIndicator {
indicator_type: FrustrationType::Profanity,
message_index: *i,
snippet: token.to_string(),
});
break;
}
}
}
let frustration_count = indicators.len();
let has_frustration = frustration_count > 0;
// Calculate severity
let severity = if frustration_count == 0 {
0
} else if frustration_count <= 2 {
1
} else if frustration_count <= 4 {
2
} else {
3
};
FrustrationSignal {
frustration_count,
has_frustration,
severity,
indicators,
}
}
/// Analyze repetition and looping behavior
fn analyze_repetition(
&self,
normalized_messages: &[(usize, Role, NormalizedMessage)],
) -> RepetitionSignal {
let mut repetitions = Vec::new();
// Collect assistant messages with normalized content
let assistant_messages: Vec<(usize, &NormalizedMessage)> = normalized_messages
.iter()
.filter(|(_, role, _)| *role == Role::Assistant)
.map(|(i, _, norm_msg)| (*i, norm_msg))
.collect();
// Limit the window size to prevent O(n²) explosion
// Only compare messages within the max_repetition_window
let window_size = self.max_repetition_window.min(assistant_messages.len());
// Check for exact or near-duplicate responses using bigram similarity
// Only compare within the sliding window
for i in 0..assistant_messages.len() {
let window_start = i + 1;
let window_end = (i + 1 + window_size).min(assistant_messages.len());
for j in window_start..window_end {
let (idx_i, norm_msg_i) = &assistant_messages[i];
let (idx_j, norm_msg_j) = &assistant_messages[j];
// Skip if messages are too short
if norm_msg_i.tokens.len() < 5 || norm_msg_j.tokens.len() < 5 {
continue;
}
// Calculate bigram-based similarity (more accurate for near-duplicates)
let similarity = self.calculate_bigram_similarity(norm_msg_i, norm_msg_j);
// Exact match - lowered from 0.95 to 0.85 for bigram similarity
if similarity >= 0.85 {
repetitions.push(RepetitionInstance {
message_indices: vec![*idx_i, *idx_j],
similarity,
repetition_type: RepetitionType::Exact,
});
}
// Near duplicate - lowered from 0.75 to 0.50 to catch subtle repetitions
else if similarity >= 0.50 {
repetitions.push(RepetitionInstance {
message_indices: vec![*idx_i, *idx_j],
similarity,
repetition_type: RepetitionType::NearDuplicate,
});
}
}
}
let repetition_count = repetitions.len();
let has_looping = repetition_count > 2;
let severity = if repetition_count == 0 {
0
} else if repetition_count <= 2 {
1
} else if repetition_count <= 4 {
2
} else {
3
};
RepetitionSignal {
repetition_count,
has_looping,
severity,
repetitions,
}
}
/// Calculate bigram similarity using cached bigram sets
fn calculate_bigram_similarity(
&self,
norm_msg1: &NormalizedMessage,
norm_msg2: &NormalizedMessage,
) -> f64 {
// Use pre-cached bigram sets for O(1) lookups
let set1 = &norm_msg1.bigram_set;
let set2 = &norm_msg2.bigram_set;
if set1.is_empty() && set2.is_empty() {
return 1.0; // Both empty = identical
}
if set1.is_empty() || set2.is_empty() {
return 0.0;
}
let intersection = set1.intersection(set2).count();
let union = set1.union(set2).count();
if union == 0 {
return 0.0;
}
intersection as f64 / union as f64
}
/// Analyze positive feedback indicators
fn analyze_positive_feedback(
&self,
normalized_messages: &[(usize, Role, NormalizedMessage)],
) -> PositiveFeedbackSignal {
let mut indicators = Vec::new();
for (i, role, norm_msg) in normalized_messages {
if *role != Role::User {
continue;
}
// Use per-turn boolean to prevent double-counting
let mut found_in_turn = false;
// Check gratitude using pre-computed patterns
for pattern in GRATITUDE_PATTERNS.iter() {
if norm_msg.matches_normalized_pattern(
pattern,
self.char_ngram_threshold,
self.token_cosine_threshold,
) {
indicators.push(PositiveIndicator {
indicator_type: PositiveType::Gratitude,
message_index: *i,
snippet: pattern.raw.clone(),
});
found_in_turn = true;
break;
}
}
if found_in_turn {
continue;
}
// Check satisfaction using pre-computed patterns
for pattern in SATISFACTION_PATTERNS.iter() {
if norm_msg.matches_normalized_pattern(
pattern,
self.char_ngram_threshold,
self.token_cosine_threshold,
) {
indicators.push(PositiveIndicator {
indicator_type: PositiveType::Satisfaction,
message_index: *i,
snippet: pattern.raw.clone(),
});
found_in_turn = true;
break;
}
}
if found_in_turn {
continue;
}
// Check success confirmation using pre-computed patterns
for pattern in SUCCESS_PATTERNS.iter() {
if norm_msg.matches_normalized_pattern(
pattern,
self.char_ngram_threshold,
self.token_cosine_threshold,
) {
indicators.push(PositiveIndicator {
indicator_type: PositiveType::Success,
message_index: *i,
snippet: pattern.raw.clone(),
});
break;
}
}
}
let positive_count = indicators.len();
let has_positive_feedback = positive_count > 0;
// Calculate confidence based on number and diversity of indicators
let confidence = if positive_count == 0 {
0.0
} else if positive_count == 1 {
0.6
} else if positive_count == 2 {
0.8
} else {
0.95
};
PositiveFeedbackSignal {
positive_count,
has_positive_feedback,
confidence,
indicators,
}
}
/// Analyze user escalation requests
fn analyze_escalation(
&self,
normalized_messages: &[(usize, Role, NormalizedMessage)],
) -> EscalationSignal {
let mut requests = Vec::new();
for (i, role, norm_msg) in normalized_messages {
if *role != Role::User {
continue;
}
let mut found_human_agent = false;
// Check for human agent request using pre-computed patterns
for pattern in HUMAN_AGENT_PATTERNS.iter() {
if norm_msg.matches_normalized_pattern(
pattern,
self.char_ngram_threshold,
self.token_cosine_threshold,
) {
requests.push(EscalationRequest {
message_index: *i,
snippet: pattern.raw.clone(),
escalation_type: EscalationType::HumanAgent,
});
found_human_agent = true;
break;
}
}
// Check for support request (only if no human agent request found)
// HumanAgent and Support are too similar and often match the same phrase
if !found_human_agent {
for pattern in SUPPORT_PATTERNS.iter() {
if norm_msg.matches_normalized_pattern(
pattern,
self.char_ngram_threshold,
self.token_cosine_threshold,
) {
requests.push(EscalationRequest {
message_index: *i,
snippet: pattern.raw.clone(),
escalation_type: EscalationType::Support,
});
break;
}
}
}
// Check for quit threats (independent of HumanAgent/Support)
// A message can contain both "give up" (quit) and "speak to human" (escalation)
for pattern in QUIT_PATTERNS.iter() {
if norm_msg.matches_normalized_pattern(
pattern,
self.char_ngram_threshold,
self.token_cosine_threshold,
) {
requests.push(EscalationRequest {
message_index: *i,
snippet: pattern.raw.clone(),
escalation_type: EscalationType::ThreatToQuit,
});
break;
}
}
}
let escalation_count = requests.len();
let escalation_requested = escalation_count > 0;
EscalationSignal {
escalation_requested,
escalation_count,
requests,
}
}
// ========================================================================
// Helper Methods
// ========================================================================
/// Check if two messages are similar rephrases
fn is_similar_rephrase(
&self,
norm_msg1: &NormalizedMessage,
norm_msg2: &NormalizedMessage,
) -> bool {
// Skip if too short
if norm_msg1.tokens.len() < 3 || norm_msg2.tokens.len() < 3 {
return false;
}
// Common stopwords to downweight
let stopwords: HashSet<&str> = [
"i", "me", "my", "you", "the", "a", "an", "is", "are", "was", "were", "to", "with",
"for", "of", "at", "by", "in", "on", "it", "this", "that", "can", "could", "do",
"does", "did", "will", "would", "should", "be",
]
.iter()
.cloned()
.collect();
// Filter out stopwords for meaningful overlap
let tokens1: HashSet<_> = norm_msg1
.tokens
.iter()
.filter(|t| !stopwords.contains(t.as_str()))
.collect();
let tokens2: HashSet<_> = norm_msg2
.tokens
.iter()
.filter(|t| !stopwords.contains(t.as_str()))
.collect();
// Need at least 2 non-stopword tokens
if tokens1.len() < 2 || tokens2.len() < 2 {
return false;
}
let intersection = tokens1.intersection(&tokens2).count();
let min_size = tokens1.len().min(tokens2.len());
// High overlap suggests rephrase
let overlap_ratio = intersection as f64 / min_size as f64;
overlap_ratio >= 0.6
}
/// Assess overall interaction quality based on all signals
fn assess_overall_quality(
&self,
turn_count: &TurnCountSignal,
follow_up: &FollowUpSignal,
frustration: &FrustrationSignal,
repetition: &RepetitionSignal,
positive: &PositiveFeedbackSignal,
escalation: &EscalationSignal,
) -> InteractionQuality {
// Critical conditions - immediate fail
if escalation.escalation_requested
|| frustration.severity >= 3
|| repetition.severity >= 3
|| turn_count.is_excessive
{
return InteractionQuality::Severe;
}
// Calculate quality score
let mut score = 50.0; // Start at neutral
// Positive factors
if positive.has_positive_feedback {
score += 20.0 * positive.confidence;
}
score += turn_count.efficiency_score * 10.0;
// Negative factors
if frustration.has_frustration {
score -= frustration.severity as f64 * 10.00;
}
if follow_up.is_concerning {
score -= 15.0;
}
if repetition.has_looping {
score -= repetition.severity as f64 * 8.0;
}
if turn_count.is_concerning {
score -= 10.0;
}
// Map score to quality level
if score >= 75.0 {
InteractionQuality::Excellent
} else if score >= 60.0 {
InteractionQuality::Good
} else if score >= 40.0 {
InteractionQuality::Neutral
} else if score >= 25.0 {
InteractionQuality::Poor
} else {
InteractionQuality::Severe
}
}
/// Generate human-readable summary
#[allow(clippy::too_many_arguments)]
fn generate_summary(
&self,
turn_count: &TurnCountSignal,
follow_up: &FollowUpSignal,
frustration: &FrustrationSignal,
repetition: &RepetitionSignal,
positive: &PositiveFeedbackSignal,
escalation: &EscalationSignal,
quality: &InteractionQuality,
) -> String {
let mut summary_parts = Vec::new();
summary_parts.push(format!("Overall Quality: {:?}", quality));
summary_parts.push(format!(
"Turn Count: {} turns (efficiency: {:.1}%)",
turn_count.total_turns,
turn_count.efficiency_score * 100.0
));
if follow_up.is_concerning {
summary_parts.push(format!(
"⚠️ High repair rate: {:.1}% of user turns",
follow_up.repair_ratio * 100.0
));
}
if frustration.has_frustration {
summary_parts.push(format!(
"⚠️ Frustration detected: {} indicators (severity: {})",
frustration.frustration_count, frustration.severity
));
}
if repetition.has_looping {
summary_parts.push(format!(
"⚠️ Looping detected: {} repetitions",
repetition.repetition_count
));
}
if positive.has_positive_feedback {
summary_parts.push(format!(
"✓ Positive feedback: {} indicators",
positive.positive_count
));
}
if escalation.escalation_requested {
summary_parts.push(format!(
"⚠️ Escalation requested: {} requests",
escalation.escalation_count
));
}
summary_parts.join(" | ")
}
}
impl SignalAnalyzer for TextBasedSignalAnalyzer {
fn analyze(&self, messages: &[Message]) -> SignalReport {
// Limit the number of messages to process (take most recent messages)
let messages_to_process = if messages.len() > self.max_messages {
&messages[messages.len() - self.max_messages..]
} else {
messages
};
// Preprocess all messages once, filtering out non-text content (tool calls, etc.)
// and truncating long messages
let normalized_messages: Vec<(usize, Role, NormalizedMessage)> = messages_to_process
.iter()
.enumerate()
.filter_map(|(i, msg)| {
Self::extract_text(&msg.content).map(|text| {
(
i,
msg.role.clone(),
NormalizedMessage::from_text_with_limit(&text, self.max_message_length),
)
})
})
.collect();
let turn_count = self.analyze_turn_count(messages_to_process);
let follow_up = self.analyze_follow_up(&normalized_messages);
let frustration = self.analyze_frustration(&normalized_messages);
let repetition = self.analyze_repetition(&normalized_messages);
let positive_feedback = self.analyze_positive_feedback(&normalized_messages);
let escalation = self.analyze_escalation(&normalized_messages);
let overall_quality = self.assess_overall_quality(
&turn_count,
&follow_up,
&frustration,
&repetition,
&positive_feedback,
&escalation,
);
let summary = self.generate_summary(
&turn_count,
&follow_up,
&frustration,
&repetition,
&positive_feedback,
&escalation,
&overall_quality,
);
SignalReport {
turn_count,
follow_up,
frustration,
repetition,
positive_feedback,
escalation,
overall_quality,
summary,
}
}
}
impl Default for TextBasedSignalAnalyzer {
fn default() -> Self {
Self::new()
}
}
// ============================================================================
// Tests
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
use hermesllm::apis::openai::MessageContent;
use hermesllm::transforms::lib::ExtractText;
use std::time::Instant;
fn create_message(role: Role, content: &str) -> Message {
Message {
role,
content: Some(MessageContent::Text(content.to_string())),
name: None,
tool_calls: None,
tool_call_id: None,
}
}
// ========================================================================
// Tests for New Similarity Methods
// ========================================================================
#[test]
fn test_char_ngram_similarity_exact_match() {
let msg = NormalizedMessage::from_text("thank you very much");
let similarity = msg.char_ngram_similarity("thank you very much");
assert!(
similarity > 0.95,
"Exact match should have very high similarity"
);
}
#[test]
fn test_char_ngram_similarity_typo() {
let msg = NormalizedMessage::from_text("thank you very much");
// Common typo: "thnks" instead of "thanks"
let similarity = msg.char_ngram_similarity("thnks you very much");
assert!(
similarity > 0.50,
"Should handle single-character typo with decent similarity: {}",
similarity
);
}
#[test]
fn test_char_ngram_similarity_small_edit() {
let msg = NormalizedMessage::from_text("this doesn't work");
let similarity = msg.char_ngram_similarity("this doesnt work");
assert!(
similarity > 0.70,
"Should handle punctuation removal gracefully: {}",
similarity
);
}
#[test]
fn test_char_ngram_similarity_word_insertion() {
let msg = NormalizedMessage::from_text("i don't understand");
let similarity = msg.char_ngram_similarity("i really don't understand");
assert!(
similarity > 0.40,
"Should be robust to word insertions: {}",
similarity
);
}
#[test]
fn test_token_cosine_similarity_exact_match() {
let msg = NormalizedMessage::from_text("this is not helpful");
let similarity = msg.token_cosine_similarity("this is not helpful");
assert!(
(similarity - 1.0).abs() < 0.01,
"Exact match should have cosine similarity of 1.0"
);
}
#[test]
fn test_token_cosine_similarity_word_order() {
let msg = NormalizedMessage::from_text("not helpful at all");
let similarity = msg.token_cosine_similarity("helpful not at all");
assert!(
similarity > 0.95,
"Should be robust to word order changes: {}",
similarity
);
}
#[test]
fn test_token_cosine_similarity_frequency() {
let msg = NormalizedMessage::from_text("help help help please");
let similarity = msg.token_cosine_similarity("help please");
assert!(
similarity > 0.7 && similarity < 1.0,
"Should account for frequency differences: {}",
similarity
);
}
#[test]
fn test_token_cosine_similarity_long_message_with_context() {
let msg = NormalizedMessage::from_text(
"I've been trying to set up my account for the past hour \
and the verification email never arrived. I checked my spam folder \
and still nothing. This is really frustrating and not helpful at all.",
);
let similarity = msg.token_cosine_similarity("not helpful");
assert!(
similarity > 0.15 && similarity < 0.7,
"Should detect pattern in long message with lower but non-zero similarity: {}",
similarity
);
}
#[test]
fn test_layered_matching_exact_hit() {
let msg = NormalizedMessage::from_text("thank you so much");
assert!(
msg.layered_contains_phrase("thank you", 0.50, 0.60),
"Should match exact phrase in Layer 0"
);
}
#[test]
fn test_layered_matching_typo_hit() {
// Test that shows layered matching is more robust than exact matching alone
let msg = NormalizedMessage::from_text("it doesnt work for me");
// "doesnt work" should match "doesn't work" via character ngrams (high overlap)
assert!(
msg.layered_contains_phrase("doesn't work", 0.50, 0.60),
"Should match 'doesnt work' to 'doesn't work' via character ngrams"
);
}
#[test]
fn test_layered_matching_word_order_hit() {
let msg = NormalizedMessage::from_text("helpful not very");
assert!(
msg.layered_contains_phrase("not helpful", 0.50, 0.60),
"Should match reordered words via token cosine in Layer 2"
);
}
#[test]
fn test_layered_matching_long_message_with_pattern() {
let msg = NormalizedMessage::from_text(
"I've tried everything and followed all the instructions \
but this is not helpful at all and I'm getting frustrated",
);
assert!(
msg.layered_contains_phrase("not helpful", 0.50, 0.60),
"Should detect pattern buried in long message"
);
}
#[test]
fn test_layered_matching_no_match() {
let msg = NormalizedMessage::from_text("everything is working perfectly");
assert!(
!msg.layered_contains_phrase("not helpful", 0.50, 0.60),
"Should not match completely different content"
);
}
#[test]
fn test_char_ngram_vs_token_cosine_tradeoffs() {
// Character ngrams handle character-level changes well
let msg1 = NormalizedMessage::from_text("this doesnt work");
let char_sim1 = msg1.char_ngram_similarity("this doesn't work");
assert!(
char_sim1 > 0.70,
"Character ngrams should handle punctuation: {}",
char_sim1
);
// Token cosine is better for word order and long messages with semantic overlap
let msg2 =
NormalizedMessage::from_text("I really appreciate all your help with this issue today");
let token_sim2 = msg2.token_cosine_similarity("thank you for help");
assert!(
token_sim2 > 0.15,
"Token cosine should detect semantic overlap: {}",
token_sim2
);
}
// ========================================================================
// Existing Tests
// ========================================================================
fn preprocess_messages(messages: &[Message]) -> Vec<(usize, Role, NormalizedMessage)> {
messages
.iter()
.enumerate()
.map(|(i, msg)| {
let text = msg.content.extract_text();
(i, msg.role.clone(), NormalizedMessage::from_text(&text))
})
.collect()
}
#[test]
fn test_turn_count_efficient() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "Hello"),
create_message(Role::Assistant, "Hi! How can I help?"),
create_message(Role::User, "Thanks!"),
];
let signal = analyzer.analyze_turn_count(&messages);
assert_eq!(signal.total_turns, 3);
assert_eq!(signal.user_turns, 2);
assert_eq!(signal.assistant_turns, 1);
assert!(!signal.is_concerning);
assert!(!signal.is_excessive);
assert!(signal.efficiency_score > 0.9);
println!("test_turn_count_efficient took: {:?}", start.elapsed());
}
#[test]
fn test_turn_count_excessive() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
let mut messages = Vec::new();
for i in 0..15 {
messages.push(create_message(
if i % 2 == 0 {
Role::User
} else {
Role::Assistant
},
&format!("Message {}", i),
));
}
let signal = analyzer.analyze_turn_count(&messages);
assert_eq!(signal.total_turns, 15);
assert!(signal.is_concerning);
assert!(signal.is_excessive);
assert!(signal.efficiency_score < 0.5);
println!("test_turn_count_excessive took: {:?}", start.elapsed());
}
#[test]
fn test_follow_up_detection() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "Show me restaurants"),
create_message(Role::Assistant, "Here are some options"),
create_message(Role::User, "No, I meant Italian restaurants"),
create_message(Role::Assistant, "Here are Italian restaurants"),
];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_follow_up(&normalized_messages);
assert_eq!(signal.repair_count, 1);
assert!(signal.repair_ratio > 0.0);
println!("test_follow_up_detection took: {:?}", start.elapsed());
}
#[test]
fn test_frustration_detection() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "THIS IS RIDICULOUS!!!"),
create_message(Role::Assistant, "I apologize for the frustration"),
create_message(Role::User, "This doesn't work at all"),
];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_frustration(&normalized_messages);
assert!(signal.has_frustration);
assert!(signal.frustration_count >= 2);
assert!(signal.severity > 0);
println!("test_frustration_detection took: {:?}", start.elapsed());
}
#[test]
fn test_positive_feedback_detection() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "Can you help me?"),
create_message(Role::Assistant, "Sure!"),
create_message(Role::User, "Thank you! That's exactly what I needed."),
];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_positive_feedback(&normalized_messages);
assert!(signal.has_positive_feedback);
assert!(signal.positive_count >= 1);
assert!(signal.confidence > 0.5);
println!(
"test_positive_feedback_detection took: {:?}",
start.elapsed()
);
}
#[test]
fn test_escalation_detection() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "This isn't working"),
create_message(Role::Assistant, "Let me help"),
create_message(Role::User, "I need to speak to a human agent"),
];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_escalation(&normalized_messages);
assert!(signal.escalation_requested);
assert_eq!(signal.escalation_count, 1);
println!("test_escalation_detection took: {:?}", start.elapsed());
}
#[test]
fn test_repetition_detection() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "What's the weather?"),
create_message(
Role::Assistant,
"I can help you with the weather information",
),
create_message(Role::User, "Show me the forecast"),
create_message(Role::Assistant, "Sure, I can help you with the forecast"),
create_message(Role::User, "Stop repeating yourself"),
];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_repetition(&normalized_messages);
for rep in &signal.repetitions {
println!(
" - Messages {:?}, similarity: {:.3}, type: {:?}",
rep.message_indices, rep.similarity, rep.repetition_type
);
}
assert!(signal.repetition_count > 0,
"Should detect the subtle repetition between 'I can help you with the weather information' \
and 'Sure, I can help you with the forecast'");
println!("test_repetition_detection took: {:?}", start.elapsed());
}
#[test]
fn test_full_analysis_excellent() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "I need to book a flight"),
create_message(Role::Assistant, "Sure! Where would you like to go?"),
create_message(Role::User, "New York"),
create_message(Role::Assistant, "Great! I found several options."),
create_message(Role::User, "Perfect!"),
];
let report = analyzer.analyze(&messages);
assert!(matches!(
report.overall_quality,
InteractionQuality::Excellent | InteractionQuality::Good
));
assert!(report.positive_feedback.has_positive_feedback);
assert!(!report.frustration.has_frustration);
println!("test_full_analysis_excellent took: {:?}", start.elapsed());
}
#[test]
fn test_full_analysis_poor() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "Help me"),
create_message(Role::Assistant, "How can I assist?"),
create_message(Role::User, "No, I meant something else"),
create_message(Role::Assistant, "What do you need?"),
create_message(Role::User, "THIS DOESN'T WORK!!!"),
create_message(Role::Assistant, "I apologize"),
create_message(Role::User, "Let me speak to a human"),
];
let report = analyzer.analyze(&messages);
assert!(matches!(
report.overall_quality,
InteractionQuality::Poor | InteractionQuality::Severe
));
assert!(report.frustration.has_frustration);
assert!(report.escalation.escalation_requested);
println!("test_full_analysis_poor took: {:?}", start.elapsed());
}
#[test]
fn test_fuzzy_matching_gratitude() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "Can you help me?"),
create_message(Role::Assistant, "Sure!"),
create_message(Role::User, "thnaks! that's exactly what i needed."),
];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_positive_feedback(&normalized_messages);
assert!(signal.has_positive_feedback);
assert!(signal.positive_count >= 1);
println!("test_fuzzy_matching_gratitude took: {:?}", start.elapsed());
}
#[test]
fn test_fuzzy_matching_escalation() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "This isn't working"),
create_message(Role::Assistant, "Let me help"),
create_message(Role::User, "i need to speek to a human agnet"),
];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_escalation(&normalized_messages);
assert!(signal.escalation_requested);
assert_eq!(signal.escalation_count, 1);
println!("test_fuzzy_matching_escalation took: {:?}", start.elapsed());
}
#[test]
fn test_fuzzy_matching_repair() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "Show me restaurants"),
create_message(Role::Assistant, "Here are some options"),
create_message(Role::User, "no i ment Italian restaurants"),
create_message(Role::Assistant, "Here are Italian restaurants"),
];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_follow_up(&normalized_messages);
assert!(signal.repair_count >= 1);
println!("test_fuzzy_matching_repair took: {:?}", start.elapsed());
}
#[test]
fn test_fuzzy_matching_complaint() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
// Use a complaint that should match - "doesnt work" is close enough to "doesn't work"
let messages = vec![
create_message(Role::User, "this doesnt work at all"), // Common typo: missing apostrophe
create_message(Role::Assistant, "I apologize"),
];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_frustration(&normalized_messages);
// The layered matching should catch this via character ngrams or token cosine
// "doesnt work" has high character-level similarity to "doesn't work"
assert!(
signal.has_frustration,
"Should detect frustration from complaint pattern"
);
assert!(signal.frustration_count >= 1);
println!("test_fuzzy_matching_complaint took: {:?}", start.elapsed());
}
#[test]
fn test_exact_match_priority() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![create_message(Role::User, "thank you so much")];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_positive_feedback(&normalized_messages);
assert!(signal.has_positive_feedback);
// Should detect exact match, not fuzzy
assert!(signal.indicators[0].snippet.contains("thank you"));
assert!(!signal.indicators[0].snippet.contains("fuzzy"));
println!("test_exact_match_priority took: {:?}", start.elapsed());
}
// ========================================================================
// Anti-Tests: Verify fixes stay fixed
// ========================================================================
#[test]
fn test_hello_not_profanity() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![create_message(Role::User, "hello there")];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_frustration(&normalized_messages);
assert!(
!signal.has_frustration,
"\"hello\" should not trigger profanity detection"
);
}
#[test]
fn test_prepare_not_escalation() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![create_message(
Role::User,
"Can you help me prepare for the meeting?",
)];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_escalation(&normalized_messages);
assert!(
!signal.escalation_requested,
"\"prepare\" should not trigger escalation (rep pattern removed)"
);
}
#[test]
fn test_unicode_apostrophe_confusion() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "I'm confused"), // Unicode apostrophe
];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_frustration(&normalized_messages);
assert!(
signal.has_frustration,
"Unicode apostrophe 'I'm confused' should trigger confusion"
);
}
#[test]
fn test_unicode_quotes_work() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![create_message(
Role::User,
"\u{201C}doesn\u{2019}t work\u{201D} with unicode quotes",
)];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_frustration(&normalized_messages);
assert!(
signal.has_frustration,
"Unicode quotes should be normalized and match patterns"
);
}
#[test]
fn test_absolute_not_profanity() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![create_message(Role::User, "That's absolute nonsense")];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_frustration(&normalized_messages);
// Should match on "nonsense" logic, not on "bs" substring
let has_bs_match = signal
.indicators
.iter()
.any(|ind| ind.snippet.contains("bs"));
assert!(
!has_bs_match,
"\"absolute\" should not trigger 'bs' profanity match"
);
}
#[test]
fn test_stopwords_not_rephrase() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "Help me with X"),
create_message(Role::Assistant, "Sure"),
create_message(Role::User, "Help me with Y"),
];
let normalized_messages = preprocess_messages(&messages);
let signal = analyzer.analyze_follow_up(&normalized_messages);
// Should not detect as rephrase since only stopwords overlap
assert_eq!(
signal.repair_count, 0,
"Messages with only stopword overlap should not be rephrases"
);
}
#[test]
fn test_frustrated_user_with_legitimate_repair() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
use hermesllm::apis::openai::{FunctionCall, ToolCall};
// Helper to create a message with tool calls
let create_assistant_with_tools =
|content: &str, tool_id: &str, tool_name: &str, args: &str| -> Message {
Message {
role: Role::Assistant,
content: Some(MessageContent::Text(content.to_string())),
name: None,
tool_calls: Some(vec![ToolCall {
id: tool_id.to_string(),
call_type: "function".to_string(),
function: FunctionCall {
name: tool_name.to_string(),
arguments: args.to_string(),
},
}]),
tool_call_id: None,
}
};
// Helper to create a tool response message
let create_tool_message = |tool_call_id: &str, content: &str| -> Message {
Message {
role: Role::Tool,
content: Some(MessageContent::Text(content.to_string())),
name: None,
tool_calls: None,
tool_call_id: Some(tool_call_id.to_string()),
}
};
// Scenario: User DOES mention New York in first message, making "I already told you" legitimate
let messages = vec![
create_message(
Role::User,
"I need to book a flight from New York to Paris for December 20th",
),
create_assistant_with_tools(
"I'll help you search for flights to Paris.",
"call_123",
"search_flights",
r#"{"origin": "NYC", "destination": "Paris", "date": "2025-12-20"}"#,
),
create_tool_message("call_123", r#"{"flights": []}"#),
create_message(
Role::Assistant,
"I couldn't find any flights. Could you provide your departure city?",
),
create_message(Role::User, "I already told you, from New York!"),
create_assistant_with_tools(
"Let me try again.",
"call_456",
"search_flights",
r#"{"origin": "New York", "destination": "Paris", "date": "2025-12-20"}"#,
),
create_tool_message("call_456", r#"{"flights": []}"#),
create_message(
Role::Assistant,
"I'm still not finding results. Let me check the system.",
),
create_message(
Role::User,
"THIS IS RIDICULOUS!!! The tool doesn't work at all. Why do you keep calling it?",
),
create_message(
Role::Assistant,
"I sincerely apologize for the frustration with the search tool.",
),
create_message(
Role::User,
"Forget it. I need to speak to a human agent. This is a waste of time.",
),
];
let report = analyzer.analyze(&messages);
// Tool messages should be filtered out, so we should only analyze text messages
// That's 4 user messages + 5 assistant text messages = 9 turns
assert_eq!(
report.turn_count.total_turns, 9,
"Should count 9 text messages (tool messages filtered out)"
);
assert!(
report.turn_count.is_concerning,
"Should flag concerning turn count"
);
// Should detect frustration (all caps, complaints)
assert!(
report.frustration.has_frustration,
"Should detect frustration"
);
assert!(
report.frustration.frustration_count >= 2,
"Should detect multiple frustration indicators"
);
assert!(
report.frustration.severity >= 2,
"Should have moderate or higher frustration severity"
);
// Should detect escalation request
assert!(
report.escalation.escalation_requested,
"Should detect escalation to human agent"
);
assert!(
report.escalation.escalation_count >= 1,
"Should detect at least one escalation"
);
// Overall quality should be Poor or Severe
assert!(
matches!(
report.overall_quality,
InteractionQuality::Poor | InteractionQuality::Severe
),
"Quality should be Poor or Severe, got {:?}",
report.overall_quality
);
println!(
"test_frustrated_user_with_legitimate_repair took: {:?}",
start.elapsed()
);
}
#[test]
fn test_frustrated_user_false_claim() {
let start = Instant::now();
let analyzer = TextBasedSignalAnalyzer::new();
use hermesllm::apis::openai::{FunctionCall, ToolCall};
// Helper to create a message with tool calls
let create_assistant_with_tools =
|content: &str, tool_id: &str, tool_name: &str, args: &str| -> Message {
Message {
role: Role::Assistant,
content: Some(MessageContent::Text(content.to_string())),
name: None,
tool_calls: Some(vec![ToolCall {
id: tool_id.to_string(),
call_type: "function".to_string(),
function: FunctionCall {
name: tool_name.to_string(),
arguments: args.to_string(),
},
}]),
tool_call_id: None,
}
};
// Helper to create a tool response message
let create_tool_message = |tool_call_id: &str, content: &str| -> Message {
Message {
role: Role::Tool,
content: Some(MessageContent::Text(content.to_string())),
name: None,
tool_calls: None,
tool_call_id: Some(tool_call_id.to_string()),
}
};
// Scenario: User NEVER mentions New York in first message but claims "I already told you"
// This represents realistic frustrated user behavior - exaggeration/misremembering
let messages = vec![
create_message(
Role::User,
"I need to book a flight to Paris for December 20th",
),
create_assistant_with_tools(
"I'll help you search for flights to Paris.",
"call_123",
"search_flights",
r#"{"destination": "Paris", "date": "2025-12-20"}"#,
),
create_tool_message("call_123", r#"{"error": "origin required"}"#),
create_message(
Role::Assistant,
"I couldn't find any flights. Could you provide your departure city?",
),
create_message(Role::User, "I already told you, from New York!"), // False claim - never mentioned it
create_assistant_with_tools(
"Let me try again.",
"call_456",
"search_flights",
r#"{"origin": "New York", "destination": "Paris", "date": "2025-12-20"}"#,
),
create_tool_message("call_456", r#"{"flights": []}"#),
create_message(
Role::Assistant,
"I'm still not finding results. Let me check the system.",
),
create_message(
Role::User,
"THIS IS RIDICULOUS!!! The tool doesn't work at all. Why do you keep calling it?",
),
create_message(
Role::Assistant,
"I sincerely apologize for the frustration with the search tool.",
),
create_message(
Role::User,
"Forget it. I need to speak to a human agent. This is a waste of time.",
),
];
let report = analyzer.analyze(&messages);
// Tool messages should be filtered out, so we should only analyze text messages
// That's 4 user messages + 5 assistant text messages = 9 turns
assert_eq!(
report.turn_count.total_turns, 9,
"Should count 9 text messages (tool messages filtered out)"
);
assert!(
report.turn_count.is_concerning,
"Should flag concerning turn count"
);
// Should detect frustration (all caps, complaints, false claims)
assert!(
report.frustration.has_frustration,
"Should detect frustration"
);
assert!(
report.frustration.frustration_count >= 2,
"Should detect multiple frustration indicators"
);
assert!(
report.frustration.severity >= 2,
"Should have moderate or higher frustration severity"
);
// Should detect escalation request
assert!(
report.escalation.escalation_requested,
"Should detect escalation to human agent"
);
assert!(
report.escalation.escalation_count >= 1,
"Should detect at least one escalation"
);
// Note: May detect false positive "positive feedback" due to fuzzy matching
// e.g., "I already told YOU" matches "you rock", "THIS is RIDICULOUS" matches "this helps"
// However, the overall quality should still be Poor/Severe due to frustration+escalation
// Overall quality should be Poor or Severe (frustration + escalation indicates poor interaction)
assert!(
matches!(
report.overall_quality,
InteractionQuality::Poor | InteractionQuality::Severe
),
"Quality should be Poor or Severe for frustrated user with false claims, got {:?}",
report.overall_quality
);
println!(
"test_frustrated_user_false_claim took: {:?}",
start.elapsed()
);
}
// false negative tests
#[test]
fn test_dissatisfaction_polite_not_working_for_me() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "Thanks, but this still isn't working for me."), // Polite dissatisfaction, e.g., I appreciate it, but this isn't what I was looking for.
create_message(Role::Assistant, "Sorry—what error do you see?"),
];
let normalized = preprocess_messages(&messages);
let signal = analyzer.analyze_frustration(&normalized);
assert!(
signal.has_frustration,
"Polite dissatisfaction should be detected"
);
}
#[test]
fn test_dissatisfaction_giving_up_without_escalation() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![create_message(
Role::User,
"Never mind, I'll figure it out myself.",
)];
let normalized = preprocess_messages(&messages);
let signal = analyzer.analyze_escalation(&normalized);
assert!(
signal.escalation_requested,
"Giving up should count as escalation/quit intent"
);
}
#[test]
fn test_dissatisfaction_same_problem_again() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![create_message(
Role::User,
"I'm running into the same issue again.",
)];
let normalized = preprocess_messages(&messages);
let signal = analyzer.analyze_frustration(&normalized);
assert!(
signal.has_frustration,
"'same issue again' should be detected"
);
}
#[test]
fn test_unsatisfied_incomplete() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![create_message(Role::User, "This feels incomplete.")];
let normalized = preprocess_messages(&messages);
let signal = analyzer.analyze_frustration(&normalized);
assert!(
signal.has_frustration,
"Should detect 'incomplete' dissatisfaction"
);
}
#[test]
fn test_low_mood_overwhelming() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![create_message(
Role::User,
"This is overwhelming and I'm not sure what to do.",
)];
let normalized = preprocess_messages(&messages);
let signal = analyzer.analyze_frustration(&normalized);
assert!(signal.has_frustration, "Should detect overwhelmed language");
}
#[test]
fn test_low_mood_exhausted_trying() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![create_message(
Role::User,
"I'm exhausted trying to get this working.",
)];
let normalized = preprocess_messages(&messages);
let signal = analyzer.analyze_frustration(&normalized);
assert!(
signal.has_frustration,
"Should detect exhaustion/struggle language"
);
}
#[test]
fn test_common_polite_unresolved_dissatisfaction() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "I'm trying to set up SSH keys for GitHub."),
create_message(
Role::Assistant,
"Sure. First generate a key using ssh-keygen.",
),
create_message(Role::User, "I did that already."),
create_message(
Role::Assistant,
"Then add the key to your GitHub account settings.",
),
create_message(Role::User, "I've done that too."),
create_message(
Role::Assistant,
"After that, make sure your SSH agent is running.",
),
create_message(
Role::User,
"Okay, but this still doesn't seem to fix the issue.",
),
create_message(Role::Assistant, "What error message are you seeing?"),
create_message(Role::User, "It's just not connecting the way I expected."),
];
let report = analyzer.analyze(&messages);
// This is a common false negative if you only look for caps/profanity.
// Desired: detect dissatisfaction/frustration (or at least not rate as Excellent).
assert!(
report.frustration.has_frustration || report.follow_up.repair_count >= 1,
"Should detect polite unresolved dissatisfaction via frustration or follow-up indicators"
);
assert!(
!matches!(report.overall_quality, InteractionQuality::Excellent),
"Should not classify unresolved dissatisfaction as Excellent"
);
}
#[test]
fn test_common_resigned_giving_up_quietly() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(
Role::User,
"Can you explain how to deploy this with Docker?",
),
create_message(
Role::Assistant,
"You need to write a Dockerfile and build an image.",
),
create_message(Role::User, "I tried that."),
create_message(Role::Assistant, "Then you can run docker-compose up."),
create_message(Role::User, "I did, but it didnt really help."),
create_message(Role::Assistant, "What error are you getting?"),
create_message(
Role::User,
"Honestly, never mind. Ill just try something else.",
),
];
let report = analyzer.analyze(&messages);
// Many systems miss "never mind / I'll try something else" if they only look for "human agent".
assert!(
report.escalation.escalation_requested || report.frustration.has_frustration,
"Resigned quitting language should trigger escalation or frustration"
);
assert!(
matches!(
report.overall_quality,
InteractionQuality::Poor | InteractionQuality::Severe
) || report.escalation.escalation_requested
|| report.frustration.has_frustration,
"Giving up should not be classified as a high-quality interaction"
);
}
#[test]
fn test_common_discouraged_overwhelmed_low_mood() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "I'm trying to understand backpropagation."),
create_message(
Role::Assistant,
"It's a way to compute gradients efficiently.",
),
create_message(Role::User, "Ive read that explanation already."),
create_message(Role::Assistant, "Would you like a mathematical derivation?"),
create_message(Role::User, "Maybe, but Im still having trouble following."),
create_message(Role::Assistant, "I can walk through a simple example."),
create_message(
Role::User,
"That might help, but honestly this is pretty overwhelming.",
),
create_message(Role::Assistant, "Lets slow it down step by step."),
create_message(
Role::User,
"Yeah… Im just feeling kind of discouraged right now.",
),
];
let report = analyzer.analyze(&messages);
// This is negative affect without caps/profanity. Should still count as frustration/negative signal.
assert!(
report.frustration.has_frustration,
"Overwhelmed/discouraged language should be detected as negative sentiment/frustration"
);
assert!(
!matches!(report.overall_quality, InteractionQuality::Excellent),
"Low-mood discouragement should not be classified as Excellent"
);
}
#[test]
fn test_common_misalignment_not_what_i_asked() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "How do I optimize this SQL query?"),
create_message(
Role::Assistant,
"You can add indexes to improve performance.",
),
create_message(Role::User, "I already have indexes."),
create_message(Role::Assistant, "Then you could consider query caching."),
create_message(Role::User, "Thats not really what I was asking about."),
create_message(
Role::Assistant,
"What specifically are you trying to optimize?",
),
create_message(
Role::User,
"The execution plan — this answer doesnt address that.",
),
];
let report = analyzer.analyze(&messages);
// Misalignment often shows as follow-up repair or frustration.
assert!(
report.follow_up.repair_count >= 1 || report.frustration.has_frustration,
"Misalignment ('not what I asked') should trigger repair or frustration signals"
);
assert!(
!matches!(report.overall_quality, InteractionQuality::Excellent),
"Misalignment should not be rated as Excellent"
);
}
#[test]
fn test_common_false_negative_polite_disappointment_complexity() {
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
create_message(Role::User, "Can you help me write a regex for this?"),
create_message(Role::Assistant, "Sure, try this pattern: ^[a-z]+$"),
create_message(Role::User, "I tested it."),
create_message(Role::Assistant, "Did it work?"),
create_message(Role::User, "Not quite — it matches more than it should."),
create_message(Role::Assistant, "You can refine it with a lookahead."),
create_message(
Role::User,
"I see… this is more complicated than I expected.",
),
];
let report = analyzer.analyze(&messages);
// Polite disappointment often becomes a false negative.
assert!(
report.frustration.has_frustration || report.follow_up.repair_count >= 1,
"Polite dissatisfaction ('not quite', 'more complicated than expected') should trigger a negative signal"
);
assert!(
!matches!(report.overall_quality, InteractionQuality::Excellent),
"Polite disappointment should not be classified as Excellent"
);
}
#[test]
fn test_catastrophic_failure_looping_assistant() {
let analyzer = TextBasedSignalAnalyzer::new();
// Catastrophic failure: assistant stuck in loop, user increasingly frustrated
let messages = vec![
// Turn 1: Initial request
create_message(Role::User, "I need to cancel my subscription"),
create_message(Role::Assistant, "I can help you with account management. What would you like to do today?"),
// Turn 3: First repair - repetition clarification
create_message(Role::User, "I just said I want to cancel my subscription"),
// Turn 4: Exact repetition from assistant
create_message(Role::Assistant, "I can help you with account management. What would you like to do today?"),
// Turn 5: Repair (negation) + frustration (ALL CAPS)
create_message(Role::User, "No, I meant CANCEL. My. Subscription."),
// Turn 6: Near-duplicate repetition
create_message(Role::Assistant, "I understand you need help with your account. Let me assist you with that."),
// Turn 7: Severe frustration (complaint + excessive punctuation + ALL CAPS)
create_message(Role::User, "This is not helpful!! I want to CANCEL my subscription!!!"),
// Turn 8: Exact repetition again (looping)
create_message(Role::Assistant, "I can help you with account management. What would you like to do today?"),
// Turn 9: Repair (explicit rephrase) + frustration (complaint)
create_message(Role::User, "This doesnt work. Let me rephrase: I need to terminate my account and stop being charged"),
// Turn 10: Near-duplicate repetition (looping continues)
create_message(Role::Assistant, "I understand you need help with your account. Let me assist you with that."),
// Turn 11: Escalation (multiple types) + frustration
create_message(Role::User, "I give up. Can I speak to a real person? This is a waste of time"),
];
let report = analyzer.analyze(&messages);
// Validate turn count
assert_eq!(
report.turn_count.total_turns, 11,
"Should have 11 total turns"
);
assert_eq!(report.turn_count.user_turns, 6, "Should have 6 user turns");
assert_eq!(
report.turn_count.assistant_turns, 5,
"Should have 5 assistant turns"
);
assert!(
report.turn_count.is_concerning,
"11 turns should be concerning (>7)"
);
assert!(
!report.turn_count.is_excessive,
"11 turns should not be excessive (<=12)"
);
assert!(
report.turn_count.efficiency_score < 0.5,
"Efficiency should be low"
);
// Validate repair detection (USER signals - query reformulation)
// Detected repairs:
// 1. "I just said I want to cancel..." - pattern: "I just said"
// 2. "No, I meant CANCEL..." - pattern: "No, I meant"
// 3. "Let me rephrase: I need to terminate..." - pattern: "let me rephrase"
// Note: "This is not helpful!!" is frustration (not repair)
// Note: "I give up..." is escalation (not repair)
assert_eq!(
report.follow_up.repair_count, 3,
"Should detect exactly 3 repair attempts from user messages"
);
assert_eq!(
report.follow_up.repair_ratio, 0.5,
"Repair ratio should be 0.5 (3 repairs / 6 user messages)"
);
assert!(
report.follow_up.is_concerning,
"50% repair ratio should be highly concerning (threshold is 30%)"
);
// Validate frustration detection
assert!(
report.frustration.has_frustration,
"Should detect frustration"
);
assert!(
report.frustration.frustration_count >= 4,
"Should detect multiple frustration indicators: found {}",
report.frustration.frustration_count
);
assert!(
report.frustration.severity >= 2,
"Should be at least moderate frustration"
);
// Validate repetition/looping detection (ASSISTANT signals - not following instructions)
// The assistant repeats the same unhelpful responses multiple times:
// 1. "I can help you with account management..." appears 3 times (exact repetition)
// 2. "I understand you need help with your account..." appears 2 times (near-duplicate)
assert!(
report.repetition.repetition_count >= 4,
"Should detect at least 4 assistant repetitions (exact + near-duplicates)"
);
assert!(
report.repetition.has_looping,
"Should detect looping (>2 repetitions indicates stuck agent)"
);
assert!(
report.repetition.severity >= 2,
"Should be moderate to severe looping (assistant not adapting)"
);
// Validate escalation detection
assert!(
report.escalation.escalation_requested,
"Should detect escalation request"
);
assert!(
report.escalation.escalation_count >= 2,
"Should detect multiple escalation indicators: 'give up' + 'speak to a real person'"
);
// Validate overall quality
assert_eq!(report.overall_quality, InteractionQuality::Severe, "Should be classified as Severe due to escalation + excessive frustration + looping + high repair ratio");
}
}