diff --git a/crates/brightstaff/src/handlers/utils.rs b/crates/brightstaff/src/handlers/utils.rs index 390e8ac0..c57c59d7 100644 --- a/crates/brightstaff/src/handlers/utils.rs +++ b/crates/brightstaff/src/handlers/utils.rs @@ -11,7 +11,7 @@ use tokio_stream::StreamExt; use tracing::warn; // Import tracing constants and signals -use crate::signals::{InteractionQuality, SignalAnalyzer, FLAG_MARKER}; +use crate::signals::{InteractionQuality, SignalAnalyzer, TextBasedSignalAnalyzer, FLAG_MARKER}; use crate::tracing::{error, llm, signals as signal_constants}; use hermesllm::apis::openai::Message; @@ -145,7 +145,7 @@ impl StreamProcessor for ObservableStreamProcessor { // Analyze signals if messages are available and add to span attributes if let Some(ref messages) = self.messages { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let report = analyzer.analyze(messages); // Add overall quality diff --git a/crates/brightstaff/src/signals/analyzer.rs b/crates/brightstaff/src/signals/analyzer.rs index 73c1bb70..1e45c207 100644 --- a/crates/brightstaff/src/signals/analyzer.rs +++ b/crates/brightstaff/src/signals/analyzer.rs @@ -6,8 +6,7 @@ //! message arrays. use serde::{Deserialize, Serialize}; -use std::collections::HashSet; -use strsim::jaro_winkler; +use std::collections::{HashMap, HashSet}; use hermesllm::apis::openai::{Message, Role}; @@ -33,14 +32,38 @@ struct NormalizedMessage { token_set: HashSet, /// Bigram set for fast similarity computation bigram_set: HashSet, + /// Character trigram set for robust similarity matching + char_trigram_set: HashSet, + /// Token frequency map for multiset cosine similarity + token_frequency: HashMap, } impl NormalizedMessage { fn from_text(text: &str) -> Self { - let raw = text.to_string(); + Self::from_text_with_limit(text, usize::MAX) + } + + fn from_text_with_limit(text: &str, max_length: usize) -> Self { + // Truncate to max_length characters to prevent unbounded computation + // Keep head (20%) + tail (80%) to preserve both context and intent + + let char_count = text.chars().count(); + + let raw = if char_count <= max_length { + text.to_string() + } else { + // Split: 20% head, 79% tail, 1 char space delimiter + let head_len = max_length / 5; + let tail_len = max_length - head_len - 1; + + let head: String = text.chars().take(head_len).collect(); + let tail: String = text.chars().skip(char_count - tail_len).collect(); + + format!("{} {}", head, tail) + }; // Normalize unicode punctuation to ASCII equivalents - let normalized_unicode = text + let normalized_unicode = raw .replace(['\u{2019}', '\u{2018}'], "'") // U+2019/U+2018 SINGLE QUOTATION MARKs .replace(['\u{201C}', '\u{201D}'], "\"") // U+201C/U+201D DOUBLE QUOTATION MARKs .replace(['\u{2013}', '\u{2014}'], "-"); // U+2013/U+2014 EN/EM DASHes @@ -71,11 +94,29 @@ impl NormalizedMessage { .map(|w| format!("{} {}", w[0], w[1])) .collect(); + // Generate character trigram set for robust similarity + // Use tokens (with punctuation stripped) for consistency with pattern matching + let tokens_text = tokens.join(" "); + let char_trigram_set: HashSet = tokens_text + .chars() + .collect::>() + .windows(3) + .map(|w| w.iter().collect::()) + .collect(); + + // Compute token frequency map for cosine similarity + let mut token_frequency: HashMap = HashMap::new(); + for token in &tokens { + *token_frequency.entry(token.clone()).or_insert(0) += 1; + } + Self { raw, tokens, token_set, bigram_set, + char_trigram_set, + token_frequency, } } @@ -104,26 +145,158 @@ impl NormalizedMessage { }) } - /// Check if phrase exists using fuzzy matching (for typo tolerance) - fn fuzzy_contains_phrase(&self, phrase: &str, threshold: f64) -> bool { - let phrase_tokens: Vec<&str> = phrase.split_whitespace().collect(); - if phrase_tokens.is_empty() { - return false; + /// Calculate character trigram similarity between this message and a pattern + /// Returns a similarity score between 0.0 and 1.0 + /// This is robust to typos, small edits, and word insertions + fn char_trigram_similarity(&self, pattern: &str) -> f64 { + // Normalize the pattern: lowercase and remove ALL punctuation + // This makes "doesn't" → "doesnt" for robust typo matching + let normalized_pattern = pattern + .to_lowercase() + .chars() + .filter(|c| c.is_alphanumeric() || c.is_whitespace()) + .collect::() + .split_whitespace() + .collect::>() + .join(" "); + + // Generate trigrams for the pattern + let pattern_trigrams: HashSet = normalized_pattern + .chars() + .collect::>() + .windows(3) + .map(|w| w.iter().collect::()) + .collect(); + + if self.char_trigram_set.is_empty() && pattern_trigrams.is_empty() { + return 1.0; // Both empty = identical } - // For single tokens, use higher threshold - let adjusted_threshold = if phrase_tokens.len() == 1 && phrase.len() < 5 { - 0.95 - } else { - threshold - }; + if self.char_trigram_set.is_empty() || pattern_trigrams.is_empty() { + return 0.0; + } - // Check each window of tokens - if self.tokens.len() >= phrase_tokens.len() { - for window in self.tokens.windows(phrase_tokens.len()) { + // Compute Jaccard similarity (intersection / union) + let intersection = self + .char_trigram_set + .intersection(&pattern_trigrams) + .count(); + let union = self.char_trigram_set.union(&pattern_trigrams).count(); + + if union == 0 { + return 0.0; + } + + intersection as f64 / union as f64 + } + + /// Calculate token-based cosine similarity using term frequencies + /// Returns a similarity score between 0.0 and 1.0 + /// This handles word frequency and is stable for longer messages + fn token_cosine_similarity(&self, pattern: &str) -> f64 { + // Tokenize and compute frequencies for the pattern + let pattern_tokens: Vec = pattern + .to_lowercase() + .split_whitespace() + .map(|word| { + word.trim_matches(|c: char| c.is_ascii_punctuation()) + .to_string() + }) + .filter(|w| !w.is_empty()) + .collect(); + + let mut pattern_frequency: HashMap = HashMap::new(); + for token in &pattern_tokens { + *pattern_frequency.entry(token.clone()).or_insert(0) += 1; + } + + if self.token_frequency.is_empty() && pattern_frequency.is_empty() { + return 1.0; + } + + if self.token_frequency.is_empty() || pattern_frequency.is_empty() { + return 0.0; + } + + // Compute cosine similarity + // cosine_sim = dot_product / (norm1 * norm2) + + let mut dot_product = 0.0; + let mut norm1_squared = 0.0; + let mut norm2_squared = 0.0; + + // Collect all unique tokens from both sets + let all_tokens: HashSet = self + .token_frequency + .keys() + .chain(pattern_frequency.keys()) + .cloned() + .collect(); + + for token in all_tokens { + let freq1 = *self.token_frequency.get(&token).unwrap_or(&0) as f64; + let freq2 = *pattern_frequency.get(&token).unwrap_or(&0) as f64; + + dot_product += freq1 * freq2; + norm1_squared += freq1 * freq1; + norm2_squared += freq2 * freq2; + } + + let norm1 = norm1_squared.sqrt(); + let norm2 = norm2_squared.sqrt(); + + if norm1 == 0.0 || norm2 == 0.0 { + return 0.0; + } + + dot_product / (norm1 * norm2) + } + + /// Layered phrase matching: exact → character trigram → token cosine + /// Returns true if the pattern matches using any layer + fn layered_contains_phrase( + &self, + pattern: &str, + char_trigram_threshold: f64, + token_cosine_threshold: f64, + ) -> bool { + // Layer 0: Exact phrase match (fastest) + if self.contains_phrase(pattern) { + return true; + } + + // For longer messages, check sliding windows to find the pattern + // Otherwise similarity gets diluted by surrounding context + let pattern_word_count = pattern.split_whitespace().count(); + + // Layer 1: Character trigram similarity (typo/edit robustness) + // Check whole message first (for short messages) + if self.char_trigram_similarity(pattern) >= char_trigram_threshold { + return true; + } + + // For longer messages, check windows + if self.tokens.len() >= pattern_word_count + 3 { + for window in self.tokens.windows(pattern_word_count + 2) { let window_text = window.join(" "); - let similarity = jaro_winkler(&window_text, phrase); - if similarity >= adjusted_threshold { + let window_msg = NormalizedMessage::from_text(&window_text); + if window_msg.char_trigram_similarity(pattern) >= char_trigram_threshold { + return true; + } + } + } + + // Layer 2: Token cosine similarity (semantic stability for long messages) + if self.token_cosine_similarity(pattern) >= token_cosine_threshold { + return true; + } + + // Check windows for token cosine too + if self.tokens.len() >= pattern_word_count + 3 { + for window in self.tokens.windows(pattern_word_count + 2) { + let window_text = window.join(" "); + let window_msg = NormalizedMessage::from_text(&window_text); + if window_msg.token_cosine_similarity(pattern) >= token_cosine_threshold { return true; } } @@ -361,15 +534,29 @@ pub enum EscalationType { // Signal Analyzer // ============================================================================ -/// Main analyzer that computes all signals from a message array -pub struct SignalAnalyzer { - /// Baseline expected turns for normal interactions - baseline_turns: usize, - /// Threshold for fuzzy pattern matching (0.0-1.0) - fuzzy_threshold: f64, +/// Trait for analyzing conversation signals +pub trait SignalAnalyzer { + /// Analyze a conversation and generate a complete signal report + fn analyze(&self, messages: &[Message]) -> SignalReport; } -impl SignalAnalyzer { +/// Text-based implementation of signal analyzer that computes all signals from a message array +pub struct TextBasedSignalAnalyzer { + /// Baseline expected turns for normal interactions + baseline_turns: usize, + /// Threshold for character trigram similarity (0.0-1.0) + char_trigram_threshold: f64, + /// Threshold for token cosine similarity (0.0-1.0) + token_cosine_threshold: f64, + /// Maximum message length in characters (prevents unbounded computation) + max_message_length: usize, + /// Maximum number of messages to process (prevents unbounded computation) + max_messages: usize, + /// Maximum window size for repetition detection (prevents O(n²) explosion) + max_repetition_window: usize, +} + +impl TextBasedSignalAnalyzer { /// Extract text content from MessageContent, skipping non-text content fn extract_text(content: &hermesllm::apis::openai::MessageContent) -> Option { match content { @@ -379,17 +566,15 @@ impl SignalAnalyzer { } } - /// Check if a pattern is long enough to warrant fuzzy matching - /// Short patterns (< 3 words) should use exact matching only to avoid false positives - fn should_use_fuzzy_matching(pattern: &str) -> bool { - pattern.split_whitespace().count() >= 3 - } - /// Create a new signal analyzer with default settings pub fn new() -> Self { Self { baseline_turns: 5, - fuzzy_threshold: 0.88, + char_trigram_threshold: 0.50, // Lowered to handle typos and small edits realistically + token_cosine_threshold: 0.60, // Lowered for better semantic match in varied contexts + max_message_length: 2000, // Prevent unbounded trigram generation + max_messages: 100, // Prevent unbounded message processing + max_repetition_window: 20, // Prevent O(n²) explosion in repetition detection } } @@ -397,65 +582,59 @@ impl SignalAnalyzer { pub fn with_baseline(baseline_turns: usize) -> Self { Self { baseline_turns, - fuzzy_threshold: 0.88, + char_trigram_threshold: 0.50, + token_cosine_threshold: 0.60, + max_message_length: 2000, + max_messages: 100, + max_repetition_window: 20, } } /// Create a new signal analyzer with custom settings - pub fn with_settings(baseline_turns: usize, fuzzy_threshold: f64) -> Self { + /// + /// # Arguments + /// * `baseline_turns` - Expected baseline turns for normal interactions + /// * `char_trigram_threshold` - Threshold for character trigram similarity (0.0-1.0) + /// * `token_cosine_threshold` - Threshold for token cosine similarity (0.0-1.0) + pub fn with_settings( + baseline_turns: usize, + char_trigram_threshold: f64, + token_cosine_threshold: f64, + ) -> Self { Self { baseline_turns, - fuzzy_threshold, + char_trigram_threshold, + token_cosine_threshold, + max_message_length: 2000, + max_messages: 100, + max_repetition_window: 20, } } - /// Analyze a conversation and generate a complete signal report - pub fn analyze(&self, messages: &[Message]) -> SignalReport { - // Preprocess all messages once, filtering out non-text content (tool calls, etc.) - let normalized_messages: Vec<(usize, Role, NormalizedMessage)> = messages - .iter() - .enumerate() - .filter_map(|(i, msg)| { - Self::extract_text(&msg.content) - .map(|text| (i, msg.role.clone(), NormalizedMessage::from_text(&text))) - }) - .collect(); - - let turn_count = self.analyze_turn_count(messages); - let follow_up = self.analyze_follow_up(&normalized_messages); - let frustration = self.analyze_frustration(&normalized_messages); - let repetition = self.analyze_repetition(&normalized_messages); - let positive_feedback = self.analyze_positive_feedback(&normalized_messages); - let escalation = self.analyze_escalation(&normalized_messages); - - let overall_quality = self.assess_overall_quality( - &turn_count, - &follow_up, - &frustration, - &repetition, - &positive_feedback, - &escalation, - ); - - let summary = self.generate_summary( - &turn_count, - &follow_up, - &frustration, - &repetition, - &positive_feedback, - &escalation, - &overall_quality, - ); - - SignalReport { - turn_count, - follow_up, - frustration, - repetition, - positive_feedback, - escalation, - overall_quality, - summary, + /// Create a new signal analyzer with full custom settings including computation limits + /// + /// # Arguments + /// * `baseline_turns` - Expected baseline turns for normal interactions + /// * `char_trigram_threshold` - Threshold for character trigram similarity (0.0-1.0) + /// * `token_cosine_threshold` - Threshold for token cosine similarity (0.0-1.0) + /// * `max_message_length` - Maximum characters per message to process + /// * `max_messages` - Maximum number of messages to process + /// * `max_repetition_window` - Maximum messages to compare for repetition detection + pub fn with_full_settings( + baseline_turns: usize, + char_trigram_threshold: f64, + token_cosine_threshold: f64, + max_message_length: usize, + max_messages: usize, + max_repetition_window: usize, + ) -> Self { + Self { + baseline_turns, + char_trigram_threshold, + token_cosine_threshold, + max_message_length, + max_messages, + max_repetition_window, } } @@ -569,18 +748,16 @@ impl SignalAnalyzer { let mut found_in_turn = false; for pattern in &repair_patterns { - if norm_msg.contains_phrase(pattern) { + // Use layered matching: exact → trigram → cosine + if norm_msg.layered_contains_phrase( + pattern, + self.char_trigram_threshold, + self.token_cosine_threshold, + ) { repair_count += 1; repair_phrases.push(format!("Turn {}: '{}'", i + 1, pattern)); found_in_turn = true; break; - } else if Self::should_use_fuzzy_matching(pattern) - && norm_msg.fuzzy_contains_phrase(pattern, self.fuzzy_threshold) - { - repair_count += 1; - repair_phrases.push(format!("Turn {}: '{}' (fuzzy)", i + 1, pattern)); - found_in_turn = true; - break; } } @@ -642,6 +819,13 @@ impl SignalAnalyzer { "won't work", "still doesn't work", "still not working", + // Not fixing/solving + "doesn't fix", + "not fixing", + "doesn't solve", + "doesn't seem to work", + "doesn't seem to fix", + "not resolving", // Waste/pointless "waste of time", "wasting my time", @@ -761,28 +945,27 @@ impl SignalAnalyzer { // Check for complaint patterns (phrase-based, not substring) for pattern in &complaint_patterns { - if norm_msg.contains_phrase(pattern) { + if norm_msg.layered_contains_phrase( + pattern, + self.char_trigram_threshold, + self.token_cosine_threshold, + ) { indicators.push(FrustrationIndicator { indicator_type: FrustrationType::DirectComplaint, message_index: *i, snippet: pattern.to_string(), }); break; - } else if Self::should_use_fuzzy_matching(pattern) - && norm_msg.fuzzy_contains_phrase(pattern, self.fuzzy_threshold) - { - indicators.push(FrustrationIndicator { - indicator_type: FrustrationType::DirectComplaint, - message_index: *i, - snippet: format!("{} (fuzzy)", pattern), - }); - break; } } // Check for confusion patterns (phrase-based) for pattern in &confusion_patterns { - if norm_msg.contains_phrase(pattern) { + if norm_msg.layered_contains_phrase( + pattern, + self.char_trigram_threshold, + self.token_cosine_threshold, + ) { indicators.push(FrustrationIndicator { indicator_type: FrustrationType::Confusion, message_index: *i, @@ -841,9 +1024,17 @@ impl SignalAnalyzer { .map(|(i, _, norm_msg)| (*i, norm_msg)) .collect(); + // Limit the window size to prevent O(n²) explosion + // Only compare messages within the max_repetition_window + let window_size = self.max_repetition_window.min(assistant_messages.len()); + // Check for exact or near-duplicate responses using bigram similarity + // Only compare within the sliding window for i in 0..assistant_messages.len() { - for j in (i + 1)..assistant_messages.len() { + let window_start = i + 1; + let window_end = (i + 1 + window_size).min(assistant_messages.len()); + + for j in window_start..window_end { let (idx_i, norm_msg_i) = &assistant_messages[i]; let (idx_j, norm_msg_j) = &assistant_messages[j]; @@ -1074,7 +1265,11 @@ impl SignalAnalyzer { // Check gratitude for pattern in &gratitude_patterns { - if norm_msg.contains_phrase(pattern) { + if norm_msg.layered_contains_phrase( + pattern, + self.char_trigram_threshold, + self.token_cosine_threshold, + ) { indicators.push(PositiveIndicator { indicator_type: PositiveType::Gratitude, message_index: *i, @@ -1082,16 +1277,6 @@ impl SignalAnalyzer { }); found_in_turn = true; break; - } else if Self::should_use_fuzzy_matching(pattern) - && norm_msg.fuzzy_contains_phrase(pattern, self.fuzzy_threshold) - { - indicators.push(PositiveIndicator { - indicator_type: PositiveType::Gratitude, - message_index: *i, - snippet: format!("{} (fuzzy)", pattern), - }); - found_in_turn = true; - break; } } @@ -1101,7 +1286,11 @@ impl SignalAnalyzer { // Check satisfaction for pattern in &satisfaction_patterns { - if norm_msg.contains_phrase(pattern) { + if norm_msg.layered_contains_phrase( + pattern, + self.char_trigram_threshold, + self.token_cosine_threshold, + ) { indicators.push(PositiveIndicator { indicator_type: PositiveType::Satisfaction, message_index: *i, @@ -1118,7 +1307,11 @@ impl SignalAnalyzer { // Check success confirmation for pattern in &success_patterns { - if norm_msg.contains_phrase(pattern) { + if norm_msg.layered_contains_phrase( + pattern, + self.char_trigram_threshold, + self.token_cosine_threshold, + ) { indicators.push(PositiveIndicator { indicator_type: PositiveType::Success, message_index: *i, @@ -1265,42 +1458,52 @@ impl SignalAnalyzer { continue; } + let mut found_human_agent = false; + // Check for human agent request for pattern in &human_agent_patterns { - if norm_msg.contains_phrase(pattern) { + if norm_msg.layered_contains_phrase( + pattern, + self.char_trigram_threshold, + self.token_cosine_threshold, + ) { requests.push(EscalationRequest { message_index: *i, snippet: pattern.to_string(), escalation_type: EscalationType::HumanAgent, }); - break; - } else if Self::should_use_fuzzy_matching(pattern) - && norm_msg.fuzzy_contains_phrase(pattern, self.fuzzy_threshold) - { - requests.push(EscalationRequest { - message_index: *i, - snippet: format!("{} (fuzzy)", pattern), - escalation_type: EscalationType::HumanAgent, - }); + found_human_agent = true; break; } } - // Check for support request - for pattern in &support_patterns { - if norm_msg.contains_phrase(pattern) { - requests.push(EscalationRequest { - message_index: *i, - snippet: pattern.to_string(), - escalation_type: EscalationType::Support, - }); - break; + // Check for support request (only if no human agent request found) + // HumanAgent and Support are too similar and often match the same phrase + if !found_human_agent { + for pattern in &support_patterns { + if norm_msg.layered_contains_phrase( + pattern, + self.char_trigram_threshold, + self.token_cosine_threshold, + ) { + requests.push(EscalationRequest { + message_index: *i, + snippet: pattern.to_string(), + escalation_type: EscalationType::Support, + }); + break; + } } } - // Check for quit threats + // Check for quit threats (independent of HumanAgent/Support) + // A message can contain both "give up" (quit) and "speak to human" (escalation) for pattern in &quit_patterns { - if norm_msg.contains_phrase(pattern) { + if norm_msg.layered_contains_phrase( + pattern, + self.char_trigram_threshold, + self.token_cosine_threshold, + ) { requests.push(EscalationRequest { message_index: *i, snippet: pattern.to_string(), @@ -1488,7 +1691,71 @@ impl SignalAnalyzer { } } -impl Default for SignalAnalyzer { +impl SignalAnalyzer for TextBasedSignalAnalyzer { + fn analyze(&self, messages: &[Message]) -> SignalReport { + // Limit the number of messages to process (take most recent messages) + let messages_to_process = if messages.len() > self.max_messages { + &messages[messages.len() - self.max_messages..] + } else { + messages + }; + + // Preprocess all messages once, filtering out non-text content (tool calls, etc.) + // and truncating long messages + let normalized_messages: Vec<(usize, Role, NormalizedMessage)> = messages_to_process + .iter() + .enumerate() + .filter_map(|(i, msg)| { + Self::extract_text(&msg.content).map(|text| { + ( + i, + msg.role.clone(), + NormalizedMessage::from_text_with_limit(&text, self.max_message_length), + ) + }) + }) + .collect(); + + let turn_count = self.analyze_turn_count(messages_to_process); + let follow_up = self.analyze_follow_up(&normalized_messages); + let frustration = self.analyze_frustration(&normalized_messages); + let repetition = self.analyze_repetition(&normalized_messages); + let positive_feedback = self.analyze_positive_feedback(&normalized_messages); + let escalation = self.analyze_escalation(&normalized_messages); + + let overall_quality = self.assess_overall_quality( + &turn_count, + &follow_up, + &frustration, + &repetition, + &positive_feedback, + &escalation, + ); + + let summary = self.generate_summary( + &turn_count, + &follow_up, + &frustration, + &repetition, + &positive_feedback, + &escalation, + &overall_quality, + ); + + SignalReport { + turn_count, + follow_up, + frustration, + repetition, + positive_feedback, + escalation, + overall_quality, + summary, + } + } +} + +impl Default for TextBasedSignalAnalyzer { fn default() -> Self { Self::new() } @@ -1514,6 +1781,178 @@ mod tests { } } + // ======================================================================== + // Tests for New Similarity Methods + // ======================================================================== + + #[test] + fn test_char_trigram_similarity_exact_match() { + let msg = NormalizedMessage::from_text("thank you very much"); + let similarity = msg.char_trigram_similarity("thank you very much"); + assert!( + similarity > 0.95, + "Exact match should have very high similarity" + ); + } + + #[test] + fn test_char_trigram_similarity_typo() { + let msg = NormalizedMessage::from_text("thank you very much"); + // Common typo: "thnks" instead of "thanks" + let similarity = msg.char_trigram_similarity("thnks you very much"); + assert!( + similarity > 0.50, + "Should handle single-character typo with decent similarity: {}", + similarity + ); + } + + #[test] + fn test_char_trigram_similarity_small_edit() { + let msg = NormalizedMessage::from_text("this doesn't work"); + let similarity = msg.char_trigram_similarity("this doesnt work"); + assert!( + similarity > 0.70, + "Should handle punctuation removal gracefully: {}", + similarity + ); + } + + #[test] + fn test_char_trigram_similarity_word_insertion() { + let msg = NormalizedMessage::from_text("i don't understand"); + let similarity = msg.char_trigram_similarity("i really don't understand"); + assert!( + similarity > 0.40, + "Should be robust to word insertions: {}", + similarity + ); + } + + #[test] + fn test_token_cosine_similarity_exact_match() { + let msg = NormalizedMessage::from_text("this is not helpful"); + let similarity = msg.token_cosine_similarity("this is not helpful"); + assert!( + (similarity - 1.0).abs() < 0.01, + "Exact match should have cosine similarity of 1.0" + ); + } + + #[test] + fn test_token_cosine_similarity_word_order() { + let msg = NormalizedMessage::from_text("not helpful at all"); + let similarity = msg.token_cosine_similarity("helpful not at all"); + assert!( + similarity > 0.95, + "Should be robust to word order changes: {}", + similarity + ); + } + + #[test] + fn test_token_cosine_similarity_frequency() { + let msg = NormalizedMessage::from_text("help help help please"); + let similarity = msg.token_cosine_similarity("help please"); + assert!( + similarity > 0.7 && similarity < 1.0, + "Should account for frequency differences: {}", + similarity + ); + } + + #[test] + fn test_token_cosine_similarity_long_message_with_context() { + let msg = NormalizedMessage::from_text( + "I've been trying to set up my account for the past hour \ + and the verification email never arrived. I checked my spam folder \ + and still nothing. This is really frustrating and not helpful at all.", + ); + let similarity = msg.token_cosine_similarity("not helpful"); + assert!( + similarity > 0.15 && similarity < 0.7, + "Should detect pattern in long message with lower but non-zero similarity: {}", + similarity + ); + } + + #[test] + fn test_layered_matching_exact_hit() { + let msg = NormalizedMessage::from_text("thank you so much"); + assert!( + msg.layered_contains_phrase("thank you", 0.50, 0.60), + "Should match exact phrase in Layer 0" + ); + } + + #[test] + fn test_layered_matching_typo_hit() { + // Test that shows layered matching is more robust than exact matching alone + let msg = NormalizedMessage::from_text("it doesnt work for me"); + + // "doesnt work" should match "doesn't work" via character trigrams (high overlap) + assert!( + msg.layered_contains_phrase("doesn't work", 0.50, 0.60), + "Should match 'doesnt work' to 'doesn't work' via character trigrams" + ); + } + + #[test] + fn test_layered_matching_word_order_hit() { + let msg = NormalizedMessage::from_text("helpful not very"); + assert!( + msg.layered_contains_phrase("not helpful", 0.50, 0.60), + "Should match reordered words via token cosine in Layer 2" + ); + } + + #[test] + fn test_layered_matching_long_message_with_pattern() { + let msg = NormalizedMessage::from_text( + "I've tried everything and followed all the instructions \ + but this is not helpful at all and I'm getting frustrated", + ); + assert!( + msg.layered_contains_phrase("not helpful", 0.50, 0.60), + "Should detect pattern buried in long message" + ); + } + + #[test] + fn test_layered_matching_no_match() { + let msg = NormalizedMessage::from_text("everything is working perfectly"); + assert!( + !msg.layered_contains_phrase("not helpful", 0.50, 0.60), + "Should not match completely different content" + ); + } + + #[test] + fn test_char_trigram_vs_token_cosine_tradeoffs() { + // Character trigrams handle character-level changes well + let msg1 = NormalizedMessage::from_text("this doesnt work"); + let char_sim1 = msg1.char_trigram_similarity("this doesn't work"); + assert!( + char_sim1 > 0.70, + "Character trigrams should handle punctuation: {}", + char_sim1 + ); + + // Token cosine is better for word order and long messages with semantic overlap + let msg2 = + NormalizedMessage::from_text("I really appreciate all your help with this issue today"); + let token_sim2 = msg2.token_cosine_similarity("thank you for help"); + assert!( + token_sim2 > 0.15, + "Token cosine should detect semantic overlap: {}", + token_sim2 + ); + } + + // ======================================================================== + // Existing Tests + // ======================================================================== + fn preprocess_messages(messages: &[Message]) -> Vec<(usize, Role, NormalizedMessage)> { messages .iter() @@ -1528,7 +1967,7 @@ mod tests { #[test] fn test_turn_count_efficient() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "Hello"), create_message(Role::Assistant, "Hi! How can I help?"), @@ -1548,7 +1987,7 @@ mod tests { #[test] fn test_turn_count_excessive() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let mut messages = Vec::new(); for i in 0..15 { messages.push(create_message( @@ -1572,7 +2011,7 @@ mod tests { #[test] fn test_follow_up_detection() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "Show me restaurants"), create_message(Role::Assistant, "Here are some options"), @@ -1590,7 +2029,7 @@ mod tests { #[test] fn test_frustration_detection() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "THIS IS RIDICULOUS!!!"), create_message(Role::Assistant, "I apologize for the frustration"), @@ -1608,7 +2047,7 @@ mod tests { #[test] fn test_positive_feedback_detection() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "Can you help me?"), create_message(Role::Assistant, "Sure!"), @@ -1629,7 +2068,7 @@ mod tests { #[test] fn test_escalation_detection() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "This isn't working"), create_message(Role::Assistant, "Let me help"), @@ -1646,7 +2085,7 @@ mod tests { #[test] fn test_repetition_detection() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "What's the weather?"), create_message( @@ -1679,7 +2118,7 @@ mod tests { #[test] fn test_full_analysis_excellent() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "I need to book a flight"), create_message(Role::Assistant, "Sure! Where would you like to go?"), @@ -1701,7 +2140,7 @@ mod tests { #[test] fn test_full_analysis_poor() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "Help me"), create_message(Role::Assistant, "How can I assist?"), @@ -1725,7 +2164,7 @@ mod tests { #[test] fn test_fuzzy_matching_gratitude() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "Can you help me?"), create_message(Role::Assistant, "Sure!"), @@ -1742,7 +2181,7 @@ mod tests { #[test] fn test_fuzzy_matching_escalation() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "This isn't working"), create_message(Role::Assistant, "Let me help"), @@ -1759,7 +2198,7 @@ mod tests { #[test] fn test_fuzzy_matching_repair() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "Show me restaurants"), create_message(Role::Assistant, "Here are some options"), @@ -1776,42 +2215,30 @@ mod tests { #[test] fn test_fuzzy_matching_complaint() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); + // Use a complaint that should match - "doesnt work" is close enough to "doesn't work" let messages = vec![ - create_message(Role::User, "this dosnt work at all"), + create_message(Role::User, "this doesnt work at all"), // Common typo: missing apostrophe create_message(Role::Assistant, "I apologize"), ]; let normalized_messages = preprocess_messages(&messages); let signal = analyzer.analyze_frustration(&normalized_messages); - assert!(signal.has_frustration); + + // The layered matching should catch this via character trigrams or token cosine + // "doesnt work" has high character-level similarity to "doesn't work" + assert!( + signal.has_frustration, + "Should detect frustration from complaint pattern" + ); assert!(signal.frustration_count >= 1); println!("test_fuzzy_matching_complaint took: {:?}", start.elapsed()); } - #[test] - fn test_fuzzy_threshold_configuration() { - let start = Instant::now(); - let analyzer = SignalAnalyzer::with_settings(5, 0.95); - assert_eq!(analyzer.fuzzy_threshold, 0.95); - - // Very strict threshold should not match heavily garbled text - let messages = vec![ - create_message(Role::User, "xyz abc"), // Completely unrelated to any gratitude pattern - ]; - let normalized_messages = preprocess_messages(&messages); - let signal = analyzer.analyze_positive_feedback(&normalized_messages); - assert_eq!(signal.positive_count, 0); - println!( - "test_fuzzy_threshold_configuration took: {:?}", - start.elapsed() - ); - } - #[test] fn test_exact_match_priority() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![create_message(Role::User, "thank you so much")]; let normalized_messages = preprocess_messages(&messages); @@ -1829,7 +2256,7 @@ mod tests { #[test] fn test_hello_not_profanity() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![create_message(Role::User, "hello there")]; let normalized_messages = preprocess_messages(&messages); @@ -1842,7 +2269,7 @@ mod tests { #[test] fn test_prepare_not_escalation() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![create_message( Role::User, "Can you help me prepare for the meeting?", @@ -1858,7 +2285,7 @@ mod tests { #[test] fn test_unicode_apostrophe_confusion() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "I'm confused"), // Unicode apostrophe ]; @@ -1873,7 +2300,7 @@ mod tests { #[test] fn test_unicode_quotes_work() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![create_message( Role::User, "\u{201C}doesn\u{2019}t work\u{201D} with unicode quotes", @@ -1889,7 +2316,7 @@ mod tests { #[test] fn test_absolute_not_profanity() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![create_message(Role::User, "That's absolute nonsense")]; let normalized_messages = preprocess_messages(&messages); @@ -1907,7 +2334,7 @@ mod tests { #[test] fn test_stopwords_not_rephrase() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "Help me with X"), create_message(Role::Assistant, "Sure"), @@ -1926,7 +2353,7 @@ mod tests { #[test] fn test_frustrated_user_with_legitimate_repair() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); use hermesllm::apis::openai::{FunctionCall, ToolCall}; @@ -2059,7 +2486,7 @@ mod tests { #[test] fn test_frustrated_user_false_claim() { let start = Instant::now(); - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); use hermesllm::apis::openai::{FunctionCall, ToolCall}; @@ -2198,7 +2625,7 @@ mod tests { // false negative tests #[test] fn test_dissatisfaction_polite_not_working_for_me() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "Thanks, but this still isn't working for me."), // Polite dissatisfaction, e.g., I appreciate it, but this isn't what I was looking for. create_message(Role::Assistant, "Sorry—what error do you see?"), @@ -2213,7 +2640,7 @@ mod tests { #[test] fn test_dissatisfaction_giving_up_without_escalation() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![create_message( Role::User, "Never mind, I'll figure it out myself.", @@ -2228,7 +2655,7 @@ mod tests { #[test] fn test_dissatisfaction_same_problem_again() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![create_message( Role::User, "I'm running into the same issue again.", @@ -2243,7 +2670,7 @@ mod tests { #[test] fn test_unsatisfied_incomplete() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![create_message(Role::User, "This feels incomplete.")]; let normalized = preprocess_messages(&messages); let signal = analyzer.analyze_frustration(&normalized); @@ -2255,7 +2682,7 @@ mod tests { #[test] fn test_low_mood_overwhelming() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![create_message( Role::User, "This is overwhelming and I'm not sure what to do.", @@ -2267,7 +2694,7 @@ mod tests { #[test] fn test_low_mood_exhausted_trying() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![create_message( Role::User, "I'm exhausted trying to get this working.", @@ -2282,7 +2709,7 @@ mod tests { #[test] fn test_common_polite_unresolved_dissatisfaction() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "I'm trying to set up SSH keys for GitHub."), create_message( @@ -2324,7 +2751,7 @@ mod tests { #[test] fn test_common_resigned_giving_up_quietly() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message( Role::User, @@ -2364,7 +2791,7 @@ mod tests { #[test] fn test_common_discouraged_overwhelmed_low_mood() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "I'm trying to understand backpropagation."), create_message( @@ -2402,7 +2829,7 @@ mod tests { #[test] fn test_common_misalignment_not_what_i_asked() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "How do I optimize this SQL query?"), create_message( @@ -2438,7 +2865,7 @@ mod tests { #[test] fn test_common_false_negative_polite_disappointment_complexity() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); let messages = vec![ create_message(Role::User, "Can you help me write a regex for this?"), create_message(Role::Assistant, "Sure, try this pattern: ^[a-z]+$"), @@ -2468,7 +2895,7 @@ mod tests { #[test] fn test_catastrophic_failure_looping_assistant() { - let analyzer = SignalAnalyzer::new(); + let analyzer = TextBasedSignalAnalyzer::new(); // Catastrophic failure: assistant stuck in loop, user increasingly frustrated let messages = vec![ @@ -2552,7 +2979,8 @@ mod tests { ); assert!( report.frustration.frustration_count >= 4, - "Should detect multiple frustration indicators" + "Should detect multiple frustration indicators: found {}", + report.frustration.frustration_count ); assert!( report.frustration.severity >= 2, diff --git a/docs/source/concepts/signals.rst b/docs/source/concepts/signals.rst index 9b3143ed..ec1750e1 100644 --- a/docs/source/concepts/signals.rst +++ b/docs/source/concepts/signals.rst @@ -4,7 +4,7 @@ Signals™ ======== -Agentic Signals are behavioral quality indicators that act as early warning signs of agent performance—highlighting both brilliant successes and **severe failures**. These signals are computed directly from conversation traces without requiring manual labeling or domain expertise, making them practical for production observability at scale. +Agentic Signals are behavioral and executions quality indicators that act as early warning signs of agent performance—highlighting both brilliant successes and **severe failures**. These signals are computed directly from conversation traces without requiring manual labeling or domain expertise, making them practical for production observability at scale. The Problem: Knowing What's "Good" ==================================