From dfd58867433b2800f83bb9835af3180da7a75961 Mon Sep 17 00:00:00 2001
From: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
Date: Wed, 17 Dec 2025 14:13:30 -0800
Subject: [PATCH] reducing false positives for signals like positive
 interaction

---
 crates/brightstaff/src/handlers/utils.rs  |   2 +-
 crates/brightstaff/src/signals/signals.rs | 210 +++++++++++++++++++++-
 2 files changed, 202 insertions(+), 10 deletions(-)

diff --git a/crates/brightstaff/src/handlers/utils.rs b/crates/brightstaff/src/handlers/utils.rs
index 5d8612f5..d0060090 100644
--- a/crates/brightstaff/src/handlers/utils.rs
+++ b/crates/brightstaff/src/handlers/utils.rs
@@ -165,7 +165,7 @@ impl StreamProcessor for ObservableStreamProcessor {
                 || report.escalation.escalation_requested
                 || matches!(
                     report.overall_quality,
-                    InteractionQuality::Poor | InteractionQuality::Critical
+                    InteractionQuality::Poor | InteractionQuality::Severe
                 );
 
             if should_flag {
diff --git a/crates/brightstaff/src/signals/signals.rs b/crates/brightstaff/src/signals/signals.rs
index 0893afe7..656702ac 100644
--- a/crates/brightstaff/src/signals/signals.rs
+++ b/crates/brightstaff/src/signals/signals.rs
@@ -16,7 +16,7 @@ use hermesllm::apis::openai::{Message, Role};
 // ============================================================================
 
 /// Flag emoji for marking spans/operations worth investigating
-pub const FLAG_MARKER: &str = "\u{2691}";
+pub const FLAG_MARKER: &str = "\u{1F6A9}";
 
 // ============================================================================
 // Normalized Message Processing
@@ -152,7 +152,7 @@ pub enum InteractionQuality {
     /// Poor interaction with concerning signals
     Poor,
     /// Critical interaction with severe negative signals
-    Critical,
+    Severe,
 }
 
 /// Container for all computed signals for a conversation
@@ -382,6 +382,12 @@ impl SignalAnalyzer {
         }
     }
 
+    /// Check if a pattern is long enough to warrant fuzzy matching
+    /// Short patterns (< 3 words) should use exact matching only to avoid false positives
+    fn should_use_fuzzy_matching(pattern: &str) -> bool {
+        pattern.split_whitespace().count() >= 3
+    }
+
     /// Create a new signal analyzer with default settings
     pub fn new() -> Self {
         Self {
@@ -571,7 +577,7 @@ impl SignalAnalyzer {
                     repair_phrases.push(format!("Turn {}: '{}'", i + 1, pattern));
                     found_in_turn = true;
                     break;
-                } else if norm_msg.fuzzy_contains_phrase(pattern, self.fuzzy_threshold) {
+                } else if Self::should_use_fuzzy_matching(pattern) && norm_msg.fuzzy_contains_phrase(pattern, self.fuzzy_threshold) {
                     repair_count += 1;
                     repair_phrases.push(format!("Turn {}: '{}' (fuzzy)", i + 1, pattern));
                     found_in_turn = true;
@@ -754,7 +760,7 @@ impl SignalAnalyzer {
                         snippet: pattern.to_string(),
                     });
                     break;
-                } else if norm_msg.fuzzy_contains_phrase(pattern, self.fuzzy_threshold) {
+                } else if Self::should_use_fuzzy_matching(pattern) && norm_msg.fuzzy_contains_phrase(pattern, self.fuzzy_threshold) {
                     indicators.push(FrustrationIndicator {
                         indicator_type: FrustrationType::DirectComplaint,
                         message_index: *i,
@@ -1056,7 +1062,7 @@ impl SignalAnalyzer {
                     });
                     found_in_turn = true;
                     break;
-                } else if norm_msg.fuzzy_contains_phrase(pattern, self.fuzzy_threshold) {
+                } else if Self::should_use_fuzzy_matching(pattern) && norm_msg.fuzzy_contains_phrase(pattern, self.fuzzy_threshold) {
                     indicators.push(PositiveIndicator {
                         indicator_type: PositiveType::Gratitude,
                         message_index: *i,
@@ -1243,7 +1249,7 @@ impl SignalAnalyzer {
                         escalation_type: EscalationType::HumanAgent,
                     });
                     break;
-                } else if norm_msg.fuzzy_contains_phrase(pattern, self.fuzzy_threshold) {
+                } else if Self::should_use_fuzzy_matching(pattern) && norm_msg.fuzzy_contains_phrase(pattern, self.fuzzy_threshold) {
                     requests.push(EscalationRequest {
                         message_index: *i,
                         snippet: format!("{} (fuzzy)", pattern),
@@ -1343,7 +1349,7 @@ impl SignalAnalyzer {
             || repetition.severity >= 3
             || turn_count.is_excessive
         {
-            return InteractionQuality::Critical;
+            return InteractionQuality::Severe;
         }
 
         // Calculate quality score
@@ -1379,7 +1385,7 @@ impl SignalAnalyzer {
         } else if score >= 25.0 {
             InteractionQuality::Poor
         } else {
-            InteractionQuality::Critical
+            InteractionQuality::Severe
         }
     }
 
@@ -1661,7 +1667,7 @@ mod tests {
         let report = analyzer.analyze(&messages);
         assert!(matches!(
             report.overall_quality,
-            InteractionQuality::Poor | InteractionQuality::Critical
+            InteractionQuality::Poor | InteractionQuality::Severe
         ));
         assert!(report.frustration.has_frustration);
         assert!(report.escalation.escalation_requested);
@@ -1848,4 +1854,190 @@ mod tests {
         // Should not detect as rephrase since only stopwords overlap
         assert_eq!(signal.repair_count, 0, "Messages with only stopword overlap should not be rephrases");
     }
+
+    #[test]
+    fn test_frustrated_user_with_legitimate_repair() {
+        let start = Instant::now();
+        let analyzer = SignalAnalyzer::new();
+
+        use hermesllm::apis::openai::{ToolCall, FunctionCall};
+
+        // Helper to create a message with tool calls
+        let create_assistant_with_tools = |content: &str, tool_id: &str, tool_name: &str, args: &str| -> Message {
+            Message {
+                role: Role::Assistant,
+                content: MessageContent::Text(content.to_string()),
+                name: None,
+                tool_calls: Some(vec![ToolCall {
+                    id: tool_id.to_string(),
+                    call_type: "function".to_string(),
+                    function: FunctionCall {
+                        name: tool_name.to_string(),
+                        arguments: args.to_string(),
+                    },
+                }]),
+                tool_call_id: None,
+            }
+        };
+
+        // Helper to create a tool response message
+        let create_tool_message = |tool_call_id: &str, content: &str| -> Message {
+            Message {
+                role: Role::Tool,
+                content: MessageContent::Text(content.to_string()),
+                name: None,
+                tool_calls: None,
+                tool_call_id: Some(tool_call_id.to_string()),
+            }
+        };
+
+        // Scenario: User DOES mention New York in first message, making "I already told you" legitimate
+        let messages = vec![
+            create_message(Role::User, "I need to book a flight from New York to Paris for December 20th"),
+            create_assistant_with_tools(
+                "I'll help you search for flights to Paris.",
+                "call_123",
+                "search_flights",
+                r#"{"origin": "NYC", "destination": "Paris", "date": "2025-12-20"}"#
+            ),
+            create_tool_message("call_123", r#"{"flights": []}"#),
+            create_message(Role::Assistant, "I couldn't find any flights. Could you provide your departure city?"),
+            create_message(Role::User, "I already told you, from New York!"),
+            create_assistant_with_tools(
+                "Let me try again.",
+                "call_456",
+                "search_flights",
+                r#"{"origin": "New York", "destination": "Paris", "date": "2025-12-20"}"#
+            ),
+            create_tool_message("call_456", r#"{"flights": []}"#),
+            create_message(Role::Assistant, "I'm still not finding results. Let me check the system."),
+            create_message(Role::User, "THIS IS RIDICULOUS!!! The tool doesn't work at all. Why do you keep calling it?"),
+            create_message(Role::Assistant, "I sincerely apologize for the frustration with the search tool."),
+            create_message(Role::User, "Forget it. I need to speak to a human agent. This is a waste of time."),
+        ];
+
+        let report = analyzer.analyze(&messages);
+
+        // Tool messages should be filtered out, so we should only analyze text messages
+        // That's 4 user messages + 5 assistant text messages = 9 turns
+        assert_eq!(report.turn_count.total_turns, 9, "Should count 9 text messages (tool messages filtered out)");
+        assert!(report.turn_count.is_concerning, "Should flag concerning turn count");
+
+        // Should detect frustration (all caps, complaints)
+        assert!(report.frustration.has_frustration, "Should detect frustration");
+        assert!(report.frustration.frustration_count >= 2, "Should detect multiple frustration indicators");
+        assert!(report.frustration.severity >= 2, "Should have moderate or higher frustration severity");
+
+        // Should detect escalation request
+        assert!(report.escalation.escalation_requested, "Should detect escalation to human agent");
+        assert!(report.escalation.escalation_count >= 1, "Should detect at least one escalation");
+
+        // Overall quality should be Poor or Severe
+        assert!(
+            matches!(
+                report.overall_quality,
+                InteractionQuality::Poor | InteractionQuality::Severe
+            ),
+            "Quality should be Poor or Severe, got {:?}",
+            report.overall_quality
+        );
+
+        println!("test_frustrated_user_with_legitimate_repair took: {:?}", start.elapsed());
+    }
+
+    #[test]
+    fn test_frustrated_user_false_claim() {
+        let start = Instant::now();
+        let analyzer = SignalAnalyzer::new();
+
+        use hermesllm::apis::openai::{ToolCall, FunctionCall};
+
+        // Helper to create a message with tool calls
+        let create_assistant_with_tools = |content: &str, tool_id: &str, tool_name: &str, args: &str| -> Message {
+            Message {
+                role: Role::Assistant,
+                content: MessageContent::Text(content.to_string()),
+                name: None,
+                tool_calls: Some(vec![ToolCall {
+                    id: tool_id.to_string(),
+                    call_type: "function".to_string(),
+                    function: FunctionCall {
+                        name: tool_name.to_string(),
+                        arguments: args.to_string(),
+                    },
+                }]),
+                tool_call_id: None,
+            }
+        };
+
+        // Helper to create a tool response message
+        let create_tool_message = |tool_call_id: &str, content: &str| -> Message {
+            Message {
+                role: Role::Tool,
+                content: MessageContent::Text(content.to_string()),
+                name: None,
+                tool_calls: None,
+                tool_call_id: Some(tool_call_id.to_string()),
+            }
+        };
+
+        // Scenario: User NEVER mentions New York in first message but claims "I already told you"
+        // This represents realistic frustrated user behavior - exaggeration/misremembering
+        let messages = vec![
+            create_message(Role::User, "I need to book a flight to Paris for December 20th"),
+            create_assistant_with_tools(
+                "I'll help you search for flights to Paris.",
+                "call_123",
+                "search_flights",
+                r#"{"destination": "Paris", "date": "2025-12-20"}"#
+            ),
+            create_tool_message("call_123", r#"{"error": "origin required"}"#),
+            create_message(Role::Assistant, "I couldn't find any flights. Could you provide your departure city?"),
+            create_message(Role::User, "I already told you, from New York!"),  // False claim - never mentioned it
+            create_assistant_with_tools(
+                "Let me try again.",
+                "call_456",
+                "search_flights",
+                r#"{"origin": "New York", "destination": "Paris", "date": "2025-12-20"}"#
+            ),
+            create_tool_message("call_456", r#"{"flights": []}"#),
+            create_message(Role::Assistant, "I'm still not finding results. Let me check the system."),
+            create_message(Role::User, "THIS IS RIDICULOUS!!! The tool doesn't work at all. Why do you keep calling it?"),
+            create_message(Role::Assistant, "I sincerely apologize for the frustration with the search tool."),
+            create_message(Role::User, "Forget it. I need to speak to a human agent. This is a waste of time."),
+        ];
+
+        let report = analyzer.analyze(&messages);
+
+        // Tool messages should be filtered out, so we should only analyze text messages
+        // That's 4 user messages + 5 assistant text messages = 9 turns
+        assert_eq!(report.turn_count.total_turns, 9, "Should count 9 text messages (tool messages filtered out)");
+        assert!(report.turn_count.is_concerning, "Should flag concerning turn count");
+
+        // Should detect frustration (all caps, complaints, false claims)
+        assert!(report.frustration.has_frustration, "Should detect frustration");
+        assert!(report.frustration.frustration_count >= 2, "Should detect multiple frustration indicators");
+        assert!(report.frustration.severity >= 2, "Should have moderate or higher frustration severity");
+
+        // Should detect escalation request
+        assert!(report.escalation.escalation_requested, "Should detect escalation to human agent");
+        assert!(report.escalation.escalation_count >= 1, "Should detect at least one escalation");
+
+        // Note: May detect false positive "positive feedback" due to fuzzy matching
+        // e.g., "I already told YOU" matches "you rock", "THIS is RIDICULOUS" matches "this helps"
+        // However, the overall quality should still be Poor/Severe due to frustration+escalation
+
+        // Overall quality should be Poor or Severe (frustration + escalation indicates poor interaction)
+        assert!(
+            matches!(
+                report.overall_quality,
+                InteractionQuality::Poor | InteractionQuality::Severe
+            ),
+            "Quality should be Poor or Severe for frustrated user with false claims, got {:?}",
+            report.overall_quality
+        );
+
+        println!("test_frustrated_user_false_claim took: {:?}", start.elapsed());
+        println!("Full signal analysis completed in {:?}", start.elapsed());
+    }
 }