added more tests and fixed docuemtnation. PR 100% ready

2026-06-17 15:25:17 +02:00 · 2026-01-03 16:44:39 -08:00 · 2026-01-03 16:44:39 -08:00 · 7d517ace77
commit 7d517ace77
parent c2cfd358f5
3 changed files with 134 additions and 5 deletions
--- a/crates/brightstaff/src/signals/analyzer.rs
+++ b/crates/brightstaff/src/signals/analyzer.rs
@ -2465,4 +2465,128 @@ mod tests {
            "Polite disappointment should not be classified as Excellent"
        );
    }
+
+    #[test]
+    fn test_catastrophic_failure_looping_assistant() {
+        let analyzer = SignalAnalyzer::new();
+
+        // Catastrophic failure: assistant stuck in loop, user increasingly frustrated
+        let messages = vec![
+            // Turn 1: Initial request
+            create_message(Role::User, "I need to cancel my subscription"),
+            create_message(Role::Assistant, "I can help you with account management. What would you like to do today?"),
+
+            // Turn 3: First repair - repetition clarification
+            create_message(Role::User, "I just said I want to cancel my subscription"),
+            // Turn 4: Exact repetition from assistant
+            create_message(Role::Assistant, "I can help you with account management. What would you like to do today?"),
+
+            // Turn 5: Repair (negation) + frustration (ALL CAPS)
+            create_message(Role::User, "No, I meant CANCEL. My. Subscription."),
+            // Turn 6: Near-duplicate repetition
+            create_message(Role::Assistant, "I understand you need help with your account. Let me assist you with that."),
+
+            // Turn 7: Severe frustration (complaint + excessive punctuation + ALL CAPS)
+            create_message(Role::User, "This is not helpful!! I want to CANCEL my subscription!!!"),
+            // Turn 8: Exact repetition again (looping)
+            create_message(Role::Assistant, "I can help you with account management. What would you like to do today?"),
+
+            // Turn 9: Repair (explicit rephrase) + frustration (complaint)
+            create_message(Role::User, "This doesnt work. Let me rephrase: I need to terminate my account and stop being charged"),
+            // Turn 10: Near-duplicate repetition (looping continues)
+            create_message(Role::Assistant, "I understand you need help with your account. Let me assist you with that."),
+
+            // Turn 11: Escalation (multiple types) + frustration
+            create_message(Role::User, "I give up. Can I speak to a real person? This is a waste of time"),
+        ];
+
+        let report = analyzer.analyze(&messages);
+
+        // Validate turn count
+        assert_eq!(
+            report.turn_count.total_turns, 11,
+            "Should have 11 total turns"
+        );
+        assert_eq!(report.turn_count.user_turns, 6, "Should have 6 user turns");
+        assert_eq!(
+            report.turn_count.assistant_turns, 5,
+            "Should have 5 assistant turns"
+        );
+        assert!(
+            report.turn_count.is_concerning,
+            "11 turns should be concerning (>7)"
+        );
+        assert!(
+            !report.turn_count.is_excessive,
+            "11 turns should not be excessive (<=12)"
+        );
+        assert!(
+            report.turn_count.efficiency_score < 0.5,
+            "Efficiency should be low"
+        );
+
+        // Validate repair detection (USER signals - query reformulation)
+        // Detected repairs:
+        // 1. "I just said I want to cancel..." - pattern: "I just said"
+        // 2. "No, I meant CANCEL..." - pattern: "No, I meant"
+        // 3. "Let me rephrase: I need to terminate..." - pattern: "let me rephrase"
+        // Note: "This is not helpful!!" is frustration (not repair)
+        // Note: "I give up..." is escalation (not repair)
+        assert_eq!(
+            report.follow_up.repair_count, 3,
+            "Should detect exactly 3 repair attempts from user messages"
+        );
+        assert_eq!(
+            report.follow_up.repair_ratio, 0.5,
+            "Repair ratio should be 0.5 (3 repairs / 6 user messages)"
+        );
+        assert!(
+            report.follow_up.is_concerning,
+            "50% repair ratio should be highly concerning (threshold is 30%)"
+        );
+
+        // Validate frustration detection
+        assert!(
+            report.frustration.has_frustration,
+            "Should detect frustration"
+        );
+        assert!(
+            report.frustration.frustration_count >= 4,
+            "Should detect multiple frustration indicators"
+        );
+        assert!(
+            report.frustration.severity >= 2,
+            "Should be at least moderate frustration"
+        );
+
+        // Validate repetition/looping detection (ASSISTANT signals - not following instructions)
+        // The assistant repeats the same unhelpful responses multiple times:
+        // 1. "I can help you with account management..." appears 3 times (exact repetition)
+        // 2. "I understand you need help with your account..." appears 2 times (near-duplicate)
+        assert!(
+            report.repetition.repetition_count >= 4,
+            "Should detect at least 4 assistant repetitions (exact + near-duplicates)"
+        );
+        assert!(
+            report.repetition.has_looping,
+            "Should detect looping (>2 repetitions indicates stuck agent)"
+        );
+        assert!(
+            report.repetition.severity >= 2,
+            "Should be moderate to severe looping (assistant not adapting)"
+        );
+
+        // Validate escalation detection
+        assert!(
+            report.escalation.escalation_requested,
+            "Should detect escalation request"
+        );
+        assert!(
+            report.escalation.escalation_count >= 2,
+            "Should detect multiple escalation indicators: 'give up' + 'speak to a real person'"
+        );
+
+        // Validate overall quality
+        assert_eq!(report.overall_quality, InteractionQuality::Severe, "Should be classified as Severe due to escalation + excessive frustration + looping + high repair ratio");
+    }
 }
--- a/docs/source/_static/img/signals_trace.png
+++ b/docs/source/_static/img/signals_trace.png
--- a/docs/source/concepts/signals.rst
+++ b/docs/source/concepts/signals.rst
@ -86,6 +86,11 @@ Example queries:
 - Find positive interactions: ``signals.positive_feedback.count >= 2``
 - Find escalations: ``signals.escalation.requested = "true"``

+.. image:: /_static/img/signals_trace.png
+   :width: 100%
+   :align: center
+
+
 Core Signal Types
 =================

@ -115,10 +120,10 @@ Follow-Up & Repair Frequency
 ----------------------------

 **What it measures**
-    How often users clarify, correct, or rephrase requests.
+    How often users clarify, correct, or rephrase requests. This is a **user signal** tracking query reformulation behavior—when users must repair or rephrase their requests because the agent didn't understand or respond appropriately.

 **Why it matters**
-    High repair frequency is a proxy for misunderstanding or intent drift.
+    High repair frequency is a proxy for misunderstanding or intent drift. When users repeatedly rephrase the same request, it indicates the agent is failing to grasp or act on the user's intent.

 **Key metrics**

@ -165,10 +170,10 @@ Repetition & Looping
 --------------------

 **What it measures**
-    Assistant repetition / degenerative loops.
+    Assistant repetition / degenerative loops. This is an **assistant signal** tracking when the agent repeats itself, fails to follow instructions, or gets stuck in loops—indicating the agent is not making progress or adapting its responses.

 **Why it matters**
-    Often indicates missing state tracking, broken tool integration, or prompt issues.
+    Often indicates missing state tracking, broken tool integration, prompt issues, or the agent ignoring user corrections. High repetition means the agent is not learning from the conversation context.

 **Detection method**

@ -176,7 +181,7 @@ Repetition & Looping
 - Classify:

  - **Exact**: similarity >= 0.85
-  - **Near-duplicate**: similarity >= 0.60
+  - **Near-duplicate**: similarity >= 0.50

 - Looping is flagged when repetition instances exceed 2 in a session.