added more tests and fixed docuemtnation. PR 100% ready

This commit is contained in:
Salman Paracha 2026-01-03 16:44:39 -08:00
parent c2cfd358f5
commit 7d517ace77
3 changed files with 134 additions and 5 deletions

View file

@ -2465,4 +2465,128 @@ mod tests {
"Polite disappointment should not be classified as Excellent"
);
}
#[test]
fn test_catastrophic_failure_looping_assistant() {
let analyzer = SignalAnalyzer::new();
// Catastrophic failure: assistant stuck in loop, user increasingly frustrated
let messages = vec![
// Turn 1: Initial request
create_message(Role::User, "I need to cancel my subscription"),
create_message(Role::Assistant, "I can help you with account management. What would you like to do today?"),
// Turn 3: First repair - repetition clarification
create_message(Role::User, "I just said I want to cancel my subscription"),
// Turn 4: Exact repetition from assistant
create_message(Role::Assistant, "I can help you with account management. What would you like to do today?"),
// Turn 5: Repair (negation) + frustration (ALL CAPS)
create_message(Role::User, "No, I meant CANCEL. My. Subscription."),
// Turn 6: Near-duplicate repetition
create_message(Role::Assistant, "I understand you need help with your account. Let me assist you with that."),
// Turn 7: Severe frustration (complaint + excessive punctuation + ALL CAPS)
create_message(Role::User, "This is not helpful!! I want to CANCEL my subscription!!!"),
// Turn 8: Exact repetition again (looping)
create_message(Role::Assistant, "I can help you with account management. What would you like to do today?"),
// Turn 9: Repair (explicit rephrase) + frustration (complaint)
create_message(Role::User, "This doesnt work. Let me rephrase: I need to terminate my account and stop being charged"),
// Turn 10: Near-duplicate repetition (looping continues)
create_message(Role::Assistant, "I understand you need help with your account. Let me assist you with that."),
// Turn 11: Escalation (multiple types) + frustration
create_message(Role::User, "I give up. Can I speak to a real person? This is a waste of time"),
];
let report = analyzer.analyze(&messages);
// Validate turn count
assert_eq!(
report.turn_count.total_turns, 11,
"Should have 11 total turns"
);
assert_eq!(report.turn_count.user_turns, 6, "Should have 6 user turns");
assert_eq!(
report.turn_count.assistant_turns, 5,
"Should have 5 assistant turns"
);
assert!(
report.turn_count.is_concerning,
"11 turns should be concerning (>7)"
);
assert!(
!report.turn_count.is_excessive,
"11 turns should not be excessive (<=12)"
);
assert!(
report.turn_count.efficiency_score < 0.5,
"Efficiency should be low"
);
// Validate repair detection (USER signals - query reformulation)
// Detected repairs:
// 1. "I just said I want to cancel..." - pattern: "I just said"
// 2. "No, I meant CANCEL..." - pattern: "No, I meant"
// 3. "Let me rephrase: I need to terminate..." - pattern: "let me rephrase"
// Note: "This is not helpful!!" is frustration (not repair)
// Note: "I give up..." is escalation (not repair)
assert_eq!(
report.follow_up.repair_count, 3,
"Should detect exactly 3 repair attempts from user messages"
);
assert_eq!(
report.follow_up.repair_ratio, 0.5,
"Repair ratio should be 0.5 (3 repairs / 6 user messages)"
);
assert!(
report.follow_up.is_concerning,
"50% repair ratio should be highly concerning (threshold is 30%)"
);
// Validate frustration detection
assert!(
report.frustration.has_frustration,
"Should detect frustration"
);
assert!(
report.frustration.frustration_count >= 4,
"Should detect multiple frustration indicators"
);
assert!(
report.frustration.severity >= 2,
"Should be at least moderate frustration"
);
// Validate repetition/looping detection (ASSISTANT signals - not following instructions)
// The assistant repeats the same unhelpful responses multiple times:
// 1. "I can help you with account management..." appears 3 times (exact repetition)
// 2. "I understand you need help with your account..." appears 2 times (near-duplicate)
assert!(
report.repetition.repetition_count >= 4,
"Should detect at least 4 assistant repetitions (exact + near-duplicates)"
);
assert!(
report.repetition.has_looping,
"Should detect looping (>2 repetitions indicates stuck agent)"
);
assert!(
report.repetition.severity >= 2,
"Should be moderate to severe looping (assistant not adapting)"
);
// Validate escalation detection
assert!(
report.escalation.escalation_requested,
"Should detect escalation request"
);
assert!(
report.escalation.escalation_count >= 2,
"Should detect multiple escalation indicators: 'give up' + 'speak to a real person'"
);
// Validate overall quality
assert_eq!(report.overall_quality, InteractionQuality::Severe, "Should be classified as Severe due to escalation + excessive frustration + looping + high repair ratio");
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 MiB

View file

@ -86,6 +86,11 @@ Example queries:
- Find positive interactions: ``signals.positive_feedback.count >= 2``
- Find escalations: ``signals.escalation.requested = "true"``
.. image:: /_static/img/signals_trace.png
:width: 100%
:align: center
Core Signal Types
=================
@ -115,10 +120,10 @@ Follow-Up & Repair Frequency
----------------------------
**What it measures**
How often users clarify, correct, or rephrase requests.
How often users clarify, correct, or rephrase requests. This is a **user signal** tracking query reformulation behavior—when users must repair or rephrase their requests because the agent didn't understand or respond appropriately.
**Why it matters**
High repair frequency is a proxy for misunderstanding or intent drift.
High repair frequency is a proxy for misunderstanding or intent drift. When users repeatedly rephrase the same request, it indicates the agent is failing to grasp or act on the user's intent.
**Key metrics**
@ -165,10 +170,10 @@ Repetition & Looping
--------------------
**What it measures**
Assistant repetition / degenerative loops.
Assistant repetition / degenerative loops. This is an **assistant signal** tracking when the agent repeats itself, fails to follow instructions, or gets stuck in loops—indicating the agent is not making progress or adapting its responses.
**Why it matters**
Often indicates missing state tracking, broken tool integration, or prompt issues.
Often indicates missing state tracking, broken tool integration, prompt issues, or the agent ignoring user corrections. High repetition means the agent is not learning from the conversation context.
**Detection method**
@ -176,7 +181,7 @@ Repetition & Looping
- Classify:
- **Exact**: similarity >= 0.85
- **Near-duplicate**: similarity >= 0.60
- **Near-duplicate**: similarity >= 0.50
- Looping is flagged when repetition instances exceed 2 in a session.