fix: prevent index-out-of-bounds panic in signal analyzer follow-up (#896)

This commit is contained in:
Adil Hafeez 2026-04-18 16:24:02 -07:00 committed by GitHub
parent e7464b817a
commit ffea891dba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1250,7 +1250,7 @@ impl TextBasedSignalAnalyzer {
let mut repair_phrases = Vec::new(); let mut repair_phrases = Vec::new();
let mut user_turn_count = 0; let mut user_turn_count = 0;
for (i, role, norm_msg) in normalized_messages { for (pos, (i, role, norm_msg)) in normalized_messages.iter().enumerate() {
if *role != Role::User { if *role != Role::User {
continue; continue;
} }
@ -1274,10 +1274,13 @@ impl TextBasedSignalAnalyzer {
} }
} }
// Only check for semantic similarity if no pattern matched // Only check for semantic similarity if no pattern matched. Walk
if !found_in_turn && *i >= 2 { // backwards through the *normalized* list (not the original
// Find previous user message // conversation indices, which may be non-contiguous because
for j in (0..*i).rev() { // messages without extractable text are filtered out) to find the
// most recent prior user message.
if !found_in_turn && pos >= 1 {
for j in (0..pos).rev() {
let (_, prev_role, prev_norm_msg) = &normalized_messages[j]; let (_, prev_role, prev_norm_msg) = &normalized_messages[j];
if *prev_role == Role::User { if *prev_role == Role::User {
if self.is_similar_rephrase(norm_msg, prev_norm_msg) { if self.is_similar_rephrase(norm_msg, prev_norm_msg) {
@ -2199,6 +2202,68 @@ mod tests {
println!("test_follow_up_detection took: {:?}", start.elapsed()); println!("test_follow_up_detection took: {:?}", start.elapsed());
} }
#[test]
fn test_follow_up_does_not_panic_with_filtered_messages() {
// Regression test: the preprocessing pipeline filters out messages
// without extractable text (tool calls, tool results, empty content).
// The stored tuple index `i` is the ORIGINAL-conversation index, so
// once anything is filtered out, `i` no longer matches the position
// inside `normalized_messages`. The old code used `*i` to index into
// `normalized_messages`, which panicked with "index out of bounds"
// when a user message appeared after filtered entries.
let analyzer = TextBasedSignalAnalyzer::new();
let messages = vec![
Message {
role: Role::User,
content: Some(hermesllm::apis::openai::MessageContent::Text(
"first question".to_string(),
)),
name: None,
tool_calls: None,
tool_call_id: None,
},
// Assistant message with no text content (e.g. tool call) — filtered out.
Message {
role: Role::Assistant,
content: None,
name: None,
tool_calls: None,
tool_call_id: None,
},
// Tool-role message with no extractable text — filtered out.
Message {
role: Role::Tool,
content: None,
name: None,
tool_calls: None,
tool_call_id: None,
},
Message {
role: Role::Assistant,
content: Some(hermesllm::apis::openai::MessageContent::Text(
"some answer".to_string(),
)),
name: None,
tool_calls: None,
tool_call_id: None,
},
// Rephrased user turn — original index 4, but after filtering
// only 3 messages remain in `normalized_messages` before it.
Message {
role: Role::User,
content: Some(hermesllm::apis::openai::MessageContent::Text(
"first question please".to_string(),
)),
name: None,
tool_calls: None,
tool_call_id: None,
},
];
// Must not panic — exercises the full analyze pipeline.
let _report = analyzer.analyze(&messages);
}
#[test] #[test]
fn test_frustration_detection() { fn test_frustration_detection() {
let start = Instant::now(); let start = Instant::now();