fix: prevent index-out-of-bounds panic in signal analyzer follow-up

The signals analyzer preprocesses messages by filtering out any message without extractable text (tool calls, tool results, empty content), but it stores each surviving entry's ORIGINAL conversation index in the tuple. `analyze_follow_up` then used that stored index to look up previous user messages inside `normalized_messages`, which panicked ("index out of bounds: the len is N but the index is M") whenever any message was filtered out before the current turn — a common pattern for Anthropic conversations containing <tool_use_error> blocks. The panic is caught at the tokio task boundary so requests still complete with the default model, but every affected request logs a noisy stack trace and silently skips signal analysis, which can degrade routing decisions. Fix by walking the normalized list with `iter().enumerate()` and using the inner position for index lookups. The stored original index is kept only for display ("Turn N") strings. Includes a regression test that reproduces the exact panic shape via the public `analyze` entry point.
2026-04-25 00:36:34 +02:00 · 2026-04-17 18:21:57 -07:00 · 2026-04-17 18:21:57 -07:00 · 421bff03ac
commit 421bff03ac
parent 37600fd07a
1 changed files with 70 additions and 5 deletions
--- a/crates/brightstaff/src/signals/analyzer.rs
+++ b/crates/brightstaff/src/signals/analyzer.rs
@ -1250,7 +1250,7 @@ impl TextBasedSignalAnalyzer {
        let mut repair_phrases = Vec::new();
        let mut user_turn_count = 0;

-        for (i, role, norm_msg) in normalized_messages {
+        for (pos, (i, role, norm_msg)) in normalized_messages.iter().enumerate() {
            if *role != Role::User {
                continue;
            }
@ -1274,10 +1274,13 @@ impl TextBasedSignalAnalyzer {
                }
            }

-            // Only check for semantic similarity if no pattern matched
-            if !found_in_turn && *i >= 2 {
-                // Find previous user message
-                for j in (0..*i).rev() {
+            // Only check for semantic similarity if no pattern matched. Walk
+            // backwards through the *normalized* list (not the original
+            // conversation indices, which may be non-contiguous because
+            // messages without extractable text are filtered out) to find the
+            // most recent prior user message.
+            if !found_in_turn && pos >= 1 {
+                for j in (0..pos).rev() {
                    let (_, prev_role, prev_norm_msg) = &normalized_messages[j];
                    if *prev_role == Role::User {
                        if self.is_similar_rephrase(norm_msg, prev_norm_msg) {
@ -2199,6 +2202,68 @@ mod tests {
        println!("test_follow_up_detection took: {:?}", start.elapsed());
    }

+    #[test]
+    fn test_follow_up_does_not_panic_with_filtered_messages() {
+        // Regression test: the preprocessing pipeline filters out messages
+        // without extractable text (tool calls, tool results, empty content).
+        // The stored tuple index `i` is the ORIGINAL-conversation index, so
+        // once anything is filtered out, `i` no longer matches the position
+        // inside `normalized_messages`. The old code used `*i` to index into
+        // `normalized_messages`, which panicked with "index out of bounds"
+        // when a user message appeared after filtered entries.
+        let analyzer = TextBasedSignalAnalyzer::new();
+        let messages = vec![
+            Message {
+                role: Role::User,
+                content: Some(hermesllm::apis::openai::MessageContent::Text(
+                    "first question".to_string(),
+                )),
+                name: None,
+                tool_calls: None,
+                tool_call_id: None,
+            },
+            // Assistant message with no text content (e.g. tool call) — filtered out.
+            Message {
+                role: Role::Assistant,
+                content: None,
+                name: None,
+                tool_calls: None,
+                tool_call_id: None,
+            },
+            // Tool-role message with no extractable text — filtered out.
+            Message {
+                role: Role::Tool,
+                content: None,
+                name: None,
+                tool_calls: None,
+                tool_call_id: None,
+            },
+            Message {
+                role: Role::Assistant,
+                content: Some(hermesllm::apis::openai::MessageContent::Text(
+                    "some answer".to_string(),
+                )),
+                name: None,
+                tool_calls: None,
+                tool_call_id: None,
+            },
+            // Rephrased user turn — original index 4, but after filtering
+            // only 3 messages remain in `normalized_messages` before it.
+            Message {
+                role: Role::User,
+                content: Some(hermesllm::apis::openai::MessageContent::Text(
+                    "first question please".to_string(),
+                )),
+                name: None,
+                tool_calls: None,
+                tool_call_id: None,
+            },
+        ];
+
+        // Must not panic — exercises the full analyze pipeline.
+        let _report = analyzer.analyze(&messages);
+    }
+
    #[test]
    fn test_frustration_detection() {
        let start = Instant::now();