Introduce signals change (#655)

* adding support for signals

* reducing false positives for signals like positive interaction

* adding docs. Still need to fix the messages list, but waiting on PR #621

* Improve frustration detection: normalize contractions and refine punctuation

* Further refine test cases with longer messages

* minor doc changes

* fixing echo statement for build

* fixing the messages construction and using the trait for signals

* update signals docs

* fixed some minor doc changes

* added more tests and fixed docuemtnation. PR 100% ready

* made fixes based on PR comments

* Optimize latency

1. replace sliding window approach with trigram containment check
2. add code to pre-compute ngrams for patterns

* removed some debug statements to make tests easier to read

* PR comments to make ObservableStreamProcessor accept optonal Vec<Messagges>

* fixed PR comments

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
Co-authored-by: MeiyuZhong <mariazhong9612@gmail.com>
Co-authored-by: nehcgs <54548843+nehcgs@users.noreply.github.com>
This commit is contained in:
Salman Paracha 2026-01-07 11:20:44 -08:00 committed by GitHub
parent 57327ba667
commit b4543ba56c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 3972 additions and 191 deletions

1
crates/Cargo.lock generated
View file

@ -335,6 +335,7 @@ dependencies = [
"serde_json",
"serde_with",
"serde_yaml",
"strsim",
"thiserror 2.0.12",
"time",
"tokio",

View file

@ -30,6 +30,7 @@ reqwest = { version = "0.12.15", features = ["stream"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.140"
serde_with = "3.13.0"
strsim = "0.11"
serde_yaml = "0.9.34"
thiserror = "2.0.12"
tokio = { version = "1.44.2", features = ["full"] }

View file

@ -111,6 +111,9 @@ pub async fn llm_chat(
.get_recent_user_message()
.map(|msg| truncate_message(&msg, 50));
// Extract messages for signal analysis (clone before moving client_request)
let messages_for_signals = client_request.get_messages();
client_request.set_model(resolved_model.clone());
if client_request.remove_metadata_key("archgw_preference_config") {
debug!(
@ -292,6 +295,7 @@ pub async fn llm_chat(
operation_component::LLM,
llm_span,
request_start_time,
Some(messages_for_signals),
);
// === v1/responses state management: Wrap with ResponsesStateProcessor ===

View file

@ -10,8 +10,10 @@ use tokio_stream::wrappers::ReceiverStream;
use tokio_stream::StreamExt;
use tracing::warn;
// Import tracing constants
use crate::tracing::{error, llm};
// Import tracing constants and signals
use crate::signals::{InteractionQuality, SignalAnalyzer, TextBasedSignalAnalyzer, FLAG_MARKER};
use crate::tracing::{error, llm, signals as signal_constants};
use hermesllm::apis::openai::Message;
/// Trait for processing streaming chunks
/// Implementors can inject custom logic during streaming (e.g., hallucination detection, logging)
@ -38,6 +40,7 @@ pub struct ObservableStreamProcessor {
chunk_count: usize,
start_time: Instant,
time_to_first_token: Option<u128>,
messages: Option<Vec<Message>>,
}
impl ObservableStreamProcessor {
@ -48,11 +51,13 @@ impl ObservableStreamProcessor {
/// * `service_name` - The service name for this span (e.g., "archgw(llm)")
/// * `span` - The span to finalize after streaming completes
/// * `start_time` - When the request started (for duration calculation)
/// * `messages` - Optional conversation messages for signal analysis
pub fn new(
collector: Arc<TraceCollector>,
service_name: impl Into<String>,
span: Span,
start_time: Instant,
messages: Option<Vec<Message>>,
) -> Self {
Self {
collector,
@ -62,6 +67,7 @@ impl ObservableStreamProcessor {
chunk_count: 0,
start_time,
time_to_first_token: None,
messages,
}
}
}
@ -133,6 +139,94 @@ impl StreamProcessor for ObservableStreamProcessor {
}
}
// Analyze signals if messages are available and add to span attributes
if let Some(ref messages) = self.messages {
let analyzer: Box<dyn SignalAnalyzer> = Box::new(TextBasedSignalAnalyzer::new());
let report = analyzer.analyze(messages);
// Add overall quality
self.span.attributes.push(Attribute {
key: signal_constants::QUALITY.to_string(),
value: AttributeValue {
string_value: Some(format!("{:?}", report.overall_quality)),
},
});
// Add repair/follow-up metrics if concerning
if report.follow_up.is_concerning || report.follow_up.repair_count > 0 {
self.span.attributes.push(Attribute {
key: signal_constants::REPAIR_COUNT.to_string(),
value: AttributeValue {
string_value: Some(report.follow_up.repair_count.to_string()),
},
});
self.span.attributes.push(Attribute {
key: signal_constants::REPAIR_RATIO.to_string(),
value: AttributeValue {
string_value: Some(format!("{:.3}", report.follow_up.repair_ratio)),
},
});
}
// Add flag marker to operation name if any concerning signal is detected
let should_flag = report.frustration.has_frustration
|| report.repetition.has_looping
|| report.escalation.escalation_requested
|| matches!(
report.overall_quality,
InteractionQuality::Poor | InteractionQuality::Severe
);
if should_flag {
// Prepend flag marker to the operation name
self.span.name = format!("{} {}", self.span.name, FLAG_MARKER);
}
// Add key signal metrics
if report.frustration.has_frustration {
self.span.attributes.push(Attribute {
key: signal_constants::FRUSTRATION_COUNT.to_string(),
value: AttributeValue {
string_value: Some(report.frustration.frustration_count.to_string()),
},
});
self.span.attributes.push(Attribute {
key: signal_constants::FRUSTRATION_SEVERITY.to_string(),
value: AttributeValue {
string_value: Some(report.frustration.severity.to_string()),
},
});
}
if report.repetition.has_looping {
self.span.attributes.push(Attribute {
key: signal_constants::REPETITION_COUNT.to_string(),
value: AttributeValue {
string_value: Some(report.repetition.repetition_count.to_string()),
},
});
}
if report.escalation.escalation_requested {
self.span.attributes.push(Attribute {
key: signal_constants::ESCALATION_REQUESTED.to_string(),
value: AttributeValue {
string_value: Some("true".to_string()),
},
});
}
if report.positive_feedback.has_positive_feedback {
self.span.attributes.push(Attribute {
key: signal_constants::POSITIVE_FEEDBACK_COUNT.to_string(),
value: AttributeValue {
string_value: Some(report.positive_feedback.positive_count.to_string()),
},
});
}
}
// Record the finalized span
self.collector
.record_span(&self.service_name, self.span.clone());

View file

@ -1,5 +1,6 @@
pub mod handlers;
pub mod router;
pub mod signals;
pub mod state;
pub mod tracing;
pub mod utils;

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,3 @@
mod analyzer;
pub use analyzer::*;

View file

@ -139,6 +139,45 @@ pub mod error {
pub const STACK_TRACE: &str = "error.stack_trace";
}
// =============================================================================
// Span Attributes - Agentic Signals
// =============================================================================
/// Behavioral quality indicators for agent interactions
/// These signals are computed automatically from conversation patterns
pub mod signals {
/// Overall quality assessment
/// Values: "Excellent", "Good", "Neutral", "Poor", "Severe"
pub const QUALITY: &str = "signals.quality";
/// Total number of turns in the conversation
pub const TURN_COUNT: &str = "signals.turn_count";
/// Efficiency score (0.0-1.0)
pub const EFFICIENCY_SCORE: &str = "signals.efficiency_score";
/// Number of repair attempts detected
pub const REPAIR_COUNT: &str = "signals.follow_up.repair.count";
/// Ratio of repairs to user turns
pub const REPAIR_RATIO: &str = "signals.follow_up.repair.ratio";
/// Number of frustration indicators detected
pub const FRUSTRATION_COUNT: &str = "signals.frustration.count";
/// Frustration severity level (0-3)
pub const FRUSTRATION_SEVERITY: &str = "signals.frustration.severity";
/// Number of repetition instances detected
pub const REPETITION_COUNT: &str = "signals.repetition.count";
/// Whether escalation was requested (user asked for human help)
pub const ESCALATION_REQUESTED: &str = "signals.escalation.requested";
/// Number of positive feedback indicators detected
pub const POSITIVE_FEEDBACK_COUNT: &str = "signals.positive_feedback.count";
}
// =============================================================================
// Operation Names
// =============================================================================

View file

@ -1,3 +1,5 @@
mod constants;
pub use constants::{error, http, llm, operation_component, routing, OperationNameBuilder};
pub use constants::{
error, http, llm, operation_component, routing, signals, OperationNameBuilder,
};

View file

@ -1127,82 +1127,16 @@ impl ProviderRequest for ResponsesAPIRequest {
}
fn get_messages(&self) -> Vec<crate::apis::openai::Message> {
use crate::apis::openai::{Message, MessageContent, Role};
use crate::transforms::request::from_openai::ResponsesInputConverter;
let mut openai_messages = Vec::new();
// Use the shared converter to get the full conversion with image support
let converter = ResponsesInputConverter {
input: self.input.clone(),
instructions: self.instructions.clone(),
};
// Add instructions as system message if present
if let Some(instructions) = &self.instructions {
openai_messages.push(Message {
role: Role::System,
content: MessageContent::Text(instructions.clone()),
name: None,
tool_calls: None,
tool_call_id: None,
});
}
// Convert input to messages
match &self.input {
InputParam::Text(text) => {
openai_messages.push(Message {
role: Role::User,
content: MessageContent::Text(text.clone()),
name: None,
tool_calls: None,
tool_call_id: None,
});
}
InputParam::Items(items) => {
for item in items {
match item {
InputItem::Message(msg) => {
// Convert message role
let role = match msg.role {
MessageRole::User => Role::User,
MessageRole::Assistant => Role::Assistant,
MessageRole::System => Role::System,
MessageRole::Developer => Role::System, // Map developer to system
};
// Extract text from message content
let content = match &msg.content {
crate::apis::openai_responses::MessageContent::Text(text) => {
text.clone()
}
crate::apis::openai_responses::MessageContent::Items(items) => {
items
.iter()
.filter_map(|c| {
if let InputContent::InputText { text } = c {
Some(text.clone())
} else {
None
}
})
.collect::<Vec<_>>()
.join("\n")
}
};
openai_messages.push(Message {
role,
content: MessageContent::Text(content),
name: None,
tool_calls: None,
tool_call_id: None,
});
}
// Skip other input item types for now
InputItem::ItemReference { .. } | InputItem::FunctionCallOutput { .. } => {
// These are not yet supported in agent framework
}
}
}
}
}
openai_messages
// Convert and return, falling back to empty vec on error
converter.try_into().unwrap_or_else(|_| Vec::new())
}
fn set_messages(&mut self, messages: &[crate::apis::openai::Message]) {

View file

@ -24,6 +24,150 @@ use crate::transforms::*;
type AnthropicMessagesRequest = MessagesRequest;
// ============================================================================
// RESPONSES API INPUT CONVERSION
// ============================================================================
/// Helper struct for converting ResponsesAPI input to OpenAI messages
pub struct ResponsesInputConverter {
pub input: InputParam,
pub instructions: Option<String>,
}
impl TryFrom<ResponsesInputConverter> for Vec<Message> {
type Error = TransformError;
fn try_from(converter: ResponsesInputConverter) -> Result<Self, Self::Error> {
// Convert input to messages
match converter.input {
InputParam::Text(text) => {
// Simple text input becomes a user message
let mut messages = Vec::new();
// Add instructions as system message if present
if let Some(instructions) = converter.instructions {
messages.push(Message {
role: Role::System,
content: MessageContent::Text(instructions),
name: None,
tool_call_id: None,
tool_calls: None,
});
}
// Add the user message
messages.push(Message {
role: Role::User,
content: MessageContent::Text(text),
name: None,
tool_call_id: None,
tool_calls: None,
});
Ok(messages)
}
InputParam::Items(items) => {
// Convert input items to messages
let mut converted_messages = Vec::new();
// Add instructions as system message if present
if let Some(instructions) = converter.instructions {
converted_messages.push(Message {
role: Role::System,
content: MessageContent::Text(instructions),
name: None,
tool_call_id: None,
tool_calls: None,
});
}
// Convert each input item
for item in items {
if let InputItem::Message(input_msg) = item {
let role = match input_msg.role {
MessageRole::User => Role::User,
MessageRole::Assistant => Role::Assistant,
MessageRole::System => Role::System,
MessageRole::Developer => Role::System, // Map developer to system
};
// Convert content based on MessageContent type
let content = match &input_msg.content {
crate::apis::openai_responses::MessageContent::Text(text) => {
// Simple text content
MessageContent::Text(text.clone())
}
crate::apis::openai_responses::MessageContent::Items(content_items) => {
// Check if it's a single text item (can use simple text format)
if content_items.len() == 1 {
if let InputContent::InputText { text } = &content_items[0] {
MessageContent::Text(text.clone())
} else {
// Single non-text item - use parts format
MessageContent::Parts(
content_items.iter()
.filter_map(|c| match c {
InputContent::InputText { text } => {
Some(crate::apis::openai::ContentPart::Text { text: text.clone() })
}
InputContent::InputImage { image_url, .. } => {
Some(crate::apis::openai::ContentPart::ImageUrl {
image_url: crate::apis::openai::ImageUrl {
url: image_url.clone(),
detail: None,
}
})
}
InputContent::InputFile { .. } => None, // Skip files for now
InputContent::InputAudio { .. } => None, // Skip audio for now
})
.collect()
)
}
} else {
// Multiple content items - convert to parts
MessageContent::Parts(
content_items
.iter()
.filter_map(|c| match c {
InputContent::InputText { text } => {
Some(crate::apis::openai::ContentPart::Text {
text: text.clone(),
})
}
InputContent::InputImage { image_url, .. } => Some(
crate::apis::openai::ContentPart::ImageUrl {
image_url: crate::apis::openai::ImageUrl {
url: image_url.clone(),
detail: None,
},
},
),
InputContent::InputFile { .. } => None, // Skip files for now
InputContent::InputAudio { .. } => None, // Skip audio for now
})
.collect(),
)
}
}
};
converted_messages.push(Message {
role,
content,
name: None,
tool_call_id: None,
tool_calls: None,
});
}
}
Ok(converted_messages)
}
}
}
}
// ============================================================================
// MAIN REQUEST TRANSFORMATIONS
// ============================================================================
@ -253,117 +397,12 @@ impl TryFrom<ResponsesAPIRequest> for ChatCompletionsRequest {
type Error = TransformError;
fn try_from(req: ResponsesAPIRequest) -> Result<Self, Self::Error> {
// Convert input to messages
let messages = match req.input {
InputParam::Text(text) => {
// Simple text input becomes a user message
vec![Message {
role: Role::User,
content: MessageContent::Text(text),
name: None,
tool_call_id: None,
tool_calls: None,
}]
}
InputParam::Items(items) => {
// Convert input items to messages
let mut converted_messages = Vec::new();
// Add instructions as system message if present
if let Some(instructions) = &req.instructions {
converted_messages.push(Message {
role: Role::System,
content: MessageContent::Text(instructions.clone()),
name: None,
tool_call_id: None,
tool_calls: None,
});
}
// Convert each input item
for item in items {
if let InputItem::Message(input_msg) = item {
let role = match input_msg.role {
MessageRole::User => Role::User,
MessageRole::Assistant => Role::Assistant,
MessageRole::System => Role::System,
MessageRole::Developer => Role::System, // Map developer to system
};
// Convert content based on MessageContent type
let content = match &input_msg.content {
crate::apis::openai_responses::MessageContent::Text(text) => {
// Simple text content
MessageContent::Text(text.clone())
}
crate::apis::openai_responses::MessageContent::Items(content_items) => {
// Check if it's a single text item (can use simple text format)
if content_items.len() == 1 {
if let InputContent::InputText { text } = &content_items[0] {
MessageContent::Text(text.clone())
} else {
// Single non-text item - use parts format
MessageContent::Parts(
content_items.iter()
.filter_map(|c| match c {
InputContent::InputText { text } => {
Some(crate::apis::openai::ContentPart::Text { text: text.clone() })
}
InputContent::InputImage { image_url, .. } => {
Some(crate::apis::openai::ContentPart::ImageUrl {
image_url: crate::apis::openai::ImageUrl {
url: image_url.clone(),
detail: None,
}
})
}
InputContent::InputFile { .. } => None, // Skip files for now
InputContent::InputAudio { .. } => None, // Skip audio for now
})
.collect()
)
}
} else {
// Multiple content items - convert to parts
MessageContent::Parts(
content_items
.iter()
.filter_map(|c| match c {
InputContent::InputText { text } => {
Some(crate::apis::openai::ContentPart::Text {
text: text.clone(),
})
}
InputContent::InputImage { image_url, .. } => Some(
crate::apis::openai::ContentPart::ImageUrl {
image_url: crate::apis::openai::ImageUrl {
url: image_url.clone(),
detail: None,
},
},
),
InputContent::InputFile { .. } => None, // Skip files for now
InputContent::InputAudio { .. } => None, // Skip audio for now
})
.collect(),
)
}
}
};
converted_messages.push(Message {
role,
content,
name: None,
tool_call_id: None,
tool_calls: None,
});
}
}
converted_messages
}
// Convert input to messages using the shared converter
let converter = ResponsesInputConverter {
input: req.input,
instructions: req.instructions.clone(),
};
let messages: Vec<Message> = converter.try_into()?;
// Build the ChatCompletionsRequest
Ok(ChatCompletionsRequest {