mirror of
https://github.com/katanemo/plano.git
synced 2026-05-08 07:12:42 +02:00
use standard tracing and logging in brightstaff (#721)
This commit is contained in:
parent
4d9ed74b68
commit
46de89590b
55 changed files with 1494 additions and 2432 deletions
|
|
@ -1,9 +1,7 @@
|
|||
use std::sync::Arc;
|
||||
use std::time::{Instant, SystemTime};
|
||||
use std::time::Instant;
|
||||
|
||||
use bytes::Bytes;
|
||||
use common::consts::TRACE_PARENT_HEADER;
|
||||
use common::traces::{generate_random_span_id, parse_traceparent, SpanBuilder, SpanKind};
|
||||
use hermesllm::apis::OpenAIMessage;
|
||||
use hermesllm::clients::SupportedAPIsFromClient;
|
||||
use hermesllm::providers::request::ProviderRequest;
|
||||
|
|
@ -11,14 +9,15 @@ use hermesllm::ProviderRequestType;
|
|||
use http_body_util::combinators::BoxBody;
|
||||
use http_body_util::BodyExt;
|
||||
use hyper::{Request, Response};
|
||||
use opentelemetry::trace::get_active_span;
|
||||
use serde::ser::Error as SerError;
|
||||
use tracing::{debug, info, warn};
|
||||
use tracing::{debug, info, info_span, warn, Instrument};
|
||||
|
||||
use super::agent_selector::{AgentSelectionError, AgentSelector};
|
||||
use super::pipeline_processor::{PipelineError, PipelineProcessor};
|
||||
use super::response_handler::ResponseHandler;
|
||||
use crate::router::plano_orchestrator::OrchestratorService;
|
||||
use crate::tracing::{http, operation_component, OperationNameBuilder};
|
||||
use crate::tracing::{operation_component, set_service_name};
|
||||
|
||||
/// Main errors for agent chat completions
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
|
|
@ -41,92 +40,122 @@ pub async fn agent_chat(
|
|||
_: String,
|
||||
agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
|
||||
listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
|
||||
trace_collector: Arc<common::traces::TraceCollector>,
|
||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||
match handle_agent_chat(
|
||||
request,
|
||||
orchestrator_service,
|
||||
agents_list,
|
||||
listeners,
|
||||
trace_collector,
|
||||
)
|
||||
.await
|
||||
// Extract request_id from headers or generate a new one
|
||||
let request_id: String = match request
|
||||
.headers()
|
||||
.get(common::consts::REQUEST_ID_HEADER)
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.map(|s| s.to_string())
|
||||
{
|
||||
Ok(response) => Ok(response),
|
||||
Err(err) => {
|
||||
// Check if this is a client error from the pipeline that should be cascaded
|
||||
if let AgentFilterChainError::Pipeline(PipelineError::ClientError {
|
||||
agent,
|
||||
status,
|
||||
body,
|
||||
}) = &err
|
||||
{
|
||||
warn!(
|
||||
"Client error from agent '{}' (HTTP {}): {}",
|
||||
agent, status, body
|
||||
);
|
||||
Some(id) => id,
|
||||
None => uuid::Uuid::new_v4().to_string(),
|
||||
};
|
||||
|
||||
// Create error response with the original status code and body
|
||||
// Create a span with request_id that will be included in all log lines
|
||||
let request_span = info_span!(
|
||||
"(orchestrator)",
|
||||
component = "orchestrator",
|
||||
request_id = %request_id,
|
||||
http.method = %request.method(),
|
||||
http.path = %request.uri().path()
|
||||
);
|
||||
|
||||
// Execute the handler inside the span
|
||||
async {
|
||||
// Set service name for orchestrator operations
|
||||
set_service_name(operation_component::ORCHESTRATOR);
|
||||
|
||||
match handle_agent_chat_inner(
|
||||
request,
|
||||
orchestrator_service,
|
||||
agents_list,
|
||||
listeners,
|
||||
request_id,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(response) => Ok(response),
|
||||
Err(err) => {
|
||||
// Check if this is a client error from the pipeline that should be cascaded
|
||||
if let AgentFilterChainError::Pipeline(PipelineError::ClientError {
|
||||
agent,
|
||||
status,
|
||||
body,
|
||||
}) = &err
|
||||
{
|
||||
warn!(
|
||||
agent = %agent,
|
||||
status = %status,
|
||||
body = %body,
|
||||
"client error from agent"
|
||||
);
|
||||
|
||||
// Create error response with the original status code and body
|
||||
let error_json = serde_json::json!({
|
||||
"error": "ClientError",
|
||||
"agent": agent,
|
||||
"status": status,
|
||||
"agent_response": body
|
||||
});
|
||||
|
||||
let json_string = error_json.to_string();
|
||||
let mut response =
|
||||
Response::new(ResponseHandler::create_full_body(json_string));
|
||||
*response.status_mut() = hyper::StatusCode::from_u16(*status)
|
||||
.unwrap_or(hyper::StatusCode::BAD_REQUEST);
|
||||
response.headers_mut().insert(
|
||||
hyper::header::CONTENT_TYPE,
|
||||
"application/json".parse().unwrap(),
|
||||
);
|
||||
return Ok(response);
|
||||
}
|
||||
|
||||
// Print detailed error information with full error chain for other errors
|
||||
let mut error_chain = Vec::new();
|
||||
let mut current_error: &dyn std::error::Error = &err;
|
||||
|
||||
// Collect the full error chain
|
||||
loop {
|
||||
error_chain.push(current_error.to_string());
|
||||
match current_error.source() {
|
||||
Some(source) => current_error = source,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
// Log the complete error chain
|
||||
warn!(error_chain = ?error_chain, "agent chat error chain");
|
||||
warn!(root_error = ?err, "root error");
|
||||
|
||||
// Create structured error response as JSON
|
||||
let error_json = serde_json::json!({
|
||||
"error": "ClientError",
|
||||
"agent": agent,
|
||||
"status": status,
|
||||
"agent_response": body
|
||||
"error": {
|
||||
"type": "AgentFilterChainError",
|
||||
"message": err.to_string(),
|
||||
"error_chain": error_chain,
|
||||
"debug_info": format!("{:?}", err)
|
||||
}
|
||||
});
|
||||
|
||||
let json_string = error_json.to_string();
|
||||
let mut response = Response::new(ResponseHandler::create_full_body(json_string));
|
||||
*response.status_mut() =
|
||||
hyper::StatusCode::from_u16(*status).unwrap_or(hyper::StatusCode::BAD_REQUEST);
|
||||
response.headers_mut().insert(
|
||||
hyper::header::CONTENT_TYPE,
|
||||
"application/json".parse().unwrap(),
|
||||
);
|
||||
return Ok(response);
|
||||
// Log the error for debugging
|
||||
info!(error = %error_json, "structured error info");
|
||||
|
||||
// Return JSON error response
|
||||
Ok(ResponseHandler::create_json_error_response(&error_json))
|
||||
}
|
||||
|
||||
// Print detailed error information with full error chain for other errors
|
||||
let mut error_chain = Vec::new();
|
||||
let mut current_error: &dyn std::error::Error = &err;
|
||||
|
||||
// Collect the full error chain
|
||||
loop {
|
||||
error_chain.push(current_error.to_string());
|
||||
match current_error.source() {
|
||||
Some(source) => current_error = source,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
// Log the complete error chain
|
||||
warn!("Agent chat error chain: {:#?}", error_chain);
|
||||
warn!("Root error: {:?}", err);
|
||||
|
||||
// Create structured error response as JSON
|
||||
let error_json = serde_json::json!({
|
||||
"error": {
|
||||
"type": "AgentFilterChainError",
|
||||
"message": err.to_string(),
|
||||
"error_chain": error_chain,
|
||||
"debug_info": format!("{:?}", err)
|
||||
}
|
||||
});
|
||||
|
||||
// Log the error for debugging
|
||||
info!("Structured error info: {}", error_json);
|
||||
|
||||
// Return JSON error response
|
||||
Ok(ResponseHandler::create_json_error_response(&error_json))
|
||||
}
|
||||
}
|
||||
.instrument(request_span)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn handle_agent_chat(
|
||||
async fn handle_agent_chat_inner(
|
||||
request: Request<hyper::body::Incoming>,
|
||||
orchestrator_service: Arc<OrchestratorService>,
|
||||
agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
|
||||
listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
|
||||
trace_collector: Arc<common::traces::TraceCollector>,
|
||||
request_id: String,
|
||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, AgentFilterChainError> {
|
||||
// Initialize services
|
||||
let agent_selector = AgentSelector::new(orchestrator_service);
|
||||
|
|
@ -140,14 +169,18 @@ async fn handle_agent_chat(
|
|||
.and_then(|name| name.to_str().ok());
|
||||
|
||||
// Find the appropriate listener
|
||||
let listener = {
|
||||
let listener: common::configuration::Listener = {
|
||||
let listeners = listeners.read().await;
|
||||
agent_selector
|
||||
.find_listener(listener_name, &listeners)
|
||||
.await?
|
||||
};
|
||||
|
||||
info!("Handling request for listener: {}", listener.name);
|
||||
get_active_span(|span| {
|
||||
span.update_name(listener.name.to_string());
|
||||
});
|
||||
|
||||
info!(listener = %listener.name, "handling request");
|
||||
|
||||
// Parse request body
|
||||
let request_path = request
|
||||
|
|
@ -162,12 +195,8 @@ async fn handle_agent_chat(
|
|||
let mut headers = request.headers().clone();
|
||||
headers.remove(common::consts::ENVOY_ORIGINAL_PATH_HEADER);
|
||||
|
||||
// Set the request_id in headers if not already present
|
||||
if !headers.contains_key(common::consts::REQUEST_ID_HEADER) {
|
||||
let request_id = uuid::Uuid::new_v4().to_string();
|
||||
info!(
|
||||
"Request id not found in headers, generated new request id: {}",
|
||||
request_id
|
||||
);
|
||||
headers.insert(
|
||||
common::consts::REQUEST_ID_HEADER,
|
||||
hyper::header::HeaderValue::from_str(&request_id).unwrap(),
|
||||
|
|
@ -180,8 +209,8 @@ async fn handle_agent_chat(
|
|||
let chat_request_bytes = request.collect().await?.to_bytes();
|
||||
|
||||
debug!(
|
||||
"Received request body (raw utf8): {}",
|
||||
String::from_utf8_lossy(&chat_request_bytes)
|
||||
body = %String::from_utf8_lossy(&chat_request_bytes),
|
||||
"received request body"
|
||||
);
|
||||
|
||||
// Determine the API type from the endpoint
|
||||
|
|
@ -195,7 +224,7 @@ async fn handle_agent_chat(
|
|||
let client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) {
|
||||
Ok(request) => request,
|
||||
Err(err) => {
|
||||
warn!("Failed to parse request as ProviderRequestType: {}", err);
|
||||
warn!("failed to parse request as ProviderRequestType: {}", err);
|
||||
let err_msg = format!("Failed to parse request: {}", err);
|
||||
return Err(AgentFilterChainError::RequestParsing(
|
||||
serde_json::Error::custom(err_msg),
|
||||
|
|
@ -205,12 +234,6 @@ async fn handle_agent_chat(
|
|||
|
||||
let message: Vec<OpenAIMessage> = client_request.get_messages();
|
||||
|
||||
// Extract trace parent for routing
|
||||
let traceparent = request_headers
|
||||
.iter()
|
||||
.find(|(key, _)| key.as_str() == TRACE_PARENT_HEADER)
|
||||
.map(|(_, value)| value.to_str().unwrap_or_default().to_string());
|
||||
|
||||
let request_id = request_headers
|
||||
.get(common::consts::REQUEST_ID_HEADER)
|
||||
.and_then(|val| val.to_str().ok())
|
||||
|
|
@ -223,87 +246,58 @@ async fn handle_agent_chat(
|
|||
agent_selector.create_agent_map(agents)
|
||||
};
|
||||
|
||||
// Parse trace parent to get trace_id and parent_span_id
|
||||
let (trace_id, parent_span_id) = if let Some(ref tp) = traceparent {
|
||||
parse_traceparent(tp)
|
||||
} else {
|
||||
(String::new(), None)
|
||||
};
|
||||
|
||||
// Select appropriate agents using arch orchestrator llm model
|
||||
let selection_span_id = generate_random_span_id();
|
||||
let selection_start_time = SystemTime::now();
|
||||
let selection_start_instant = Instant::now();
|
||||
|
||||
let selection_start = Instant::now();
|
||||
let selected_agents = agent_selector
|
||||
.select_agents(&message, &listener, traceparent.clone(), request_id.clone())
|
||||
.select_agents(&message, &listener, request_id.clone())
|
||||
.await?;
|
||||
|
||||
// Record agent selection span
|
||||
let selection_end_time = SystemTime::now();
|
||||
let selection_elapsed = selection_start_instant.elapsed();
|
||||
let selection_operation_name = OperationNameBuilder::new()
|
||||
.with_method("POST")
|
||||
.with_path("/agents/select")
|
||||
.with_target(&listener.name)
|
||||
.build();
|
||||
|
||||
let mut selection_span_builder = SpanBuilder::new(&selection_operation_name)
|
||||
.with_span_id(selection_span_id)
|
||||
.with_kind(SpanKind::Internal)
|
||||
.with_start_time(selection_start_time)
|
||||
.with_end_time(selection_end_time)
|
||||
.with_attribute(http::METHOD, "POST")
|
||||
.with_attribute(http::TARGET, "/agents/select")
|
||||
.with_attribute("selection.listener", listener.name.clone())
|
||||
.with_attribute("selection.agent_count", selected_agents.len().to_string())
|
||||
.with_attribute(
|
||||
// Record selection attributes on the current orchestrator span
|
||||
let selection_elapsed_ms = selection_start.elapsed().as_secs_f64() * 1000.0;
|
||||
get_active_span(|span| {
|
||||
span.set_attribute(opentelemetry::KeyValue::new(
|
||||
"selection.listener",
|
||||
listener.name.clone(),
|
||||
));
|
||||
span.set_attribute(opentelemetry::KeyValue::new(
|
||||
"selection.agent_count",
|
||||
selected_agents.len() as i64,
|
||||
));
|
||||
span.set_attribute(opentelemetry::KeyValue::new(
|
||||
"selection.agents",
|
||||
selected_agents
|
||||
.iter()
|
||||
.map(|a| a.id.as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(","),
|
||||
)
|
||||
.with_attribute(
|
||||
"duration_ms",
|
||||
format!("{:.2}", selection_elapsed.as_secs_f64() * 1000.0),
|
||||
);
|
||||
));
|
||||
span.set_attribute(opentelemetry::KeyValue::new(
|
||||
"selection.determination_ms",
|
||||
format!("{:.2}", selection_elapsed_ms),
|
||||
));
|
||||
});
|
||||
|
||||
if !trace_id.is_empty() {
|
||||
selection_span_builder = selection_span_builder.with_trace_id(trace_id.clone());
|
||||
}
|
||||
if let Some(parent_id) = parent_span_id.clone() {
|
||||
selection_span_builder = selection_span_builder.with_parent_span_id(parent_id);
|
||||
}
|
||||
|
||||
let selection_span = selection_span_builder.build();
|
||||
trace_collector.record_span(operation_component::ORCHESTRATOR, selection_span);
|
||||
|
||||
info!("Selected {} agent(s) for execution", selected_agents.len());
|
||||
info!(
|
||||
count = selected_agents.len(),
|
||||
"selected agents for execution"
|
||||
);
|
||||
|
||||
// Execute agents sequentially, passing output from one to the next
|
||||
let mut current_messages = message.clone();
|
||||
let agent_count = selected_agents.len();
|
||||
|
||||
for (agent_index, selected_agent) in selected_agents.iter().enumerate() {
|
||||
// Get agent name
|
||||
let agent_name = selected_agent.id.clone();
|
||||
let is_last_agent = agent_index == agent_count - 1;
|
||||
|
||||
debug!(
|
||||
"Processing agent {}/{}: {}",
|
||||
agent_index + 1,
|
||||
agent_count,
|
||||
selected_agent.id
|
||||
agent_index = agent_index + 1,
|
||||
total = agent_count,
|
||||
agent = %agent_name,
|
||||
"processing agent"
|
||||
);
|
||||
|
||||
// Record the start time for agent span
|
||||
let agent_start_time = SystemTime::now();
|
||||
let agent_start_instant = Instant::now();
|
||||
let span_id = generate_random_span_id();
|
||||
|
||||
// Get agent name
|
||||
let agent_name = selected_agent.id.clone();
|
||||
|
||||
// Process the filter chain
|
||||
let chat_history = pipeline_processor
|
||||
.process_filter_chain(
|
||||
|
|
@ -311,88 +305,71 @@ async fn handle_agent_chat(
|
|||
selected_agent,
|
||||
&agent_map,
|
||||
&request_headers,
|
||||
Some(&trace_collector),
|
||||
trace_id.clone(),
|
||||
span_id.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Get agent details and invoke
|
||||
let agent = agent_map.get(&agent_name).unwrap();
|
||||
|
||||
debug!("Invoking agent: {}", agent_name);
|
||||
debug!(agent = %agent_name, "invoking agent");
|
||||
|
||||
let llm_response = pipeline_processor
|
||||
.invoke_agent(
|
||||
&chat_history,
|
||||
client_request.clone(),
|
||||
agent,
|
||||
&request_headers,
|
||||
trace_id.clone(),
|
||||
span_id.clone(),
|
||||
)
|
||||
.await?;
|
||||
let agent_span = info_span!(
|
||||
"agent",
|
||||
agent_id = %agent_name,
|
||||
message_count = chat_history.len(),
|
||||
);
|
||||
|
||||
// Record agent span
|
||||
let agent_end_time = SystemTime::now();
|
||||
let agent_elapsed = agent_start_instant.elapsed();
|
||||
let full_path = format!("/agents{}", request_path);
|
||||
let operation_name = OperationNameBuilder::new()
|
||||
.with_method("POST")
|
||||
.with_path(&full_path)
|
||||
.with_target(&agent_name)
|
||||
.build();
|
||||
let llm_response = async {
|
||||
set_service_name(operation_component::AGENT);
|
||||
get_active_span(|span| {
|
||||
span.update_name(format!("{} /v1/chat/completions", agent_name));
|
||||
});
|
||||
|
||||
let mut span_builder = SpanBuilder::new(&operation_name)
|
||||
.with_span_id(span_id)
|
||||
.with_kind(SpanKind::Internal)
|
||||
.with_start_time(agent_start_time)
|
||||
.with_end_time(agent_end_time)
|
||||
.with_attribute(http::METHOD, "POST")
|
||||
.with_attribute(http::TARGET, full_path)
|
||||
.with_attribute("agent.name", agent_name.clone())
|
||||
.with_attribute(
|
||||
"agent.sequence",
|
||||
format!("{}/{}", agent_index + 1, agent_count),
|
||||
)
|
||||
.with_attribute(
|
||||
"duration_ms",
|
||||
format!("{:.2}", agent_elapsed.as_secs_f64() * 1000.0),
|
||||
);
|
||||
|
||||
if !trace_id.is_empty() {
|
||||
span_builder = span_builder.with_trace_id(trace_id.clone());
|
||||
pipeline_processor
|
||||
.invoke_agent(
|
||||
&chat_history,
|
||||
client_request.clone(),
|
||||
agent,
|
||||
&request_headers,
|
||||
)
|
||||
.await
|
||||
}
|
||||
if let Some(parent_id) = parent_span_id.clone() {
|
||||
span_builder = span_builder.with_parent_span_id(parent_id);
|
||||
}
|
||||
|
||||
let span = span_builder.build();
|
||||
trace_collector.record_span(operation_component::AGENT, span);
|
||||
.instrument(agent_span.clone())
|
||||
.await?;
|
||||
|
||||
// If this is the last agent, return the streaming response
|
||||
if is_last_agent {
|
||||
info!(
|
||||
"Completed agent chain, returning response from last agent: {}",
|
||||
agent_name
|
||||
agent = %agent_name,
|
||||
"completed agent chain, returning response"
|
||||
);
|
||||
return response_handler
|
||||
.create_streaming_response(llm_response)
|
||||
.await
|
||||
.map_err(AgentFilterChainError::from);
|
||||
// Capture the orchestrator span (parent of the agent span) so it
|
||||
// stays open for the full streaming duration alongside the agent span.
|
||||
let orchestrator_span = tracing::Span::current();
|
||||
return async {
|
||||
response_handler
|
||||
.create_streaming_response(
|
||||
llm_response,
|
||||
tracing::Span::current(), // agent span (inner)
|
||||
orchestrator_span, // orchestrator span (outer)
|
||||
)
|
||||
.await
|
||||
.map_err(AgentFilterChainError::from)
|
||||
}
|
||||
.instrument(agent_span)
|
||||
.await;
|
||||
}
|
||||
|
||||
// For intermediate agents, collect the full response and pass to next agent
|
||||
debug!(
|
||||
"Collecting response from intermediate agent: {}",
|
||||
agent_name
|
||||
);
|
||||
let response_text = response_handler.collect_full_response(llm_response).await?;
|
||||
debug!(agent = %agent_name, "collecting response from intermediate agent");
|
||||
let response_text = async { response_handler.collect_full_response(llm_response).await }
|
||||
.instrument(agent_span)
|
||||
.await?;
|
||||
|
||||
info!(
|
||||
"Agent {} completed, passing {} character response to next agent",
|
||||
agent_name,
|
||||
response_text.len()
|
||||
agent = %agent_name,
|
||||
response_len = response_text.len(),
|
||||
"agent completed, passing response to next agent"
|
||||
);
|
||||
|
||||
// remove last message and add new one at the end
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ impl AgentSelector {
|
|||
.cloned()
|
||||
.or_else(|| {
|
||||
warn!(
|
||||
"No default agent found, routing request to first agent: {}",
|
||||
"no default agent found, routing request to first agent: {}",
|
||||
agents[0].id
|
||||
);
|
||||
Some(agents[0].clone())
|
||||
|
|
@ -108,7 +108,6 @@ impl AgentSelector {
|
|||
&self,
|
||||
messages: &[Message],
|
||||
listener: &Listener,
|
||||
trace_parent: Option<String>,
|
||||
request_id: Option<String>,
|
||||
) -> Result<Vec<AgentFilterChain>, AgentSelectionError> {
|
||||
let agents = listener
|
||||
|
|
@ -118,7 +117,7 @@ impl AgentSelector {
|
|||
|
||||
// If only one agent, skip orchestration
|
||||
if agents.len() == 1 {
|
||||
debug!("Only one agent available, skipping orchestration");
|
||||
debug!("only one agent available, skipping orchestration");
|
||||
return Ok(vec![agents[0].clone()]);
|
||||
}
|
||||
|
||||
|
|
@ -132,15 +131,15 @@ impl AgentSelector {
|
|||
|
||||
match self
|
||||
.orchestrator_service
|
||||
.determine_orchestration(messages, trace_parent, Some(usage_preferences), request_id)
|
||||
.determine_orchestration(messages, Some(usage_preferences), request_id)
|
||||
.await
|
||||
{
|
||||
Ok(Some(routes)) => {
|
||||
debug!("Determined {} agent(s) via orchestration", routes.len());
|
||||
debug!(count = routes.len(), "determined agents via orchestration");
|
||||
let mut selected_agents = Vec::new();
|
||||
|
||||
for (route_name, agent_name) in routes {
|
||||
debug!("Processing route: {}, agent: {}", route_name, agent_name);
|
||||
debug!(route = %route_name, agent = %agent_name, "processing route");
|
||||
let selected_agent = agents
|
||||
.iter()
|
||||
.find(|a| a.id == agent_name)
|
||||
|
|
@ -155,14 +154,14 @@ impl AgentSelector {
|
|||
}
|
||||
|
||||
if selected_agents.is_empty() {
|
||||
debug!("No agents determined using orchestration, using default agent");
|
||||
debug!("no agents determined via orchestration, using default");
|
||||
Ok(vec![self.get_default_agent(agents, &listener.name)?])
|
||||
} else {
|
||||
Ok(selected_agents)
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
debug!("No agents determined using orchestration, using default agent");
|
||||
debug!("no agents determined using orchestration, using default agent");
|
||||
Ok(vec![self.get_default_agent(agents, &listener.name)?])
|
||||
}
|
||||
Err(err) => Err(AgentSelectionError::OrchestrationError(err.to_string())),
|
||||
|
|
|
|||
|
|
@ -944,7 +944,7 @@ impl ArchFunctionHandler {
|
|||
) -> Result<ChatCompletionsResponse> {
|
||||
use tracing::{error, info};
|
||||
|
||||
info!("[Arch-Function] - ChatCompletion");
|
||||
info!("processing chat completion request");
|
||||
|
||||
let messages = self.process_messages(
|
||||
&request.messages,
|
||||
|
|
@ -955,9 +955,9 @@ impl ArchFunctionHandler {
|
|||
)?;
|
||||
|
||||
info!(
|
||||
"[request to arch-fc]: model: {}, messages count: {}",
|
||||
self.model_name,
|
||||
messages.len()
|
||||
model = %self.model_name,
|
||||
message_count = messages.len(),
|
||||
"sending request to arch-fc"
|
||||
);
|
||||
|
||||
let use_agent_orchestrator = request
|
||||
|
|
@ -991,7 +991,7 @@ impl ArchFunctionHandler {
|
|||
}
|
||||
}
|
||||
}
|
||||
info!("[Agent Orchestrator]: response received");
|
||||
info!("agent orchestrator response received");
|
||||
} else if let Some(tools) = request.tools.as_ref() {
|
||||
let mut hallucination_state = HallucinationState::new(tools);
|
||||
let mut has_tool_calls = None;
|
||||
|
|
@ -1040,7 +1040,10 @@ impl ArchFunctionHandler {
|
|||
}
|
||||
|
||||
if has_tool_calls == Some(true) && has_hallucination {
|
||||
info!("[Hallucination]: {}", hallucination_state.error_message);
|
||||
info!(
|
||||
"detected hallucination: {}",
|
||||
hallucination_state.error_message
|
||||
);
|
||||
|
||||
let clarify_messages = self.prefill_message(messages.clone(), &self.clarify_prefix);
|
||||
let clarify_request = self.create_request_with_extra_body(clarify_messages, false);
|
||||
|
|
@ -1075,8 +1078,8 @@ impl ArchFunctionHandler {
|
|||
let response_dict = self.parse_model_response(&model_response);
|
||||
|
||||
info!(
|
||||
"[arch-fc]: raw model response: {}",
|
||||
response_dict.raw_response
|
||||
raw_response = %response_dict.raw_response,
|
||||
"arch-fc model response"
|
||||
);
|
||||
|
||||
// General model response (no intent matched - should route to default target)
|
||||
|
|
@ -1126,7 +1129,7 @@ impl ArchFunctionHandler {
|
|||
|
||||
if verification.is_valid {
|
||||
info!(
|
||||
"[Tool calls]: {:?}",
|
||||
"tool calls extracted: {:?}",
|
||||
response_dict
|
||||
.tool_calls
|
||||
.iter()
|
||||
|
|
@ -1143,7 +1146,7 @@ impl ArchFunctionHandler {
|
|||
tool_calls: Some(response_dict.tool_calls.clone()),
|
||||
}
|
||||
} else {
|
||||
error!("Invalid tool call - {}", verification.error_message);
|
||||
error!(error = %verification.error_message, "invalid tool call");
|
||||
ResponseMessage {
|
||||
role: Role::Assistant,
|
||||
content: Some(String::new()),
|
||||
|
|
@ -1155,7 +1158,7 @@ impl ArchFunctionHandler {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
error!("Tool calls present but no tools provided in request");
|
||||
error!("tool calls present but no tools provided in request");
|
||||
ResponseMessage {
|
||||
role: Role::Assistant,
|
||||
content: Some(String::new()),
|
||||
|
|
@ -1168,7 +1171,7 @@ impl ArchFunctionHandler {
|
|||
}
|
||||
} else {
|
||||
info!(
|
||||
"[Tool calls]: {:?}",
|
||||
"tool calls extracted: {:?}",
|
||||
response_dict
|
||||
.tool_calls
|
||||
.iter()
|
||||
|
|
@ -1187,8 +1190,8 @@ impl ArchFunctionHandler {
|
|||
}
|
||||
} else {
|
||||
error!(
|
||||
"Invalid tool calls in response: {}",
|
||||
response_dict.error_message
|
||||
error = %response_dict.error_message,
|
||||
"invalid tool calls in response"
|
||||
);
|
||||
ResponseMessage {
|
||||
role: Role::Assistant,
|
||||
|
|
@ -1201,7 +1204,7 @@ impl ArchFunctionHandler {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
error!("Invalid model response - {}", model_response);
|
||||
error!(response = %model_response, "invalid model response");
|
||||
ResponseMessage {
|
||||
role: Role::Assistant,
|
||||
content: Some(String::new()),
|
||||
|
|
@ -1244,7 +1247,7 @@ impl ArchFunctionHandler {
|
|||
metadata: Some(metadata),
|
||||
};
|
||||
|
||||
info!("[response arch-fc]: {:?}", chat_completion_response);
|
||||
info!(response = ?chat_completion_response, "arch-fc response");
|
||||
|
||||
Ok(chat_completion_response)
|
||||
}
|
||||
|
|
@ -1331,7 +1334,7 @@ pub async fn function_calling_chat_handler(
|
|||
let mut body_json: Value = match serde_json::from_slice(&whole_body) {
|
||||
Ok(json) => json,
|
||||
Err(e) => {
|
||||
error!("Failed to parse request body as JSON: {}", e);
|
||||
error!(error = %e, "failed to parse request body as json");
|
||||
let mut response = Response::new(full(
|
||||
serde_json::json!({
|
||||
"error": format!("Invalid request body: {}", e)
|
||||
|
|
@ -1355,13 +1358,13 @@ pub async fn function_calling_chat_handler(
|
|||
let chat_request: ChatCompletionsRequest = match serde_json::from_value(body_json) {
|
||||
Ok(req) => {
|
||||
info!(
|
||||
"[request body]: {}",
|
||||
serde_json::to_string(&req).unwrap_or_default()
|
||||
request_body = %serde_json::to_string(&req).unwrap_or_default(),
|
||||
"received request"
|
||||
);
|
||||
req
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to parse request body: {}", e);
|
||||
error!(error = %e, "failed to parse request body");
|
||||
let mut response = Response::new(full(
|
||||
serde_json::json!({
|
||||
"error": format!("Invalid request body: {}", e)
|
||||
|
|
@ -1384,7 +1387,10 @@ pub async fn function_calling_chat_handler(
|
|||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false);
|
||||
|
||||
info!("Use agent orchestrator: {}", use_agent_orchestrator);
|
||||
info!(
|
||||
use_agent_orchestrator = use_agent_orchestrator,
|
||||
"handler mode"
|
||||
);
|
||||
|
||||
// Create the appropriate handler
|
||||
let handler_name = if use_agent_orchestrator {
|
||||
|
|
@ -1415,7 +1421,7 @@ pub async fn function_calling_chat_handler(
|
|||
match final_response {
|
||||
Ok(response_data) => {
|
||||
let response_json = serde_json::to_string(&response_data).unwrap_or_else(|e| {
|
||||
error!("Failed to serialize response: {}", e);
|
||||
error!(error = %e, "failed to serialize response");
|
||||
serde_json::json!({"error": "Failed to serialize response"}).to_string()
|
||||
});
|
||||
|
||||
|
|
@ -1428,7 +1434,7 @@ pub async fn function_calling_chat_handler(
|
|||
Ok(response)
|
||||
}
|
||||
Err(e) => {
|
||||
error!("[{}] - Error in function calling: {}", handler_name, e);
|
||||
error!(handler = handler_name, error = %e, "error in function calling");
|
||||
|
||||
let error_response = serde_json::json!({
|
||||
"error": format!("[{}] - Error in function calling: {}", handler_name, e)
|
||||
|
|
|
|||
|
|
@ -112,15 +112,7 @@ mod tests {
|
|||
|
||||
let headers = HeaderMap::new();
|
||||
let result = pipeline_processor
|
||||
.process_filter_chain(
|
||||
&request.messages,
|
||||
&test_pipeline,
|
||||
&agent_map,
|
||||
&headers,
|
||||
None,
|
||||
String::new(),
|
||||
String::new(),
|
||||
)
|
||||
.process_filter_chain(&request.messages, &test_pipeline, &agent_map, &headers)
|
||||
.await;
|
||||
|
||||
println!("Pipeline processing result: {:?}", result);
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ use common::consts::{
|
|||
ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
|
||||
};
|
||||
use common::llm_providers::LlmProviders;
|
||||
use common::traces::TraceCollector;
|
||||
use hermesllm::apis::openai_responses::InputParam;
|
||||
use hermesllm::clients::{SupportedAPIsFromClient, SupportedUpstreamAPIs};
|
||||
use hermesllm::{ProviderRequest, ProviderRequestType};
|
||||
|
|
@ -12,10 +11,13 @@ use http_body_util::combinators::BoxBody;
|
|||
use http_body_util::{BodyExt, Full};
|
||||
use hyper::header::{self};
|
||||
use hyper::{Request, Response, StatusCode};
|
||||
use opentelemetry::global;
|
||||
use opentelemetry::trace::get_active_span;
|
||||
use opentelemetry_http::HeaderInjector;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::{debug, info, warn};
|
||||
use tracing::{debug, info, info_span, warn, Instrument};
|
||||
|
||||
use crate::handlers::router_chat::router_chat_get_upstream_model;
|
||||
use crate::handlers::utils::{
|
||||
|
|
@ -26,7 +28,7 @@ use crate::state::response_state_processor::ResponsesStateProcessor;
|
|||
use crate::state::{
|
||||
extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError,
|
||||
};
|
||||
use crate::tracing::operation_component;
|
||||
use crate::tracing::{operation_component, set_service_name};
|
||||
|
||||
fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
|
||||
Full::new(chunk.into())
|
||||
|
|
@ -40,7 +42,6 @@ pub async fn llm_chat(
|
|||
full_qualified_llm_provider_url: String,
|
||||
model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
|
||||
llm_providers: Arc<RwLock<LlmProviders>>,
|
||||
trace_collector: Arc<TraceCollector>,
|
||||
state_storage: Option<Arc<dyn StateStorage>>,
|
||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||
let request_path = request.uri().path().to_string();
|
||||
|
|
@ -51,16 +52,49 @@ pub async fn llm_chat(
|
|||
.map(|s| s.to_string())
|
||||
{
|
||||
Some(id) => id,
|
||||
None => {
|
||||
let generated_id = uuid::Uuid::new_v4().to_string();
|
||||
warn!(
|
||||
"[PLANO_REQ_ID:{}] | REQUEST_ID header missing, generated new ID",
|
||||
generated_id
|
||||
);
|
||||
generated_id
|
||||
}
|
||||
None => uuid::Uuid::new_v4().to_string(),
|
||||
};
|
||||
|
||||
// Create a span with request_id that will be included in all log lines
|
||||
let request_span = info_span!(
|
||||
"llm",
|
||||
component = "llm",
|
||||
request_id = %request_id,
|
||||
http.method = %request.method(),
|
||||
http.path = %request_path,
|
||||
);
|
||||
|
||||
// Execute the rest of the handler inside the span
|
||||
llm_chat_inner(
|
||||
request,
|
||||
router_service,
|
||||
full_qualified_llm_provider_url,
|
||||
model_aliases,
|
||||
llm_providers,
|
||||
state_storage,
|
||||
request_id,
|
||||
request_path,
|
||||
request_headers,
|
||||
)
|
||||
.instrument(request_span)
|
||||
.await
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn llm_chat_inner(
|
||||
request: Request<hyper::body::Incoming>,
|
||||
router_service: Arc<RouterService>,
|
||||
full_qualified_llm_provider_url: String,
|
||||
model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
|
||||
llm_providers: Arc<RwLock<LlmProviders>>,
|
||||
state_storage: Option<Arc<dyn StateStorage>>,
|
||||
request_id: String,
|
||||
request_path: String,
|
||||
mut request_headers: hyper::HeaderMap,
|
||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||
// Set service name for LLM operations
|
||||
set_service_name(operation_component::LLM);
|
||||
|
||||
// Extract or generate traceparent - this establishes the trace context for all spans
|
||||
let traceparent: String = match request_headers
|
||||
.get(TRACE_PARENT_HEADER)
|
||||
|
|
@ -73,20 +107,18 @@ pub async fn llm_chat(
|
|||
let trace_id = Uuid::new_v4().to_string().replace("-", "");
|
||||
let generated_tp = format!("00-{}-0000000000000000-01", trace_id);
|
||||
warn!(
|
||||
"[PLANO_REQ_ID:{}] | TRACE_PARENT header missing, generated new traceparent: {}",
|
||||
request_id, generated_tp
|
||||
generated_traceparent = %generated_tp,
|
||||
"TRACE_PARENT header missing, generated new traceparent"
|
||||
);
|
||||
generated_tp
|
||||
}
|
||||
};
|
||||
|
||||
let mut request_headers = request_headers;
|
||||
let chat_request_bytes = request.collect().await?.to_bytes();
|
||||
|
||||
debug!(
|
||||
"[PLANO_REQ_ID:{}] | REQUEST_BODY (UTF8): {}",
|
||||
request_id,
|
||||
String::from_utf8_lossy(&chat_request_bytes)
|
||||
body = %String::from_utf8_lossy(&chat_request_bytes),
|
||||
"request body received"
|
||||
);
|
||||
|
||||
let mut client_request = match ProviderRequestType::try_from((
|
||||
|
|
@ -96,13 +128,10 @@ pub async fn llm_chat(
|
|||
Ok(request) => request,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"[PLANO_REQ_ID:{}] | FAILURE | Failed to parse request as ProviderRequestType: {}",
|
||||
request_id, err
|
||||
);
|
||||
let err_msg = format!(
|
||||
"[PLANO_REQ_ID:{}] | FAILURE | Failed to parse request: {}",
|
||||
request_id, err
|
||||
error = %err,
|
||||
"failed to parse request as ProviderRequestType"
|
||||
);
|
||||
let err_msg = format!("Failed to parse request: {}", err);
|
||||
let mut bad_request = Response::new(full(err_msg));
|
||||
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
|
||||
return Ok(bad_request);
|
||||
|
|
@ -120,18 +149,23 @@ pub async fn llm_chat(
|
|||
// Model alias resolution: update model field in client_request immediately
|
||||
// This ensures all downstream objects use the resolved model
|
||||
let model_from_request = client_request.model().to_string();
|
||||
let temperature = client_request.get_temperature();
|
||||
let _temperature = client_request.get_temperature();
|
||||
let is_streaming_request = client_request.is_streaming();
|
||||
let resolved_model = resolve_model_alias(&model_from_request, &model_aliases);
|
||||
let alias_resolved_model = resolve_model_alias(&model_from_request, &model_aliases);
|
||||
|
||||
// Validate that the requested model exists in configuration
|
||||
// This matches the validation in llm_gateway routing.rs
|
||||
if llm_providers.read().await.get(&resolved_model).is_none() {
|
||||
if llm_providers
|
||||
.read()
|
||||
.await
|
||||
.get(&alias_resolved_model)
|
||||
.is_none()
|
||||
{
|
||||
let err_msg = format!(
|
||||
"Model '{}' not found in configured providers",
|
||||
resolved_model
|
||||
alias_resolved_model
|
||||
);
|
||||
warn!("[PLANO_REQ_ID:{}] | FAILURE | {}", request_id, err_msg);
|
||||
warn!(model = %alias_resolved_model, "model not found in configured providers");
|
||||
let mut bad_request = Response::new(full(err_msg));
|
||||
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
|
||||
return Ok(bad_request);
|
||||
|
|
@ -139,29 +173,26 @@ pub async fn llm_chat(
|
|||
|
||||
// Handle provider/model slug format (e.g., "openai/gpt-4")
|
||||
// Extract just the model name for upstream (providers don't understand the slug)
|
||||
let model_name_only = if let Some((_, model)) = resolved_model.split_once('/') {
|
||||
let model_name_only = if let Some((_, model)) = alias_resolved_model.split_once('/') {
|
||||
model.to_string()
|
||||
} else {
|
||||
resolved_model.clone()
|
||||
alias_resolved_model.clone()
|
||||
};
|
||||
|
||||
// Extract tool names and user message preview for span attributes
|
||||
let tool_names = client_request.get_tool_names();
|
||||
let user_message_preview = client_request
|
||||
let _tool_names = client_request.get_tool_names();
|
||||
let _user_message_preview = client_request
|
||||
.get_recent_user_message()
|
||||
.map(|msg| truncate_message(&msg, 50));
|
||||
|
||||
// Extract messages for signal analysis (clone before moving client_request)
|
||||
let messages_for_signals = client_request.get_messages();
|
||||
let messages_for_signals = Some(client_request.get_messages());
|
||||
|
||||
// Set the model to just the model name (without provider prefix)
|
||||
// This ensures upstream receives "gpt-4" not "openai/gpt-4"
|
||||
client_request.set_model(model_name_only.clone());
|
||||
if client_request.remove_metadata_key("archgw_preference_config") {
|
||||
debug!(
|
||||
"[PLANO_REQ_ID:{}] Removed archgw_preference_config from metadata",
|
||||
request_id
|
||||
);
|
||||
debug!("removed archgw_preference_config from metadata");
|
||||
}
|
||||
|
||||
// === v1/responses state management: Determine upstream API and combine input if needed ===
|
||||
|
|
@ -180,9 +211,9 @@ pub async fn llm_chat(
|
|||
// Get the upstream path and check if it's ResponsesAPI
|
||||
let upstream_path = get_upstream_path(
|
||||
&llm_providers,
|
||||
&resolved_model,
|
||||
&alias_resolved_model,
|
||||
&request_path,
|
||||
&resolved_model,
|
||||
&alias_resolved_model,
|
||||
is_streaming_request,
|
||||
)
|
||||
.await;
|
||||
|
|
@ -209,14 +240,17 @@ pub async fn llm_chat(
|
|||
// Update both the request and original_input_items
|
||||
responses_req.input = InputParam::Items(combined_input.clone());
|
||||
original_input_items = combined_input;
|
||||
info!("[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Updated request with conversation history ({} items)", request_id, original_input_items.len());
|
||||
info!(
|
||||
items = original_input_items.len(),
|
||||
"updated request with conversation history"
|
||||
);
|
||||
}
|
||||
Err(StateStorageError::NotFound(_)) => {
|
||||
// Return 409 Conflict when previous_response_id not found
|
||||
warn!("[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Previous response_id not found: {}", request_id, prev_resp_id);
|
||||
warn!(previous_response_id = %prev_resp_id, "previous response_id not found");
|
||||
let err_msg = format!(
|
||||
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Conversation state not found for previous_response_id: {}",
|
||||
request_id, prev_resp_id
|
||||
"Conversation state not found for previous_response_id: {}",
|
||||
prev_resp_id
|
||||
);
|
||||
let mut conflict_response = Response::new(full(err_msg));
|
||||
*conflict_response.status_mut() = StatusCode::CONFLICT;
|
||||
|
|
@ -225,8 +259,9 @@ pub async fn llm_chat(
|
|||
Err(e) => {
|
||||
// Log warning but continue on other storage errors
|
||||
warn!(
|
||||
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to retrieve conversation state for {}: {}",
|
||||
request_id, prev_resp_id, e
|
||||
previous_response_id = %prev_resp_id,
|
||||
error = %e,
|
||||
"failed to retrieve conversation state"
|
||||
);
|
||||
// Restore original_input_items since we passed ownership
|
||||
original_input_items = extract_input_items(&responses_req.input);
|
||||
|
|
@ -234,10 +269,7 @@ pub async fn llm_chat(
|
|||
}
|
||||
}
|
||||
} else {
|
||||
debug!(
|
||||
"[PLANO_REQ_ID:{}] | BRIGHT_STAFF | Upstream supports ResponsesAPI natively.",
|
||||
request_id
|
||||
);
|
||||
debug!("upstream supports ResponsesAPI natively");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -246,14 +278,29 @@ pub async fn llm_chat(
|
|||
let client_request_bytes_for_upstream = ProviderRequestType::to_bytes(&client_request).unwrap();
|
||||
|
||||
// Determine routing using the dedicated router_chat module
|
||||
let routing_result = match router_chat_get_upstream_model(
|
||||
router_service,
|
||||
client_request, // Pass the original request - router_chat will convert it
|
||||
trace_collector.clone(),
|
||||
&traceparent,
|
||||
&request_path,
|
||||
&request_id,
|
||||
)
|
||||
// This gets its own span for latency and error tracking
|
||||
let routing_span = info_span!(
|
||||
"routing",
|
||||
component = "routing",
|
||||
http.method = "POST",
|
||||
http.target = %request_path,
|
||||
model.requested = %model_from_request,
|
||||
model.alias_resolved = %alias_resolved_model,
|
||||
route.selected_model = tracing::field::Empty,
|
||||
routing.determination_ms = tracing::field::Empty,
|
||||
);
|
||||
let routing_result = match async {
|
||||
set_service_name(operation_component::ROUTING);
|
||||
router_chat_get_upstream_model(
|
||||
router_service,
|
||||
client_request, // Pass the original request - router_chat will convert it
|
||||
&traceparent,
|
||||
&request_path,
|
||||
&request_id,
|
||||
)
|
||||
.await
|
||||
}
|
||||
.instrument(routing_span)
|
||||
.await
|
||||
{
|
||||
Ok(result) => result,
|
||||
|
|
@ -267,22 +314,36 @@ pub async fn llm_chat(
|
|||
// Determine final model to use
|
||||
// Router returns "none" as a sentinel value when it doesn't select a specific model
|
||||
let router_selected_model = routing_result.model_name;
|
||||
let model_name = if router_selected_model != "none" {
|
||||
let resolved_model = if router_selected_model != "none" {
|
||||
// Router selected a specific model via routing preferences
|
||||
router_selected_model
|
||||
} else {
|
||||
// Router returned "none" sentinel, use validated resolved_model from request
|
||||
resolved_model.clone()
|
||||
alias_resolved_model.clone()
|
||||
};
|
||||
|
||||
let span_name = if model_from_request == resolved_model {
|
||||
format!("POST {} {}", request_path, resolved_model)
|
||||
} else {
|
||||
format!(
|
||||
"POST {} {} -> {}",
|
||||
request_path, model_from_request, resolved_model
|
||||
)
|
||||
};
|
||||
get_active_span(|span| {
|
||||
span.update_name(span_name.clone());
|
||||
});
|
||||
|
||||
debug!(
|
||||
"[PLANO_REQ_ID:{}] | ARCH_ROUTER URL | {}, Provider Hint: {}, Model for upstream: {}",
|
||||
request_id, full_qualified_llm_provider_url, model_name, model_name_only
|
||||
url = %full_qualified_llm_provider_url,
|
||||
provider_hint = %resolved_model,
|
||||
upstream_model = %model_name_only,
|
||||
"Routing to upstream"
|
||||
);
|
||||
|
||||
request_headers.insert(
|
||||
ARCH_PROVIDER_HINT_HEADER,
|
||||
header::HeaderValue::from_str(&model_name).unwrap(),
|
||||
header::HeaderValue::from_str(&resolved_model).unwrap(),
|
||||
);
|
||||
|
||||
request_headers.insert(
|
||||
|
|
@ -292,12 +353,18 @@ pub async fn llm_chat(
|
|||
// remove content-length header if it exists
|
||||
request_headers.remove(header::CONTENT_LENGTH);
|
||||
|
||||
// Inject current LLM span's trace context so upstream spans are children of plano(llm)
|
||||
global::get_text_map_propagator(|propagator| {
|
||||
let cx = tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
|
||||
propagator.inject_context(&cx, &mut HeaderInjector(&mut request_headers));
|
||||
});
|
||||
|
||||
// Capture start time right before sending request to upstream
|
||||
let request_start_time = std::time::Instant::now();
|
||||
let request_start_system_time = std::time::SystemTime::now();
|
||||
let _request_start_system_time = std::time::SystemTime::now();
|
||||
|
||||
let llm_response = match reqwest::Client::new()
|
||||
.post(full_qualified_llm_provider_url)
|
||||
.post(&full_qualified_llm_provider_url)
|
||||
.headers(request_headers)
|
||||
.body(client_request_bytes_for_upstream)
|
||||
.send()
|
||||
|
|
@ -324,29 +391,12 @@ pub async fn llm_chat(
|
|||
// Build LLM span with actual status code using constants
|
||||
let byte_stream = llm_response.bytes_stream();
|
||||
|
||||
// Build the LLM span (will be finalized after streaming completes)
|
||||
let llm_span = build_llm_span(
|
||||
&traceparent,
|
||||
&request_path,
|
||||
&resolved_model,
|
||||
&model_name,
|
||||
upstream_status.as_u16(),
|
||||
is_streaming_request,
|
||||
request_start_system_time,
|
||||
tool_names,
|
||||
user_message_preview,
|
||||
temperature,
|
||||
&llm_providers,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Create base processor for metrics and tracing
|
||||
let base_processor = ObservableStreamProcessor::new(
|
||||
trace_collector,
|
||||
operation_component::LLM,
|
||||
llm_span,
|
||||
span_name,
|
||||
request_start_time,
|
||||
Some(messages_for_signals),
|
||||
messages_for_signals,
|
||||
);
|
||||
|
||||
// === v1/responses state management: Wrap with ResponsesStateProcessor ===
|
||||
|
|
@ -367,8 +417,8 @@ pub async fn llm_chat(
|
|||
base_processor,
|
||||
state_store,
|
||||
original_input_items,
|
||||
alias_resolved_model.clone(),
|
||||
resolved_model.clone(),
|
||||
model_name.clone(),
|
||||
is_streaming_request,
|
||||
false, // Not OpenAI upstream since should_manage_state is true
|
||||
content_encoding,
|
||||
|
|
@ -409,88 +459,6 @@ fn resolve_model_alias(
|
|||
model_from_request.to_string()
|
||||
}
|
||||
|
||||
/// Builds the LLM span with all required and optional attributes.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn build_llm_span(
|
||||
traceparent: &str,
|
||||
request_path: &str,
|
||||
resolved_model: &str,
|
||||
model_name: &str,
|
||||
status_code: u16,
|
||||
is_streaming: bool,
|
||||
start_time: std::time::SystemTime,
|
||||
tool_names: Option<Vec<String>>,
|
||||
user_message_preview: Option<String>,
|
||||
temperature: Option<f32>,
|
||||
llm_providers: &Arc<RwLock<LlmProviders>>,
|
||||
) -> common::traces::Span {
|
||||
use crate::tracing::{http, llm, OperationNameBuilder};
|
||||
use common::traces::{parse_traceparent, SpanBuilder, SpanKind};
|
||||
|
||||
// Calculate the upstream path based on provider configuration
|
||||
let upstream_path = get_upstream_path(
|
||||
llm_providers,
|
||||
model_name,
|
||||
request_path,
|
||||
resolved_model,
|
||||
is_streaming,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Build operation name showing path transformation if different
|
||||
let operation_name = if request_path != upstream_path {
|
||||
OperationNameBuilder::new()
|
||||
.with_method("POST")
|
||||
.with_path(format!("{} >> {}", request_path, upstream_path))
|
||||
.with_target(resolved_model)
|
||||
.build()
|
||||
} else {
|
||||
OperationNameBuilder::new()
|
||||
.with_method("POST")
|
||||
.with_path(request_path)
|
||||
.with_target(resolved_model)
|
||||
.build()
|
||||
};
|
||||
|
||||
let (trace_id, parent_span_id) = parse_traceparent(traceparent);
|
||||
|
||||
let mut span_builder = SpanBuilder::new(&operation_name)
|
||||
.with_trace_id(&trace_id)
|
||||
.with_kind(SpanKind::Client)
|
||||
.with_start_time(start_time)
|
||||
.with_attribute(http::METHOD, "POST")
|
||||
.with_attribute(http::STATUS_CODE, status_code.to_string())
|
||||
.with_attribute(http::TARGET, request_path.to_string())
|
||||
.with_attribute(http::UPSTREAM_TARGET, upstream_path)
|
||||
.with_attribute(llm::MODEL_NAME, resolved_model.to_string())
|
||||
.with_attribute(llm::IS_STREAMING, is_streaming.to_string());
|
||||
|
||||
// Only set parent span ID if it exists (not a root span)
|
||||
if let Some(parent) = parent_span_id {
|
||||
span_builder = span_builder.with_parent_span_id(&parent);
|
||||
}
|
||||
|
||||
// Add optional attributes
|
||||
if let Some(temp) = temperature {
|
||||
span_builder = span_builder.with_attribute(llm::TEMPERATURE, temp.to_string());
|
||||
}
|
||||
|
||||
if let Some(tools) = tool_names {
|
||||
let formatted_tools = tools
|
||||
.iter()
|
||||
.map(|name| format!("{}(...)", name))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
span_builder = span_builder.with_attribute(llm::TOOLS, formatted_tools);
|
||||
}
|
||||
|
||||
if let Some(preview) = user_message_preview {
|
||||
span_builder = span_builder.with_attribute(llm::USER_MESSAGE_PREVIEW, preview);
|
||||
}
|
||||
|
||||
span_builder.build()
|
||||
}
|
||||
|
||||
/// Calculates the upstream path for the provider based on the model name.
|
||||
/// Looks up provider configuration, gets the ProviderId and base_url_path_prefix,
|
||||
/// then uses target_endpoint_for_provider to calculate the correct upstream path.
|
||||
|
|
|
|||
|
|
@ -4,20 +4,18 @@ use common::configuration::{Agent, AgentFilterChain};
|
|||
use common::consts::{
|
||||
ARCH_UPSTREAM_HOST_HEADER, BRIGHT_STAFF_SERVICE_NAME, ENVOY_RETRY_HEADER, TRACE_PARENT_HEADER,
|
||||
};
|
||||
use common::traces::{generate_random_span_id, SpanBuilder, SpanKind};
|
||||
use hermesllm::apis::openai::Message;
|
||||
use hermesllm::{ProviderRequest, ProviderRequestType};
|
||||
use hyper::header::HeaderMap;
|
||||
use std::time::{Instant, SystemTime};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::tracing::operation_component::{self};
|
||||
use crate::tracing::{http, OperationNameBuilder};
|
||||
use opentelemetry::global;
|
||||
use opentelemetry_http::HeaderInjector;
|
||||
use tracing::{debug, info, instrument, warn};
|
||||
|
||||
use crate::handlers::jsonrpc::{
|
||||
JsonRpcId, JsonRpcNotification, JsonRpcRequest, JsonRpcResponse, JSON_RPC_VERSION,
|
||||
MCP_INITIALIZE, MCP_INITIALIZE_NOTIFICATION, TOOL_CALL_METHOD,
|
||||
};
|
||||
use crate::tracing::{operation_component, set_service_name};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Errors that can occur during pipeline processing
|
||||
|
|
@ -81,115 +79,14 @@ impl PipelineProcessor {
|
|||
}
|
||||
}
|
||||
|
||||
/// Record a span for filter execution
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn record_filter_span(
|
||||
&self,
|
||||
collector: &std::sync::Arc<common::traces::TraceCollector>,
|
||||
agent_name: &str,
|
||||
tool_name: &str,
|
||||
start_time: SystemTime,
|
||||
end_time: SystemTime,
|
||||
elapsed: std::time::Duration,
|
||||
trace_id: String,
|
||||
parent_span_id: String,
|
||||
span_id: String,
|
||||
) -> String {
|
||||
// let (trace_id, parent_span_id) = self.extract_trace_context();
|
||||
|
||||
// Build operation name: POST /agents/* {filter_name}
|
||||
// Using generic path since we don't have access to specific endpoint here
|
||||
let operation_name = OperationNameBuilder::new()
|
||||
.with_method("POST")
|
||||
.with_path("/agents/*")
|
||||
.with_target(agent_name)
|
||||
.build();
|
||||
|
||||
let mut span_builder = SpanBuilder::new(&operation_name)
|
||||
.with_span_id(span_id.clone())
|
||||
.with_kind(SpanKind::Client)
|
||||
.with_start_time(start_time)
|
||||
.with_end_time(end_time)
|
||||
.with_attribute(http::METHOD, "POST")
|
||||
.with_attribute(http::TARGET, "/agents/*")
|
||||
.with_attribute("filter.name", agent_name.to_string())
|
||||
.with_attribute("filter.tool_name", tool_name.to_string())
|
||||
.with_attribute(
|
||||
"duration_ms",
|
||||
format!("{:.2}", elapsed.as_secs_f64() * 1000.0),
|
||||
);
|
||||
|
||||
if !trace_id.is_empty() {
|
||||
span_builder = span_builder.with_trace_id(trace_id);
|
||||
}
|
||||
if !parent_span_id.is_empty() {
|
||||
span_builder = span_builder.with_parent_span_id(parent_span_id);
|
||||
}
|
||||
|
||||
let span = span_builder.build();
|
||||
// Use plano(filter) as service name for filter execution spans
|
||||
collector.record_span(operation_component::AGENT_FILTER, span);
|
||||
span_id.clone()
|
||||
}
|
||||
|
||||
/// Record a span for MCP protocol interactions
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn record_agent_filter_span(
|
||||
&self,
|
||||
collector: &std::sync::Arc<common::traces::TraceCollector>,
|
||||
operation: &str,
|
||||
agent_id: &str,
|
||||
start_time: SystemTime,
|
||||
end_time: SystemTime,
|
||||
elapsed: std::time::Duration,
|
||||
additional_attrs: Option<HashMap<&str, String>>,
|
||||
trace_id: String,
|
||||
parent_span_id: String,
|
||||
span_id: Option<String>,
|
||||
) {
|
||||
// let (trace_id, parent_span_id) = self.extract_trace_context();
|
||||
|
||||
// Build operation name: POST /mcp {agent_id}
|
||||
let operation_name = OperationNameBuilder::new()
|
||||
.with_method("POST")
|
||||
.with_path("/mcp")
|
||||
.with_operation(operation)
|
||||
.with_target(agent_id)
|
||||
.build();
|
||||
|
||||
let mut span_builder = SpanBuilder::new(&operation_name)
|
||||
.with_span_id(span_id.unwrap_or_else(generate_random_span_id))
|
||||
.with_kind(SpanKind::Client)
|
||||
.with_start_time(start_time)
|
||||
.with_end_time(end_time)
|
||||
.with_attribute(http::METHOD, "POST")
|
||||
.with_attribute(http::TARGET, format!("/mcp ({})", operation))
|
||||
.with_attribute("mcp.operation", operation.to_string())
|
||||
.with_attribute("mcp.agent_id", agent_id.to_string())
|
||||
.with_attribute(
|
||||
"duration_ms",
|
||||
format!("{:.2}", elapsed.as_secs_f64() * 1000.0),
|
||||
);
|
||||
|
||||
if let Some(attrs) = additional_attrs {
|
||||
for (key, value) in attrs {
|
||||
span_builder = span_builder.with_attribute(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
if !trace_id.is_empty() {
|
||||
span_builder = span_builder.with_trace_id(trace_id);
|
||||
}
|
||||
if !parent_span_id.is_empty() {
|
||||
span_builder = span_builder.with_parent_span_id(parent_span_id);
|
||||
}
|
||||
|
||||
let span = span_builder.build();
|
||||
// MCP spans also use plano(filter) service name as they are part of filter operations
|
||||
collector.record_span(operation_component::AGENT_FILTER, span);
|
||||
}
|
||||
|
||||
/// Process the filter chain of agents (all except the terminal agent)
|
||||
// /// Process the filter chain of agents (all except the terminal agent)
|
||||
// #[instrument(
|
||||
// skip(self, chat_history, agent_filter_chain, agent_map, request_headers),
|
||||
// fields(
|
||||
// filter_count = agent_filter_chain.filter_chain.as_ref().map(|fc| fc.len()).unwrap_or(0),
|
||||
// message_count = chat_history.len()
|
||||
// )
|
||||
// )]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn process_filter_chain(
|
||||
&mut self,
|
||||
|
|
@ -197,9 +94,6 @@ impl PipelineProcessor {
|
|||
agent_filter_chain: &AgentFilterChain,
|
||||
agent_map: &HashMap<String, Agent>,
|
||||
request_headers: &HeaderMap,
|
||||
trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
|
||||
trace_id: String,
|
||||
parent_span_id: String,
|
||||
) -> Result<Vec<Message>, PipelineError> {
|
||||
let mut chat_history_updated = chat_history.to_vec();
|
||||
|
||||
|
|
@ -210,7 +104,7 @@ impl PipelineProcessor {
|
|||
};
|
||||
|
||||
for agent_name in filter_chain {
|
||||
debug!("Processing filter agent: {}", agent_name);
|
||||
debug!(agent = %agent_name, "processing filter agent");
|
||||
|
||||
let agent = agent_map
|
||||
.get(agent_name)
|
||||
|
|
@ -219,68 +113,29 @@ impl PipelineProcessor {
|
|||
let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
|
||||
|
||||
info!(
|
||||
"executing filter: {}/{}, url: {}, type: {}, conversation length: {}",
|
||||
agent_name,
|
||||
tool_name,
|
||||
agent.url,
|
||||
agent.agent_type.as_deref().unwrap_or("mcp"),
|
||||
chat_history.len()
|
||||
agent = %agent_name,
|
||||
tool = %tool_name,
|
||||
url = %agent.url,
|
||||
agent_type = %agent.agent_type.as_deref().unwrap_or("mcp"),
|
||||
conversation_len = chat_history.len(),
|
||||
"executing filter"
|
||||
);
|
||||
|
||||
let start_time = SystemTime::now();
|
||||
let start_instant = Instant::now();
|
||||
|
||||
// Generate filter span ID before execution so MCP spans can use it as parent
|
||||
let filter_span_id = generate_random_span_id();
|
||||
|
||||
if agent.agent_type.as_deref().unwrap_or("mcp") == "mcp" {
|
||||
chat_history_updated = self
|
||||
.execute_mcp_filter(
|
||||
&chat_history_updated,
|
||||
agent,
|
||||
request_headers,
|
||||
trace_collector,
|
||||
trace_id.clone(),
|
||||
filter_span_id.clone(),
|
||||
)
|
||||
.execute_mcp_filter(&chat_history_updated, agent, request_headers)
|
||||
.await?;
|
||||
} else {
|
||||
chat_history_updated = self
|
||||
.execute_http_filter(
|
||||
&chat_history_updated,
|
||||
agent,
|
||||
request_headers,
|
||||
trace_collector,
|
||||
trace_id.clone(),
|
||||
filter_span_id.clone(),
|
||||
)
|
||||
.execute_http_filter(&chat_history_updated, agent, request_headers)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let end_time = SystemTime::now();
|
||||
let elapsed = start_instant.elapsed();
|
||||
|
||||
info!(
|
||||
"Filter '{}' completed in {:.2}ms, updated conversation length: {}",
|
||||
agent_name,
|
||||
elapsed.as_secs_f64() * 1000.0,
|
||||
chat_history_updated.len()
|
||||
agent = %agent_name,
|
||||
updated_len = chat_history_updated.len(),
|
||||
"filter completed"
|
||||
);
|
||||
|
||||
// Record span for this filter execution
|
||||
if let Some(collector) = trace_collector {
|
||||
self.record_filter_span(
|
||||
collector,
|
||||
agent_name,
|
||||
tool_name,
|
||||
start_time,
|
||||
end_time,
|
||||
elapsed,
|
||||
trace_id.clone(),
|
||||
parent_span_id.clone(),
|
||||
filter_span_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(chat_history_updated)
|
||||
|
|
@ -292,18 +147,17 @@ impl PipelineProcessor {
|
|||
request_headers: &HeaderMap,
|
||||
agent_id: &str,
|
||||
session_id: Option<&str>,
|
||||
trace_id: String,
|
||||
parent_span_id: String,
|
||||
) -> Result<HeaderMap, PipelineError> {
|
||||
let trace_parent = format!("00-{}-{}-01", trace_id, parent_span_id);
|
||||
let mut headers = request_headers.clone();
|
||||
headers.remove(hyper::header::CONTENT_LENGTH);
|
||||
|
||||
// Inject OpenTelemetry trace context automatically
|
||||
headers.remove(TRACE_PARENT_HEADER);
|
||||
headers.insert(
|
||||
TRACE_PARENT_HEADER,
|
||||
hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
|
||||
);
|
||||
global::get_text_map_propagator(|propagator| {
|
||||
let cx =
|
||||
tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
|
||||
propagator.inject_context(&cx, &mut HeaderInjector(&mut headers));
|
||||
});
|
||||
|
||||
headers.insert(
|
||||
ARCH_UPSTREAM_HOST_HEADER,
|
||||
|
|
@ -348,9 +202,9 @@ impl PipelineProcessor {
|
|||
// Validate SSE format: first line should be "event: message"
|
||||
if lines.is_empty() || lines[0] != "event: message" {
|
||||
warn!(
|
||||
"Invalid SSE response format from agent {}: expected 'event: message' as first line, got: {:?}",
|
||||
agent_id,
|
||||
lines.first()
|
||||
agent = %agent_id,
|
||||
first_line = ?lines.first(),
|
||||
"invalid SSE response format"
|
||||
);
|
||||
return Err(PipelineError::NoContentInResponse(format!(
|
||||
"Invalid SSE response format from agent {}: expected 'event: message' as first line",
|
||||
|
|
@ -367,9 +221,9 @@ impl PipelineProcessor {
|
|||
|
||||
if data_lines.len() != 1 {
|
||||
warn!(
|
||||
"Expected exactly one 'data:' line from agent {}, found {}",
|
||||
agent_id,
|
||||
data_lines.len()
|
||||
agent = %agent_id,
|
||||
found = data_lines.len(),
|
||||
"expected exactly one 'data:' line"
|
||||
);
|
||||
return Err(PipelineError::NoContentInResponse(format!(
|
||||
"Expected exactly one 'data:' line from agent {}, found {}",
|
||||
|
|
@ -429,27 +283,34 @@ impl PipelineProcessor {
|
|||
}
|
||||
|
||||
/// Send request to a specific agent and return the response content
|
||||
#[instrument(
|
||||
skip(self, messages, agent, request_headers),
|
||||
fields(
|
||||
agent_id = %agent.id,
|
||||
filter_name = %agent.id,
|
||||
message_count = messages.len()
|
||||
)
|
||||
)]
|
||||
async fn execute_mcp_filter(
|
||||
&mut self,
|
||||
messages: &[Message],
|
||||
agent: &Agent,
|
||||
request_headers: &HeaderMap,
|
||||
trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
|
||||
trace_id: String,
|
||||
filter_span_id: String,
|
||||
) -> Result<Vec<Message>, PipelineError> {
|
||||
// Set service name for this filter span
|
||||
set_service_name(operation_component::AGENT_FILTER);
|
||||
|
||||
// Update current span name to include filter name
|
||||
use opentelemetry::trace::get_active_span;
|
||||
get_active_span(|span| {
|
||||
span.update_name(format!("execute_mcp_filter ({})", agent.id));
|
||||
});
|
||||
|
||||
// Get or create MCP session
|
||||
let mcp_session_id = if let Some(session_id) = self.agent_id_session_map.get(&agent.id) {
|
||||
session_id.clone()
|
||||
} else {
|
||||
let session_id = self
|
||||
.get_new_session_id(
|
||||
&agent.id,
|
||||
trace_id.clone(),
|
||||
filter_span_id.clone(),
|
||||
request_headers,
|
||||
)
|
||||
.await;
|
||||
let session_id = self.get_new_session_id(&agent.id, request_headers).await;
|
||||
self.agent_id_session_map
|
||||
.insert(agent.id.clone(), session_id.clone());
|
||||
session_id
|
||||
|
|
@ -464,21 +325,9 @@ impl PipelineProcessor {
|
|||
let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
|
||||
let json_rpc_request = self.build_tool_call_request(tool_name, messages)?;
|
||||
|
||||
// Generate span ID for this MCP tool call (child of filter span)
|
||||
let mcp_span_id = generate_random_span_id();
|
||||
|
||||
// Build headers
|
||||
let agent_headers = self.build_mcp_headers(
|
||||
request_headers,
|
||||
&agent.id,
|
||||
Some(&mcp_session_id),
|
||||
trace_id.clone(),
|
||||
mcp_span_id.clone(),
|
||||
)?;
|
||||
|
||||
// Send request with tracing
|
||||
let start_time = SystemTime::now();
|
||||
let start_instant = Instant::now();
|
||||
let agent_headers =
|
||||
self.build_mcp_headers(request_headers, &agent.id, Some(&mcp_session_id))?;
|
||||
|
||||
let response = self
|
||||
.send_mcp_request(&json_rpc_request, &agent_headers, &agent.id)
|
||||
|
|
@ -486,31 +335,6 @@ impl PipelineProcessor {
|
|||
let http_status = response.status();
|
||||
let response_bytes = response.bytes().await?;
|
||||
|
||||
let end_time = SystemTime::now();
|
||||
let elapsed = start_instant.elapsed();
|
||||
|
||||
// Record MCP tool call span
|
||||
if let Some(collector) = trace_collector {
|
||||
let mut attrs = HashMap::new();
|
||||
attrs.insert("mcp.method", "tools/call".to_string());
|
||||
attrs.insert("mcp.tool_name", tool_name.to_string());
|
||||
attrs.insert("mcp.session_id", mcp_session_id.clone());
|
||||
attrs.insert("http.status_code", http_status.as_u16().to_string());
|
||||
|
||||
self.record_agent_filter_span(
|
||||
collector,
|
||||
"tool_call",
|
||||
&agent.id,
|
||||
start_time,
|
||||
end_time,
|
||||
elapsed,
|
||||
Some(attrs),
|
||||
trace_id.clone(),
|
||||
filter_span_id.clone(),
|
||||
Some(mcp_span_id),
|
||||
);
|
||||
}
|
||||
|
||||
// Handle HTTP errors
|
||||
if !http_status.is_success() {
|
||||
let error_body = String::from_utf8_lossy(&response_bytes).to_string();
|
||||
|
|
@ -611,8 +435,6 @@ impl PipelineProcessor {
|
|||
&self,
|
||||
agent_id: &str,
|
||||
session_id: &str,
|
||||
trace_id: String,
|
||||
parent_span_id: String,
|
||||
request_headers: &HeaderMap,
|
||||
) -> Result<(), PipelineError> {
|
||||
let initialized_notification = JsonRpcNotification {
|
||||
|
|
@ -622,15 +444,9 @@ impl PipelineProcessor {
|
|||
};
|
||||
|
||||
let notification_body = serde_json::to_string(&initialized_notification)?;
|
||||
debug!("Sending initialized notification for agent {}", agent_id);
|
||||
debug!("sending initialized notification for agent {}", agent_id);
|
||||
|
||||
let headers = self.build_mcp_headers(
|
||||
request_headers,
|
||||
agent_id,
|
||||
Some(session_id),
|
||||
trace_id.clone(),
|
||||
parent_span_id.clone(),
|
||||
)?;
|
||||
let headers = self.build_mcp_headers(request_headers, agent_id, Some(session_id))?;
|
||||
|
||||
let response = self
|
||||
.client
|
||||
|
|
@ -641,31 +457,19 @@ impl PipelineProcessor {
|
|||
.await?;
|
||||
|
||||
info!(
|
||||
"Initialized notification response status: {}",
|
||||
"initialized notification response status: {}",
|
||||
response.status()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_new_session_id(
|
||||
&self,
|
||||
agent_id: &str,
|
||||
trace_id: String,
|
||||
parent_span_id: String,
|
||||
request_headers: &HeaderMap,
|
||||
) -> String {
|
||||
info!("Initializing MCP session for agent {}", agent_id);
|
||||
async fn get_new_session_id(&self, agent_id: &str, request_headers: &HeaderMap) -> String {
|
||||
info!("initializing MCP session for agent {}", agent_id);
|
||||
|
||||
let initialize_request = self.build_initialize_request();
|
||||
let headers = self
|
||||
.build_mcp_headers(
|
||||
request_headers,
|
||||
agent_id,
|
||||
None,
|
||||
trace_id.clone(),
|
||||
parent_span_id.clone(),
|
||||
)
|
||||
.build_mcp_headers(request_headers, agent_id, None)
|
||||
.expect("Failed to build headers for initialization");
|
||||
|
||||
let response = self
|
||||
|
|
@ -673,7 +477,7 @@ impl PipelineProcessor {
|
|||
.await
|
||||
.expect("Failed to initialize MCP session");
|
||||
|
||||
info!("Initialize response status: {}", response.status());
|
||||
info!("initialize response status: {}", response.status());
|
||||
|
||||
let session_id = response
|
||||
.headers()
|
||||
|
|
@ -683,49 +487,54 @@ impl PipelineProcessor {
|
|||
.to_string();
|
||||
|
||||
info!(
|
||||
"Created new MCP session for agent {}: {}",
|
||||
"created new MCP session for agent {}: {}",
|
||||
agent_id, session_id
|
||||
);
|
||||
|
||||
// Send initialized notification
|
||||
self.send_initialized_notification(
|
||||
agent_id,
|
||||
&session_id,
|
||||
trace_id.clone(),
|
||||
parent_span_id.clone(),
|
||||
&headers,
|
||||
)
|
||||
.await
|
||||
.expect("Failed to send initialized notification");
|
||||
self.send_initialized_notification(agent_id, &session_id, &headers)
|
||||
.await
|
||||
.expect("Failed to send initialized notification");
|
||||
|
||||
session_id
|
||||
}
|
||||
|
||||
/// Execute a HTTP-based filter agent
|
||||
#[instrument(
|
||||
skip(self, messages, agent, request_headers),
|
||||
fields(
|
||||
agent_id = %agent.id,
|
||||
agent_url = %agent.url,
|
||||
filter_name = %agent.id,
|
||||
message_count = messages.len()
|
||||
)
|
||||
)]
|
||||
async fn execute_http_filter(
|
||||
&mut self,
|
||||
messages: &[Message],
|
||||
agent: &Agent,
|
||||
request_headers: &HeaderMap,
|
||||
trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
|
||||
trace_id: String,
|
||||
filter_span_id: String,
|
||||
) -> Result<Vec<Message>, PipelineError> {
|
||||
let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
|
||||
// Set service name for this filter span
|
||||
set_service_name(operation_component::AGENT_FILTER);
|
||||
|
||||
// Generate span ID for this HTTP call (child of filter span)
|
||||
let http_span_id = generate_random_span_id();
|
||||
// Update current span name to include filter name
|
||||
use opentelemetry::trace::get_active_span;
|
||||
get_active_span(|span| {
|
||||
span.update_name(format!("execute_http_filter ({})", agent.id));
|
||||
});
|
||||
|
||||
// Build headers
|
||||
let trace_parent = format!("00-{}-{}-01", trace_id, http_span_id);
|
||||
let mut agent_headers = request_headers.clone();
|
||||
agent_headers.remove(hyper::header::CONTENT_LENGTH);
|
||||
|
||||
// Inject OpenTelemetry trace context automatically
|
||||
agent_headers.remove(TRACE_PARENT_HEADER);
|
||||
agent_headers.insert(
|
||||
TRACE_PARENT_HEADER,
|
||||
hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
|
||||
);
|
||||
global::get_text_map_propagator(|propagator| {
|
||||
let cx =
|
||||
tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
|
||||
propagator.inject_context(&cx, &mut HeaderInjector(&mut agent_headers));
|
||||
});
|
||||
|
||||
agent_headers.insert(
|
||||
ARCH_UPSTREAM_HOST_HEADER,
|
||||
|
|
@ -748,10 +557,6 @@ impl PipelineProcessor {
|
|||
hyper::header::HeaderValue::from_static("application/json"),
|
||||
);
|
||||
|
||||
// Send request with tracing
|
||||
let start_time = SystemTime::now();
|
||||
let start_instant = Instant::now();
|
||||
|
||||
debug!(
|
||||
"Sending HTTP request to agent {} at URL: {}",
|
||||
agent.id, agent.url
|
||||
|
|
@ -769,30 +574,6 @@ impl PipelineProcessor {
|
|||
let http_status = response.status();
|
||||
let response_bytes = response.bytes().await?;
|
||||
|
||||
let end_time = SystemTime::now();
|
||||
let elapsed = start_instant.elapsed();
|
||||
|
||||
// Record HTTP call span
|
||||
if let Some(collector) = trace_collector {
|
||||
let mut attrs = HashMap::new();
|
||||
attrs.insert("http.tool_name", tool_name.to_string());
|
||||
attrs.insert("http.url", agent.url.clone());
|
||||
attrs.insert("http.status_code", http_status.as_u16().to_string());
|
||||
|
||||
self.record_agent_filter_span(
|
||||
collector,
|
||||
"http_call",
|
||||
&agent.id,
|
||||
start_time,
|
||||
end_time,
|
||||
elapsed,
|
||||
Some(attrs),
|
||||
trace_id.clone(),
|
||||
filter_span_id.clone(),
|
||||
Some(http_span_id),
|
||||
);
|
||||
}
|
||||
|
||||
// Handle HTTP errors
|
||||
if !http_status.is_success() {
|
||||
let error_body = String::from_utf8_lossy(&response_bytes).to_string();
|
||||
|
|
@ -825,34 +606,34 @@ impl PipelineProcessor {
|
|||
}
|
||||
|
||||
/// Send request to terminal agent and return the raw response for streaming
|
||||
/// Note: The caller is responsible for creating the plano(agent) span that wraps
|
||||
/// both this call and the subsequent response consumption.
|
||||
pub async fn invoke_agent(
|
||||
&self,
|
||||
messages: &[Message],
|
||||
mut original_request: ProviderRequestType,
|
||||
terminal_agent: &Agent,
|
||||
request_headers: &HeaderMap,
|
||||
trace_id: String,
|
||||
agent_span_id: String,
|
||||
) -> Result<reqwest::Response, PipelineError> {
|
||||
// let mut request = original_request.clone();
|
||||
original_request.set_messages(messages);
|
||||
|
||||
let request_url = "/v1/chat/completions";
|
||||
|
||||
let request_body = ProviderRequestType::to_bytes(&original_request).unwrap();
|
||||
// let request_body = serde_json::to_string(&request)?;
|
||||
debug!("Sending request to terminal agent {}", terminal_agent.id);
|
||||
debug!("sending request to terminal agent {}", terminal_agent.id);
|
||||
|
||||
let mut agent_headers = request_headers.clone();
|
||||
agent_headers.remove(hyper::header::CONTENT_LENGTH);
|
||||
|
||||
// Set traceparent header to make the egress span a child of the agent span
|
||||
if !trace_id.is_empty() && !agent_span_id.is_empty() {
|
||||
let trace_parent = format!("00-{}-{}-01", trace_id, agent_span_id);
|
||||
agent_headers.remove(TRACE_PARENT_HEADER);
|
||||
agent_headers.insert(
|
||||
TRACE_PARENT_HEADER,
|
||||
hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
|
||||
);
|
||||
}
|
||||
// Inject OpenTelemetry trace context automatically
|
||||
agent_headers.remove(TRACE_PARENT_HEADER);
|
||||
global::get_text_map_propagator(|propagator| {
|
||||
let cx =
|
||||
tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
|
||||
propagator.inject_context(&cx, &mut HeaderInjector(&mut agent_headers));
|
||||
});
|
||||
|
||||
agent_headers.insert(
|
||||
ARCH_UPSTREAM_HOST_HEADER,
|
||||
|
|
@ -867,7 +648,7 @@ impl PipelineProcessor {
|
|||
|
||||
let response = self
|
||||
.client
|
||||
.post(format!("{}/v1/chat/completions", self.url))
|
||||
.post(format!("{}{}", self.url, request_url))
|
||||
.headers(agent_headers)
|
||||
.body(request_body)
|
||||
.send()
|
||||
|
|
@ -914,15 +695,7 @@ mod tests {
|
|||
let pipeline = create_test_pipeline(vec!["nonexistent-agent", "terminal-agent"]);
|
||||
|
||||
let result = processor
|
||||
.process_filter_chain(
|
||||
&messages,
|
||||
&pipeline,
|
||||
&agent_map,
|
||||
&request_headers,
|
||||
None,
|
||||
String::new(),
|
||||
String::new(),
|
||||
)
|
||||
.process_filter_chain(&messages, &pipeline, &agent_map, &request_headers)
|
||||
.await;
|
||||
|
||||
assert!(result.is_err());
|
||||
|
|
@ -956,14 +729,7 @@ mod tests {
|
|||
let request_headers = HeaderMap::new();
|
||||
|
||||
let result = processor
|
||||
.execute_mcp_filter(
|
||||
&messages,
|
||||
&agent,
|
||||
&request_headers,
|
||||
None,
|
||||
"trace-123".to_string(),
|
||||
"span-123".to_string(),
|
||||
)
|
||||
.execute_mcp_filter(&messages, &agent, &request_headers)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
|
|
@ -1002,14 +768,7 @@ mod tests {
|
|||
let request_headers = HeaderMap::new();
|
||||
|
||||
let result = processor
|
||||
.execute_mcp_filter(
|
||||
&messages,
|
||||
&agent,
|
||||
&request_headers,
|
||||
None,
|
||||
"trace-456".to_string(),
|
||||
"span-456".to_string(),
|
||||
)
|
||||
.execute_mcp_filter(&messages, &agent, &request_headers)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
|
|
@ -1061,14 +820,7 @@ mod tests {
|
|||
let request_headers = HeaderMap::new();
|
||||
|
||||
let result = processor
|
||||
.execute_mcp_filter(
|
||||
&messages,
|
||||
&agent,
|
||||
&request_headers,
|
||||
None,
|
||||
"trace-789".to_string(),
|
||||
"span-789".to_string(),
|
||||
)
|
||||
.execute_mcp_filter(&messages, &agent, &request_headers)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ use hyper::{Response, StatusCode};
|
|||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tokio_stream::StreamExt;
|
||||
use tracing::{info, warn};
|
||||
use tracing::{info, warn, Instrument};
|
||||
|
||||
/// Errors that can occur during response handling
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
|
|
@ -69,10 +69,14 @@ impl ResponseHandler {
|
|||
response
|
||||
}
|
||||
|
||||
/// Create a streaming response from a reqwest response
|
||||
/// Create a streaming response from a reqwest response.
|
||||
/// The spawned streaming task is instrumented with both `agent_span` and `orchestrator_span`
|
||||
/// so their durations reflect the actual time spent streaming to the client.
|
||||
pub async fn create_streaming_response(
|
||||
&self,
|
||||
llm_response: reqwest::Response,
|
||||
agent_span: tracing::Span,
|
||||
orchestrator_span: tracing::Span,
|
||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ResponseError> {
|
||||
// Copy headers from the original response
|
||||
let response_headers = llm_response.headers();
|
||||
|
|
@ -89,25 +93,30 @@ impl ResponseHandler {
|
|||
// Create channel for async streaming
|
||||
let (tx, rx) = mpsc::channel::<Bytes>(16);
|
||||
|
||||
// Spawn task to stream data
|
||||
tokio::spawn(async move {
|
||||
let mut byte_stream = llm_response.bytes_stream();
|
||||
// Spawn streaming task instrumented with both spans (nested) so both
|
||||
// remain entered for the full streaming duration.
|
||||
tokio::spawn(
|
||||
async move {
|
||||
let mut byte_stream = llm_response.bytes_stream();
|
||||
|
||||
while let Some(item) = byte_stream.next().await {
|
||||
let chunk = match item {
|
||||
Ok(chunk) => chunk,
|
||||
Err(err) => {
|
||||
warn!("Error receiving chunk: {:?}", err);
|
||||
while let Some(item) = byte_stream.next().await {
|
||||
let chunk = match item {
|
||||
Ok(chunk) => chunk,
|
||||
Err(err) => {
|
||||
warn!(error = ?err, "error receiving chunk");
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
if tx.send(chunk).await.is_err() {
|
||||
warn!("receiver dropped");
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
if tx.send(chunk).await.is_err() {
|
||||
warn!("Receiver dropped");
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
.instrument(agent_span)
|
||||
.instrument(orchestrator_span),
|
||||
);
|
||||
|
||||
let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk)));
|
||||
let stream_body = BoxBody::new(StreamBody::new(stream));
|
||||
|
|
@ -164,11 +173,11 @@ impl ResponseHandler {
|
|||
if let Some(content) = provider_response.content_delta() {
|
||||
accumulated_text.push_str(content);
|
||||
} else {
|
||||
info!("No content delta in provider response");
|
||||
info!("no content delta in provider response");
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Failed to parse provider response: {:?}", e);
|
||||
warn!(error = ?e, "failed to parse provider response");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -248,7 +257,13 @@ mod tests {
|
|||
let llm_response = client.get(&(server.url() + "/test")).send().await.unwrap();
|
||||
|
||||
let handler = ResponseHandler::new();
|
||||
let result = handler.create_streaming_response(llm_response).await;
|
||||
let result = handler
|
||||
.create_streaming_response(
|
||||
llm_response,
|
||||
tracing::Span::current(),
|
||||
tracing::Span::current(),
|
||||
)
|
||||
.await;
|
||||
|
||||
mock.assert_async().await;
|
||||
assert!(result.is_ok());
|
||||
|
|
|
|||
|
|
@ -1,14 +1,12 @@
|
|||
use common::configuration::ModelUsagePreference;
|
||||
use common::traces::{parse_traceparent, SpanBuilder, SpanKind, TraceCollector};
|
||||
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
|
||||
use hermesllm::{ProviderRequest, ProviderRequestType};
|
||||
use hyper::StatusCode;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::router::llm_router::RouterService;
|
||||
use crate::tracing::{http, operation_component, routing, OperationNameBuilder};
|
||||
use crate::tracing::routing;
|
||||
|
||||
pub struct RoutingResult {
|
||||
pub model_name: String,
|
||||
|
|
@ -36,7 +34,6 @@ impl RoutingError {
|
|||
pub async fn router_chat_get_upstream_model(
|
||||
router_service: Arc<RouterService>,
|
||||
client_request: ProviderRequestType,
|
||||
trace_collector: Arc<TraceCollector>,
|
||||
traceparent: &str,
|
||||
request_path: &str,
|
||||
request_id: &str,
|
||||
|
|
@ -56,14 +53,14 @@ pub async fn router_chat_get_upstream_model(
|
|||
| ProviderRequestType::BedrockConverseStream(_)
|
||||
| ProviderRequestType::ResponsesAPIRequest(_),
|
||||
) => {
|
||||
warn!("Unexpected: got non-ChatCompletions request after converting to OpenAI format");
|
||||
warn!("unexpected: got non-ChatCompletions request after converting to OpenAI format");
|
||||
return Err(RoutingError::internal_error(
|
||||
"Request conversion failed".to_string(),
|
||||
));
|
||||
}
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"Failed to convert request to ChatCompletionsRequest: {}",
|
||||
"failed to convert request to ChatCompletionsRequest: {}",
|
||||
err
|
||||
);
|
||||
return Err(RoutingError::internal_error(format!(
|
||||
|
|
@ -74,9 +71,8 @@ pub async fn router_chat_get_upstream_model(
|
|||
};
|
||||
|
||||
debug!(
|
||||
"[PLANO_REQ_ID: {:?}]: ROUTER_REQ: {}",
|
||||
request_id,
|
||||
&serde_json::to_string(&chat_request).unwrap()
|
||||
request = %serde_json::to_string(&chat_request).unwrap(),
|
||||
"router request"
|
||||
);
|
||||
|
||||
// Extract usage preferences from metadata
|
||||
|
|
@ -112,16 +108,14 @@ pub async fn router_chat_get_upstream_model(
|
|||
};
|
||||
|
||||
info!(
|
||||
"[PLANO_REQ_ID: {:?}] | ROUTER_REQ | Usage preferences from request: {}, request_path: {}, latest message: {}",
|
||||
request_id,
|
||||
usage_preferences.is_some(),
|
||||
request_path,
|
||||
latest_message_for_log
|
||||
has_usage_preferences = usage_preferences.is_some(),
|
||||
path = %request_path,
|
||||
latest_message = %latest_message_for_log,
|
||||
"processing router request"
|
||||
);
|
||||
|
||||
// Capture start time for routing span
|
||||
let routing_start_time = std::time::Instant::now();
|
||||
let routing_start_system_time = std::time::SystemTime::now();
|
||||
|
||||
// Attempt to determine route using the router service
|
||||
let routing_result = router_service
|
||||
|
|
@ -133,41 +127,21 @@ pub async fn router_chat_get_upstream_model(
|
|||
)
|
||||
.await;
|
||||
|
||||
let determination_ms = routing_start_time.elapsed().as_millis() as i64;
|
||||
let current_span = tracing::Span::current();
|
||||
current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);
|
||||
|
||||
match routing_result {
|
||||
Ok(route) => match route {
|
||||
Some((_, model_name)) => {
|
||||
// Record successful routing span
|
||||
let mut attrs: HashMap<String, String> = HashMap::new();
|
||||
attrs.insert("route.selected_model".to_string(), model_name.clone());
|
||||
record_routing_span(
|
||||
trace_collector,
|
||||
traceparent,
|
||||
routing_start_time,
|
||||
routing_start_system_time,
|
||||
attrs,
|
||||
)
|
||||
.await;
|
||||
|
||||
current_span.record("route.selected_model", model_name.as_str());
|
||||
Ok(RoutingResult { model_name })
|
||||
}
|
||||
None => {
|
||||
// No route determined, return sentinel value "none"
|
||||
// This signals to llm.rs to use the original validated request model
|
||||
info!(
|
||||
"[PLANO_REQ_ID: {}] | ROUTER_REQ | No route determined, returning sentinel 'none'",
|
||||
request_id
|
||||
);
|
||||
|
||||
let mut attrs = HashMap::new();
|
||||
attrs.insert("route.selected_model".to_string(), "none".to_string());
|
||||
record_routing_span(
|
||||
trace_collector,
|
||||
traceparent,
|
||||
routing_start_time,
|
||||
routing_start_system_time,
|
||||
attrs,
|
||||
)
|
||||
.await;
|
||||
current_span.record("route.selected_model", "none");
|
||||
info!("no route determined, using default model");
|
||||
|
||||
Ok(RoutingResult {
|
||||
model_name: "none".to_string(),
|
||||
|
|
@ -175,19 +149,7 @@ pub async fn router_chat_get_upstream_model(
|
|||
}
|
||||
},
|
||||
Err(err) => {
|
||||
// Record failed routing span
|
||||
let mut attrs = HashMap::new();
|
||||
attrs.insert("route.selected_model".to_string(), "unknown".to_string());
|
||||
attrs.insert("error.message".to_string(), err.to_string());
|
||||
record_routing_span(
|
||||
trace_collector,
|
||||
traceparent,
|
||||
routing_start_time,
|
||||
routing_start_system_time,
|
||||
attrs,
|
||||
)
|
||||
.await;
|
||||
|
||||
current_span.record("route.selected_model", "unknown");
|
||||
Err(RoutingError::internal_error(format!(
|
||||
"Failed to determine route: {}",
|
||||
err
|
||||
|
|
@ -195,53 +157,3 @@ pub async fn router_chat_get_upstream_model(
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to record a routing span with the given attributes.
|
||||
/// Reduces code duplication across different routing outcomes.
|
||||
async fn record_routing_span(
|
||||
trace_collector: Arc<TraceCollector>,
|
||||
traceparent: &str,
|
||||
start_time: std::time::Instant,
|
||||
start_system_time: std::time::SystemTime,
|
||||
attrs: HashMap<String, String>,
|
||||
) {
|
||||
// The routing always uses OpenAI Chat Completions format internally,
|
||||
// so we log that as the actual API being used for routing
|
||||
let routing_api_path = "/v1/chat/completions";
|
||||
|
||||
let routing_operation_name = OperationNameBuilder::new()
|
||||
.with_method("POST")
|
||||
.with_path(routing_api_path)
|
||||
.with_target("Arch-Router-1.5B")
|
||||
.build();
|
||||
|
||||
let (trace_id, parent_span_id) = parse_traceparent(traceparent);
|
||||
|
||||
// Build the routing span directly using constants
|
||||
let mut span_builder = SpanBuilder::new(&routing_operation_name)
|
||||
.with_trace_id(&trace_id)
|
||||
.with_kind(SpanKind::Client)
|
||||
.with_start_time(start_system_time)
|
||||
.with_end_time(std::time::SystemTime::now())
|
||||
.with_attribute(http::METHOD, "POST")
|
||||
.with_attribute(http::TARGET, routing_api_path.to_string())
|
||||
.with_attribute(
|
||||
routing::ROUTE_DETERMINATION_MS,
|
||||
start_time.elapsed().as_millis().to_string(),
|
||||
);
|
||||
|
||||
// Only set parent span ID if it exists (not a root span)
|
||||
if let Some(parent) = parent_span_id {
|
||||
span_builder = span_builder.with_parent_span_id(&parent);
|
||||
}
|
||||
|
||||
// Add all custom attributes
|
||||
for (key, value) in attrs {
|
||||
span_builder = span_builder.with_attribute(key, value);
|
||||
}
|
||||
|
||||
let span = span_builder.build();
|
||||
|
||||
// Record the span directly to the collector
|
||||
trace_collector.record_span(operation_component::ROUTING, span);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,18 +1,18 @@
|
|||
use bytes::Bytes;
|
||||
use common::traces::{Attribute, AttributeValue, Event, Span, TraceCollector};
|
||||
use http_body_util::combinators::BoxBody;
|
||||
use http_body_util::StreamBody;
|
||||
use hyper::body::Frame;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Instant, SystemTime};
|
||||
use opentelemetry::trace::TraceContextExt;
|
||||
use opentelemetry::KeyValue;
|
||||
use std::time::Instant;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tokio_stream::StreamExt;
|
||||
use tracing::warn;
|
||||
use tracing::{info, warn, Instrument};
|
||||
use tracing_opentelemetry::OpenTelemetrySpanExt;
|
||||
|
||||
// Import tracing constants and signals
|
||||
use crate::signals::{InteractionQuality, SignalAnalyzer, TextBasedSignalAnalyzer, FLAG_MARKER};
|
||||
use crate::tracing::{error, llm, signals as signal_constants};
|
||||
use crate::tracing::{llm, set_service_name, signals as signal_constants};
|
||||
use hermesllm::apis::openai::Message;
|
||||
|
||||
/// Trait for processing streaming chunks
|
||||
|
|
@ -31,11 +31,10 @@ pub trait StreamProcessor: Send + 'static {
|
|||
fn on_error(&mut self, _error: &str) {}
|
||||
}
|
||||
|
||||
/// A processor that tracks streaming metrics and finalizes the span
|
||||
/// A processor that tracks streaming metrics
|
||||
pub struct ObservableStreamProcessor {
|
||||
collector: Arc<TraceCollector>,
|
||||
service_name: String,
|
||||
span: Span,
|
||||
operation_name: String,
|
||||
total_bytes: usize,
|
||||
chunk_count: usize,
|
||||
start_time: Instant,
|
||||
|
|
@ -47,22 +46,28 @@ impl ObservableStreamProcessor {
|
|||
/// Create a new passthrough processor
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `collector` - The trace collector to record the span to
|
||||
/// * `service_name` - The service name for this span (e.g., "archgw(llm)")
|
||||
/// * `span` - The span to finalize after streaming completes
|
||||
/// * `service_name` - The service name for this span (e.g., "plano(llm)")
|
||||
/// This will be set as the `service.name.override` attribute on the current span,
|
||||
/// allowing the ServiceNameOverrideExporter to route spans to different services.
|
||||
/// * `operation_name` - The current span operation name (e.g., "POST /v1/chat/completions gpt-4")
|
||||
/// Used to append the flag marker when concerning signals are detected.
|
||||
/// * `start_time` - When the request started (for duration calculation)
|
||||
/// * `messages` - Optional conversation messages for signal analysis
|
||||
pub fn new(
|
||||
collector: Arc<TraceCollector>,
|
||||
service_name: impl Into<String>,
|
||||
span: Span,
|
||||
operation_name: impl Into<String>,
|
||||
start_time: Instant,
|
||||
messages: Option<Vec<Message>>,
|
||||
) -> Self {
|
||||
let service_name = service_name.into();
|
||||
|
||||
// Set the service name override on the current span for OpenTelemetry export
|
||||
// This allows the ServiceNameOverrideExporter to route this span to the correct service
|
||||
set_service_name(&service_name);
|
||||
|
||||
Self {
|
||||
collector,
|
||||
service_name: service_name.into(),
|
||||
span,
|
||||
service_name,
|
||||
operation_name: operation_name.into(),
|
||||
total_bytes: 0,
|
||||
chunk_count: 0,
|
||||
start_time,
|
||||
|
|
@ -87,89 +92,81 @@ impl StreamProcessor for ObservableStreamProcessor {
|
|||
}
|
||||
|
||||
fn on_complete(&mut self) {
|
||||
// Update span with streaming metrics and end time
|
||||
let end_time_nanos = SystemTime::now()
|
||||
.duration_since(SystemTime::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_nanos();
|
||||
|
||||
self.span.end_time_unix_nano = format!("{}", end_time_nanos);
|
||||
|
||||
// Add streaming metrics as attributes using constants
|
||||
self.span.attributes.push(Attribute {
|
||||
key: llm::RESPONSE_BYTES.to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some(self.total_bytes.to_string()),
|
||||
},
|
||||
});
|
||||
|
||||
self.span.attributes.push(Attribute {
|
||||
key: llm::DURATION_MS.to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some(self.start_time.elapsed().as_millis().to_string()),
|
||||
},
|
||||
});
|
||||
|
||||
// Add time to first token if available (streaming only)
|
||||
// Record time-to-first-token as an OTel span attribute + event (streaming only)
|
||||
if let Some(ttft) = self.time_to_first_token {
|
||||
self.span.attributes.push(Attribute {
|
||||
key: llm::TIME_TO_FIRST_TOKEN_MS.to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some(ttft.to_string()),
|
||||
},
|
||||
});
|
||||
|
||||
// Add time to first token as a span event
|
||||
// Calculate the timestamp by adding ttft duration to span start time
|
||||
if let Ok(start_time_nanos) = self.span.start_time_unix_nano.parse::<u128>() {
|
||||
// Convert ttft from milliseconds to nanoseconds and add to start time
|
||||
let event_timestamp = start_time_nanos + (ttft * 1_000_000);
|
||||
let mut event =
|
||||
Event::new(llm::TIME_TO_FIRST_TOKEN_MS.to_string(), event_timestamp);
|
||||
event.add_attribute(llm::TIME_TO_FIRST_TOKEN_MS.to_string(), ttft.to_string());
|
||||
|
||||
// Initialize events vector if needed
|
||||
if self.span.events.is_none() {
|
||||
self.span.events = Some(Vec::new());
|
||||
}
|
||||
|
||||
if let Some(ref mut events) = self.span.events {
|
||||
events.push(event);
|
||||
}
|
||||
}
|
||||
let span = tracing::Span::current();
|
||||
let otel_context = span.context();
|
||||
let otel_span = otel_context.span();
|
||||
otel_span.set_attribute(KeyValue::new(llm::TIME_TO_FIRST_TOKEN_MS, ttft as i64));
|
||||
otel_span.add_event(
|
||||
llm::TIME_TO_FIRST_TOKEN_MS,
|
||||
vec![KeyValue::new(llm::TIME_TO_FIRST_TOKEN_MS, ttft as i64)],
|
||||
);
|
||||
}
|
||||
|
||||
// Analyze signals if messages are available and add to span attributes
|
||||
// Analyze signals if messages are available and record as span attributes
|
||||
if let Some(ref messages) = self.messages {
|
||||
let analyzer: Box<dyn SignalAnalyzer> = Box::new(TextBasedSignalAnalyzer::new());
|
||||
let report = analyzer.analyze(messages);
|
||||
|
||||
// Get the current OTel span to set signal attributes
|
||||
let span = tracing::Span::current();
|
||||
let otel_context = span.context();
|
||||
let otel_span = otel_context.span();
|
||||
|
||||
// Add overall quality
|
||||
self.span.attributes.push(Attribute {
|
||||
key: signal_constants::QUALITY.to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some(format!("{:?}", report.overall_quality)),
|
||||
},
|
||||
});
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::QUALITY,
|
||||
format!("{:?}", report.overall_quality),
|
||||
));
|
||||
|
||||
// Add repair/follow-up metrics if concerning
|
||||
if report.follow_up.is_concerning || report.follow_up.repair_count > 0 {
|
||||
self.span.attributes.push(Attribute {
|
||||
key: signal_constants::REPAIR_COUNT.to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some(report.follow_up.repair_count.to_string()),
|
||||
},
|
||||
});
|
||||
|
||||
self.span.attributes.push(Attribute {
|
||||
key: signal_constants::REPAIR_RATIO.to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some(format!("{:.3}", report.follow_up.repair_ratio)),
|
||||
},
|
||||
});
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::REPAIR_COUNT,
|
||||
report.follow_up.repair_count as i64,
|
||||
));
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::REPAIR_RATIO,
|
||||
format!("{:.3}", report.follow_up.repair_ratio),
|
||||
));
|
||||
}
|
||||
|
||||
// Add flag marker to operation name if any concerning signal is detected
|
||||
// Add frustration metrics
|
||||
if report.frustration.has_frustration {
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::FRUSTRATION_COUNT,
|
||||
report.frustration.frustration_count as i64,
|
||||
));
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::FRUSTRATION_SEVERITY,
|
||||
report.frustration.severity as i64,
|
||||
));
|
||||
}
|
||||
|
||||
// Add repetition metrics
|
||||
if report.repetition.has_looping {
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::REPETITION_COUNT,
|
||||
report.repetition.repetition_count as i64,
|
||||
));
|
||||
}
|
||||
|
||||
// Add escalation metrics
|
||||
if report.escalation.escalation_requested {
|
||||
otel_span
|
||||
.set_attribute(KeyValue::new(signal_constants::ESCALATION_REQUESTED, true));
|
||||
}
|
||||
|
||||
// Add positive feedback metrics
|
||||
if report.positive_feedback.has_positive_feedback {
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::POSITIVE_FEEDBACK_COUNT,
|
||||
report.positive_feedback.positive_count as i64,
|
||||
));
|
||||
}
|
||||
|
||||
// Flag the span name if any concerning signal is detected
|
||||
let should_flag = report.frustration.has_frustration
|
||||
|| report.repetition.has_looping
|
||||
|| report.escalation.escalation_requested
|
||||
|
|
@ -179,94 +176,27 @@ impl StreamProcessor for ObservableStreamProcessor {
|
|||
);
|
||||
|
||||
if should_flag {
|
||||
// Prepend flag marker to the operation name
|
||||
self.span.name = format!("{} {}", self.span.name, FLAG_MARKER);
|
||||
}
|
||||
|
||||
// Add key signal metrics
|
||||
if report.frustration.has_frustration {
|
||||
self.span.attributes.push(Attribute {
|
||||
key: signal_constants::FRUSTRATION_COUNT.to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some(report.frustration.frustration_count.to_string()),
|
||||
},
|
||||
});
|
||||
self.span.attributes.push(Attribute {
|
||||
key: signal_constants::FRUSTRATION_SEVERITY.to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some(report.frustration.severity.to_string()),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
if report.repetition.has_looping {
|
||||
self.span.attributes.push(Attribute {
|
||||
key: signal_constants::REPETITION_COUNT.to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some(report.repetition.repetition_count.to_string()),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
if report.escalation.escalation_requested {
|
||||
self.span.attributes.push(Attribute {
|
||||
key: signal_constants::ESCALATION_REQUESTED.to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some("true".to_string()),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
if report.positive_feedback.has_positive_feedback {
|
||||
self.span.attributes.push(Attribute {
|
||||
key: signal_constants::POSITIVE_FEEDBACK_COUNT.to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some(report.positive_feedback.positive_count.to_string()),
|
||||
},
|
||||
});
|
||||
otel_span.update_name(format!("{} {}", self.operation_name, FLAG_MARKER));
|
||||
}
|
||||
}
|
||||
|
||||
// Record the finalized span
|
||||
self.collector
|
||||
.record_span(&self.service_name, self.span.clone());
|
||||
info!(
|
||||
service = %self.service_name,
|
||||
total_bytes = self.total_bytes,
|
||||
chunk_count = self.chunk_count,
|
||||
duration_ms = self.start_time.elapsed().as_millis(),
|
||||
time_to_first_token_ms = ?self.time_to_first_token,
|
||||
"streaming completed"
|
||||
);
|
||||
}
|
||||
|
||||
fn on_error(&mut self, error_msg: &str) {
|
||||
warn!("Stream error in PassthroughProcessor: {}", error_msg);
|
||||
|
||||
// Update span with error info and end time
|
||||
let end_time_nanos = SystemTime::now()
|
||||
.duration_since(SystemTime::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_nanos();
|
||||
|
||||
self.span.end_time_unix_nano = format!("{}", end_time_nanos);
|
||||
|
||||
self.span.attributes.push(Attribute {
|
||||
key: error::ERROR.to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some("true".to_string()),
|
||||
},
|
||||
});
|
||||
|
||||
self.span.attributes.push(Attribute {
|
||||
key: error::MESSAGE.to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some(error_msg.to_string()),
|
||||
},
|
||||
});
|
||||
|
||||
self.span.attributes.push(Attribute {
|
||||
key: llm::DURATION_MS.to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some(self.start_time.elapsed().as_millis().to_string()),
|
||||
},
|
||||
});
|
||||
|
||||
// Record the error span
|
||||
self.collector
|
||||
.record_span(&self.service_name, self.span.clone());
|
||||
warn!(
|
||||
service = %self.service_name,
|
||||
error = error_msg,
|
||||
duration_ms = self.start_time.elapsed().as_millis(),
|
||||
"stream error"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -287,49 +217,55 @@ where
|
|||
{
|
||||
let (tx, rx) = mpsc::channel::<Bytes>(buffer_size);
|
||||
|
||||
// Capture the current span so the spawned task inherits the request context
|
||||
let current_span = tracing::Span::current();
|
||||
|
||||
// Spawn a task to process and forward chunks
|
||||
let processor_handle = tokio::spawn(async move {
|
||||
let mut is_first_chunk = true;
|
||||
let processor_handle = tokio::spawn(
|
||||
async move {
|
||||
let mut is_first_chunk = true;
|
||||
|
||||
while let Some(item) = byte_stream.next().await {
|
||||
let chunk = match item {
|
||||
Ok(chunk) => chunk,
|
||||
Err(err) => {
|
||||
let err_msg = format!("Error receiving chunk: {:?}", err);
|
||||
warn!("{}", err_msg);
|
||||
processor.on_error(&err_msg);
|
||||
break;
|
||||
while let Some(item) = byte_stream.next().await {
|
||||
let chunk = match item {
|
||||
Ok(chunk) => chunk,
|
||||
Err(err) => {
|
||||
let err_msg = format!("Error receiving chunk: {:?}", err);
|
||||
warn!(error = %err_msg, "stream error");
|
||||
processor.on_error(&err_msg);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
// Call on_first_bytes for the first chunk
|
||||
if is_first_chunk {
|
||||
processor.on_first_bytes();
|
||||
is_first_chunk = false;
|
||||
}
|
||||
};
|
||||
|
||||
// Call on_first_bytes for the first chunk
|
||||
if is_first_chunk {
|
||||
processor.on_first_bytes();
|
||||
is_first_chunk = false;
|
||||
}
|
||||
|
||||
// Process the chunk
|
||||
match processor.process_chunk(chunk) {
|
||||
Ok(Some(processed_chunk)) => {
|
||||
if tx.send(processed_chunk).await.is_err() {
|
||||
warn!("Receiver dropped");
|
||||
// Process the chunk
|
||||
match processor.process_chunk(chunk) {
|
||||
Ok(Some(processed_chunk)) => {
|
||||
if tx.send(processed_chunk).await.is_err() {
|
||||
warn!("receiver dropped");
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
// Skip this chunk
|
||||
continue;
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("processor error: {}", err);
|
||||
processor.on_error(&err);
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
// Skip this chunk
|
||||
continue;
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("Processor error: {}", err);
|
||||
processor.on_error(&err);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
processor.on_complete();
|
||||
});
|
||||
processor.on_complete();
|
||||
}
|
||||
.instrument(current_span),
|
||||
);
|
||||
|
||||
// Convert channel receiver to HTTP stream
|
||||
let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk)));
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ use common::consts::{
|
|||
CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH, PLANO_ORCHESTRATOR_MODEL_NAME,
|
||||
};
|
||||
use common::llm_providers::LlmProviders;
|
||||
use common::traces::TraceCollector;
|
||||
use http_body_util::{combinators::BoxBody, BodyExt, Empty};
|
||||
use hyper::body::Incoming;
|
||||
use hyper::server::conn::http1;
|
||||
|
|
@ -51,13 +50,12 @@ fn empty() -> BoxBody<Bytes, hyper::Error> {
|
|||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
let _tracer_provider = init_tracer();
|
||||
let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string());
|
||||
|
||||
// loading arch_config.yaml file
|
||||
// loading arch_config.yaml file (before tracing init so we can read tracing config)
|
||||
let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED")
|
||||
.unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string());
|
||||
info!("Loading arch_config.yaml from {}", arch_config_path);
|
||||
eprintln!("loading arch_config.yaml from {}", arch_config_path);
|
||||
|
||||
let config_contents =
|
||||
fs::read_to_string(&arch_config_path).expect("Failed to read arch_config.yaml");
|
||||
|
|
@ -65,6 +63,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
let config: Configuration =
|
||||
serde_yaml::from_str(&config_contents).expect("Failed to parse arch_config.yaml");
|
||||
|
||||
// Initialize tracing using config.yaml tracing section
|
||||
let _tracer_provider = init_tracer(config.tracing.as_ref());
|
||||
info!(path = %arch_config_path, "loaded arch_config.yaml");
|
||||
|
||||
let arch_config = Arc::new(config);
|
||||
|
||||
// combine agents and filters into a single list of agents
|
||||
|
|
@ -116,17 +118,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
// Initialize trace collector and start background flusher
|
||||
// Tracing is enabled if the tracing config is present in arch_config.yaml
|
||||
// Pass Some(true/false) to override, or None to use env var OTEL_TRACING_ENABLED
|
||||
let tracing_enabled = if arch_config.tracing.is_some() {
|
||||
info!("Tracing configuration found in arch_config.yaml");
|
||||
Some(true)
|
||||
} else {
|
||||
info!(
|
||||
"No tracing configuration in arch_config.yaml, will check OTEL_TRACING_ENABLED env var"
|
||||
);
|
||||
None
|
||||
};
|
||||
let trace_collector = Arc::new(TraceCollector::new(tracing_enabled));
|
||||
let _flusher_handle = trace_collector.clone().start_background_flusher();
|
||||
// OpenTelemetry automatic instrumentation is configured in utils/tracing.rs
|
||||
|
||||
// Initialize conversation state storage for v1/responses
|
||||
// Configurable via arch_config.yaml state_storage section
|
||||
|
|
@ -136,7 +128,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
if let Some(storage_config) = &arch_config.state_storage {
|
||||
let storage: Arc<dyn StateStorage> = match storage_config.storage_type {
|
||||
common::configuration::StateStorageType::Memory => {
|
||||
info!("Initialized conversation state storage: Memory");
|
||||
info!(
|
||||
storage_type = "memory",
|
||||
"initialized conversation state storage"
|
||||
);
|
||||
Arc::new(MemoryConversationalStorage::new())
|
||||
}
|
||||
common::configuration::StateStorageType::Postgres => {
|
||||
|
|
@ -145,8 +140,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
.as_ref()
|
||||
.expect("connection_string is required for postgres state_storage");
|
||||
|
||||
debug!("Postgres connection string (full): {}", connection_string);
|
||||
info!("Initializing conversation state storage: Postgres");
|
||||
debug!(connection_string = %connection_string, "postgres connection");
|
||||
info!(
|
||||
storage_type = "postgres",
|
||||
"initializing conversation state storage"
|
||||
);
|
||||
Arc::new(
|
||||
PostgreSQLConversationStorage::new(connection_string.clone())
|
||||
.await
|
||||
|
|
@ -156,7 +154,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
};
|
||||
Some(storage)
|
||||
} else {
|
||||
info!("No state_storage configured - conversation state management disabled");
|
||||
info!("no state_storage configured, conversation state management disabled");
|
||||
None
|
||||
};
|
||||
|
||||
|
|
@ -175,7 +173,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
let llm_providers = llm_providers.clone();
|
||||
let agents_list = combined_agents_filters_list.clone();
|
||||
let listeners = listeners.clone();
|
||||
let trace_collector = trace_collector.clone();
|
||||
let state_storage = state_storage.clone();
|
||||
let service = service_fn(move |req| {
|
||||
let router_service = Arc::clone(&router_service);
|
||||
|
|
@ -186,7 +183,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
let model_aliases = Arc::clone(&model_aliases);
|
||||
let agents_list = agents_list.clone();
|
||||
let listeners = listeners.clone();
|
||||
let trace_collector = trace_collector.clone();
|
||||
let state_storage = state_storage.clone();
|
||||
|
||||
async move {
|
||||
|
|
@ -206,7 +202,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
fully_qualified_url,
|
||||
agents_list,
|
||||
listeners,
|
||||
trace_collector,
|
||||
)
|
||||
.with_context(parent_cx)
|
||||
.await;
|
||||
|
|
@ -224,7 +219,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
fully_qualified_url,
|
||||
model_aliases,
|
||||
llm_providers,
|
||||
trace_collector,
|
||||
state_storage,
|
||||
)
|
||||
.with_context(parent_cx)
|
||||
|
|
@ -265,7 +259,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
Ok(response)
|
||||
}
|
||||
_ => {
|
||||
debug!("No route for {} {}", req.method(), req.uri().path());
|
||||
debug!(method = %req.method(), path = %req.uri().path(), "no route found");
|
||||
let mut not_found = Response::new(empty());
|
||||
*not_found.status_mut() = StatusCode::NOT_FOUND;
|
||||
Ok(not_found)
|
||||
|
|
@ -275,13 +269,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
});
|
||||
|
||||
tokio::task::spawn(async move {
|
||||
debug!("Accepted connection from {:?}", peer_addr);
|
||||
debug!(peer = ?peer_addr, "accepted connection");
|
||||
if let Err(err) = http1::Builder::new()
|
||||
// .serve_connection(io, service_fn(chat_completion))
|
||||
.serve_connection(io, service)
|
||||
.await
|
||||
{
|
||||
warn!("Error serving connection: {:?}", err);
|
||||
warn!(error = ?err, "error serving connection");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -96,14 +96,14 @@ impl RouterService {
|
|||
.generate_request(messages, &usage_preferences);
|
||||
|
||||
debug!(
|
||||
"sending request to arch-router model: {}, endpoint: {}",
|
||||
self.router_model.get_model_name(),
|
||||
self.router_url
|
||||
model = %self.router_model.get_model_name(),
|
||||
endpoint = %self.router_url,
|
||||
"sending request to arch-router"
|
||||
);
|
||||
|
||||
debug!(
|
||||
"arch request body: {}",
|
||||
&serde_json::to_string(&router_request).unwrap(),
|
||||
body = %serde_json::to_string(&router_request).unwrap(),
|
||||
"arch router request"
|
||||
);
|
||||
|
||||
let mut llm_route_request_headers = header::HeaderMap::new();
|
||||
|
|
@ -148,9 +148,9 @@ impl RouterService {
|
|||
Ok(response) => response,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"Failed to parse JSON: {}. Body: {}",
|
||||
err,
|
||||
&serde_json::to_string(&body).unwrap()
|
||||
error = %err,
|
||||
body = %serde_json::to_string(&body).unwrap(),
|
||||
"failed to parse json response"
|
||||
);
|
||||
return Err(RoutingError::JsonError(
|
||||
err,
|
||||
|
|
@ -160,7 +160,7 @@ impl RouterService {
|
|||
};
|
||||
|
||||
if chat_completion_response.choices.is_empty() {
|
||||
warn!("No choices in router response: {}", body);
|
||||
warn!(body = %body, "no choices in router response");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
|
|
@ -169,10 +169,10 @@ impl RouterService {
|
|||
.router_model
|
||||
.parse_response(content, &usage_preferences)?;
|
||||
info!(
|
||||
"arch-router determined route: {}, selected_model: {:?}, response time: {}ms",
|
||||
content.replace("\n", "\\n"),
|
||||
parsed_response,
|
||||
router_response_time.as_millis()
|
||||
content = %content.replace("\n", "\\n"),
|
||||
selected_model = ?parsed_response,
|
||||
response_time_ms = router_response_time.as_millis(),
|
||||
"arch-router determined route"
|
||||
);
|
||||
|
||||
if let Some(ref parsed_response) = parsed_response {
|
||||
|
|
|
|||
|
|
@ -197,12 +197,12 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
token_count += message_token_count;
|
||||
if token_count > self.max_token_length {
|
||||
debug!(
|
||||
"OrchestratorModelV1: token count {} exceeds max token length {}, truncating conversation, selected message count {}, total message count: {}",
|
||||
token_count,
|
||||
self.max_token_length
|
||||
, selected_messsage_count,
|
||||
messages_vec.len()
|
||||
);
|
||||
token_count = token_count,
|
||||
max_tokens = self.max_token_length,
|
||||
selected = selected_messsage_count,
|
||||
total = messages_vec.len(),
|
||||
"token count exceeds max, truncating conversation"
|
||||
);
|
||||
if message.role == Role::User {
|
||||
// If message that exceeds max token length is from user, we need to keep it
|
||||
selected_messages_list_reversed.push(message);
|
||||
|
|
@ -214,9 +214,7 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
}
|
||||
|
||||
if selected_messages_list_reversed.is_empty() {
|
||||
debug!(
|
||||
"OrchestratorModelV1: no messages selected, using the last message in the conversation"
|
||||
);
|
||||
debug!("no messages selected, using last message");
|
||||
if let Some(last_message) = messages_vec.last() {
|
||||
selected_messages_list_reversed.push(last_message);
|
||||
}
|
||||
|
|
@ -228,12 +226,12 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
// - last() is the first message in the original conversation
|
||||
if let Some(first_message) = selected_messages_list_reversed.first() {
|
||||
if first_message.role != Role::User {
|
||||
warn!("OrchestratorModelV1: last message in the conversation is not from user, this may lead to incorrect orchestration");
|
||||
warn!("last message is not from user, may lead to incorrect orchestration");
|
||||
}
|
||||
}
|
||||
if let Some(last_message) = selected_messages_list_reversed.last() {
|
||||
if last_message.role != Role::User {
|
||||
warn!("OrchestratorModelV1: first message in the selected conversation is not from user, this may lead to incorrect orchestration");
|
||||
warn!("first message is not from user, may lead to incorrect orchestration");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -323,8 +321,9 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
result.push((selected_route, model_name));
|
||||
} else {
|
||||
warn!(
|
||||
"No matching model found for route: {}, usage preferences: {:?}",
|
||||
selected_route, usage_preferences
|
||||
route = %selected_route,
|
||||
preferences = ?usage_preferences,
|
||||
"no matching model found for route"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -339,8 +338,9 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
result.push((selected_route, model));
|
||||
} else {
|
||||
warn!(
|
||||
"No model found for route: {}, orchestrator model preferences: {:?}",
|
||||
selected_route, self.agent_orchestration_to_model_map
|
||||
route = %selected_route,
|
||||
preferences = ?self.agent_orchestration_to_model_map,
|
||||
"no model found for route"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,13 +2,12 @@ use std::{collections::HashMap, sync::Arc};
|
|||
|
||||
use common::{
|
||||
configuration::{AgentUsagePreference, OrchestrationPreference},
|
||||
consts::{
|
||||
ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME, REQUEST_ID_HEADER,
|
||||
TRACE_PARENT_HEADER,
|
||||
},
|
||||
consts::{ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME, REQUEST_ID_HEADER},
|
||||
};
|
||||
use hermesllm::apis::openai::{ChatCompletionsResponse, Message};
|
||||
use hyper::header;
|
||||
use opentelemetry::global;
|
||||
use opentelemetry_http::HeaderInjector;
|
||||
use thiserror::Error;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
|
|
@ -57,7 +56,6 @@ impl OrchestratorService {
|
|||
pub async fn determine_orchestration(
|
||||
&self,
|
||||
messages: &[Message],
|
||||
trace_parent: Option<String>,
|
||||
usage_preferences: Option<Vec<AgentUsagePreference>>,
|
||||
request_id: Option<String>,
|
||||
) -> Result<Option<Vec<(String, String)>>> {
|
||||
|
|
@ -75,14 +73,14 @@ impl OrchestratorService {
|
|||
.generate_request(messages, &usage_preferences);
|
||||
|
||||
debug!(
|
||||
"sending request to arch-orchestrator model: {}, endpoint: {}",
|
||||
self.orchestrator_model.get_model_name(),
|
||||
self.orchestrator_url
|
||||
model = %self.orchestrator_model.get_model_name(),
|
||||
endpoint = %self.orchestrator_url,
|
||||
"sending request to arch-orchestrator"
|
||||
);
|
||||
|
||||
debug!(
|
||||
"arch orchestrator request body: {}",
|
||||
&serde_json::to_string(&orchestrator_request).unwrap(),
|
||||
body = %serde_json::to_string(&orchestrator_request).unwrap(),
|
||||
"arch orchestrator request"
|
||||
);
|
||||
|
||||
let mut orchestration_request_headers = header::HeaderMap::new();
|
||||
|
|
@ -96,12 +94,12 @@ impl OrchestratorService {
|
|||
header::HeaderValue::from_str(PLANO_ORCHESTRATOR_MODEL_NAME).unwrap(),
|
||||
);
|
||||
|
||||
if let Some(trace_parent) = trace_parent {
|
||||
orchestration_request_headers.insert(
|
||||
header::HeaderName::from_static(TRACE_PARENT_HEADER),
|
||||
header::HeaderValue::from_str(&trace_parent).unwrap(),
|
||||
);
|
||||
}
|
||||
// Inject OpenTelemetry trace context from current span
|
||||
global::get_text_map_propagator(|propagator| {
|
||||
let cx =
|
||||
tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
|
||||
propagator.inject_context(&cx, &mut HeaderInjector(&mut orchestration_request_headers));
|
||||
});
|
||||
|
||||
if let Some(request_id) = request_id {
|
||||
orchestration_request_headers.insert(
|
||||
|
|
@ -131,9 +129,9 @@ impl OrchestratorService {
|
|||
Ok(response) => response,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"Failed to parse JSON: {}. Body: {}",
|
||||
err,
|
||||
&serde_json::to_string(&body).unwrap()
|
||||
error = %err,
|
||||
body = %serde_json::to_string(&body).unwrap(),
|
||||
"failed to parse json response"
|
||||
);
|
||||
return Err(OrchestrationError::JsonError(
|
||||
err,
|
||||
|
|
@ -143,7 +141,7 @@ impl OrchestratorService {
|
|||
};
|
||||
|
||||
if chat_completion_response.choices.is_empty() {
|
||||
warn!("No choices in orchestrator response: {}", body);
|
||||
warn!(body = %body, "no choices in orchestrator response");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
|
|
@ -152,10 +150,10 @@ impl OrchestratorService {
|
|||
.orchestrator_model
|
||||
.parse_response(content, &usage_preferences)?;
|
||||
info!(
|
||||
"arch-orchestrator determined routes: {}, selected_routes: {:?}, response time: {}ms",
|
||||
content.replace("\n", "\\n"),
|
||||
parsed_response,
|
||||
orchestrator_response_time.as_millis()
|
||||
content = %content.replace("\n", "\\n"),
|
||||
selected_routes = ?parsed_response,
|
||||
response_time_ms = orchestrator_response_time.as_millis(),
|
||||
"arch-orchestrator determined routes"
|
||||
);
|
||||
|
||||
if let Some(ref parsed_response) = parsed_response {
|
||||
|
|
|
|||
|
|
@ -94,12 +94,12 @@ impl RouterModel for RouterModelV1 {
|
|||
token_count += message_token_count;
|
||||
if token_count > self.max_token_length {
|
||||
debug!(
|
||||
"RouterModelV1: token count {} exceeds max token length {}, truncating conversation, selected message count {}, total message count: {}",
|
||||
token_count,
|
||||
self.max_token_length
|
||||
, selected_messsage_count,
|
||||
messages_vec.len()
|
||||
);
|
||||
token_count = token_count,
|
||||
max_tokens = self.max_token_length,
|
||||
selected = selected_messsage_count,
|
||||
total = messages_vec.len(),
|
||||
"token count exceeds max, truncating conversation"
|
||||
);
|
||||
if message.role == Role::User {
|
||||
// If message that exceeds max token length is from user, we need to keep it
|
||||
selected_messages_list_reversed.push(message);
|
||||
|
|
@ -111,9 +111,7 @@ impl RouterModel for RouterModelV1 {
|
|||
}
|
||||
|
||||
if selected_messages_list_reversed.is_empty() {
|
||||
debug!(
|
||||
"RouterModelV1: no messages selected, using the last message in the conversation"
|
||||
);
|
||||
debug!("no messages selected, using last message");
|
||||
if let Some(last_message) = messages_vec.last() {
|
||||
selected_messages_list_reversed.push(last_message);
|
||||
}
|
||||
|
|
@ -122,12 +120,12 @@ impl RouterModel for RouterModelV1 {
|
|||
// ensure that first and last selected message is from user
|
||||
if let Some(first_message) = selected_messages_list_reversed.first() {
|
||||
if first_message.role != Role::User {
|
||||
warn!("RouterModelV1: last message in the conversation is not from user, this may lead to incorrect routing");
|
||||
warn!("last message is not from user, may lead to incorrect routing");
|
||||
}
|
||||
}
|
||||
if let Some(last_message) = selected_messages_list_reversed.last() {
|
||||
if last_message.role != Role::User {
|
||||
warn!("RouterModelV1: first message in the conversation is not from user, this may lead to incorrect routing");
|
||||
warn!("first message is not from user, may lead to incorrect routing");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -206,8 +204,9 @@ impl RouterModel for RouterModelV1 {
|
|||
return Ok(Some((selected_route, model_name)));
|
||||
} else {
|
||||
warn!(
|
||||
"No matching model found for route: {}, usage preferences: {:?}",
|
||||
selected_route, usage_preferences
|
||||
route = %selected_route,
|
||||
preferences = ?usage_preferences,
|
||||
"no matching model found for route"
|
||||
);
|
||||
return Ok(None);
|
||||
}
|
||||
|
|
@ -219,8 +218,9 @@ impl RouterModel for RouterModelV1 {
|
|||
}
|
||||
|
||||
warn!(
|
||||
"No model found for route: {}, router model preferences: {:?}",
|
||||
selected_route, self.llm_route_to_model_map
|
||||
route = %selected_route,
|
||||
preferences = ?self.llm_route_to_model_map,
|
||||
"no model found for route"
|
||||
);
|
||||
|
||||
Ok(None)
|
||||
|
|
|
|||
|
|
@ -92,18 +92,16 @@ impl<P: StreamProcessor> ResponsesStateProcessor<P> {
|
|||
match decoder.read_to_end(&mut decompressed) {
|
||||
Ok(_) => {
|
||||
debug!(
|
||||
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Successfully decompressed {} bytes to {} bytes",
|
||||
self.request_id,
|
||||
self.chunk_buffer.len(),
|
||||
decompressed.len()
|
||||
original_bytes = self.chunk_buffer.len(),
|
||||
decompressed_bytes = decompressed.len(),
|
||||
"Successfully decompressed response"
|
||||
);
|
||||
decompressed
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to decompress gzip buffer: {}",
|
||||
self.request_id,
|
||||
e
|
||||
error = %e,
|
||||
"Failed to decompress gzip buffer"
|
||||
);
|
||||
self.chunk_buffer.clone()
|
||||
}
|
||||
|
|
@ -111,9 +109,8 @@ impl<P: StreamProcessor> ResponsesStateProcessor<P> {
|
|||
}
|
||||
Some(encoding) => {
|
||||
warn!(
|
||||
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Unsupported Content-Encoding: {}. Only gzip is currently supported.",
|
||||
self.request_id,
|
||||
encoding
|
||||
encoding = %encoding,
|
||||
"Unsupported Content-Encoding, only gzip is supported"
|
||||
);
|
||||
self.chunk_buffer.clone()
|
||||
}
|
||||
|
|
@ -143,10 +140,9 @@ impl<P: StreamProcessor> ResponsesStateProcessor<P> {
|
|||
serde_json::from_str::<ResponsesAPIStreamEvent>(data_str)
|
||||
{
|
||||
info!(
|
||||
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Captured streaming response.completed: response_id={}, output_items={}",
|
||||
self.request_id,
|
||||
response.id,
|
||||
response.output.len()
|
||||
response_id = %response.id,
|
||||
output_items = response.output.len(),
|
||||
"Captured streaming response"
|
||||
);
|
||||
self.response_id = Some(response.id.clone());
|
||||
self.output_items = Some(response.output.clone());
|
||||
|
|
@ -175,24 +171,20 @@ impl<P: StreamProcessor> ResponsesStateProcessor<P> {
|
|||
) {
|
||||
Ok(response) => {
|
||||
info!(
|
||||
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Captured non-streaming response: response_id={}, output_items={}",
|
||||
self.request_id,
|
||||
response.id,
|
||||
response.output.len()
|
||||
response_id = %response.id,
|
||||
output_items = response.output.len(),
|
||||
"Captured non-streaming response"
|
||||
);
|
||||
self.response_id = Some(response.id.clone());
|
||||
self.output_items = Some(response.output.clone());
|
||||
}
|
||||
Err(e) => {
|
||||
// Log parse error with chunk preview for debugging
|
||||
let chunk_preview = String::from_utf8_lossy(&decompressed);
|
||||
let preview_len = chunk_preview.len().min(200);
|
||||
warn!(
|
||||
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to parse non-streaming ResponsesAPIResponse: {}. Decompressed preview (first {} bytes): {}",
|
||||
self.request_id,
|
||||
e,
|
||||
preview_len,
|
||||
&chunk_preview[..preview_len]
|
||||
error = %e,
|
||||
preview = %&chunk_preview[..preview_len],
|
||||
"Failed to parse non-streaming ResponsesAPIResponse"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -221,10 +213,7 @@ impl<P: StreamProcessor> StreamProcessor for ResponsesStateProcessor<P> {
|
|||
|
||||
// Skip storage for OpenAI upstream
|
||||
if self.is_openai_upstream {
|
||||
debug!(
|
||||
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Skipping state storage for OpenAI upstream provider",
|
||||
self.request_id
|
||||
);
|
||||
debug!("Skipping state storage for OpenAI upstream");
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -234,8 +223,9 @@ impl<P: StreamProcessor> StreamProcessor for ResponsesStateProcessor<P> {
|
|||
let output_as_inputs = outputs_to_inputs(output_items);
|
||||
|
||||
debug!(
|
||||
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Converting outputs to inputs: output_items_count={}, converted_input_items_count={}",
|
||||
self.request_id, output_items.len(), output_as_inputs.len()
|
||||
output_items = output_items.len(),
|
||||
converted_items = output_as_inputs.len(),
|
||||
"Converting outputs to inputs"
|
||||
);
|
||||
|
||||
// Combine original input + output as new input history
|
||||
|
|
@ -243,11 +233,9 @@ impl<P: StreamProcessor> StreamProcessor for ResponsesStateProcessor<P> {
|
|||
combined_input.extend(output_as_inputs);
|
||||
|
||||
debug!(
|
||||
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Storing state: original_input_count={}, combined_input_count={}, combined_json={}",
|
||||
self.request_id,
|
||||
self.original_input.len(),
|
||||
combined_input.len(),
|
||||
serde_json::to_string(&combined_input).unwrap_or_else(|_| "serialization_error".to_string())
|
||||
original_input = self.original_input.len(),
|
||||
combined_input = combined_input.len(),
|
||||
"Storing conversation state"
|
||||
);
|
||||
|
||||
let state = OpenAIConversationState {
|
||||
|
|
@ -270,28 +258,27 @@ impl<P: StreamProcessor> StreamProcessor for ResponsesStateProcessor<P> {
|
|||
match storage.put(state).await {
|
||||
Ok(()) => {
|
||||
info!(
|
||||
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Successfully stored conversation state for response_id: {}, items_count={}",
|
||||
request_id,
|
||||
response_id_clone,
|
||||
items_count
|
||||
request_id = %request_id,
|
||||
response_id = %response_id_clone,
|
||||
items = items_count,
|
||||
"Stored conversation state"
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to store conversation state for response_id {}: {}",
|
||||
request_id,
|
||||
response_id_clone,
|
||||
e
|
||||
request_id = %request_id,
|
||||
response_id = %response_id_clone,
|
||||
error = %e,
|
||||
"Failed to store conversation state"
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
} else {
|
||||
warn!(
|
||||
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | No response_id captured from upstream response - cannot store conversation state. response_id present: {}, output present: {}",
|
||||
self.request_id,
|
||||
self.response_id.is_some(),
|
||||
self.output_items.is_some()
|
||||
has_response_id = self.response_id.is_some(),
|
||||
has_output = self.output_items.is_some(),
|
||||
"No response_id captured, cannot store conversation state"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,35 @@
|
|||
mod constants;
|
||||
mod service_name_exporter;
|
||||
|
||||
pub use constants::{
|
||||
error, http, llm, operation_component, routing, signals, OperationNameBuilder,
|
||||
};
|
||||
pub use service_name_exporter::{ServiceNameOverrideExporter, SERVICE_NAME_OVERRIDE_KEY};
|
||||
|
||||
use opentelemetry::trace::get_active_span;
|
||||
use opentelemetry::KeyValue;
|
||||
|
||||
/// Sets the service name override on the current active OpenTelemetry span.
|
||||
///
|
||||
/// This function adds the `service.name.override` attribute to the active
|
||||
/// OpenTelemetry span, which allows observability backends to filter and group
|
||||
/// spans by their logical service (e.g., `plano(llm)`, `plano(filter)`).
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `service_name` - The service name to use (e.g., `operation_component::LLM`)
|
||||
///
|
||||
/// # Example
|
||||
/// ```rust,ignore
|
||||
/// use brightstaff::tracing::{set_service_name, operation_component};
|
||||
///
|
||||
/// // Inside a traced function:
|
||||
/// set_service_name(operation_component::LLM);
|
||||
/// ```
|
||||
pub fn set_service_name(service_name: &str) {
|
||||
get_active_span(|span| {
|
||||
span.set_attribute(KeyValue::new(
|
||||
SERVICE_NAME_OVERRIDE_KEY,
|
||||
service_name.to_string(),
|
||||
));
|
||||
});
|
||||
}
|
||||
|
|
|
|||
187
crates/brightstaff/src/tracing/service_name_exporter.rs
Normal file
187
crates/brightstaff/src/tracing/service_name_exporter.rs
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
//! Service Name Override Exporter
|
||||
//!
|
||||
//! This module provides a custom SpanExporter that allows per-span service.name overrides.
|
||||
//! In OpenTelemetry, `service.name` is part of the Resource, which is tied to the TracerProvider.
|
||||
//! However, if you need different service names for different spans (e.g., `plano(orchestrator)`,
|
||||
//! `plano(filter)`, `plano(llm)`) within the same provider, this exporter handles that by:
|
||||
//!
|
||||
//! 1. Looking for a special span attribute `service.name.override`
|
||||
//! 2. Grouping spans by their effective service name
|
||||
//! 3. Exporting each group via a dedicated OTLP exporter whose Resource has the correct
|
||||
//! `service.name`
|
||||
//!
|
||||
//! All per-service exporters are created eagerly at construction time so that no tonic
|
||||
//! channel creation happens later inside `futures_executor::block_on` (which the
|
||||
//! `BatchSpanProcessor` uses and which lacks a tokio runtime).
|
||||
//!
|
||||
//! # Usage
|
||||
//!
|
||||
//! ```rust
|
||||
//! use brightstaff::tracing::{set_service_name, operation_component};
|
||||
//!
|
||||
//! // In your instrumented code, set the service name override:
|
||||
//! set_service_name(operation_component::LLM);
|
||||
//! ```
|
||||
|
||||
use opentelemetry::Key;
|
||||
use opentelemetry_otlp::WithExportConfig;
|
||||
use opentelemetry_sdk::error::OTelSdkResult;
|
||||
use opentelemetry_sdk::trace::{SpanData, SpanExporter};
|
||||
use opentelemetry_sdk::Resource;
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use super::operation_component;
|
||||
|
||||
/// The attribute key used to override the service name for a specific span.
|
||||
/// Set this as a span attribute to route the span to a different service.
|
||||
pub const SERVICE_NAME_OVERRIDE_KEY: &str = "service.name.override";
|
||||
|
||||
/// Default service name used when no override is set on a span.
|
||||
const DEFAULT_SERVICE_NAME: &str = "plano";
|
||||
|
||||
/// All known service names that will have dedicated exporters.
|
||||
const ALL_SERVICE_NAMES: &[&str] = &[
|
||||
DEFAULT_SERVICE_NAME,
|
||||
operation_component::INBOUND,
|
||||
operation_component::ROUTING,
|
||||
operation_component::ORCHESTRATOR,
|
||||
operation_component::AGENT_FILTER,
|
||||
operation_component::AGENT,
|
||||
operation_component::LLM,
|
||||
];
|
||||
|
||||
/// Span attribute keys to remove before export.
|
||||
const FILTERED_ATTR_KEYS: &[&str] = &[
|
||||
"busy_ns",
|
||||
"idle_ns",
|
||||
"thread.id",
|
||||
"thread.name",
|
||||
"code.file.path",
|
||||
"code.line.number",
|
||||
"code.module.name",
|
||||
"target",
|
||||
];
|
||||
|
||||
/// A SpanExporter that supports per-span `service.name` overrides.
|
||||
///
|
||||
/// Internally it holds one OTLP exporter per known service name. Each exporter
|
||||
/// has its own `Resource` with the correct `service.name`, so backends like
|
||||
/// Jaeger see the spans under the right service.
|
||||
pub struct ServiceNameOverrideExporter {
|
||||
/// Map from service name → pre-created OTLP exporter (behind tokio Mutex
|
||||
/// because `SpanExporter::export` takes `&self` and the future must be Send).
|
||||
exporters: HashMap<String, Mutex<opentelemetry_otlp::SpanExporter>>,
|
||||
}
|
||||
|
||||
// Manual Debug because `opentelemetry_otlp::SpanExporter` doesn't implement Debug
|
||||
impl std::fmt::Debug for ServiceNameOverrideExporter {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("ServiceNameOverrideExporter")
|
||||
.field("services", &self.exporters.keys().collect::<Vec<_>>())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl ServiceNameOverrideExporter {
|
||||
/// Create a new `ServiceNameOverrideExporter`.
|
||||
///
|
||||
/// This eagerly creates one OTLP gRPC exporter per known service name so
|
||||
/// that the tonic channel is established while a tokio runtime is available.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `endpoint` – The OTLP collector endpoint URL (e.g. `http://localhost:4317`)
|
||||
pub fn new(endpoint: &str) -> Self {
|
||||
let mut exporters = HashMap::new();
|
||||
|
||||
for &service_name in ALL_SERVICE_NAMES {
|
||||
let resource = Resource::builder_empty()
|
||||
.with_service_name(service_name)
|
||||
.build();
|
||||
|
||||
let mut exporter = opentelemetry_otlp::SpanExporter::builder()
|
||||
.with_tonic()
|
||||
.with_endpoint(endpoint)
|
||||
.build()
|
||||
.expect("Failed to create OTLP span exporter");
|
||||
|
||||
exporter.set_resource(&resource);
|
||||
exporters.insert(service_name.to_string(), Mutex::new(exporter));
|
||||
}
|
||||
|
||||
Self { exporters }
|
||||
}
|
||||
}
|
||||
|
||||
impl SpanExporter for ServiceNameOverrideExporter {
|
||||
fn export(
|
||||
&self,
|
||||
batch: Vec<SpanData>,
|
||||
) -> impl std::future::Future<Output = OTelSdkResult> + Send {
|
||||
let override_key = Key::new(SERVICE_NAME_OVERRIDE_KEY);
|
||||
|
||||
// Group spans by their effective service name
|
||||
let mut spans_by_service: HashMap<String, Vec<SpanData>> = HashMap::new();
|
||||
|
||||
let should_filter = !tracing::enabled!(tracing::Level::DEBUG);
|
||||
|
||||
for span in batch {
|
||||
let mut span = span;
|
||||
|
||||
if should_filter {
|
||||
span.attributes
|
||||
.retain(|kv| !FILTERED_ATTR_KEYS.contains(&kv.key.as_str()));
|
||||
}
|
||||
|
||||
let service_name = span
|
||||
.attributes
|
||||
.iter()
|
||||
.find(|kv| kv.key == override_key)
|
||||
.map(|kv| kv.value.to_string())
|
||||
.unwrap_or_else(|| DEFAULT_SERVICE_NAME.to_string());
|
||||
|
||||
spans_by_service.entry(service_name).or_default().push(span);
|
||||
}
|
||||
|
||||
// Collect grouped spans into a Vec so the async block owns the data.
|
||||
let results: Vec<(String, Vec<SpanData>)> = spans_by_service.into_iter().collect();
|
||||
async move {
|
||||
for (service_name, spans) in results {
|
||||
// Look up the pre-created exporter; fall back to default if
|
||||
// the service name isn't one of the known ones.
|
||||
let key = if self.exporters.contains_key(&service_name) {
|
||||
service_name.clone()
|
||||
} else {
|
||||
DEFAULT_SERVICE_NAME.to_string()
|
||||
};
|
||||
|
||||
if let Some(exporter_mutex) = self.exporters.get(&key) {
|
||||
let exporter = exporter_mutex.lock().await;
|
||||
if let Err(e) = exporter.export(spans).await {
|
||||
tracing::warn!(
|
||||
service = %service_name,
|
||||
error = ?e,
|
||||
"Failed to export spans"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn shutdown_with_timeout(&mut self, timeout: Duration) -> OTelSdkResult {
|
||||
for (_, exporter_mutex) in self.exporters.iter() {
|
||||
if let Ok(mut exporter) = exporter_mutex.try_lock() {
|
||||
let _ = exporter.shutdown_with_timeout(timeout);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_resource(&mut self, _resource: &Resource) {
|
||||
// Each inner exporter already has its own resource set at creation time.
|
||||
// Nothing to propagate.
|
||||
}
|
||||
}
|
||||
|
|
@ -3,12 +3,17 @@ use std::sync::OnceLock;
|
|||
|
||||
use opentelemetry::global;
|
||||
use opentelemetry_sdk::{propagation::TraceContextPropagator, trace::SdkTracerProvider};
|
||||
use opentelemetry_stdout::SpanExporter;
|
||||
use time::macros::format_description;
|
||||
use tracing::{Event, Subscriber};
|
||||
use tracing_subscriber::fmt::{format, time::FormatTime, FmtContext, FormatEvent, FormatFields};
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
use tracing_subscriber::registry::LookupSpan;
|
||||
use tracing_subscriber::util::SubscriberInitExt;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use crate::tracing::ServiceNameOverrideExporter;
|
||||
use common::configuration::Tracing;
|
||||
|
||||
struct BracketedTime;
|
||||
|
||||
impl FormatTime for BracketedTime {
|
||||
|
|
@ -29,7 +34,7 @@ struct BracketedFormatter;
|
|||
|
||||
impl<S, N> FormatEvent<S, N> for BracketedFormatter
|
||||
where
|
||||
S: Subscriber + for<'a> tracing_subscriber::registry::LookupSpan<'a>,
|
||||
S: Subscriber + for<'a> LookupSpan<'a>,
|
||||
N: for<'a> FormatFields<'a> + 'static,
|
||||
{
|
||||
fn format_event(
|
||||
|
|
@ -43,36 +48,116 @@ where
|
|||
|
||||
write!(
|
||||
writer,
|
||||
"[{}] ",
|
||||
"[{}]",
|
||||
event.metadata().level().to_string().to_lowercase()
|
||||
)?;
|
||||
|
||||
// Extract request_id from span context if present
|
||||
if let Some(scope) = ctx.event_scope() {
|
||||
for span in scope.from_root() {
|
||||
let extensions = span.extensions();
|
||||
if let Some(fields) = extensions.get::<FormattedFields<N>>() {
|
||||
let fields_str = fields.fields.as_str();
|
||||
// Look for request_id in the formatted fields
|
||||
if let Some(start) = fields_str.find("request_id=") {
|
||||
let rest = &fields_str[start + 11..]; // Skip "request_id="
|
||||
let end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
|
||||
let rid = &rest[..end];
|
||||
write!(writer, " request_id={}", rid)?;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
write!(writer, " ")?;
|
||||
ctx.field_format().format_fields(writer.by_ref(), event)?;
|
||||
|
||||
writeln!(writer)
|
||||
}
|
||||
}
|
||||
|
||||
use tracing_subscriber::fmt::FormattedFields;
|
||||
|
||||
static INIT_LOGGER: OnceLock<SdkTracerProvider> = OnceLock::new();
|
||||
|
||||
pub fn init_tracer() -> &'static SdkTracerProvider {
|
||||
pub fn init_tracer(tracing_config: Option<&Tracing>) -> &'static SdkTracerProvider {
|
||||
INIT_LOGGER.get_or_init(|| {
|
||||
global::set_text_map_propagator(TraceContextPropagator::new());
|
||||
// Install stdout exporter pipeline to be able to retrieve the collected spans.
|
||||
// For the demonstration, use `Sampler::AlwaysOn` sampler to sample all traces.
|
||||
let provider = SdkTracerProvider::builder()
|
||||
.with_simple_exporter(SpanExporter::default())
|
||||
.build();
|
||||
|
||||
global::set_tracer_provider(provider.clone());
|
||||
// Get OTEL endpoint and sampling from config.yaml tracing section
|
||||
let otel_endpoint = tracing_config.and_then(|t| t.opentracing_grpc_endpoint.clone());
|
||||
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")),
|
||||
)
|
||||
.event_format(BracketedFormatter)
|
||||
.init();
|
||||
let random_sampling = tracing_config.and_then(|t| t.random_sampling).unwrap_or(0);
|
||||
|
||||
provider
|
||||
let tracing_enabled = random_sampling > 0 && otel_endpoint.is_some();
|
||||
eprintln!(
|
||||
"initializing tracing: tracing_enabled={}, otel_endpoint={:?}, random_sampling={}",
|
||||
tracing_enabled, otel_endpoint, random_sampling
|
||||
);
|
||||
|
||||
// Create OTLP exporter to send spans to collector
|
||||
if tracing_enabled {
|
||||
// Set service name via environment if not already set
|
||||
if std::env::var("OTEL_SERVICE_NAME").is_err() {
|
||||
std::env::set_var("OTEL_SERVICE_NAME", "plano");
|
||||
}
|
||||
|
||||
// Create ServiceNameOverrideExporter to support per-span service names
|
||||
// This allows spans to have different service names (e.g., plano(orchestrator),
|
||||
// plano(filter), plano(llm)) by setting the "service.name.override" attribute
|
||||
let exporter = ServiceNameOverrideExporter::new(otel_endpoint.as_ref().unwrap());
|
||||
|
||||
let provider = SdkTracerProvider::builder()
|
||||
.with_batch_exporter(exporter)
|
||||
.build();
|
||||
|
||||
global::set_tracer_provider(provider.clone());
|
||||
|
||||
// Create OpenTelemetry tracing layer using TracerProvider trait
|
||||
use opentelemetry::trace::TracerProvider as _;
|
||||
let telemetry_layer =
|
||||
tracing_opentelemetry::layer().with_tracer(provider.tracer("brightstaff"));
|
||||
|
||||
// Combine the OpenTelemetry layer with fmt layer using the registry
|
||||
let env_filter =
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
|
||||
|
||||
// Create fmt layer with span field formatting enabled (no ANSI to keep fields parseable)
|
||||
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||
.event_format(BracketedFormatter)
|
||||
.fmt_fields(format::DefaultFields::new())
|
||||
.with_ansi(false);
|
||||
|
||||
let subscriber = tracing_subscriber::registry()
|
||||
.with(telemetry_layer)
|
||||
.with(env_filter)
|
||||
.with(fmt_layer);
|
||||
|
||||
tracing::subscriber::set_global_default(subscriber)
|
||||
.expect("Failed to set tracing subscriber");
|
||||
|
||||
provider
|
||||
} else {
|
||||
// Tracing disabled - use no-op provider
|
||||
let provider = SdkTracerProvider::builder().build();
|
||||
global::set_tracer_provider(provider.clone());
|
||||
|
||||
let env_filter =
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
|
||||
|
||||
// Create fmt layer with span field formatting enabled (no ANSI to keep fields parseable)
|
||||
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||
.event_format(BracketedFormatter)
|
||||
.fmt_fields(format::DefaultFields::new())
|
||||
.with_ansi(false);
|
||||
|
||||
tracing_subscriber::registry()
|
||||
.with(env_filter)
|
||||
.with(fmt_layer)
|
||||
.init();
|
||||
|
||||
provider
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue