From d53a1ab3d11a1a548ee961a972ed2465276c8d81 Mon Sep 17 00:00:00 2001 From: Musa Date: Tue, 17 Feb 2026 13:13:12 -0800 Subject: [PATCH] refactor: remove TraceCollector usage and enhance logging with structured attributes --- .../src/handlers/agent_chat_completions.rs | 438 ++++++++---------- crates/brightstaff/src/handlers/llm.rs | 341 +++++++------- .../brightstaff/src/handlers/router_chat.rs | 132 +----- crates/brightstaff/src/main.rs | 37 +- crates/brightstaff/src/tracing/mod.rs | 30 ++ crates/common/src/configuration.rs | 2 + .../travel_agents/config.yaml | 2 +- .../travel_agents/test.rest | 13 +- 8 files changed, 440 insertions(+), 555 deletions(-) diff --git a/crates/brightstaff/src/handlers/agent_chat_completions.rs b/crates/brightstaff/src/handlers/agent_chat_completions.rs index 09dc72a7..73f5cb2a 100644 --- a/crates/brightstaff/src/handlers/agent_chat_completions.rs +++ b/crates/brightstaff/src/handlers/agent_chat_completions.rs @@ -1,10 +1,8 @@ use std::sync::Arc; -use std::time::{Instant, SystemTime}; +use std::time::Instant; use bytes::Bytes; use common::configuration::SpanAttributes; -use common::consts::TRACE_PARENT_HEADER; -use common::traces::{generate_random_span_id, parse_traceparent, SpanBuilder, SpanKind}; use hermesllm::apis::OpenAIMessage; use hermesllm::clients::SupportedAPIsFromClient; use hermesllm::providers::request::ProviderRequest; @@ -12,17 +10,15 @@ use hermesllm::ProviderRequestType; use http_body_util::combinators::BoxBody; use http_body_util::BodyExt; use hyper::{Request, Response}; +use opentelemetry::trace::get_active_span; use serde::ser::Error as SerError; -use tracing::{debug, info, warn}; +use tracing::{debug, info, info_span, warn, Instrument}; use super::agent_selector::{AgentSelectionError, AgentSelector}; use super::pipeline_processor::{PipelineError, PipelineProcessor}; use super::response_handler::ResponseHandler; use crate::router::plano_orchestrator::OrchestratorService; -use crate::tracing::{ - append_span_attributes, collect_custom_trace_attributes, http, operation_component, - OperationNameBuilder, -}; +use crate::tracing::{collect_custom_trace_attributes, operation_component, set_service_name}; /// Main errors for agent chat completions #[derive(Debug, thiserror::Error)] @@ -45,95 +41,127 @@ pub async fn agent_chat( _: String, agents_list: Arc>>>, listeners: Arc>>, - trace_collector: Arc, span_attributes: Arc>, ) -> Result>, hyper::Error> { - match handle_agent_chat( - request, - orchestrator_service, - agents_list, - listeners, - trace_collector, - span_attributes, - ) - .await + let custom_attrs = + collect_custom_trace_attributes(request.headers(), span_attributes.as_ref().as_ref()); + // Extract request_id from headers or generate a new one + let request_id: String = match request + .headers() + .get(common::consts::REQUEST_ID_HEADER) + .and_then(|h| h.to_str().ok()) + .map(|s| s.to_string()) { - Ok(response) => Ok(response), - Err(err) => { - // Check if this is a client error from the pipeline that should be cascaded - if let AgentFilterChainError::Pipeline(PipelineError::ClientError { - agent, - status, - body, - }) = &err - { - warn!( - "Client error from agent '{}' (HTTP {}): {}", - agent, status, body - ); + Some(id) => id, + None => uuid::Uuid::new_v4().to_string(), + }; - // Create error response with the original status code and body + // Create a span with request_id that will be included in all log lines + let request_span = info_span!( + "(orchestrator)", + component = "orchestrator", + request_id = %request_id, + http.method = %request.method(), + http.path = %request.uri().path() + ); + + // Execute the handler inside the span + async { + // Set service name for orchestrator operations + set_service_name(operation_component::ORCHESTRATOR); + + match handle_agent_chat_inner( + request, + orchestrator_service, + agents_list, + listeners, + request_id, + custom_attrs, + ) + .await + { + Ok(response) => Ok(response), + Err(err) => { + // Check if this is a client error from the pipeline that should be cascaded + if let AgentFilterChainError::Pipeline(PipelineError::ClientError { + agent, + status, + body, + }) = &err + { + warn!( + agent = %agent, + status = %status, + body = %body, + "client error from agent" + ); + + // Create error response with the original status code and body + let error_json = serde_json::json!({ + "error": "ClientError", + "agent": agent, + "status": status, + "agent_response": body + }); + + let json_string = error_json.to_string(); + let mut response = + Response::new(ResponseHandler::create_full_body(json_string)); + *response.status_mut() = hyper::StatusCode::from_u16(*status) + .unwrap_or(hyper::StatusCode::BAD_REQUEST); + response.headers_mut().insert( + hyper::header::CONTENT_TYPE, + "application/json".parse().unwrap(), + ); + return Ok(response); + } + + // Print detailed error information with full error chain for other errors + let mut error_chain = Vec::new(); + let mut current_error: &dyn std::error::Error = &err; + + // Collect the full error chain + loop { + error_chain.push(current_error.to_string()); + match current_error.source() { + Some(source) => current_error = source, + None => break, + } + } + + // Log the complete error chain + warn!(error_chain = ?error_chain, "agent chat error chain"); + warn!(root_error = ?err, "root error"); + + // Create structured error response as JSON let error_json = serde_json::json!({ - "error": "ClientError", - "agent": agent, - "status": status, - "agent_response": body + "error": { + "type": "AgentFilterChainError", + "message": err.to_string(), + "error_chain": error_chain, + "debug_info": format!("{:?}", err) + } }); - let json_string = error_json.to_string(); - let mut response = Response::new(ResponseHandler::create_full_body(json_string)); - *response.status_mut() = - hyper::StatusCode::from_u16(*status).unwrap_or(hyper::StatusCode::BAD_REQUEST); - response.headers_mut().insert( - hyper::header::CONTENT_TYPE, - "application/json".parse().unwrap(), - ); - return Ok(response); + // Log the error for debugging + info!(error = %error_json, "structured error info"); + + // Return JSON error response + Ok(ResponseHandler::create_json_error_response(&error_json)) } - - // Print detailed error information with full error chain for other errors - let mut error_chain = Vec::new(); - let mut current_error: &dyn std::error::Error = &err; - - // Collect the full error chain - loop { - error_chain.push(current_error.to_string()); - match current_error.source() { - Some(source) => current_error = source, - None => break, - } - } - - // Log the complete error chain - warn!("Agent chat error chain: {:#?}", error_chain); - warn!("Root error: {:?}", err); - - // Create structured error response as JSON - let error_json = serde_json::json!({ - "error": { - "type": "AgentFilterChainError", - "message": err.to_string(), - "error_chain": error_chain, - "debug_info": format!("{:?}", err) - } - }); - - // Log the error for debugging - info!("Structured error info: {}", error_json); - - // Return JSON error response - Ok(ResponseHandler::create_json_error_response(&error_json)) } } + .instrument(request_span) + .await } -async fn handle_agent_chat( +async fn handle_agent_chat_inner( request: Request, orchestrator_service: Arc, agents_list: Arc>>>, listeners: Arc>>, - trace_collector: Arc, - span_attributes: Arc>, + request_id: String, + custom_attrs: std::collections::HashMap, ) -> Result>, AgentFilterChainError> { // Initialize services let agent_selector = AgentSelector::new(orchestrator_service); @@ -147,14 +175,21 @@ async fn handle_agent_chat( .and_then(|name| name.to_str().ok()); // Find the appropriate listener - let listener = { + let listener: common::configuration::Listener = { let listeners = listeners.read().await; agent_selector .find_listener(listener_name, &listeners) .await? }; - info!("Handling request for listener: {}", listener.name); + get_active_span(|span| { + span.update_name(listener.name.to_string()); + for (key, value) in &custom_attrs { + span.set_attribute(opentelemetry::KeyValue::new(key.clone(), value.clone())); + } + }); + + info!(listener = %listener.name, "handling request"); // Parse request body let request_path = request @@ -169,12 +204,8 @@ async fn handle_agent_chat( let mut headers = request.headers().clone(); headers.remove(common::consts::ENVOY_ORIGINAL_PATH_HEADER); + // Set the request_id in headers if not already present if !headers.contains_key(common::consts::REQUEST_ID_HEADER) { - let request_id = uuid::Uuid::new_v4().to_string(); - info!( - "Request id not found in headers, generated new request id: {}", - request_id - ); headers.insert( common::consts::REQUEST_ID_HEADER, hyper::header::HeaderValue::from_str(&request_id).unwrap(), @@ -183,16 +214,12 @@ async fn handle_agent_chat( headers }; - let custom_attrs = collect_custom_trace_attributes( - &request_headers, - span_attributes.as_ref().as_ref(), - ); let chat_request_bytes = request.collect().await?.to_bytes(); debug!( - "Received request body (raw utf8): {}", - String::from_utf8_lossy(&chat_request_bytes) + body = %String::from_utf8_lossy(&chat_request_bytes), + "received request body" ); // Determine the API type from the endpoint @@ -206,7 +233,7 @@ async fn handle_agent_chat( let client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) { Ok(request) => request, Err(err) => { - warn!("Failed to parse request as ProviderRequestType: {}", err); + warn!("failed to parse request as ProviderRequestType: {}", err); let err_msg = format!("Failed to parse request: {}", err); return Err(AgentFilterChainError::RequestParsing( serde_json::Error::custom(err_msg), @@ -216,12 +243,6 @@ async fn handle_agent_chat( let message: Vec = client_request.get_messages(); - // Extract trace parent for routing - let traceparent = request_headers - .iter() - .find(|(key, _)| key.as_str() == TRACE_PARENT_HEADER) - .map(|(_, value)| value.to_str().unwrap_or_default().to_string()); - let request_id = request_headers .get(common::consts::REQUEST_ID_HEADER) .and_then(|val| val.to_str().ok()) @@ -234,90 +255,58 @@ async fn handle_agent_chat( agent_selector.create_agent_map(agents) }; - // Parse trace parent to get trace_id and parent_span_id - let (trace_id, parent_span_id) = if let Some(ref tp) = traceparent { - parse_traceparent(tp) - } else { - (String::new(), None) - }; - // Select appropriate agents using arch orchestrator llm model - let selection_span_id = generate_random_span_id(); - let selection_start_time = SystemTime::now(); - let selection_start_instant = Instant::now(); - + let selection_start = Instant::now(); let selected_agents = agent_selector - .select_agents(&message, &listener, traceparent.clone(), request_id.clone()) + .select_agents(&message, &listener, request_id.clone()) .await?; - // Record agent selection span - let selection_end_time = SystemTime::now(); - let selection_elapsed = selection_start_instant.elapsed(); - let selection_operation_name = OperationNameBuilder::new() - .with_method("POST") - .with_path("/agents/select") - .with_target(&listener.name) - .build(); + // Record selection attributes on the current orchestrator span + let selection_elapsed_ms = selection_start.elapsed().as_secs_f64() * 1000.0; + get_active_span(|span| { + span.set_attribute(opentelemetry::KeyValue::new( + "selection.listener", + listener.name.clone(), + )); + span.set_attribute(opentelemetry::KeyValue::new( + "selection.agent_count", + selected_agents.len() as i64, + )); + span.set_attribute(opentelemetry::KeyValue::new( + "selection.agents", + selected_agents + .iter() + .map(|a| a.id.as_str()) + .collect::>() + .join(","), + )); + span.set_attribute(opentelemetry::KeyValue::new( + "selection.determination_ms", + format!("{:.2}", selection_elapsed_ms), + )); + }); - let mut selection_span_builder = append_span_attributes( - SpanBuilder::new(&selection_operation_name) - .with_span_id(selection_span_id) - .with_kind(SpanKind::Internal) - .with_start_time(selection_start_time) - .with_end_time(selection_end_time) - .with_attribute(http::METHOD, "POST") - .with_attribute(http::TARGET, "/agents/select") - .with_attribute("selection.listener", listener.name.clone()) - .with_attribute("selection.agent_count", selected_agents.len().to_string()) - .with_attribute( - "selection.agents", - selected_agents - .iter() - .map(|a| a.id.as_str()) - .collect::>() - .join(","), - ) - .with_attribute( - "duration_ms", - format!("{:.2}", selection_elapsed.as_secs_f64() * 1000.0), - ), - &custom_attrs, + info!( + count = selected_agents.len(), + "selected agents for execution" ); - if !trace_id.is_empty() { - selection_span_builder = selection_span_builder.with_trace_id(trace_id.clone()); - } - if let Some(parent_id) = parent_span_id.clone() { - selection_span_builder = selection_span_builder.with_parent_span_id(parent_id); - } - - let selection_span = selection_span_builder.build(); - trace_collector.record_span(operation_component::ORCHESTRATOR, selection_span); - - info!("Selected {} agent(s) for execution", selected_agents.len()); - // Execute agents sequentially, passing output from one to the next let mut current_messages = message.clone(); let agent_count = selected_agents.len(); for (agent_index, selected_agent) in selected_agents.iter().enumerate() { + // Get agent name + let agent_name = selected_agent.id.clone(); let is_last_agent = agent_index == agent_count - 1; debug!( - "Processing agent {}/{}: {}", - agent_index + 1, - agent_count, - selected_agent.id + agent_index = agent_index + 1, + total = agent_count, + agent = %agent_name, + "processing agent" ); - // Record the start time for agent span - let agent_start_time = SystemTime::now(); - let agent_start_instant = Instant::now(); - let span_id = generate_random_span_id(); - - // Get agent name - let agent_name = selected_agent.id.clone(); - // Process the filter chain let chat_history = pipeline_processor .process_filter_chain( @@ -325,91 +314,74 @@ async fn handle_agent_chat( selected_agent, &agent_map, &request_headers, - Some(&trace_collector), - trace_id.clone(), - span_id.clone(), ) .await?; // Get agent details and invoke let agent = agent_map.get(&agent_name).unwrap(); - debug!("Invoking agent: {}", agent_name); + debug!(agent = %agent_name, "invoking agent"); - let llm_response = pipeline_processor - .invoke_agent( - &chat_history, - client_request.clone(), - agent, - &request_headers, - trace_id.clone(), - span_id.clone(), - ) - .await?; - - // Record agent span - let agent_end_time = SystemTime::now(); - let agent_elapsed = agent_start_instant.elapsed(); - let full_path = format!("/agents{}", request_path); - let operation_name = OperationNameBuilder::new() - .with_method("POST") - .with_path(&full_path) - .with_target(&agent_name) - .build(); - - let mut span_builder = append_span_attributes( - SpanBuilder::new(&operation_name) - .with_span_id(span_id) - .with_kind(SpanKind::Internal) - .with_start_time(agent_start_time) - .with_end_time(agent_end_time) - .with_attribute(http::METHOD, "POST") - .with_attribute(http::TARGET, full_path) - .with_attribute("agent.name", agent_name.clone()) - .with_attribute( - "agent.sequence", - format!("{}/{}", agent_index + 1, agent_count), - ) - .with_attribute( - "duration_ms", - format!("{:.2}", agent_elapsed.as_secs_f64() * 1000.0), - ), - &custom_attrs, + let agent_span = info_span!( + "agent", + agent_id = %agent_name, + message_count = chat_history.len(), ); - if !trace_id.is_empty() { - span_builder = span_builder.with_trace_id(trace_id.clone()); - } - if let Some(parent_id) = parent_span_id.clone() { - span_builder = span_builder.with_parent_span_id(parent_id); - } + let llm_response = async { + set_service_name(operation_component::AGENT); + get_active_span(|span| { + span.update_name(format!("{} /v1/chat/completions", agent_name)); + for (key, value) in &custom_attrs { + span.set_attribute(opentelemetry::KeyValue::new(key.clone(), value.clone())); + } + }); - let span = span_builder.build(); - trace_collector.record_span(operation_component::AGENT, span); + pipeline_processor + .invoke_agent( + &chat_history, + client_request.clone(), + agent, + &request_headers, + ) + .await + } + .instrument(agent_span.clone()) + .await?; // If this is the last agent, return the streaming response if is_last_agent { info!( - "Completed agent chain, returning response from last agent: {}", - agent_name + agent = %agent_name, + "completed agent chain, returning response" ); - return response_handler - .create_streaming_response(llm_response) - .await - .map_err(AgentFilterChainError::from); + // Capture the orchestrator span (parent of the agent span) so it + // stays open for the full streaming duration alongside the agent span. + let orchestrator_span = tracing::Span::current(); + return async { + response_handler + .create_streaming_response( + llm_response, + tracing::Span::current(), // agent span (inner) + orchestrator_span, // orchestrator span (outer) + ) + .await + .map_err(AgentFilterChainError::from) + } + .instrument(agent_span) + .await; } // For intermediate agents, collect the full response and pass to next agent - debug!( - "Collecting response from intermediate agent: {}", - agent_name - ); - let response_text = response_handler.collect_full_response(llm_response).await?; + debug!(agent = %agent_name, "collecting response from intermediate agent"); + let response_text = async { response_handler.collect_full_response(llm_response).await } + .instrument(agent_span) + .await?; info!( - "Agent {} completed, passing {} character response to next agent", - agent_name, - response_text.len() + agent = %agent_name, + response_len = response_text.len(), + "agent completed, passing response to next agent" ); // remove last message and add new one at the end diff --git a/crates/brightstaff/src/handlers/llm.rs b/crates/brightstaff/src/handlers/llm.rs index 6d5b236a..6e7ac226 100644 --- a/crates/brightstaff/src/handlers/llm.rs +++ b/crates/brightstaff/src/handlers/llm.rs @@ -4,7 +4,6 @@ use common::consts::{ ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER, TRACE_PARENT_HEADER, }; use common::llm_providers::LlmProviders; -use common::traces::TraceCollector; use hermesllm::apis::openai_responses::InputParam; use hermesllm::clients::{SupportedAPIsFromClient, SupportedUpstreamAPIs}; use hermesllm::{ProviderRequest, ProviderRequestType}; @@ -12,10 +11,13 @@ use http_body_util::combinators::BoxBody; use http_body_util::{BodyExt, Full}; use hyper::header::{self}; use hyper::{Request, Response, StatusCode}; +use opentelemetry::global; +use opentelemetry::trace::get_active_span; +use opentelemetry_http::HeaderInjector; use std::collections::HashMap; use std::sync::Arc; use tokio::sync::RwLock; -use tracing::{debug, info, warn}; +use tracing::{debug, info, info_span, warn, Instrument}; use crate::handlers::router_chat::router_chat_get_upstream_model; use crate::handlers::utils::{ @@ -26,7 +28,9 @@ use crate::state::response_state_processor::ResponsesStateProcessor; use crate::state::{ extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError, }; -use crate::tracing::{collect_custom_trace_attributes, operation_component}; +use crate::tracing::{ + collect_custom_trace_attributes, llm as tracing_llm, operation_component, set_service_name, +}; fn full>(chunk: T) -> BoxBody { Full::new(chunk.into()) @@ -34,38 +38,78 @@ fn full>(chunk: T) -> BoxBody { .boxed() } -#[allow(clippy::too_many_arguments)] pub async fn llm_chat( request: Request, router_service: Arc, full_qualified_llm_provider_url: String, model_aliases: Arc>>, llm_providers: Arc>, - trace_collector: Arc, span_attributes: Arc>, state_storage: Option>, ) -> Result>, hyper::Error> { let request_path = request.uri().path().to_string(); let request_headers = request.headers().clone(); - let custom_attrs = collect_custom_trace_attributes( - &request_headers, - span_attributes.as_ref().as_ref(), - ); let request_id: String = match request_headers .get(REQUEST_ID_HEADER) .and_then(|h| h.to_str().ok()) .map(|s| s.to_string()) { Some(id) => id, - None => { - let generated_id = uuid::Uuid::new_v4().to_string(); - warn!( - "[PLANO_REQ_ID:{}] | REQUEST_ID header missing, generated new ID", - generated_id - ); - generated_id - } + None => uuid::Uuid::new_v4().to_string(), }; + let custom_attrs = + collect_custom_trace_attributes(&request_headers, span_attributes.as_ref().as_ref()); + + // Create a span with request_id that will be included in all log lines + let request_span = info_span!( + "llm", + component = "llm", + request_id = %request_id, + http.method = %request.method(), + http.path = %request_path, + llm.model = tracing::field::Empty, + llm.tools = tracing::field::Empty, + llm.user_message_preview = tracing::field::Empty, + llm.temperature = tracing::field::Empty, + ); + + // Execute the rest of the handler inside the span + llm_chat_inner( + request, + router_service, + full_qualified_llm_provider_url, + model_aliases, + llm_providers, + custom_attrs, + state_storage, + request_id, + request_path, + request_headers, + ) + .instrument(request_span) + .await +} + +#[allow(clippy::too_many_arguments)] +async fn llm_chat_inner( + request: Request, + router_service: Arc, + full_qualified_llm_provider_url: String, + model_aliases: Arc>>, + llm_providers: Arc>, + custom_attrs: HashMap, + state_storage: Option>, + request_id: String, + request_path: String, + mut request_headers: hyper::HeaderMap, +) -> Result>, hyper::Error> { + // Set service name for LLM operations + set_service_name(operation_component::LLM); + get_active_span(|span| { + for (key, value) in &custom_attrs { + span.set_attribute(opentelemetry::KeyValue::new(key.clone(), value.clone())); + } + }); // Extract or generate traceparent - this establishes the trace context for all spans let traceparent: String = match request_headers @@ -79,20 +123,18 @@ pub async fn llm_chat( let trace_id = Uuid::new_v4().to_string().replace("-", ""); let generated_tp = format!("00-{}-0000000000000000-01", trace_id); warn!( - "[PLANO_REQ_ID:{}] | TRACE_PARENT header missing, generated new traceparent: {}", - request_id, generated_tp + generated_traceparent = %generated_tp, + "TRACE_PARENT header missing, generated new traceparent" ); generated_tp } }; - let mut request_headers = request_headers; let chat_request_bytes = request.collect().await?.to_bytes(); debug!( - "[PLANO_REQ_ID:{}] | REQUEST_BODY (UTF8): {}", - request_id, - String::from_utf8_lossy(&chat_request_bytes) + body = %String::from_utf8_lossy(&chat_request_bytes), + "request body received" ); let mut client_request = match ProviderRequestType::try_from(( @@ -102,13 +144,10 @@ pub async fn llm_chat( Ok(request) => request, Err(err) => { warn!( - "[PLANO_REQ_ID:{}] | FAILURE | Failed to parse request as ProviderRequestType: {}", - request_id, err - ); - let err_msg = format!( - "[PLANO_REQ_ID:{}] | FAILURE | Failed to parse request: {}", - request_id, err + error = %err, + "failed to parse request as ProviderRequestType" ); + let err_msg = format!("Failed to parse request: {}", err); let mut bad_request = Response::new(full(err_msg)); *bad_request.status_mut() = StatusCode::BAD_REQUEST; return Ok(bad_request); @@ -128,16 +167,21 @@ pub async fn llm_chat( let model_from_request = client_request.model().to_string(); let temperature = client_request.get_temperature(); let is_streaming_request = client_request.is_streaming(); - let resolved_model = resolve_model_alias(&model_from_request, &model_aliases); + let alias_resolved_model = resolve_model_alias(&model_from_request, &model_aliases); // Validate that the requested model exists in configuration // This matches the validation in llm_gateway routing.rs - if llm_providers.read().await.get(&resolved_model).is_none() { + if llm_providers + .read() + .await + .get(&alias_resolved_model) + .is_none() + { let err_msg = format!( "Model '{}' not found in configured providers", - resolved_model + alias_resolved_model ); - warn!("[PLANO_REQ_ID:{}] | FAILURE | {}", request_id, err_msg); + warn!(model = %alias_resolved_model, "model not found in configured providers"); let mut bad_request = Response::new(full(err_msg)); *bad_request.status_mut() = StatusCode::BAD_REQUEST; return Ok(bad_request); @@ -145,10 +189,10 @@ pub async fn llm_chat( // Handle provider/model slug format (e.g., "openai/gpt-4") // Extract just the model name for upstream (providers don't understand the slug) - let model_name_only = if let Some((_, model)) = resolved_model.split_once('/') { + let model_name_only = if let Some((_, model)) = alias_resolved_model.split_once('/') { model.to_string() } else { - resolved_model.clone() + alias_resolved_model.clone() }; // Extract tool names and user message preview for span attributes @@ -156,18 +200,30 @@ pub async fn llm_chat( let user_message_preview = client_request .get_recent_user_message() .map(|msg| truncate_message(&msg, 50)); + let span = tracing::Span::current(); + if let Some(temp) = temperature { + span.record(tracing_llm::TEMPERATURE, tracing::field::display(temp)); + } + if let Some(tools) = &tool_names { + let formatted_tools = tools + .iter() + .map(|name| format!("{}(...)", name)) + .collect::>() + .join("\n"); + span.record(tracing_llm::TOOLS, formatted_tools.as_str()); + } + if let Some(preview) = &user_message_preview { + span.record(tracing_llm::USER_MESSAGE_PREVIEW, preview.as_str()); + } // Extract messages for signal analysis (clone before moving client_request) - let messages_for_signals = client_request.get_messages(); + let messages_for_signals = Some(client_request.get_messages()); // Set the model to just the model name (without provider prefix) // This ensures upstream receives "gpt-4" not "openai/gpt-4" client_request.set_model(model_name_only.clone()); - if client_request.remove_metadata_key("archgw_preference_config") { - debug!( - "[PLANO_REQ_ID:{}] Removed archgw_preference_config from metadata", - request_id - ); + if client_request.remove_metadata_key("plano_preference_config") { + debug!("removed plano_preference_config from metadata"); } // === v1/responses state management: Determine upstream API and combine input if needed === @@ -186,9 +242,9 @@ pub async fn llm_chat( // Get the upstream path and check if it's ResponsesAPI let upstream_path = get_upstream_path( &llm_providers, - &resolved_model, + &alias_resolved_model, &request_path, - &resolved_model, + &alias_resolved_model, is_streaming_request, ) .await; @@ -215,14 +271,17 @@ pub async fn llm_chat( // Update both the request and original_input_items responses_req.input = InputParam::Items(combined_input.clone()); original_input_items = combined_input; - info!("[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Updated request with conversation history ({} items)", request_id, original_input_items.len()); + info!( + items = original_input_items.len(), + "updated request with conversation history" + ); } Err(StateStorageError::NotFound(_)) => { // Return 409 Conflict when previous_response_id not found - warn!("[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Previous response_id not found: {}", request_id, prev_resp_id); + warn!(previous_response_id = %prev_resp_id, "previous response_id not found"); let err_msg = format!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Conversation state not found for previous_response_id: {}", - request_id, prev_resp_id + "Conversation state not found for previous_response_id: {}", + prev_resp_id ); let mut conflict_response = Response::new(full(err_msg)); *conflict_response.status_mut() = StatusCode::CONFLICT; @@ -231,8 +290,9 @@ pub async fn llm_chat( Err(e) => { // Log warning but continue on other storage errors warn!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to retrieve conversation state for {}: {}", - request_id, prev_resp_id, e + previous_response_id = %prev_resp_id, + error = %e, + "failed to retrieve conversation state" ); // Restore original_input_items since we passed ownership original_input_items = extract_input_items(&responses_req.input); @@ -240,10 +300,7 @@ pub async fn llm_chat( } } } else { - debug!( - "[PLANO_REQ_ID:{}] | BRIGHT_STAFF | Upstream supports ResponsesAPI natively.", - request_id - ); + debug!("upstream supports ResponsesAPI natively"); } } } @@ -252,15 +309,29 @@ pub async fn llm_chat( let client_request_bytes_for_upstream = ProviderRequestType::to_bytes(&client_request).unwrap(); // Determine routing using the dedicated router_chat module - let routing_result = match router_chat_get_upstream_model( - router_service, - client_request, // Pass the original request - router_chat will convert it - trace_collector.clone(), - &traceparent, - &request_path, - &request_id, - &custom_attrs, - ) + // This gets its own span for latency and error tracking + let routing_span = info_span!( + "routing", + component = "routing", + http.method = "POST", + http.target = %request_path, + model.requested = %model_from_request, + model.alias_resolved = %alias_resolved_model, + route.selected_model = tracing::field::Empty, + routing.determination_ms = tracing::field::Empty, + ); + let routing_result = match async { + set_service_name(operation_component::ROUTING); + router_chat_get_upstream_model( + router_service, + client_request, // Pass the original request - router_chat will convert it + &traceparent, + &request_path, + &request_id, + ) + .await + } + .instrument(routing_span) .await { Ok(result) => result, @@ -274,22 +345,37 @@ pub async fn llm_chat( // Determine final model to use // Router returns "none" as a sentinel value when it doesn't select a specific model let router_selected_model = routing_result.model_name; - let model_name = if router_selected_model != "none" { + let resolved_model = if router_selected_model != "none" { // Router selected a specific model via routing preferences router_selected_model } else { // Router returned "none" sentinel, use validated resolved_model from request - resolved_model.clone() + alias_resolved_model.clone() }; + tracing::Span::current().record(tracing_llm::MODEL_NAME, resolved_model.as_str()); + + let span_name = if model_from_request == resolved_model { + format!("POST {} {}", request_path, resolved_model) + } else { + format!( + "POST {} {} -> {}", + request_path, model_from_request, resolved_model + ) + }; + get_active_span(|span| { + span.update_name(span_name.clone()); + }); debug!( - "[PLANO_REQ_ID:{}] | ARCH_ROUTER URL | {}, Provider Hint: {}, Model for upstream: {}", - request_id, full_qualified_llm_provider_url, model_name, model_name_only + url = %full_qualified_llm_provider_url, + provider_hint = %resolved_model, + upstream_model = %model_name_only, + "Routing to upstream" ); request_headers.insert( ARCH_PROVIDER_HINT_HEADER, - header::HeaderValue::from_str(&model_name).unwrap(), + header::HeaderValue::from_str(&resolved_model).unwrap(), ); request_headers.insert( @@ -299,12 +385,18 @@ pub async fn llm_chat( // remove content-length header if it exists request_headers.remove(header::CONTENT_LENGTH); + // Inject current LLM span's trace context so upstream spans are children of plano(llm) + global::get_text_map_propagator(|propagator| { + let cx = tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current()); + propagator.inject_context(&cx, &mut HeaderInjector(&mut request_headers)); + }); + // Capture start time right before sending request to upstream let request_start_time = std::time::Instant::now(); - let request_start_system_time = std::time::SystemTime::now(); + let _request_start_system_time = std::time::SystemTime::now(); let llm_response = match reqwest::Client::new() - .post(full_qualified_llm_provider_url) + .post(&full_qualified_llm_provider_url) .headers(request_headers) .body(client_request_bytes_for_upstream) .send() @@ -331,30 +423,12 @@ pub async fn llm_chat( // Build LLM span with actual status code using constants let byte_stream = llm_response.bytes_stream(); - // Build the LLM span (will be finalized after streaming completes) - let llm_span = build_llm_span( - &traceparent, - &request_path, - &resolved_model, - &model_name, - upstream_status.as_u16(), - is_streaming_request, - request_start_system_time, - tool_names, - user_message_preview, - temperature, - &llm_providers, - &custom_attrs, - ) - .await; - // Create base processor for metrics and tracing let base_processor = ObservableStreamProcessor::new( - trace_collector, operation_component::LLM, - llm_span, + span_name, request_start_time, - Some(messages_for_signals), + messages_for_signals, ); // === v1/responses state management: Wrap with ResponsesStateProcessor === @@ -375,8 +449,8 @@ pub async fn llm_chat( base_processor, state_store, original_input_items, + alias_resolved_model.clone(), resolved_model.clone(), - model_name.clone(), is_streaming_request, false, // Not OpenAI upstream since should_manage_state is true content_encoding, @@ -417,93 +491,6 @@ fn resolve_model_alias( model_from_request.to_string() } -/// Builds the LLM span with all required and optional attributes. -#[allow(clippy::too_many_arguments)] -async fn build_llm_span( - traceparent: &str, - request_path: &str, - resolved_model: &str, - model_name: &str, - status_code: u16, - is_streaming: bool, - start_time: std::time::SystemTime, - tool_names: Option>, - user_message_preview: Option, - temperature: Option, - llm_providers: &Arc>, - custom_attrs: &HashMap, -) -> common::traces::Span { - use crate::tracing::{http, llm, OperationNameBuilder}; - use common::traces::{parse_traceparent, SpanBuilder, SpanKind}; - - // Calculate the upstream path based on provider configuration - let upstream_path = get_upstream_path( - llm_providers, - model_name, - request_path, - resolved_model, - is_streaming, - ) - .await; - - // Build operation name showing path transformation if different - let operation_name = if request_path != upstream_path { - OperationNameBuilder::new() - .with_method("POST") - .with_path(format!("{} >> {}", request_path, upstream_path)) - .with_target(resolved_model) - .build() - } else { - OperationNameBuilder::new() - .with_method("POST") - .with_path(request_path) - .with_target(resolved_model) - .build() - }; - - let (trace_id, parent_span_id) = parse_traceparent(traceparent); - - let mut span_builder = SpanBuilder::new(&operation_name) - .with_trace_id(&trace_id) - .with_kind(SpanKind::Client) - .with_start_time(start_time) - .with_attribute(http::METHOD, "POST") - .with_attribute(http::STATUS_CODE, status_code.to_string()) - .with_attribute(http::TARGET, request_path.to_string()) - .with_attribute(http::UPSTREAM_TARGET, upstream_path) - .with_attribute(llm::MODEL_NAME, resolved_model.to_string()) - .with_attribute(llm::IS_STREAMING, is_streaming.to_string()); - - // Only set parent span ID if it exists (not a root span) - if let Some(parent) = parent_span_id { - span_builder = span_builder.with_parent_span_id(&parent); - } - - // Add optional attributes - if let Some(temp) = temperature { - span_builder = span_builder.with_attribute(llm::TEMPERATURE, temp.to_string()); - } - - if let Some(tools) = tool_names { - let formatted_tools = tools - .iter() - .map(|name| format!("{}(...)", name)) - .collect::>() - .join("\n"); - span_builder = span_builder.with_attribute(llm::TOOLS, formatted_tools); - } - - if let Some(preview) = user_message_preview { - span_builder = span_builder.with_attribute(llm::USER_MESSAGE_PREVIEW, preview); - } - - for (key, value) in custom_attrs { - span_builder = span_builder.with_attribute(key, value); - } - - span_builder.build() -} - /// Calculates the upstream path for the provider based on the model name. /// Looks up provider configuration, gets the ProviderId and base_url_path_prefix, /// then uses target_endpoint_for_provider to calculate the correct upstream path. diff --git a/crates/brightstaff/src/handlers/router_chat.rs b/crates/brightstaff/src/handlers/router_chat.rs index 210e6bf6..d71734fa 100644 --- a/crates/brightstaff/src/handlers/router_chat.rs +++ b/crates/brightstaff/src/handlers/router_chat.rs @@ -1,14 +1,12 @@ use common::configuration::ModelUsagePreference; -use common::traces::{parse_traceparent, SpanBuilder, SpanKind, TraceCollector}; use hermesllm::clients::endpoints::SupportedUpstreamAPIs; use hermesllm::{ProviderRequest, ProviderRequestType}; use hyper::StatusCode; -use std::collections::HashMap; use std::sync::Arc; use tracing::{debug, info, warn}; use crate::router::llm_router::RouterService; -use crate::tracing::{http, operation_component, routing, OperationNameBuilder}; +use crate::tracing::routing; pub struct RoutingResult { pub model_name: String, @@ -36,11 +34,9 @@ impl RoutingError { pub async fn router_chat_get_upstream_model( router_service: Arc, client_request: ProviderRequestType, - trace_collector: Arc, traceparent: &str, request_path: &str, request_id: &str, - custom_attrs: &HashMap, ) -> Result { // Clone metadata for routing before converting (which consumes client_request) let routing_metadata = client_request.metadata().clone(); @@ -57,14 +53,14 @@ pub async fn router_chat_get_upstream_model( | ProviderRequestType::BedrockConverseStream(_) | ProviderRequestType::ResponsesAPIRequest(_), ) => { - warn!("Unexpected: got non-ChatCompletions request after converting to OpenAI format"); + warn!("unexpected: got non-ChatCompletions request after converting to OpenAI format"); return Err(RoutingError::internal_error( "Request conversion failed".to_string(), )); } Err(err) => { warn!( - "Failed to convert request to ChatCompletionsRequest: {}", + "failed to convert request to ChatCompletionsRequest: {}", err ); return Err(RoutingError::internal_error(format!( @@ -75,9 +71,8 @@ pub async fn router_chat_get_upstream_model( }; debug!( - "[PLANO_REQ_ID: {:?}]: ROUTER_REQ: {}", - request_id, - &serde_json::to_string(&chat_request).unwrap() + request = %serde_json::to_string(&chat_request).unwrap(), + "router request" ); // Extract usage preferences from metadata @@ -113,16 +108,14 @@ pub async fn router_chat_get_upstream_model( }; info!( - "[PLANO_REQ_ID: {:?}] | ROUTER_REQ | Usage preferences from request: {}, request_path: {}, latest message: {}", - request_id, - usage_preferences.is_some(), - request_path, - latest_message_for_log + has_usage_preferences = usage_preferences.is_some(), + path = %request_path, + latest_message = %latest_message_for_log, + "processing router request" ); // Capture start time for routing span let routing_start_time = std::time::Instant::now(); - let routing_start_system_time = std::time::SystemTime::now(); // Attempt to determine route using the router service let routing_result = router_service @@ -134,47 +127,21 @@ pub async fn router_chat_get_upstream_model( ) .await; + let determination_ms = routing_start_time.elapsed().as_millis() as i64; + let current_span = tracing::Span::current(); + current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms); + match routing_result { Ok(route) => match route { Some((_, model_name)) => { - // Record successful routing span - let mut attrs: HashMap = HashMap::new(); - attrs.insert("route.selected_model".to_string(), model_name.clone()); - for (key, value) in custom_attrs { - attrs.entry(key.clone()).or_insert_with(|| value.clone()); - } - record_routing_span( - trace_collector, - traceparent, - routing_start_time, - routing_start_system_time, - attrs, - ) - .await; - + current_span.record("route.selected_model", model_name.as_str()); Ok(RoutingResult { model_name }) } None => { // No route determined, return sentinel value "none" // This signals to llm.rs to use the original validated request model - info!( - "[PLANO_REQ_ID: {}] | ROUTER_REQ | No route determined, returning sentinel 'none'", - request_id - ); - - let mut attrs = HashMap::new(); - attrs.insert("route.selected_model".to_string(), "none".to_string()); - for (key, value) in custom_attrs { - attrs.entry(key.clone()).or_insert_with(|| value.clone()); - } - record_routing_span( - trace_collector, - traceparent, - routing_start_time, - routing_start_system_time, - attrs, - ) - .await; + current_span.record("route.selected_model", "none"); + info!("no route determined, using default model"); Ok(RoutingResult { model_name: "none".to_string(), @@ -182,22 +149,7 @@ pub async fn router_chat_get_upstream_model( } }, Err(err) => { - // Record failed routing span - let mut attrs = HashMap::new(); - attrs.insert("route.selected_model".to_string(), "unknown".to_string()); - attrs.insert("error.message".to_string(), err.to_string()); - for (key, value) in custom_attrs { - attrs.entry(key.clone()).or_insert_with(|| value.clone()); - } - record_routing_span( - trace_collector, - traceparent, - routing_start_time, - routing_start_system_time, - attrs, - ) - .await; - + current_span.record("route.selected_model", "unknown"); Err(RoutingError::internal_error(format!( "Failed to determine route: {}", err @@ -205,53 +157,3 @@ pub async fn router_chat_get_upstream_model( } } } - -/// Helper function to record a routing span with the given attributes. -/// Reduces code duplication across different routing outcomes. -async fn record_routing_span( - trace_collector: Arc, - traceparent: &str, - start_time: std::time::Instant, - start_system_time: std::time::SystemTime, - attrs: HashMap, -) { - // The routing always uses OpenAI Chat Completions format internally, - // so we log that as the actual API being used for routing - let routing_api_path = "/v1/chat/completions"; - - let routing_operation_name = OperationNameBuilder::new() - .with_method("POST") - .with_path(routing_api_path) - .with_target("Arch-Router-1.5B") - .build(); - - let (trace_id, parent_span_id) = parse_traceparent(traceparent); - - // Build the routing span directly using constants - let mut span_builder = SpanBuilder::new(&routing_operation_name) - .with_trace_id(&trace_id) - .with_kind(SpanKind::Client) - .with_start_time(start_system_time) - .with_end_time(std::time::SystemTime::now()) - .with_attribute(http::METHOD, "POST") - .with_attribute(http::TARGET, routing_api_path.to_string()) - .with_attribute( - routing::ROUTE_DETERMINATION_MS, - start_time.elapsed().as_millis().to_string(), - ); - - // Only set parent span ID if it exists (not a root span) - if let Some(parent) = parent_span_id { - span_builder = span_builder.with_parent_span_id(&parent); - } - - // Add all custom attributes - for (key, value) in attrs { - span_builder = span_builder.with_attribute(key, value); - } - - let span = span_builder.build(); - - // Record the span directly to the collector - trace_collector.record_span(operation_component::ROUTING, span); -} diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index 9d577c7f..bfcadb63 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -14,7 +14,6 @@ use common::consts::{ CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH, PLANO_ORCHESTRATOR_MODEL_NAME, }; use common::llm_providers::LlmProviders; -use common::traces::TraceCollector; use http_body_util::{combinators::BoxBody, BodyExt, Empty}; use hyper::body::Incoming; use hyper::server::conn::http1; @@ -125,17 +124,7 @@ async fn main() -> Result<(), Box> { // Initialize trace collector and start background flusher // Tracing is enabled if the tracing config is present in plano_config.yaml // Pass Some(true/false) to override, or None to use env var OTEL_TRACING_ENABLED - let tracing_enabled = if plano_config.tracing.is_some() { - info!("Tracing configuration found in plano_config.yaml"); - Some(true) - } else { - info!( - "No tracing configuration in plano_config.yaml, will check OTEL_TRACING_ENABLED env var" - ); - None - }; - let trace_collector = Arc::new(TraceCollector::new(tracing_enabled)); - let _flusher_handle = trace_collector.clone().start_background_flusher(); + // OpenTelemetry automatic instrumentation is configured in utils/tracing.rs // Initialize conversation state storage for v1/responses // Configurable via plano_config.yaml state_storage section @@ -145,7 +134,10 @@ async fn main() -> Result<(), Box> { if let Some(storage_config) = &plano_config.state_storage { let storage: Arc = match storage_config.storage_type { common::configuration::StateStorageType::Memory => { - info!("Initialized conversation state storage: Memory"); + info!( + storage_type = "memory", + "initialized conversation state storage" + ); Arc::new(MemoryConversationalStorage::new()) } common::configuration::StateStorageType::Postgres => { @@ -154,8 +146,11 @@ async fn main() -> Result<(), Box> { .as_ref() .expect("connection_string is required for postgres state_storage"); - debug!("Postgres connection string (full): {}", connection_string); - info!("Initializing conversation state storage: Postgres"); + debug!(connection_string = %connection_string, "postgres connection"); + info!( + storage_type = "postgres", + "initializing conversation state storage" + ); Arc::new( PostgreSQLConversationStorage::new(connection_string.clone()) .await @@ -165,7 +160,7 @@ async fn main() -> Result<(), Box> { }; Some(storage) } else { - info!("No state_storage configured - conversation state management disabled"); + info!("no state_storage configured, conversation state management disabled"); None }; @@ -184,7 +179,6 @@ async fn main() -> Result<(), Box> { let llm_providers = llm_providers.clone(); let agents_list = combined_agents_filters_list.clone(); let listeners = listeners.clone(); - let trace_collector = trace_collector.clone(); let span_attributes = span_attributes.clone(); let state_storage = state_storage.clone(); let service = service_fn(move |req| { @@ -196,7 +190,6 @@ async fn main() -> Result<(), Box> { let model_aliases = Arc::clone(&model_aliases); let agents_list = agents_list.clone(); let listeners = listeners.clone(); - let trace_collector = trace_collector.clone(); let span_attributes = span_attributes.clone(); let state_storage = state_storage.clone(); @@ -217,7 +210,6 @@ async fn main() -> Result<(), Box> { fully_qualified_url, agents_list, listeners, - trace_collector, span_attributes, ) .with_context(parent_cx) @@ -236,7 +228,6 @@ async fn main() -> Result<(), Box> { fully_qualified_url, model_aliases, llm_providers, - trace_collector, span_attributes, state_storage, ) @@ -278,7 +269,7 @@ async fn main() -> Result<(), Box> { Ok(response) } _ => { - debug!("No route for {} {}", req.method(), req.uri().path()); + debug!(method = %req.method(), path = %req.uri().path(), "no route found"); let mut not_found = Response::new(empty()); *not_found.status_mut() = StatusCode::NOT_FOUND; Ok(not_found) @@ -288,13 +279,13 @@ async fn main() -> Result<(), Box> { }); tokio::task::spawn(async move { - debug!("Accepted connection from {:?}", peer_addr); + debug!(peer = ?peer_addr, "accepted connection"); if let Err(err) = http1::Builder::new() // .serve_connection(io, service_fn(chat_completion)) .serve_connection(io, service) .await { - warn!("Error serving connection: {:?}", err); + warn!(error = ?err, "error serving connection"); } }); } diff --git a/crates/brightstaff/src/tracing/mod.rs b/crates/brightstaff/src/tracing/mod.rs index f0d18ea4..1db01064 100644 --- a/crates/brightstaff/src/tracing/mod.rs +++ b/crates/brightstaff/src/tracing/mod.rs @@ -1,5 +1,6 @@ mod constants; mod custom_attributes; +mod service_name_exporter; pub use constants::{ error, http, llm, operation_component, routing, signals, OperationNameBuilder, @@ -7,3 +8,32 @@ pub use constants::{ pub use custom_attributes::{ append_span_attributes, collect_custom_trace_attributes, extract_custom_trace_attributes, }; +pub use service_name_exporter::{ServiceNameOverrideExporter, SERVICE_NAME_OVERRIDE_KEY}; + +use opentelemetry::trace::get_active_span; +use opentelemetry::KeyValue; + +/// Sets the service name override on the current active OpenTelemetry span. +/// +/// This function adds the `service.name.override` attribute to the active +/// OpenTelemetry span, which allows observability backends to filter and group +/// spans by their logical service (e.g., `plano(llm)`, `plano(filter)`). +/// +/// # Arguments +/// * `service_name` - The service name to use (e.g., `operation_component::LLM`) +/// +/// # Example +/// ```rust,ignore +/// use brightstaff::tracing::{set_service_name, operation_component}; +/// +/// // Inside a traced function: +/// set_service_name(operation_component::LLM); +/// ``` +pub fn set_service_name(service_name: &str) { + get_active_span(|span| { + span.set_attribute(KeyValue::new( + SERVICE_NAME_OVERRIDE_KEY, + service_name.to_string(), + )); + }); +} diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 55045450..f4e2b7b4 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -90,6 +90,8 @@ pub struct Overrides { pub struct Tracing { pub sampling_rate: Option, pub trace_arch_internal: Option, + pub random_sampling: Option, + pub opentracing_grpc_endpoint: Option, pub span_attributes: Option, } diff --git a/demos/agent_orchestration/travel_agents/config.yaml b/demos/agent_orchestration/travel_agents/config.yaml index ce9b8f44..911baf89 100644 --- a/demos/agent_orchestration/travel_agents/config.yaml +++ b/demos/agent_orchestration/travel_agents/config.yaml @@ -57,4 +57,4 @@ tracing: random_sampling: 100 span_attributes: header_prefixes: - - x-katanemo- + - x-acme- diff --git a/demos/agent_orchestration/travel_agents/test.rest b/demos/agent_orchestration/travel_agents/test.rest index 7d7c5759..b6348f28 100644 --- a/demos/agent_orchestration/travel_agents/test.rest +++ b/demos/agent_orchestration/travel_agents/test.rest @@ -3,12 +3,13 @@ ### Travel Agent Chat Completion Request POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1 Content-Type: application/json -X-Katanemo-Workspace-Id: ws_7e2c5d91b4224f59b0e6a4e0125c21b3 -X-Katanemo-Tenant-Id: ten_4102a8c7fa6542b084b395d2df184a9a -X-Katanemo-User-Id: usr_19df7e6751b846f9ba026776e3c12abe -X-Katanemo-Admin-Level: 3 -X-Katanemo-Is-Internal: true -X-Katanemo-Budget: 42.5 +X-Acme-Workspace-Id: ws_7e2c5d91b4224f59b0e6a4e0125c21b3 +X-Acme-Tenant-Id: ten_4102a8c7fa6542b084b395d2df184a9a +X-Acme-User-Id: usr_19df7e6751b846f9ba026776e3c12abe +X-Acme-Admin-Level: 3 +X-Acme-Environment: production +X-Acme-Is-Internal: false +X-Acme-Cost-Center: HD100 { "model": "gpt-5.2",