adding canonical tracing support via bright-staff

This commit is contained in:
Salman Paracha 2025-12-09 17:45:22 -08:00
parent 09c0b999b2
commit c748ea0a71
27 changed files with 1803 additions and 273 deletions

View file

@ -55,7 +55,7 @@ static_resources:
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: ingress_traffic
stat_prefix: plano(inbound)
codec_type: AUTO
scheme_header_transformation:
scheme_to_overwrite: https
@ -95,21 +95,6 @@ static_resources:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
generate_request_id: true
tracing:
provider:
name: envoy.tracers.opentelemetry
typed_config:
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
grpc_service:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: ingress_traffic
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: ingress_traffic
codec_type: AUTO
scheme_header_transformation:
@ -240,7 +225,7 @@ static_resources:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: outbound_api_traffic
service_name: tool
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
@ -413,7 +398,7 @@ static_resources:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: archgw(outbound)
service_name: plano(outbound)
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}

3
crates/Cargo.lock generated
View file

@ -379,12 +379,15 @@ dependencies = [
"pretty_assertions",
"proxy-wasm",
"rand 0.8.5",
"reqwest",
"serde",
"serde_json",
"serde_with",
"serde_yaml",
"thiserror 1.0.69",
"tiktoken-rs",
"tokio",
"tracing",
"url",
"urlencoding",
]

View file

@ -7,7 +7,7 @@ edition = "2021"
async-openai = "0.30.1"
bytes = "1.10.1"
chrono = "0.4"
common = { version = "0.1.0", path = "../common" }
common = { version = "0.1.0", path = "../common", features = ["trace-collection"] }
eventsource-client = "0.15.0"
eventsource-stream = "0.2.3"
futures = "0.3.31"

View file

@ -1,6 +1,7 @@
pub mod agent_chat_completions;
pub mod agent_selector;
pub mod router;
pub mod router_chat;
pub mod models;
pub mod function_calling;
pub mod pipeline_processor;

View file

@ -1,8 +1,7 @@
use bytes::Bytes;
use common::configuration::{ModelAlias, ModelUsagePreference};
use common::configuration::{LlmProvider, ModelAlias};
use common::consts::{ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER};
use hermesllm::apis::openai::ChatCompletionsRequest;
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
use common::traces::TraceCollector;
use hermesllm::clients::SupportedAPIsFromClient;
use hermesllm::{ProviderRequest, ProviderRequestType};
use http_body_util::combinators::BoxBody;
@ -11,10 +10,13 @@ use hyper::header::{self};
use hyper::{Request, Response, StatusCode};
use std::collections::HashMap;
use std::sync::Arc;
use tracing::{debug, info, warn};
use tokio::sync::RwLock;
use tracing::{debug, warn};
use crate::router::llm_router::RouterService;
use crate::handlers::utils::{create_streaming_response, PassthroughProcessor};
use crate::handlers::utils::{create_streaming_response, PassthroughProcessor, truncate_message};
use crate::handlers::router_chat::router_chat_get_upstream_model;
use crate::tracing::operation_component;
fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
Full::new(chunk.into())
@ -22,14 +24,26 @@ fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
.boxed()
}
pub async fn router_chat(
pub async fn chat(
request: Request<hyper::body::Incoming>,
router_service: Arc<RouterService>,
full_qualified_llm_provider_url: String,
model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
llm_providers: Arc<RwLock<Vec<LlmProvider>>>,
trace_collector: Arc<TraceCollector>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
let request_path = request.uri().path().to_string();
let mut request_headers = request.headers().clone();
let request_headers = request.headers().clone();
// Extract traceparent header early (Envoy should have added this)
let traceparent = request_headers
.get("traceparent")
.and_then(|h| h.to_str().ok())
.unwrap_or("00-00000000000000000000000000000000-0000000000000000-01")
.to_string();
let mut request_headers = request_headers;
let chat_request_bytes = request.collect().await?.to_bytes();
debug!(
@ -55,142 +69,52 @@ pub async fn router_chat(
// This ensures all downstream objects use the resolved model
let model_from_request = client_request.model().to_string();
let is_streaming_request = client_request.is_streaming();
let resolved_model = if let Some(model_aliases) = model_aliases.as_ref() {
if let Some(model_alias) = model_aliases.get(&model_from_request) {
debug!(
"Model Alias: 'From {}' -> 'To {}'",
model_from_request, model_alias.target
);
model_alias.target.clone()
} else {
model_from_request.clone()
}
} else {
model_from_request.clone()
};
let resolved_model = resolve_model_alias(&model_from_request, &model_aliases);
// Extract tool names and user message preview for span attributes
let tool_names = client_request.get_tool_names();
let user_message_preview = client_request.get_recent_user_message()
.map(|msg| truncate_message(&msg, 50));
client_request.set_model(resolved_model.clone());
// Clone metadata for routing and remove archgw_preference_config from original
let routing_metadata = client_request.metadata().clone();
if client_request.remove_metadata_key("archgw_preference_config") {
debug!("Removed archgw_preference_config from metadata");
}
let client_request_bytes_for_upstream = ProviderRequestType::to_bytes(&client_request).unwrap();
// Convert to ChatCompletionsRequest regardless of input type (clone to avoid moving original)
let chat_completions_request_for_arch_router: ChatCompletionsRequest =
match ProviderRequestType::try_from((
client_request,
&SupportedUpstreamAPIs::OpenAIChatCompletions(
hermesllm::apis::OpenAIApi::ChatCompletions,
),
)) {
Ok(ProviderRequestType::ChatCompletionsRequest(req)) => req,
Ok(
ProviderRequestType::MessagesRequest(_)
| ProviderRequestType::BedrockConverse(_)
| ProviderRequestType::BedrockConverseStream(_)
| ProviderRequestType::ResponsesAPIRequest(_),
) => {
// This should not happen after conversion to OpenAI format
warn!("Unexpected: got non-ChatCompletions request after converting to OpenAI format");
let err_msg = "Request conversion failed".to_string();
let mut bad_request = Response::new(full(err_msg));
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
return Ok(bad_request);
}
Err(err) => {
warn!(
"Failed to convert request to ChatCompletionsRequest: {}",
err
);
let err_msg = format!("Failed to convert request: {}", err);
let mut bad_request = Response::new(full(err_msg));
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
return Ok(bad_request);
}
};
debug!(
"[ARCH_ROUTER REQ]: {}",
&serde_json::to_string(&chat_completions_request_for_arch_router).unwrap()
);
let trace_parent = request_headers
.iter()
.find(|(ty, _)| ty.as_str() == "traceparent")
.map(|(_, value)| value.to_str().unwrap_or_default().to_string());
let usage_preferences_str: Option<String> = routing_metadata.as_ref().and_then(|metadata| {
metadata
.get("archgw_preference_config")
.map(|value| value.to_string())
});
let usage_preferences: Option<Vec<ModelUsagePreference>> = usage_preferences_str
.as_ref()
.and_then(|s| serde_yaml::from_str(s).ok());
let latest_message_for_log = chat_completions_request_for_arch_router
.messages
.last()
.map_or("None".to_string(), |msg| {
msg.content.to_string().replace('\n', "\\n")
});
const MAX_MESSAGE_LENGTH: usize = 50;
let latest_message_for_log = if latest_message_for_log.chars().count() > MAX_MESSAGE_LENGTH {
let truncated: String = latest_message_for_log
.chars()
.take(MAX_MESSAGE_LENGTH)
.collect();
format!("{}...", truncated)
} else {
latest_message_for_log
};
info!(
"request received, request type: chat_completion, usage preferences from request: {}, request path: {}, latest message: {}",
usage_preferences.is_some(),
request_path,
latest_message_for_log
);
debug!("usage preferences from request: {:?}", usage_preferences);
let model_name = match router_service
.determine_route(
&chat_completions_request_for_arch_router.messages,
trace_parent.clone(),
usage_preferences,
)
.await
// Determine routing using the dedicated router_chat module
let routing_result = match router_chat_get_upstream_model(
router_service,
client_request, // Pass the original request - router_chat will convert it
&request_headers,
trace_collector.clone(),
&traceparent,
&request_path,
)
.await
{
Ok(route) => match route {
Some((_, model_name)) => model_name,
None => {
info!(
"No route determined, using default model from request: {}",
chat_completions_request_for_arch_router.model
);
chat_completions_request_for_arch_router.model.clone()
}
},
Ok(result) => result,
Err(err) => {
let err_msg = format!("Failed to determine route: {}", err);
let mut internal_error = Response::new(full(err_msg));
*internal_error.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
let mut internal_error = Response::new(full(err.message));
*internal_error.status_mut() = err.status_code;
return Ok(internal_error);
}
};
let model_name = routing_result.model_name;
debug!(
"[ARCH_ROUTER] URL: {}, Resolved Model: {}",
full_qualified_llm_provider_url, model_name
);
// Extract trace_parent for upstream request headers
let trace_parent = request_headers
.iter()
.find(|(ty, _)| ty.as_str() == "traceparent")
.map(|(_, value)| value.to_str().unwrap_or_default().to_string());
request_headers.insert(
ARCH_PROVIDER_HINT_HEADER,
header::HeaderValue::from_str(&model_name).unwrap(),
@ -210,6 +134,10 @@ pub async fn router_chat(
// remove content-length header if it exists
request_headers.remove(header::CONTENT_LENGTH);
// Capture start time right before sending request to upstream
let request_start_time = std::time::Instant::now();
let request_start_system_time = std::time::SystemTime::now();
let llm_response = match reqwest::Client::new()
.post(full_qualified_llm_provider_url)
.headers(request_headers)
@ -235,9 +163,31 @@ pub async fn router_chat(
headers.insert(header_name, header_value.clone());
}
// Use the streaming utility with a passthrough processor (no modification of chunks)
// Build LLM span with actual status code using constants
let byte_stream = llm_response.bytes_stream();
let processor = PassthroughProcessor;
// Build the LLM span (will be finalized after streaming completes)
let llm_span = build_llm_span(
&traceparent,
&request_path,
&resolved_model,
&model_name,
upstream_status.as_u16(),
is_streaming_request,
request_start_system_time,
tool_names,
user_message_preview,
&llm_providers,
).await;
// Use PassthroughProcessor to track streaming metrics and finalize the span
let processor = PassthroughProcessor::new(
trace_collector,
operation_component::LLM,
llm_span,
request_start_time,
);
let streaming_response = create_streaming_response(byte_stream, processor, 16);
match response.body(streaming_response.body) {
@ -250,3 +200,140 @@ pub async fn router_chat(
}
}
}
/// Resolves model aliases by looking up the requested model in the model_aliases map.
/// Returns the target model if an alias is found, otherwise returns the original model.
fn resolve_model_alias(
model_from_request: &str,
model_aliases: &Arc<Option<HashMap<String, ModelAlias>>>,
) -> String {
if let Some(aliases) = model_aliases.as_ref() {
if let Some(model_alias) = aliases.get(model_from_request) {
debug!(
"Model Alias: 'From {}' -> 'To {}'",
model_from_request, model_alias.target
);
return model_alias.target.clone();
}
}
model_from_request.to_string()
}
/// Builds the LLM span with all required and optional attributes.
async fn build_llm_span(
traceparent: &str,
request_path: &str,
resolved_model: &str,
model_name: &str,
status_code: u16,
is_streaming: bool,
start_time: std::time::SystemTime,
tool_names: Option<Vec<String>>,
user_message_preview: Option<String>,
llm_providers: &Arc<RwLock<Vec<LlmProvider>>>,
) -> common::traces::Span {
use common::traces::{SpanBuilder, SpanKind, parse_traceparent};
use crate::tracing::{http, llm, OperationNameBuilder};
// Calculate the upstream path based on provider configuration
let upstream_path = get_upstream_path(
llm_providers,
model_name,
request_path,
resolved_model,
is_streaming,
).await;
// Build operation name showing path transformation if different
let operation_name = if request_path != upstream_path {
OperationNameBuilder::new()
.with_method("POST")
.with_path(&format!("{} >> {}", request_path, upstream_path))
.with_target(resolved_model)
.build()
} else {
OperationNameBuilder::new()
.with_method("POST")
.with_path(request_path)
.with_target(resolved_model)
.build()
};
let (trace_id, parent_span_id) = parse_traceparent(traceparent);
let mut span_builder = SpanBuilder::new(&operation_name)
.with_trace_id(&trace_id)
.with_parent_span_id(&parent_span_id)
.with_kind(SpanKind::Client)
.with_start_time(start_time)
.with_attribute(http::METHOD, "POST")
.with_attribute(http::STATUS_CODE, status_code.to_string())
.with_attribute(http::TARGET, request_path.to_string())
.with_attribute(http::UPSTREAM_TARGET, upstream_path)
.with_attribute(llm::MODEL_NAME, resolved_model.to_string())
.with_attribute(llm::IS_STREAMING, is_streaming.to_string());
// Add optional attributes
if let Some(tools) = tool_names {
span_builder = span_builder.with_attribute(llm::TOOLS, tools.join("\n"));
}
if let Some(preview) = user_message_preview {
span_builder = span_builder.with_attribute(llm::USER_MESSAGE_PREVIEW, preview);
}
span_builder.build()
}
/// Calculates the upstream path for the provider based on the model name.
/// Looks up provider configuration, gets the ProviderId and base_url_path_prefix,
/// then uses target_endpoint_for_provider to calculate the correct upstream path.
async fn get_upstream_path(
llm_providers: &Arc<RwLock<Vec<LlmProvider>>>,
model_name: &str,
request_path: &str,
resolved_model: &str,
is_streaming: bool,
) -> String {
let providers_lock = llm_providers.read().await;
// First, try to find by model name or provider name
let provider = providers_lock.iter().find(|p| {
p.model.as_ref().map(|m| m == model_name).unwrap_or(false)
|| p.name == model_name
});
let (provider_id, base_url_path_prefix) = if let Some(provider) = provider {
let provider_id = provider.provider_interface.to_provider_id();
let prefix = provider.base_url_path_prefix.clone();
(provider_id, prefix)
} else {
let default_provider = providers_lock.iter().find(|p| {
p.default.unwrap_or(false)
});
if let Some(provider) = default_provider {
let provider_id = provider.provider_interface.to_provider_id();
let prefix = provider.base_url_path_prefix.clone();
(provider_id, prefix)
} else {
// Last resort: use OpenAI as hardcoded fallback
warn!("No default provider found, falling back to OpenAI");
(hermesllm::ProviderId::OpenAI, None)
}
};
drop(providers_lock);
// Calculate the upstream path using the proper API
let client_api = SupportedAPIsFromClient::from_endpoint(request_path)
.expect("Should have valid API endpoint");
client_api.target_endpoint_for_provider(
&provider_id,
request_path,
resolved_model,
is_streaming,
base_url_path_prefix.as_deref(),
)
}

View file

@ -0,0 +1,239 @@
use common::configuration::ModelUsagePreference;
use common::traces::{TraceCollector, SpanKind, SpanBuilder, parse_traceparent};
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
use hermesllm::{ProviderRequest, ProviderRequestType};
use hyper::StatusCode;
use std::collections::HashMap;
use std::sync::Arc;
use tracing::{debug, info, warn};
use crate::router::llm_router::RouterService;
use crate::tracing::{OperationNameBuilder, operation_component, http, routing};
pub struct RoutingResult {
pub model_name: String
}
pub struct RoutingError {
pub message: String,
pub status_code: StatusCode,
}
impl RoutingError {
pub fn internal_error(message: String) -> Self {
Self {
message,
status_code: StatusCode::INTERNAL_SERVER_ERROR
}
}
}
/// Determines the routing decision if
///
/// # Returns
/// * `Ok(RoutingResult)` - Contains the selected model name and span ID
/// * `Err(RoutingError)` - Contains error details and optional span ID
pub async fn router_chat_get_upstream_model(
router_service: Arc<RouterService>,
client_request: ProviderRequestType,
request_headers: &hyper::HeaderMap,
trace_collector: Arc<TraceCollector>,
traceparent: &str,
request_path: &str,
) -> Result<RoutingResult, RoutingError> {
// Clone metadata for routing before converting (which consumes client_request)
let routing_metadata = client_request.metadata().clone();
// Convert to ChatCompletionsRequest for routing (regardless of input type)
let chat_request = match ProviderRequestType::try_from((
client_request,
&SupportedUpstreamAPIs::OpenAIChatCompletions(
hermesllm::apis::OpenAIApi::ChatCompletions,
),
)) {
Ok(ProviderRequestType::ChatCompletionsRequest(req)) => req,
Ok(
ProviderRequestType::MessagesRequest(_)
| ProviderRequestType::BedrockConverse(_)
| ProviderRequestType::BedrockConverseStream(_)
| ProviderRequestType::ResponsesAPIRequest(_),
) => {
warn!("Unexpected: got non-ChatCompletions request after converting to OpenAI format");
return Err(RoutingError::internal_error(
"Request conversion failed".to_string(),
));
}
Err(err) => {
warn!("Failed to convert request to ChatCompletionsRequest: {}", err);
return Err(RoutingError::internal_error(format!(
"Failed to convert request: {}",
err
)));
}
};
debug!(
"[ARCH_ROUTER REQ]: {}",
&serde_json::to_string(&chat_request).unwrap()
);
// Extract trace_parent from headers
let trace_parent = request_headers
.iter()
.find(|(ty, _)| ty.as_str() == "traceparent")
.map(|(_, value)| value.to_str().unwrap_or_default().to_string());
// Extract usage preferences from metadata
let usage_preferences_str: Option<String> = routing_metadata.as_ref().and_then(|metadata| {
metadata
.get("archgw_preference_config")
.map(|value| value.to_string())
});
let usage_preferences: Option<Vec<ModelUsagePreference>> = usage_preferences_str
.as_ref()
.and_then(|s| serde_yaml::from_str(s).ok());
// Prepare log message with latest message from chat request
let latest_message_for_log = chat_request
.messages
.last()
.map_or("None".to_string(), |msg| {
msg.content.to_string().replace('\n', "\\n")
});
const MAX_MESSAGE_LENGTH: usize = 50;
let latest_message_for_log = if latest_message_for_log.chars().count() > MAX_MESSAGE_LENGTH {
let truncated: String = latest_message_for_log
.chars()
.take(MAX_MESSAGE_LENGTH)
.collect();
format!("{}...", truncated)
} else {
latest_message_for_log
};
info!(
"request received, request type: chat_completion, usage preferences from request: {}, request path: {}, latest message: {}",
usage_preferences.is_some(),
request_path,
latest_message_for_log
);
debug!("usage preferences from request: {:?}", usage_preferences);
// Capture start time for routing span
let routing_start_time = std::time::Instant::now();
let routing_start_system_time = std::time::SystemTime::now();
// Attempt to determine route using the router service
let routing_result = router_service
.determine_route(&chat_request.messages, trace_parent, usage_preferences)
.await;
match routing_result {
Ok(route) => match route {
Some((_, model_name)) => {
// Record successful routing span
let mut attrs: HashMap<String, String> = HashMap::new();
attrs.insert("route.selected_model".to_string(), model_name.clone());
record_routing_span(
trace_collector,
traceparent,
routing_start_time,
routing_start_system_time,
attrs,
)
.await;
Ok(RoutingResult {
model_name
})
}
None => {
// No route determined, use default model from request
info!(
"No route determined, using default model from request: {}",
chat_request.model
);
let default_model = chat_request.model.clone();
let mut attrs = HashMap::new();
attrs.insert("route.selected_model".to_string(), default_model.clone());
record_routing_span(
trace_collector,
traceparent,
routing_start_time,
routing_start_system_time,
attrs,
)
.await;
Ok(RoutingResult {
model_name: default_model
})
}
},
Err(err) => {
// Record failed routing span
let mut attrs = HashMap::new();
attrs.insert("route.selected_model".to_string(), "unknown".to_string());
attrs.insert("error.message".to_string(), err.to_string());
record_routing_span(
trace_collector,
traceparent,
routing_start_time,
routing_start_system_time,
attrs,
)
.await;
Err(RoutingError::internal_error(
format!("Failed to determine route: {}", err)
))
}
}
}
/// Helper function to record a routing span with the given attributes.
/// Reduces code duplication across different routing outcomes.
async fn record_routing_span(
trace_collector: Arc<TraceCollector>,
traceparent: &str,
start_time: std::time::Instant,
start_system_time: std::time::SystemTime,
attrs: HashMap<String, String>,
) {
// The routing always uses OpenAI Chat Completions format internally,
// so we log that as the actual API being used for routing
let routing_api_path = "/v1/chat/completions";
let routing_operation_name = OperationNameBuilder::new()
.with_method("POST")
.with_path(routing_api_path)
.with_target("Arch-Router-1.5B")
.build();
let (trace_id, parent_span_id) = parse_traceparent(traceparent);
// Build the routing span directly using constants
let mut span_builder = SpanBuilder::new(&routing_operation_name)
.with_trace_id(&trace_id)
.with_parent_span_id(&parent_span_id)
.with_kind(SpanKind::Client)
.with_start_time(start_system_time)
.with_end_time(std::time::SystemTime::now())
.with_attribute(http::METHOD, "POST")
.with_attribute(http::TARGET, routing_api_path.to_string())
.with_attribute(routing::ROUTE_DETERMINATION_MS, start_time.elapsed().as_millis().to_string());
// Add all custom attributes
for (key, value) in attrs {
span_builder = span_builder.with_attribute(key, value);
}
let span = span_builder.build();
// Record the span directly to the collector
trace_collector.record_span(operation_component::ROUTING, span);
}

View file

@ -1,18 +1,27 @@
use bytes::Bytes;
use common::traces::{Span, Attribute, AttributeValue, TraceCollector, Event};
use http_body_util::combinators::BoxBody;
use http_body_util::StreamBody;
use hyper::body::Frame;
use std::sync::Arc;
use std::time::{Instant, SystemTime};
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
use tokio_stream::StreamExt;
use tracing::warn;
// Import tracing constants
use crate::tracing::{llm, error};
/// Trait for processing streaming chunks
/// Implementors can inject custom logic during streaming (e.g., hallucination detection, logging)
pub trait StreamProcessor: Send + 'static {
/// Process an incoming chunk of bytes
fn process_chunk(&mut self, chunk: Bytes) -> Result<Option<Bytes>, String>;
/// Called when the first bytes are received (for time-to-first-token tracking)
fn on_first_bytes(&mut self) {}
/// Called when streaming completes successfully
fn on_complete(&mut self) {}
@ -20,13 +29,152 @@ pub trait StreamProcessor: Send + 'static {
fn on_error(&mut self, _error: &str) {}
}
/// A no-op processor that just forwards chunks as-is
pub struct PassthroughProcessor;
/// A processor that tracks streaming metrics and finalizes the span
pub struct PassthroughProcessor {
collector: Arc<TraceCollector>,
service_name: String,
span: Span,
total_bytes: usize,
chunk_count: usize,
start_time: Instant,
time_to_first_token: Option<u128>,
}
impl PassthroughProcessor {
/// Create a new passthrough processor
///
/// # Arguments
/// * `collector` - The trace collector to record the span to
/// * `service_name` - The service name for this span (e.g., "archgw(llm)")
/// * `span` - The span to finalize after streaming completes
/// * `start_time` - When the request started (for duration calculation)
pub fn new(
collector: Arc<TraceCollector>,
service_name: impl Into<String>,
span: Span,
start_time: Instant,
) -> Self {
Self {
collector,
service_name: service_name.into(),
span,
total_bytes: 0,
chunk_count: 0,
start_time,
time_to_first_token: None,
}
}
}
impl StreamProcessor for PassthroughProcessor {
fn process_chunk(&mut self, chunk: Bytes) -> Result<Option<Bytes>, String> {
self.total_bytes += chunk.len();
self.chunk_count += 1;
Ok(Some(chunk))
}
fn on_first_bytes(&mut self) {
// Record time to first token (only for streaming)
if self.time_to_first_token.is_none() {
self.time_to_first_token = Some(self.start_time.elapsed().as_millis());
}
}
fn on_complete(&mut self) {
// Update span with streaming metrics and end time
let end_time_nanos = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_nanos();
self.span.end_time_unix_nano = format!("{}", end_time_nanos);
// Add streaming metrics as attributes using constants
self.span.attributes.push(Attribute {
key: llm::RESPONSE_BYTES.to_string(),
value: AttributeValue {
string_value: Some(self.total_bytes.to_string()),
},
});
self.span.attributes.push(Attribute {
key: llm::DURATION_MS.to_string(),
value: AttributeValue {
string_value: Some(self.start_time.elapsed().as_millis().to_string()),
},
});
// Add time to first token if available (streaming only)
if let Some(ttft) = self.time_to_first_token {
self.span.attributes.push(Attribute {
key: llm::TIME_TO_FIRST_TOKEN_MS.to_string(),
value: AttributeValue {
string_value: Some(ttft.to_string()),
},
});
// Add time to first token as a span event
// Calculate the timestamp by adding ttft duration to span start time
if let Ok(start_time_nanos) = self.span.start_time_unix_nano.parse::<u128>() {
// Convert ttft from milliseconds to nanoseconds and add to start time
let event_timestamp = start_time_nanos + (ttft * 1_000_000);
let mut event = Event::new(llm::TIME_TO_FIRST_TOKEN_MS.to_string(), event_timestamp);
event.add_attribute(
llm::TIME_TO_FIRST_TOKEN_MS.to_string(),
ttft.to_string(),
);
// Initialize events vector if needed
if self.span.events.is_none() {
self.span.events = Some(Vec::new());
}
if let Some(ref mut events) = self.span.events {
events.push(event);
}
}
}
// Record the finalized span
self.collector.record_span(&self.service_name, self.span.clone());
}
fn on_error(&mut self, error_msg: &str) {
warn!("Stream error in PassthroughProcessor: {}", error_msg);
// Update span with error info and end time
let end_time_nanos = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_nanos();
self.span.end_time_unix_nano = format!("{}", end_time_nanos);
self.span.attributes.push(Attribute {
key: error::ERROR.to_string(),
value: AttributeValue {
string_value: Some("true".to_string()),
},
});
self.span.attributes.push(Attribute {
key: error::MESSAGE.to_string(),
value: AttributeValue {
string_value: Some(error_msg.to_string()),
},
});
self.span.attributes.push(Attribute {
key: llm::DURATION_MS.to_string(),
value: AttributeValue {
string_value: Some(self.start_time.elapsed().as_millis().to_string()),
},
});
// Record the error span
self.collector.record_span(&self.service_name, self.span.clone());
}
}
/// Result of creating a streaming response
@ -48,6 +196,8 @@ where
// Spawn a task to process and forward chunks
let processor_handle = tokio::spawn(async move {
let mut is_first_chunk = true;
while let Some(item) = byte_stream.next().await {
let chunk = match item {
Ok(chunk) => chunk,
@ -59,6 +209,12 @@ where
}
};
// Call on_first_bytes for the first chunk
if is_first_chunk {
processor.on_first_bytes();
is_first_chunk = false;
}
// Process the chunk
match processor.process_chunk(chunk) {
Ok(Some(processed_chunk)) => {
@ -91,3 +247,13 @@ where
processor_handle,
}
}
/// Truncates a message to the specified maximum length, adding "..." if truncated.
pub fn truncate_message(message: &str, max_length: usize) -> String {
if message.chars().count() > max_length {
let truncated: String = message.chars().take(max_length).collect();
format!("{}...", truncated)
} else {
message.to_string()
}
}

View file

@ -1,3 +1,4 @@
pub mod handlers;
pub mod router;
pub mod tracing;
pub mod utils;

View file

@ -1,5 +1,5 @@
use brightstaff::handlers::agent_chat_completions::agent_chat;
use brightstaff::handlers::router::router_chat;
use brightstaff::handlers::router::chat;
use brightstaff::handlers::models::list_models;
use brightstaff::handlers::function_calling::{function_calling_chat_handler};
use brightstaff::router::llm_router::RouterService;
@ -7,6 +7,7 @@ use brightstaff::utils::tracing::init_tracer;
use bytes::Bytes;
use common::configuration::Configuration;
use common::consts::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH};
use common::traces::TraceCollector;
use http_body_util::{combinators::BoxBody, BodyExt, Empty};
use hyper::body::Incoming;
use hyper::server::conn::http1;
@ -46,10 +47,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let _tracer_provider = init_tracer();
let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string());
info!(
"current working directory: {}",
env::current_dir().unwrap().display()
);
// loading arch_config.yaml file
let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED")
.unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string());
@ -66,19 +63,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let llm_providers = Arc::new(RwLock::new(arch_config.model_providers.clone()));
let agents_list = Arc::new(RwLock::new(arch_config.agents.clone()));
let listeners = Arc::new(RwLock::new(arch_config.listeners.clone()));
debug!(
"arch_config: {:?}",
&serde_json::to_string(arch_config.as_ref()).unwrap()
);
let llm_provider_url =
env::var("LLM_PROVIDER_ENDPOINT").unwrap_or_else(|_| "http://localhost:12001".to_string());
info!("llm provider url: {}", llm_provider_url);
info!("listening on http://{}", bind_address);
let listener = TcpListener::bind(bind_address).await?;
let routing_model_name: String = arch_config
.routing
.as_ref()
@ -100,18 +88,24 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let model_aliases = Arc::new(arch_config.model_aliases.clone());
// Initialize trace collector and start background flusher
let trace_collector = Arc::new(TraceCollector::from_env());
let _flusher_handle = trace_collector.clone().start_background_flusher();
info!("Trace collector initialized and background flusher started");
loop {
let (stream, _) = listener.accept().await?;
let peer_addr = stream.peer_addr()?;
let io = TokioIo::new(stream);
let router_service: Arc<RouterService> = Arc::clone(&router_service);
let model_aliases = Arc::clone(&model_aliases);
let model_aliases: Arc<Option<std::collections::HashMap<String, common::configuration::ModelAlias>>> = Arc::clone(&model_aliases);
let llm_provider_url = llm_provider_url.clone();
let llm_providers = llm_providers.clone();
let agents_list = agents_list.clone();
let listeners = listeners.clone();
let trace_collector = trace_collector.clone();
let service = service_fn(move |req| {
let router_service = Arc::clone(&router_service);
let parent_cx = extract_context_from_request(&req);
@ -120,13 +114,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let model_aliases = Arc::clone(&model_aliases);
let agents_list = agents_list.clone();
let listeners = listeners.clone();
let trace_collector = trace_collector.clone();
async move {
match (req.method(), req.uri().path()) {
(&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH) => {
let fully_qualified_url =
format!("{}{}", llm_provider_url, req.uri().path());
router_chat(req, router_service, fully_qualified_url, model_aliases)
chat(req, router_service, fully_qualified_url, model_aliases, llm_providers, trace_collector)
.with_context(parent_cx)
.await
}

View file

@ -0,0 +1,319 @@
/// OpenTelemetry Semantic Conventions
///
/// This module defines standard attribute keys following OTEL semantic conventions.
/// See: https://opentelemetry.io/docs/specs/semconv/
// =============================================================================
// Span Attributes - HTTP
// =============================================================================
/// Semantic conventions for HTTP-related span attributes
pub mod http {
/// HTTP request method
/// Example: "GET", "POST", "PUT"
pub const METHOD: &str = "http.method";
/// HTTP response status code
/// Example: "200", "404", "500"
pub const STATUS_CODE: &str = "http.status_code";
/// Full HTTP request URL
pub const URL: &str = "http.url";
/// HTTP request target (path + query)
/// Example: "/v1/chat/completions?stream=true"
pub const TARGET: &str = "http.target";
/// Upstream target path after routing transformation
/// Example: "/api/paas/v4/chat/completions" (for Zhipu provider)
pub const UPSTREAM_TARGET: &str = "http.upstream_target";
/// HTTP request scheme
/// Example: "http", "https"
pub const SCHEME: &str = "http.scheme";
/// Value of the HTTP User-Agent header
pub const USER_AGENT: &str = "http.user_agent";
/// Size of the request payload body in bytes
pub const REQUEST_CONTENT_LENGTH: &str = "http.request_content_length";
/// Size of the response payload body in bytes
pub const RESPONSE_CONTENT_LENGTH: &str = "http.response_content_length";
}
// =============================================================================
// Span Attributes - LLM Specific
// =============================================================================
/// Custom attributes for LLM operations
/// These follow the emerging OTEL GenAI semantic conventions
pub mod llm {
/// Name of the LLM model being called
/// Example: "gpt-4", "claude-3-sonnet", "llama-2-70b"
pub const MODEL_NAME: &str = "llm.model";
/// Provider of the LLM
/// Example: "openai", "anthropic", "azure-openai"
pub const PROVIDER: &str = "llm.provider";
/// Type of LLM operation
/// Example: "chat", "completion", "embedding"
pub const OPERATION_TYPE: &str = "llm.operation_type";
/// Whether the request is streaming
pub const IS_STREAMING: &str = "llm.is_streaming";
/// Total bytes received in the response
pub const RESPONSE_BYTES: &str = "llm.response_bytes";
/// Duration of the LLM call in milliseconds
pub const DURATION_MS: &str = "llm.duration_ms";
/// Time to first token in milliseconds (streaming only)
pub const TIME_TO_FIRST_TOKEN_MS: &str = "llm.time_to_first_token";
/// Number of prompt tokens used
pub const PROMPT_TOKENS: &str = "llm.usage.prompt_tokens";
/// Number of completion tokens generated
pub const COMPLETION_TOKENS: &str = "llm.usage.completion_tokens";
/// Total tokens used (prompt + completion)
pub const TOTAL_TOKENS: &str = "llm.usage.total_tokens";
/// Temperature parameter used
pub const TEMPERATURE: &str = "llm.request.temperature";
/// Max tokens parameter used
pub const MAX_TOKENS: &str = "llm.request.max_tokens";
/// Top-p parameter used
pub const TOP_P: &str = "llm.request.top_p";
/// List of tool names provided in the request
pub const TOOLS: &str = "llm.tools";
/// Preview of the user message (truncated)
pub const USER_MESSAGE_PREVIEW: &str = "llm.request.user_message_preview";
}
// =============================================================================
// Span Attributes - Routing & Gateway
// =============================================================================
/// Attributes specific to LLM routing and gateway operations
pub mod routing {
/// Strategy used to select the LLM endpoint
/// Example: "round-robin", "least-latency", "cost-optimized"
pub const STRATEGY: &str = "routing.strategy";
/// Selected upstream endpoint
pub const UPSTREAM_ENDPOINT: &str = "routing.upstream_endpoint";
/// Time taken to determine the route in milliseconds
pub const ROUTE_DETERMINATION_MS: &str = "routing.determination_ms";
/// Whether a fallback endpoint was used
pub const IS_FALLBACK: &str = "routing.is_fallback";
/// Reason for route selection
pub const SELECTION_REASON: &str = "routing.selection_reason";
}
// =============================================================================
// Span Attributes - Error Handling
// =============================================================================
/// Attributes for error and exception tracking
pub mod error {
/// Whether an error occurred
pub const ERROR: &str = "error";
/// Type/class of the error
/// Example: "TimeoutError", "AuthenticationError"
pub const TYPE: &str = "error.type";
/// Error message
pub const MESSAGE: &str = "error.message";
/// Stack trace of the error
pub const STACK_TRACE: &str = "error.stack_trace";
}
// =============================================================================
// Operation Names
// =============================================================================
/// Canonical operation name components for Arch Gateway
pub mod operation_component {
/// Inbound request handling
pub const INBOUND: &str = "plano(inbound)";
/// Routing decision phase
pub const ROUTING: &str = "plano(routing)";
/// Handoff to upstream service
pub const HANDOFF: &str = "plano(handoff)";
/// Agent filter execution
pub const AGENT_FILTER: &str = "plano(agent filter)";
/// Agent execution
pub const AGENT: &str = "plano(agent)";
/// LLM call
pub const LLM: &str = "plano(llm)";
}
/// Builder for constructing standardized operation names
///
/// Format: `{method} {path} {target}`
///
/// The operation component (e.g., "archgw(llm)") is now part of the service name,
/// so the operation name focuses on the HTTP request details and target.
///
/// # Examples
/// ```
/// use brightstaff::tracing::OperationNameBuilder;
///
/// // LLM call operation: "POST /v1/chat/completions gpt-4"
/// // (service name will be "archgw(llm)")
/// let op = OperationNameBuilder::new()
/// .with_method("POST")
/// .with_path("/v1/chat/completions")
/// .with_target("gpt-4")
/// .build();
///
/// // Agent filter operation: "POST /agents/v1/chat/completions hallucination-detector"
/// // (service name will be "archgw(agent filter)")
/// let op = OperationNameBuilder::new()
/// .with_method("POST")
/// .with_path("/agents/v1/chat/completions")
/// .with_target("hallucination-detector")
/// .build();
///
/// // Routing operation: "POST /v1/chat/completions"
/// // (service name will be "archgw(routing)")
/// let op = OperationNameBuilder::new()
/// .with_method("POST")
/// .with_path("/v1/chat/completions")
/// .build();
/// ```
pub struct OperationNameBuilder {
method: Option<String>,
path: Option<String>,
target: Option<String>,
}
impl OperationNameBuilder {
/// Create a new operation name builder
pub fn new() -> Self {
Self {
method: None,
path: None,
target: None,
}
}
/// Set the HTTP method
///
/// # Arguments
/// * `method` - HTTP method (e.g., "GET", "POST", "PUT")
pub fn with_method(mut self, method: impl Into<String>) -> Self {
self.method = Some(method.into());
self
}
/// Set the request path
///
/// # Arguments
/// * `path` - Request path (e.g., "/v1/chat/completions", "/agents/v1/chat/completions")
pub fn with_path(mut self, path: impl Into<String>) -> Self {
self.path = Some(path.into());
self
}
/// Set the target (model name, agent name, or filter name)
///
/// # Arguments
/// * `target` - Target identifier (e.g., "gpt-4", "my-agent", "hallucination-detector")
pub fn with_target(mut self, target: impl Into<String>) -> Self {
self.target = Some(target.into());
self
}
/// Build the operation name string
///
/// # Format
/// - With all components: `{method} {path} {target}`
/// - Without target: `{method} {path}`
/// - Without path: `{method}`
/// - Empty: returns empty string
pub fn build(self) -> String {
let mut parts = Vec::new();
if let Some(method) = self.method {
parts.push(method);
}
if let Some(path) = self.path {
parts.push(path);
}
if let Some(target) = self.target {
parts.push(target);
}
parts.join(" ")
}
}
impl Default for OperationNameBuilder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_operation_name_full() {
let op = OperationNameBuilder::new()
.with_method("POST")
.with_path("/v1/chat/completions")
.with_target("gpt-4")
.build();
assert_eq!(op, "POST /v1/chat/completions gpt-4");
}
#[test]
fn test_operation_name_no_target() {
let op = OperationNameBuilder::new()
.with_method("POST")
.with_path("/v1/chat/completions")
.build();
assert_eq!(op, "POST /v1/chat/completions");
}
#[test]
fn test_operation_name_agent_filter() {
let op = OperationNameBuilder::new()
.with_method("POST")
.with_path("/agents/v1/chat/completions")
.with_target("content-filter")
.build();
assert_eq!(op, "POST /agents/v1/chat/completions content-filter");
}
#[test]
fn test_operation_name_minimal() {
let op = OperationNameBuilder::new().build();
assert_eq!(op, "");
}
}

View file

@ -0,0 +1,3 @@
mod constants;
pub use constants::{OperationNameBuilder, operation_component, http, llm, error, routing};

View file

@ -21,6 +21,15 @@ url = "2.5.4"
hermesllm = { version = "0.1.0", path = "../hermesllm" }
serde_with = "3.13.0"
# Optional dependencies for trace collection (not available in WASM)
tokio = { version = "1.44", features = ["sync", "time"], optional = true }
reqwest = { version = "0.12", features = ["json"], optional = true }
tracing = { version = "0.1", optional = true }
[features]
default = []
trace-collection = ["tokio", "reqwest", "tracing"]
[dev-dependencies]
pretty_assertions = "1.4.1"
serde_json = "1.0.64"

View file

@ -11,4 +11,5 @@ pub mod routing;
pub mod stats;
pub mod tokenizer;
pub mod tracing;
pub mod traces;
pub mod utils;

View file

@ -0,0 +1,259 @@
use super::shapes::Span;
use super::resource_span_builder::ResourceSpanBuilder;
use std::collections::{HashMap, VecDeque};
use std::sync::Arc;
use tokio::sync::Mutex;
use tokio::time::{interval, Duration};
use tracing::{debug, error, warn};
/// Parse W3C traceparent header into trace_id and parent_span_id
/// Format: "00-{trace_id}-{parent_span_id}-01"
///
/// Returns (trace_id, parent_span_id) as strings
pub fn parse_traceparent(traceparent: &str) -> (String, String) {
let parts: Vec<&str> = traceparent.split('-').collect();
if parts.len() == 4 {
(parts[1].to_string(), parts[2].to_string())
} else {
warn!("Invalid traceparent format: {}", traceparent);
// Generate empty IDs if parsing fails
(String::new(), String::new())
}
}
/// Collects and batches spans, flushing them to an OTEL collector
///
/// Supports multiple services, with each service (e.g., "archgw(routing)", "archgw(llm)")
/// maintaining its own span queue. Flushes all services together periodically.
pub struct TraceCollector {
/// Spans grouped by service name
/// Key: service name (e.g., "archgw(routing)", "archgw(llm)")
/// Value: queue of spans for that service
spans_by_service: Arc<Mutex<HashMap<String, VecDeque<Span>>>>,
flush_interval: Duration,
otel_url: String,
}
impl TraceCollector {
/// Create a new trace collector
/// # Arguments
/// * `flush_interval` - How often to flush buffered spans
/// * `otel_url` - OTEL collector endpoint URL
pub fn new(
flush_interval: Duration,
otel_url: String,
) -> Self {
Self {
spans_by_service: Arc::new(Mutex::new(HashMap::new())),
flush_interval,
otel_url,
}
}
/// Create with defaults from environment or sensible defaults
pub fn from_env() -> Self {
let batch_size = std::env::var("TRACE_BATCH_SIZE")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(100);
let flush_interval_secs = std::env::var("TRACE_FLUSH_INTERVAL_SECS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(1);
let otel_url = std::env::var("OTEL_COLLECTOR_URL")
.unwrap_or_else(|_| "http://host.docker.internal:4318/v1/traces".to_string());
debug!(
"TraceCollector initialized: batch_size={}, flush_interval={}s, url={}",
batch_size, flush_interval_secs, otel_url
);
Self::new(
Duration::from_secs(flush_interval_secs),
otel_url,
)
}
/// Record a span for a specific service
///
/// # Arguments
/// * `service_name` - Name of the service (e.g., "archgw(routing)", "archgw(llm)")
/// * `span` - The span to record
pub fn record_span(&self, service_name: impl Into<String>, span: Span) {
let service_name = service_name.into();
// Use try_lock to avoid blocking in async contexts
// If the lock is held, we skip recording (telemetry shouldn't block the app)
if let Ok(mut spans_by_service) = self.spans_by_service.try_lock() {
// Get or create the queue for this service
let spans = spans_by_service
.entry(service_name)
.or_insert_with(VecDeque::new);
spans.push_back(span);
} else {
// Lock contention - skip recording this span
debug!("Skipped span recording due to lock contention");
}
// Flushing is handled by the periodic background flusher (see `start_background_flusher`).
}
/// Flush all buffered spans to the OTEL collector
/// Builds ResourceSpans for each service with spans
pub async fn flush(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let mut spans_by_service = self.spans_by_service.lock().await;
if spans_by_service.is_empty() {
return Ok(());
}
// Snapshot and drain all services' spans
let service_batches: Vec<(String, Vec<Span>)> = spans_by_service
.iter_mut()
.filter_map(|(service_name, spans)| {
if spans.is_empty() {
None
} else {
Some((service_name.clone(), spans.drain(..).collect()))
}
})
.collect();
drop(spans_by_service); // Release lock before HTTP call
if service_batches.is_empty() {
return Ok(());
}
let total_spans: usize = service_batches.iter().map(|(_, spans)| spans.len()).sum();
debug!("Flushing {} spans across {} services to OTEL collector", total_spans, service_batches.len());
// Build canonical OTEL payload structure - one ResourceSpan per service
let resource_spans = self.build_resource_spans(service_batches);
match self.send_to_otel(resource_spans).await {
Ok(_) => {
debug!("Successfully flushed {} spans", total_spans);
Ok(())
}
Err(e) => {
warn!("Failed to send spans to OTEL collector: {:?}", e);
Err(e)
}
}
}
/// Build OTEL-compliant resource spans from collected spans, one ResourceSpan per service
fn build_resource_spans(&self, service_batches: Vec<(String, Vec<Span>)>) -> Vec<super::shapes::ResourceSpan> {
service_batches
.into_iter()
.map(|(service_name, spans)| {
ResourceSpanBuilder::new(&service_name)
.add_spans(spans)
.build()
})
.collect()
}
/// Send resource spans to OTEL collector
/// Serializes as {"resourceSpans": [...]} per OTEL spec
async fn send_to_otel(
&self,
resource_spans: Vec<super::shapes::ResourceSpan>,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let client = reqwest::Client::new();
// Create OTEL payload with proper structure
let payload = serde_json::json!({
"resourceSpans": resource_spans
});
let response = client
.post(&self.otel_url)
.header("Content-Type", "application/json")
.json(&payload)
.timeout(Duration::from_secs(5))
.send()
.await?;
if !response.status().is_success() {
warn!(
"OTEL collector returned non-success status: {}",
response.status()
);
return Err(format!("OTEL collector error: {}", response.status()).into());
}
Ok(())
}
/// Start a background task that periodically flushes traces
/// Returns a join handle that can be used to stop the flusher
pub fn start_background_flusher(self: Arc<Self>) -> tokio::task::JoinHandle<()> {
let flush_interval = self.flush_interval;
tokio::spawn(async move {
let mut ticker = interval(flush_interval);
loop {
ticker.tick().await;
if let Err(e) = self.flush().await {
error!("Background trace flush failed: {:?}", e);
}
}
})
}
/// Get current number of buffered spans across all services (for testing/monitoring)
pub async fn buffered_count(&self) -> usize {
self.spans_by_service
.lock()
.await
.values()
.map(|spans| spans.len())
.sum()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::traces::SpanBuilder;
#[tokio::test]
async fn test_collector_basic() {
let collector = TraceCollector::new(
Duration::from_secs(60),
"http://test:4318/v1/traces".to_string(),
);
let span = SpanBuilder::new("test_operation")
.with_trace_id("abc123")
.build();
collector.record_span("test-service", span);
assert_eq!(collector.buffered_count().await, 1);
}
#[tokio::test]
async fn test_collector_auto_flush() {
// Since batch-triggered flush behavior was removed, record two spans and verify both are buffered
let collector = Arc::new(TraceCollector::new(
Duration::from_secs(60),
"http://test:4318/v1/traces".to_string(),
));
let span1 = SpanBuilder::new("test1").build();
let span2 = SpanBuilder::new("test2").build();
collector.record_span("test-service", span1);
collector.record_span("test-service", span2);
// With no batch-triggered flush, both spans should remain buffered
assert_eq!(collector.buffered_count().await, 2);
}
}

View file

@ -0,0 +1,27 @@
/// OpenTelemetry semantic convention constants for tracing
///
/// These constants ensure consistency across the codebase and prevent typos
/// Resource attribute keys following OTEL semantic conventions
pub mod resource {
/// Logical name of the service
pub const SERVICE_NAME: &str = "service.name";
/// Version of the service
pub const SERVICE_VERSION: &str = "service.version";
/// Service namespace/environment
pub const SERVICE_NAMESPACE: &str = "service.namespace";
/// Service instance ID
pub const SERVICE_INSTANCE_ID: &str = "service.instance.id";
}
/// Instrumentation scope defaults
pub mod scope {
/// Default scope name for tracing instrumentation
pub const DEFAULT_NAME: &str = "brightstaff.tracing";
/// Default scope version
pub const DEFAULT_VERSION: &str = "1.0.0";
}

View file

@ -0,0 +1,23 @@
// Original tracing types (OTEL structures)
mod shapes;
// New tracing utilities
mod span_builder;
mod resource_span_builder;
mod constants;
#[cfg(feature = "trace-collection")]
mod collector;
// Re-export original types
pub use shapes::{
Span, Event, Traceparent, TraceparentNewError,
ResourceSpan, Resource, ScopeSpan, Scope, Attribute, AttributeValue,
};
// Re-export new utilities
pub use span_builder::{SpanBuilder, SpanKind};
pub use resource_span_builder::ResourceSpanBuilder;
pub use constants::*;
#[cfg(feature = "trace-collection")]
pub use collector::{TraceCollector, parse_traceparent};

View file

@ -0,0 +1,121 @@
use super::shapes::{ResourceSpan, Resource, ScopeSpan, Scope, Span, Attribute, AttributeValue};
use super::constants::{resource, scope};
use std::collections::HashMap;
/// Builder for creating OTEL ResourceSpan structures
///
/// Provides a fluent API for building the resource/scope/span hierarchy
pub struct ResourceSpanBuilder {
service_name: String,
resource_attributes: HashMap<String, String>,
scope_name: String,
scope_version: String,
spans: Vec<Span>,
}
impl ResourceSpanBuilder {
/// Create a new ResourceSpan builder with service name
pub fn new(service_name: impl Into<String>) -> Self {
Self {
service_name: service_name.into(),
resource_attributes: HashMap::new(),
scope_name: scope::DEFAULT_NAME.to_string(),
scope_version: scope::DEFAULT_VERSION.to_string(),
spans: Vec::new(),
}
}
/// Add a resource attribute (e.g., deployment.environment, host.name)
pub fn with_resource_attribute(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.resource_attributes.insert(key.into(), value.into());
self
}
/// Set the instrumentation scope name
pub fn with_scope_name(mut self, name: impl Into<String>) -> Self {
self.scope_name = name.into();
self
}
/// Set the instrumentation scope version
pub fn with_scope_version(mut self, version: impl Into<String>) -> Self {
self.scope_version = version.into();
self
}
/// Add a single span
pub fn add_span(mut self, span: Span) -> Self {
self.spans.push(span);
self
}
/// Add multiple spans
pub fn add_spans(mut self, spans: Vec<Span>) -> Self {
self.spans.extend(spans);
self
}
/// Build the ResourceSpan
pub fn build(self) -> ResourceSpan {
// Build resource attributes
let mut attributes = vec![
Attribute {
key: resource::SERVICE_NAME.to_string(),
value: AttributeValue {
string_value: Some(self.service_name),
},
}
];
// Add custom resource attributes
for (key, value) in self.resource_attributes {
attributes.push(Attribute {
key,
value: AttributeValue {
string_value: Some(value),
},
});
}
let resource = Resource { attributes };
let scope = Scope {
name: self.scope_name,
version: self.scope_version,
attributes: Vec::new(),
};
let scope_span = ScopeSpan {
scope,
spans: self.spans,
};
ResourceSpan {
resource,
scope_spans: vec![scope_span],
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::traces::SpanBuilder;
#[test]
fn test_resource_span_builder() {
let span1 = SpanBuilder::new("operation1").build();
let span2 = SpanBuilder::new("operation2").build();
let resource_span = ResourceSpanBuilder::new("test-service")
.with_resource_attribute("deployment.environment", "production")
.with_scope_name("test-scope")
.add_span(span1)
.add_span(span2)
.build();
assert_eq!(resource_span.resource.attributes.len(), 2); // service.name + custom
assert_eq!(resource_span.scope_spans.len(), 1);
assert_eq!(resource_span.scope_spans[0].spans.len(), 2);
}
}

View file

@ -0,0 +1,123 @@
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug)]
pub struct ResourceSpan {
pub resource: Resource,
#[serde(rename = "scopeSpans")]
pub scope_spans: Vec<ScopeSpan>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Resource {
pub attributes: Vec<Attribute>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct ScopeSpan {
pub scope: Scope,
pub spans: Vec<Span>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Scope {
pub name: String,
pub version: String,
pub attributes: Vec<Attribute>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Span {
#[serde(rename = "traceId")]
pub trace_id: String,
#[serde(rename = "spanId")]
pub span_id: String,
#[serde(rename = "parentSpanId")]
pub parent_span_id: Option<String>, // Optional in case there's no parent span
pub name: String,
#[serde(rename = "startTimeUnixNano")]
pub start_time_unix_nano: String,
#[serde(rename = "endTimeUnixNano")]
pub end_time_unix_nano: String,
pub kind: u32,
pub attributes: Vec<Attribute>,
pub events: Option<Vec<Event>>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Event {
#[serde(rename = "timeUnixNano")]
pub time_unix_nano: String,
pub name: String,
pub attributes: Vec<Attribute>,
}
impl Event {
pub fn new(name: String, time_unix_nano: u128) -> Self {
Event {
time_unix_nano: format!("{}", time_unix_nano),
name,
attributes: Vec::new(),
}
}
pub fn add_attribute(&mut self, key: String, value: String) {
self.attributes.push(Attribute {
key,
value: AttributeValue {
string_value: Some(value),
},
});
}
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Attribute {
pub key: String,
pub value: AttributeValue,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct AttributeValue {
#[serde(rename = "stringValue")]
pub string_value: Option<String>, // Use Option to handle different value types
}
pub struct Traceparent {
pub version: String,
pub trace_id: String,
pub parent_id: String,
pub flags: String,
}
impl std::fmt::Display for Traceparent {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}-{}-{}-{}",
self.version, self.trace_id, self.parent_id, self.flags
)
}
}
#[derive(thiserror::Error, Debug)]
pub enum TraceparentNewError {
#[error("Invalid traceparent: \'{0}\'")]
InvalidTraceparent(String),
}
impl TryFrom<String> for Traceparent {
type Error = TraceparentNewError;
fn try_from(traceparent: String) -> Result<Self, Self::Error> {
let traceparent_tokens: Vec<&str> = traceparent.split("-").collect::<Vec<&str>>();
if traceparent_tokens.len() != 4 {
return Err(TraceparentNewError::InvalidTraceparent(traceparent));
}
Ok(Traceparent {
version: traceparent_tokens[0].to_string(),
trace_id: traceparent_tokens[1].to_string(),
parent_id: traceparent_tokens[2].to_string(),
flags: traceparent_tokens[3].to_string(),
})
}
}

View file

@ -0,0 +1,193 @@
use super::shapes::{Span, Attribute, AttributeValue};
use std::collections::HashMap;
use std::time::SystemTime;
/// OpenTelemetry span kinds
/// https://opentelemetry.io/docs/specs/otel/trace/api/#spankind
#[derive(Debug, Clone, Copy)]
pub enum SpanKind {
/// Default value. Indicates that the span represents an internal operation within an application
Internal = 0,
/// Indicates that the span describes a request to some remote service
Client = 3,
}
/// Builder for creating OTEL-compliant spans with a fluent API
///
/// This is the recommended way to create spans with proper trace context.
///
/// # Example
/// ```no_run
/// use common::traces::{SpanBuilder, SpanKind};
/// use std::time::SystemTime;
///
/// let span = SpanBuilder::new("router_chat")
/// .with_trace_id("abc123")
/// .with_parent_span_id("parent456")
/// .with_kind(SpanKind::Internal)
/// .with_attribute("http.method", "POST")
/// .with_attribute("http.path", "/v1/chat/completions")
/// .build();
/// ```
pub struct SpanBuilder {
name: String,
trace_id: Option<String>,
parent_span_id: Option<String>,
start_time: SystemTime,
end_time: Option<SystemTime>,
kind: SpanKind,
attributes: HashMap<String, String>,
}
impl SpanBuilder {
/// Create a new span builder
///
/// # Arguments
/// * `name` - The operation name for this span (e.g., "router_chat", "determine_route")
pub fn new(name: impl Into<String>) -> Self {
Self {
name: name.into(),
trace_id: None,
parent_span_id: None,
start_time: SystemTime::now(),
end_time: None,
kind: SpanKind::Internal,
attributes: HashMap::new(),
}
}
/// Set the trace ID (extracted from traceparent or OpenTelemetry context)
pub fn with_trace_id(mut self, trace_id: impl Into<String>) -> Self {
self.trace_id = Some(trace_id.into());
self
}
/// Set the parent span ID to link this span to its parent
pub fn with_parent_span_id(mut self, parent_span_id: impl Into<String>) -> Self {
self.parent_span_id = Some(parent_span_id.into());
self
}
/// Set the span kind (defaults to Internal)
pub fn with_kind(mut self, kind: SpanKind) -> Self {
self.kind = kind;
self
}
/// Set explicit start time (defaults to now)
pub fn with_start_time(mut self, start_time: SystemTime) -> Self {
self.start_time = start_time;
self
}
/// Set explicit end time (defaults to build time)
pub fn with_end_time(mut self, end_time: SystemTime) -> Self {
self.end_time = Some(end_time);
self
}
/// Add a single attribute to the span
pub fn with_attribute(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.attributes.insert(key.into(), value.into());
self
}
/// Add multiple attributes at once
pub fn with_attributes(mut self, attrs: HashMap<String, String>) -> Self {
self.attributes.extend(attrs);
self
}
/// Build the span, consuming the builder
///
/// Creates a complete OTEL-compliant span with all provided attributes,
/// generating span_id and using provided or random trace_id.
pub fn build(self) -> Span {
let end_time = self.end_time.unwrap_or_else(SystemTime::now);
let start_nanos = system_time_to_nanos(self.start_time);
let end_nanos = system_time_to_nanos(end_time);
// Generate trace_id if not provided
let trace_id = self.trace_id.unwrap_or_else(|| generate_random_trace_id());
// Create attributes in OTEL format
let attributes: Vec<Attribute> = self.attributes
.into_iter()
.map(|(key, value)| Attribute {
key,
value: AttributeValue {
string_value: Some(value),
},
})
.collect();
// Build span directly without going through Span::new()
Span {
trace_id,
span_id: generate_random_span_id(),
parent_span_id: self.parent_span_id,
name: self.name,
start_time_unix_nano: format!("{}", start_nanos),
end_time_unix_nano: format!("{}", end_nanos),
kind: self.kind as u32,
attributes,
events: None,
}
}
}
/// Convert SystemTime to nanoseconds since UNIX epoch for OTEL
fn system_time_to_nanos(time: SystemTime) -> u128 {
time.duration_since(SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_nanos()
}
/// Generate a random span ID (16 hex characters = 8 bytes)
fn generate_random_span_id() -> String {
use rand::RngCore;
let mut rng = rand::thread_rng();
let mut random_bytes = [0u8; 8];
rng.fill_bytes(&mut random_bytes);
hex::encode(random_bytes)
}
/// Generate a random trace ID (32 hex characters = 16 bytes)
fn generate_random_trace_id() -> String {
use rand::RngCore;
let mut rng = rand::thread_rng();
let mut random_bytes = [0u8; 16];
rng.fill_bytes(&mut random_bytes);
hex::encode(random_bytes)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_span_builder_basic() {
let span = SpanBuilder::new("test_operation")
.with_trace_id("abc123")
.with_parent_span_id("parent123")
.with_attribute("key", "value")
.build();
assert_eq!(span.name, "test_operation");
assert_eq!(span.trace_id, "abc123");
assert_eq!(span.parent_span_id, Some("parent123".to_string()));
assert_eq!(span.attributes.len(), 1);
}
#[test]
fn test_span_builder_no_parent() {
let span = SpanBuilder::new("root_span")
.with_trace_id("xyz789")
.build();
assert_eq!(span.name, "root_span");
assert_eq!(span.trace_id, "xyz789");
assert_eq!(span.parent_span_id, None);
}
}

View file

@ -200,6 +200,17 @@ impl ProviderRequest for ConverseRequest {
})
}
fn get_tool_names(&self) -> Option<Vec<String>> {
self.tool_config.as_ref()?.tools.as_ref().map(|tools| {
tools
.iter()
.filter_map(|tool| match tool {
Tool::ToolSpec { tool_spec } => Some(tool_spec.name.clone()),
})
.collect()
})
}
fn to_bytes(&self) -> Result<Vec<u8>, ProviderRequestError> {
serde_json::to_vec(self).map_err(|e| ProviderRequestError {
message: format!("Failed to serialize Bedrock request: {}", e),

View file

@ -513,6 +513,12 @@ impl ProviderRequest for MessagesRequest {
None
}
fn get_tool_names(&self) -> Option<Vec<String>> {
self.tools.as_ref().map(|tools| {
tools.iter().map(|tool| tool.name.clone()).collect()
})
}
fn to_bytes(&self) -> Result<Vec<u8>, ProviderRequestError> {
serde_json::to_vec(self).map_err(|e| ProviderRequestError {
message: format!("Failed to serialize MessagesRequest: {}", e),

View file

@ -687,6 +687,32 @@ impl ProviderRequest for ChatCompletionsRequest {
})
}
fn get_tool_names(&self) -> Option<Vec<String>> {
// First check the 'tools' field (current API)
if let Some(tools) = &self.tools {
let names: Vec<String> = tools
.iter()
.map(|tool| tool.function.name.clone())
.collect();
if !names.is_empty() {
return Some(names);
}
}
// Fallback to 'functions' field (deprecated but still supported)
if let Some(functions) = &self.functions {
let names: Vec<String> = functions
.iter()
.map(|func| func.function.name.clone())
.collect();
if !names.is_empty() {
return Some(names);
}
}
None
}
fn to_bytes(&self) -> Result<Vec<u8>, ProviderRequestError> {
serde_json::to_vec(&self).map_err(|e| ProviderRequestError {
message: format!("Failed to serialize OpenAI request: {}", e),

View file

@ -1063,6 +1063,19 @@ impl ProviderRequest for ResponsesAPIRequest {
}
}
fn get_tool_names(&self) -> Option<Vec<String>> {
self.tools.as_ref().map(|tools| {
tools
.iter()
.filter_map(|tool| match tool {
Tool::Function { name, .. } => Some(name.clone()),
// Other tool types don't have user-defined names
_ => None,
})
.collect()
})
}
fn to_bytes(&self) -> Result<Vec<u8>, ProviderRequestError> {
serde_json::to_vec(&self).map_err(|e| ProviderRequestError {
message: format!("Failed to serialize Responses API request: {}", e),

View file

@ -35,6 +35,9 @@ pub trait ProviderRequest: Send + Sync {
/// Extract the user message for tracing/logging purposes
fn get_recent_user_message(&self) -> Option<String>;
/// Get tool names if tools are defined in the request
fn get_tool_names(&self) -> Option<Vec<String>>;
/// Convert the request to bytes for transmission
fn to_bytes(&self) -> Result<Vec<u8>, ProviderRequestError>;
@ -95,6 +98,16 @@ impl ProviderRequest for ProviderRequestType {
}
}
fn get_tool_names(&self) -> Option<Vec<String>> {
match self {
Self::ChatCompletionsRequest(r) => r.get_tool_names(),
Self::MessagesRequest(r) => r.get_tool_names(),
Self::BedrockConverse(r) => r.get_tool_names(),
Self::BedrockConverseStream(r) => r.get_tool_names(),
Self::ResponsesAPIRequest(r) => r.get_tool_names(),
}
}
fn to_bytes(&self) -> Result<Vec<u8>, ProviderRequestError> {
match self {
Self::ChatCompletionsRequest(r) => r.to_bytes(),

View file

@ -2,26 +2,18 @@ use crate::metrics::Metrics;
use crate::stream_context::StreamContext;
use common::configuration::Configuration;
use common::configuration::Overrides;
use common::consts::OTEL_COLLECTOR_HTTP;
use common::consts::OTEL_POST_PATH;
use common::http::CallArgs;
use common::http::Client;
use common::llm_providers::LlmProviders;
use common::ratelimit;
use common::stats::Gauge;
use common::tracing::TraceData;
use log::trace;
use log::warn;
use proxy_wasm::traits::*;
use proxy_wasm::types::*;
use std::cell::RefCell;
use std::collections::HashMap;
use std::collections::VecDeque;
use std::rc::Rc;
use std::time::Duration;
use std::sync::{Arc, Mutex};
#[derive(Debug)]
pub struct CallContext {}
@ -31,7 +23,6 @@ pub struct FilterContext {
// callouts stores token_id to request mapping that we use during #on_http_call_response to match the response to the request.
callouts: RefCell<HashMap<u32, CallContext>>,
llm_providers: Option<Rc<LlmProviders>>,
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
overrides: Rc<Option<Overrides>>,
}
@ -41,7 +32,6 @@ impl FilterContext {
callouts: RefCell::new(HashMap::new()),
metrics: Rc::new(Metrics::new()),
llm_providers: None,
traces_queue: Arc::new(Mutex::new(VecDeque::new())),
overrides: Rc::new(None),
}
}
@ -95,7 +85,6 @@ impl RootContext for FilterContext {
.as_ref()
.expect("LLM Providers must exist when Streams are being created"),
),
Arc::clone(&self.traces_queue),
Rc::clone(&self.overrides),
)))
}
@ -108,34 +97,6 @@ impl RootContext for FilterContext {
self.set_tick_period(Duration::from_secs(1));
true
}
fn on_tick(&mut self) {
let _ = self.traces_queue.try_lock().map(|mut traces_queue| {
while let Some(trace) = traces_queue.pop_front() {
let trace_str = serde_json::to_string(&trace).unwrap();
trace!("trace details: {}", trace_str);
let call_args = CallArgs::new(
OTEL_COLLECTOR_HTTP,
OTEL_POST_PATH,
vec![
(":method", http::Method::POST.as_str()),
(":path", OTEL_POST_PATH),
(":authority", OTEL_COLLECTOR_HTTP),
("content-type", "application/json"),
],
Some(trace_str.as_bytes()),
vec![],
Duration::from_secs(60),
);
if let Err(error) = self.http_call(call_args, CallContext {}) {
warn!(
"failed to schedule http call to otel-collector: {:?}",
error
);
}
}
});
}
}
impl Context for FilterContext {

View file

@ -4,10 +4,8 @@ use log::{debug, info, warn};
use proxy_wasm::hostcalls::get_current_time;
use proxy_wasm::traits::*;
use proxy_wasm::types::*;
use std::collections::VecDeque;
use std::num::NonZero;
use std::rc::Rc;
use std::sync::{Arc, Mutex};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use crate::metrics::Metrics;
@ -20,7 +18,6 @@ use common::errors::ServerError;
use common::llm_providers::LlmProviders;
use common::ratelimit::Header;
use common::stats::{IncrementingMetric, RecordingMetric};
use common::tracing::{Event, Span, TraceData, Traceparent};
use common::{ratelimit, routing, tokenizer};
use hermesllm::apis::streaming_shapes::amazon_bedrock_binary_frame::BedrockBinaryFrameDecoder;
use hermesllm::apis::streaming_shapes::sse::{
@ -51,7 +48,6 @@ pub struct StreamContext {
ttft_time: Option<u128>,
traceparent: Option<String>,
request_body_sent_time: Option<u128>,
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
overrides: Rc<Option<Overrides>>,
user_message: Option<String>,
upstream_status_code: Option<StatusCode>,
@ -65,7 +61,6 @@ impl StreamContext {
pub fn new(
metrics: Rc<Metrics>,
llm_providers: Rc<LlmProviders>,
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
overrides: Rc<Option<Overrides>>,
) -> Self {
StreamContext {
@ -83,7 +78,6 @@ impl StreamContext {
ttft_duration: None,
traceparent: None,
ttft_time: None,
traces_queue,
request_body_sent_time: None,
user_message: None,
upstream_status_code: None,
@ -333,68 +327,6 @@ impl StreamContext {
self.metrics
.output_sequence_length
.record(self.response_tokens as u64);
if let Some(traceparent) = self.traceparent.as_ref() {
let current_time_ns = current_time_ns();
match Traceparent::try_from(traceparent.to_string()) {
Err(e) => {
warn!("traceparent header is invalid: {}", e);
}
Ok(traceparent) => {
let service_name = match &self.resolved_api {
Some(api) => {
let api_display = api.to_string();
format!("archgw.{}", api_display)
}
None => "archgw".to_string(),
};
let mut trace_data =
common::tracing::TraceData::new_with_service_name(service_name);
let mut llm_span = Span::new(
self.llm_provider().name.to_string(),
Some(traceparent.trace_id),
Some(traceparent.parent_id),
self.request_body_sent_time.unwrap(),
current_time_ns,
);
llm_span
.add_attribute("model".to_string(), self.llm_provider().name.to_string());
if let Some(user_message) = &self.user_message {
llm_span.add_attribute("message".to_string(), user_message.clone());
}
// Add HTTP attributes
if let Some(method) = &self.http_method {
llm_span.add_attribute("http.method".to_string(), method.clone());
}
if let Some(protocol) = &self.http_protocol {
llm_span.add_attribute("http.protocol".to_string(), protocol.clone());
}
if let Some(status_code) = &self.upstream_status_code {
llm_span.add_attribute(
"http.status_code".to_string(),
status_code.as_u16().to_string(),
);
}
// Add request ID attribute
llm_span
.add_attribute("http.request_id".to_string(), self.request_identifier());
if self.ttft_time.is_some() {
llm_span.add_event(Event::new(
"time_to_first_token".to_string(),
self.ttft_time.unwrap(),
));
}
trace_data.add_span(llm_span);
self.traces_queue.lock().unwrap().push_back(trace_data);
}
};
}
}
fn read_raw_response_body(&mut self, body_size: usize) -> Result<Vec<u8>, Action> {

View file

@ -8,8 +8,21 @@ listeners:
timeout: 30s
llm_providers:
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
default: true
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
routing_preferences:
- name: code understanding
description: understand and explain existing code snippets, functions, or libraries
- model: anthropic/claude-sonnet-4-20250514
access_key: $ANTHROPIC_API_KEY
routing_preferences:
- name: code generation
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
endpoints:
frankfurther_api: