plano/crates/brightstaff/src/handlers/router_chat.rs

use common::configuration::ModelUsagePreference;
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
use hermesllm::{ProviderRequest, ProviderRequestType};
use hyper::StatusCode;
use std::sync::Arc;
use tracing::{debug, info, warn};

use crate::router::llm_router::RouterService;
use crate::tracing::routing;

pub struct RoutingResult {
    pub model_name: String,
    pub route_name: Option<String>,
}

pub struct RoutingError {
    pub message: String,
    pub status_code: StatusCode,
}

impl RoutingError {
    pub fn internal_error(message: String) -> Self {
        Self {
            message,
            status_code: StatusCode::INTERNAL_SERVER_ERROR,
        }
    }
}

/// Determines the routing decision if
///
/// # Returns
/// * `Ok(RoutingResult)` - Contains the selected model name and span ID
/// * `Err(RoutingError)` - Contains error details and optional span ID
pub async fn router_chat_get_upstream_model(
    router_service: Arc<RouterService>,
    client_request: ProviderRequestType,
    traceparent: &str,
    request_path: &str,
    request_id: &str,
    inline_usage_preferences: Option<Vec<ModelUsagePreference>>,
) -> Result<RoutingResult, RoutingError> {
    // Clone metadata for routing before converting (which consumes client_request)
    let routing_metadata = client_request.metadata().clone();

    // Convert to ChatCompletionsRequest for routing (regardless of input type)
    let chat_request = match ProviderRequestType::try_from((
        client_request,
        &SupportedUpstreamAPIs::OpenAIChatCompletions(hermesllm::apis::OpenAIApi::ChatCompletions),
    )) {
        Ok(ProviderRequestType::ChatCompletionsRequest(req)) => req,
        Ok(
            ProviderRequestType::MessagesRequest(_)
            | ProviderRequestType::BedrockConverse(_)
            | ProviderRequestType::BedrockConverseStream(_)
            | ProviderRequestType::ResponsesAPIRequest(_),
        ) => {
            warn!("unexpected: got non-ChatCompletions request after converting to OpenAI format");
            return Err(RoutingError::internal_error(
                "Request conversion failed".to_string(),
            ));
        }
        Err(err) => {
            warn!(
                "failed to convert request to ChatCompletionsRequest: {}",
                err
            );
            return Err(RoutingError::internal_error(format!(
                "Failed to convert request: {}",
                err
            )));
        }
    };

    debug!(
        request = %serde_json::to_string(&chat_request).unwrap(),
        "router request"
    );

    // Use inline preferences if provided, otherwise fall back to metadata extraction
    let usage_preferences: Option<Vec<ModelUsagePreference>> = if inline_usage_preferences.is_some()
    {
        inline_usage_preferences
    } else {
        let usage_preferences_str: Option<String> =
            routing_metadata.as_ref().and_then(|metadata| {
                metadata
                    .get("plano_preference_config")
                    .map(|value| value.to_string())
            });
        usage_preferences_str
            .as_ref()
            .and_then(|s| serde_yaml::from_str(s).ok())
    };

    // Prepare log message with latest message from chat request
    let latest_message_for_log = chat_request
        .messages
        .last()
        .map_or("None".to_string(), |msg| {
            msg.content
                .as_ref()
                .map_or("None".to_string(), |c| c.to_string().replace('\n', "\\n"))
        });

    const MAX_MESSAGE_LENGTH: usize = 50;
    let latest_message_for_log = if latest_message_for_log.chars().count() > MAX_MESSAGE_LENGTH {
        let truncated: String = latest_message_for_log
            .chars()
            .take(MAX_MESSAGE_LENGTH)
            .collect();
        format!("{}...", truncated)
    } else {
        latest_message_for_log
    };

    info!(
        has_usage_preferences = usage_preferences.is_some(),
        path = %request_path,
        latest_message = %latest_message_for_log,
        "processing router request"
    );

    // Capture start time for routing span
    let routing_start_time = std::time::Instant::now();

    // Attempt to determine route using the router service
    let routing_result = router_service
        .determine_route(
            &chat_request.messages,
            traceparent,
            usage_preferences,
            request_id,
        )
        .await;

    let determination_ms = routing_start_time.elapsed().as_millis() as i64;
    let current_span = tracing::Span::current();
    current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);

    match routing_result {
        Ok(route) => match route {
            Some((route_name, model_name)) => {
                current_span.record("route.selected_model", model_name.as_str());
                Ok(RoutingResult {
                    model_name,
                    route_name: Some(route_name),
                })
            }
            None => {
                // No route determined, return sentinel value "none"
                // This signals to llm.rs to use the original validated request model
                current_span.record("route.selected_model", "none");
                info!("no route determined, using default model");

                Ok(RoutingResult {
                    model_name: "none".to_string(),
                    route_name: None,
                })
            }
        },
        Err(err) => {
            current_span.record("route.selected_model", "unknown");
            Err(RoutingError::internal_error(format!(
                "Failed to determine route: {}",
                err
            )))
        }
    }
}
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`use common::configuration::ModelUsagePreference;`
			`use hermesllm::clients::endpoints::SupportedUpstreamAPIs;`
			`use hermesllm::{ProviderRequest, ProviderRequestType};`
			`use hyper::StatusCode;`
			`use std::sync::Arc;`
			`use tracing::{debug, info, warn};`

			`use crate::router::llm_router::RouterService;`
use standard tracing and logging in brightstaff (#721) 2026-02-09 13:33:27 -08:00			`use crate::tracing::routing;`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00
			`pub struct RoutingResult {`
cargo clippy (#660) 2025-12-25 21:08:37 -08:00			`pub model_name: String,`
add routing service (#814) fixes https://github.com/katanemo/plano/issues/810 2026-03-09 16:32:16 -07:00			`pub route_name: Option<String>,`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`}`

			`pub struct RoutingError {`
			`pub message: String,`
			`pub status_code: StatusCode,`
			`}`

			`impl RoutingError {`
			`pub fn internal_error(message: String) -> Self {`
			`Self {`
			`message,`
cargo clippy (#660) 2025-12-25 21:08:37 -08:00			`status_code: StatusCode::INTERNAL_SERVER_ERROR,`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`}`
			`}`
			`}`

			`/// Determines the routing decision if`
			`///`
			`/// # Returns`
			/// * `Ok(RoutingResult)` - Contains the selected model name and span ID
			/// * `Err(RoutingError)` - Contains error details and optional span ID
			`pub async fn router_chat_get_upstream_model(`
			`router_service: Arc<RouterService>,`
			`client_request: ProviderRequestType,`
			`traceparent: &str,`
			`request_path: &str,`
pass request_id in orchestrator and routing model (#678) 2026-01-07 12:04:10 -08:00			`request_id: &str,`
support inline routing_policy in request body (#811) (#815) 2026-03-10 12:23:18 -07:00			`inline_usage_preferences: Option<Vec<ModelUsagePreference>>,`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`) -> Result<RoutingResult, RoutingError> {`
			`// Clone metadata for routing before converting (which consumes client_request)`
			`let routing_metadata = client_request.metadata().clone();`

			`// Convert to ChatCompletionsRequest for routing (regardless of input type)`
			`let chat_request = match ProviderRequestType::try_from((`
			`client_request,`
cargo clippy (#660) 2025-12-25 21:08:37 -08:00			`&SupportedUpstreamAPIs::OpenAIChatCompletions(hermesllm::apis::OpenAIApi::ChatCompletions),`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`)) {`
			`Ok(ProviderRequestType::ChatCompletionsRequest(req)) => req,`
			`Ok(`
			`ProviderRequestType::MessagesRequest(_)`
			`\| ProviderRequestType::BedrockConverse(_)`
			`\| ProviderRequestType::BedrockConverseStream(_)`
			`\| ProviderRequestType::ResponsesAPIRequest(_),`
			`) => {`
use standard tracing and logging in brightstaff (#721) 2026-02-09 13:33:27 -08:00			`warn!("unexpected: got non-ChatCompletions request after converting to OpenAI format");`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`return Err(RoutingError::internal_error(`
			`"Request conversion failed".to_string(),`
			`));`
			`}`
			`Err(err) => {`
cargo clippy (#660) 2025-12-25 21:08:37 -08:00			`warn!(`
use standard tracing and logging in brightstaff (#721) 2026-02-09 13:33:27 -08:00			`"failed to convert request to ChatCompletionsRequest: {}",`
cargo clippy (#660) 2025-12-25 21:08:37 -08:00			`err`
			`);`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`return Err(RoutingError::internal_error(format!(`
			`"Failed to convert request: {}",`
			`err`
			`)));`
			`}`
			`};`

			`debug!(`
use standard tracing and logging in brightstaff (#721) 2026-02-09 13:33:27 -08:00			`request = %serde_json::to_string(&chat_request).unwrap(),`
			`"router request"`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`);`

support inline routing_policy in request body (#811) (#815) 2026-03-10 12:23:18 -07:00			`// Use inline preferences if provided, otherwise fall back to metadata extraction`
			`let usage_preferences: Option<Vec<ModelUsagePreference>> = if inline_usage_preferences.is_some()`
			`{`
			`inline_usage_preferences`
			`} else {`
			`let usage_preferences_str: Option<String> =`
			`routing_metadata.as_ref().and_then(\|metadata\| {`
			`metadata`
			`.get("plano_preference_config")`
			`.map(\|value\| value.to_string())`
			`});`
			`usage_preferences_str`
			`.as_ref()`
			`.and_then(\|s\| serde_yaml::from_str(s).ok())`
			`};`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00
			`// Prepare log message with latest message from chat request`
			`let latest_message_for_log = chat_request`
			`.messages`
			`.last()`
			`.map_or("None".to_string(), \|msg\| {`
making Messages.Content optional, and having the upstream LLM fail if the right fields aren't set (#699) Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2026-01-16 16:24:03 -08:00			`msg.content`
			`.as_ref()`
			`.map_or("None".to_string(), \|c\| c.to_string().replace('\n', "\\n"))`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`});`

			`const MAX_MESSAGE_LENGTH: usize = 50;`
			`let latest_message_for_log = if latest_message_for_log.chars().count() > MAX_MESSAGE_LENGTH {`
			`let truncated: String = latest_message_for_log`
			`.chars()`
			`.take(MAX_MESSAGE_LENGTH)`
			`.collect();`
			`format!("{}...", truncated)`
			`} else {`
			`latest_message_for_log`
			`};`

			`info!(`
use standard tracing and logging in brightstaff (#721) 2026-02-09 13:33:27 -08:00			`has_usage_preferences = usage_preferences.is_some(),`
			`path = %request_path,`
			`latest_message = %latest_message_for_log,`
			`"processing router request"`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`);`

			`// Capture start time for routing span`
			`let routing_start_time = std::time::Instant::now();`

			`// Attempt to determine route using the router service`
			`let routing_result = router_service`
pass request_id in orchestrator and routing model (#678) 2026-01-07 12:04:10 -08:00			`.determine_route(`
			`&chat_request.messages,`
			`traceparent,`
			`usage_preferences,`
			`request_id,`
			`)`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`.await;`

use standard tracing and logging in brightstaff (#721) 2026-02-09 13:33:27 -08:00			`let determination_ms = routing_start_time.elapsed().as_millis() as i64;`
			`let current_span = tracing::Span::current();`
			`current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);`

Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`match routing_result {`
			`Ok(route) => match route {`
add routing service (#814) fixes https://github.com/katanemo/plano/issues/810 2026-03-09 16:32:16 -07:00			`Some((route_name, model_name)) => {`
use standard tracing and logging in brightstaff (#721) 2026-02-09 13:33:27 -08:00			`current_span.record("route.selected_model", model_name.as_str());`
add routing service (#814) fixes https://github.com/katanemo/plano/issues/810 2026-03-09 16:32:16 -07:00			`Ok(RoutingResult {`
			`model_name,`
			`route_name: Some(route_name),`
			`})`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`}`
			`None => {`
Adding support for wildcard models in the model_providers config (#696) * cleaning up plano cli commands * adding support for wildcard model providers * fixing compile errors * fixing bugs related to default model provider, provider hint and duplicates in the model provider list * fixed cargo fmt issues * updating tests to always include the model id * using default for the prompt_gateway path * fixed the model name, as gpt-5-mini-2025-08-07 wasn't in the config * making sure that all aliases and models match the config * fixed the config generator to allow for base_url providers LLMs to include wildcard models * re-ran the models list utility and added a shell script to run it * updating docs to mention wildcard model providers * updated provider_models.json to yaml, added that file to our docs for reference * updating the build docs to use the new root-based build --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2026-01-28 17:47:33 -08:00			`// No route determined, return sentinel value "none"`
			`// This signals to llm.rs to use the original validated request model`
use standard tracing and logging in brightstaff (#721) 2026-02-09 13:33:27 -08:00			`current_span.record("route.selected_model", "none");`
			`info!("no route determined, using default model");`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00
			`Ok(RoutingResult {`
Adding support for wildcard models in the model_providers config (#696) * cleaning up plano cli commands * adding support for wildcard model providers * fixing compile errors * fixing bugs related to default model provider, provider hint and duplicates in the model provider list * fixed cargo fmt issues * updating tests to always include the model id * using default for the prompt_gateway path * fixed the model name, as gpt-5-mini-2025-08-07 wasn't in the config * making sure that all aliases and models match the config * fixed the config generator to allow for base_url providers LLMs to include wildcard models * re-ran the models list utility and added a shell script to run it * updating docs to mention wildcard model providers * updated provider_models.json to yaml, added that file to our docs for reference * updating the build docs to use the new root-based build --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2026-01-28 17:47:33 -08:00			`model_name: "none".to_string(),`
add routing service (#814) fixes https://github.com/katanemo/plano/issues/810 2026-03-09 16:32:16 -07:00			`route_name: None,`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`})`
			`}`
			`},`
			`Err(err) => {`
use standard tracing and logging in brightstaff (#721) 2026-02-09 13:33:27 -08:00			`current_span.record("route.selected_model", "unknown");`
cargo clippy (#660) 2025-12-25 21:08:37 -08:00			`Err(RoutingError::internal_error(format!(`
			`"Failed to determine route: {}",`
			`err`
			`)))`
Improve end to end tracing (#628) * adding canonical tracing support via bright-staff * improved formatting for tools in the traces * removing anthropic from the currency exchange demo * using Envoy to transport traces, not calling OTEL directly * moving otel collcetor cluster outside tracing if/else * minor fixes to not write to the OTEL collector if tracing is disabled * fixed PR comments and added more trace attributes * more fixes based on PR comments * more clean up based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> 2025-12-11 15:21:57 -08:00			`}`
			`}`
			`}`