2025-12-11 15:21:57 -08:00
|
|
|
use common::configuration::ModelUsagePreference;
|
|
|
|
|
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
|
|
|
|
|
use hermesllm::{ProviderRequest, ProviderRequestType};
|
|
|
|
|
use hyper::StatusCode;
|
|
|
|
|
use std::sync::Arc;
|
|
|
|
|
use tracing::{debug, info, warn};
|
|
|
|
|
|
|
|
|
|
use crate::router::llm_router::RouterService;
|
2026-02-09 13:33:27 -08:00
|
|
|
use crate::tracing::routing;
|
2025-12-11 15:21:57 -08:00
|
|
|
|
|
|
|
|
pub struct RoutingResult {
|
2025-12-25 21:08:37 -08:00
|
|
|
pub model_name: String,
|
2026-03-09 16:32:16 -07:00
|
|
|
pub route_name: Option<String>,
|
2025-12-11 15:21:57 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub struct RoutingError {
|
|
|
|
|
pub message: String,
|
|
|
|
|
pub status_code: StatusCode,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl RoutingError {
|
|
|
|
|
pub fn internal_error(message: String) -> Self {
|
|
|
|
|
Self {
|
|
|
|
|
message,
|
2025-12-25 21:08:37 -08:00
|
|
|
status_code: StatusCode::INTERNAL_SERVER_ERROR,
|
2025-12-11 15:21:57 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Determines the routing decision if
|
|
|
|
|
///
|
|
|
|
|
/// # Returns
|
|
|
|
|
/// * `Ok(RoutingResult)` - Contains the selected model name and span ID
|
|
|
|
|
/// * `Err(RoutingError)` - Contains error details and optional span ID
|
|
|
|
|
pub async fn router_chat_get_upstream_model(
|
|
|
|
|
router_service: Arc<RouterService>,
|
|
|
|
|
client_request: ProviderRequestType,
|
|
|
|
|
traceparent: &str,
|
|
|
|
|
request_path: &str,
|
2026-01-07 12:04:10 -08:00
|
|
|
request_id: &str,
|
2025-12-11 15:21:57 -08:00
|
|
|
) -> Result<RoutingResult, RoutingError> {
|
|
|
|
|
// Clone metadata for routing before converting (which consumes client_request)
|
|
|
|
|
let routing_metadata = client_request.metadata().clone();
|
|
|
|
|
|
|
|
|
|
// Convert to ChatCompletionsRequest for routing (regardless of input type)
|
|
|
|
|
let chat_request = match ProviderRequestType::try_from((
|
|
|
|
|
client_request,
|
2025-12-25 21:08:37 -08:00
|
|
|
&SupportedUpstreamAPIs::OpenAIChatCompletions(hermesllm::apis::OpenAIApi::ChatCompletions),
|
2025-12-11 15:21:57 -08:00
|
|
|
)) {
|
|
|
|
|
Ok(ProviderRequestType::ChatCompletionsRequest(req)) => req,
|
|
|
|
|
Ok(
|
|
|
|
|
ProviderRequestType::MessagesRequest(_)
|
|
|
|
|
| ProviderRequestType::BedrockConverse(_)
|
|
|
|
|
| ProviderRequestType::BedrockConverseStream(_)
|
|
|
|
|
| ProviderRequestType::ResponsesAPIRequest(_),
|
|
|
|
|
) => {
|
2026-02-09 13:33:27 -08:00
|
|
|
warn!("unexpected: got non-ChatCompletions request after converting to OpenAI format");
|
2025-12-11 15:21:57 -08:00
|
|
|
return Err(RoutingError::internal_error(
|
|
|
|
|
"Request conversion failed".to_string(),
|
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
Err(err) => {
|
2025-12-25 21:08:37 -08:00
|
|
|
warn!(
|
2026-02-09 13:33:27 -08:00
|
|
|
"failed to convert request to ChatCompletionsRequest: {}",
|
2025-12-25 21:08:37 -08:00
|
|
|
err
|
|
|
|
|
);
|
2025-12-11 15:21:57 -08:00
|
|
|
return Err(RoutingError::internal_error(format!(
|
|
|
|
|
"Failed to convert request: {}",
|
|
|
|
|
err
|
|
|
|
|
)));
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
debug!(
|
2026-02-09 13:33:27 -08:00
|
|
|
request = %serde_json::to_string(&chat_request).unwrap(),
|
|
|
|
|
"router request"
|
2025-12-11 15:21:57 -08:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Extract usage preferences from metadata
|
|
|
|
|
let usage_preferences_str: Option<String> = routing_metadata.as_ref().and_then(|metadata| {
|
|
|
|
|
metadata
|
Rename all arch references to plano (#745)
* Rename all arch references to plano across the codebase
Complete rebrand from "Arch"/"archgw" to "Plano" including:
- Config files: arch_config_schema.yaml, workflow, demo configs
- Environment variables: ARCH_CONFIG_* → PLANO_CONFIG_*
- Python CLI: variables, functions, file paths, docker mounts
- Rust crates: config paths, log messages, metadata keys
- Docker/build: Dockerfile, supervisord, .dockerignore, .gitignore
- Docker Compose: volume mounts and env vars across all demos/tests
- GitHub workflows: job/step names
- Shell scripts: log messages
- Demos: Python code, READMEs, VS Code configs, Grafana dashboard
- Docs: RST includes, code comments, config references
- Package metadata: package.json, pyproject.toml, uv.lock
External URLs (docs.archgw.com, github.com/katanemo/archgw) left as-is.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* Update remaining arch references in docs
- Rename RST cross-reference labels: arch_access_logging, arch_overview_tracing, arch_overview_threading → plano_*
- Update label references in request_lifecycle.rst
- Rename arch_config_state_storage_example.yaml → plano_config_state_storage_example.yaml
- Update config YAML comments: "Arch creates/uses" → "Plano creates/uses"
- Update "the Arch gateway" → "the Plano gateway" in configuration_reference.rst
- Update arch_config_schema.yaml reference in provider_models.py
- Rename arch_agent_router → plano_agent_router in config example
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* Fix remaining arch references found in second pass
- config/docker-compose.dev.yaml: ARCH_CONFIG_FILE → PLANO_CONFIG_FILE,
arch_config.yaml → plano_config.yaml, archgw_logs → plano_logs
- config/test_passthrough.yaml: container mount path
- tests/e2e/docker-compose.yaml: source file path (was still arch_config.yaml)
- cli/planoai/core.py: comment and log message
- crates/brightstaff/src/tracing/constants.rs: doc comment
- tests/{e2e,archgw}/common.py: get_arch_messages → get_plano_messages,
arch_state/arch_messages variables renamed
- tests/{e2e,archgw}/test_prompt_gateway.py: updated imports and usages
- demos/shared/test_runner/{common,test_demos}.py: same renames
- tests/e2e/test_model_alias_routing.py: docstring
- .dockerignore: archgw_modelserver → plano_modelserver
- demos/use_cases/claude_code_router/pretty_model_resolution.sh: container name
Note: x-arch-* HTTP header values and Rust constant names intentionally
preserved for backwards compatibility with existing deployments.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---------
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 15:16:56 -08:00
|
|
|
.get("plano_preference_config")
|
2025-12-11 15:21:57 -08:00
|
|
|
.map(|value| value.to_string())
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
let usage_preferences: Option<Vec<ModelUsagePreference>> = usage_preferences_str
|
|
|
|
|
.as_ref()
|
|
|
|
|
.and_then(|s| serde_yaml::from_str(s).ok());
|
|
|
|
|
|
|
|
|
|
// Prepare log message with latest message from chat request
|
|
|
|
|
let latest_message_for_log = chat_request
|
|
|
|
|
.messages
|
|
|
|
|
.last()
|
|
|
|
|
.map_or("None".to_string(), |msg| {
|
2026-01-16 16:24:03 -08:00
|
|
|
msg.content
|
|
|
|
|
.as_ref()
|
|
|
|
|
.map_or("None".to_string(), |c| c.to_string().replace('\n', "\\n"))
|
2025-12-11 15:21:57 -08:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const MAX_MESSAGE_LENGTH: usize = 50;
|
|
|
|
|
let latest_message_for_log = if latest_message_for_log.chars().count() > MAX_MESSAGE_LENGTH {
|
|
|
|
|
let truncated: String = latest_message_for_log
|
|
|
|
|
.chars()
|
|
|
|
|
.take(MAX_MESSAGE_LENGTH)
|
|
|
|
|
.collect();
|
|
|
|
|
format!("{}...", truncated)
|
|
|
|
|
} else {
|
|
|
|
|
latest_message_for_log
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
info!(
|
2026-02-09 13:33:27 -08:00
|
|
|
has_usage_preferences = usage_preferences.is_some(),
|
|
|
|
|
path = %request_path,
|
|
|
|
|
latest_message = %latest_message_for_log,
|
|
|
|
|
"processing router request"
|
2025-12-11 15:21:57 -08:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Capture start time for routing span
|
|
|
|
|
let routing_start_time = std::time::Instant::now();
|
|
|
|
|
|
|
|
|
|
// Attempt to determine route using the router service
|
|
|
|
|
let routing_result = router_service
|
2026-01-07 12:04:10 -08:00
|
|
|
.determine_route(
|
|
|
|
|
&chat_request.messages,
|
|
|
|
|
traceparent,
|
|
|
|
|
usage_preferences,
|
|
|
|
|
request_id,
|
|
|
|
|
)
|
2025-12-11 15:21:57 -08:00
|
|
|
.await;
|
|
|
|
|
|
2026-02-09 13:33:27 -08:00
|
|
|
let determination_ms = routing_start_time.elapsed().as_millis() as i64;
|
|
|
|
|
let current_span = tracing::Span::current();
|
|
|
|
|
current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);
|
|
|
|
|
|
2025-12-11 15:21:57 -08:00
|
|
|
match routing_result {
|
|
|
|
|
Ok(route) => match route {
|
2026-03-09 16:32:16 -07:00
|
|
|
Some((route_name, model_name)) => {
|
2026-02-09 13:33:27 -08:00
|
|
|
current_span.record("route.selected_model", model_name.as_str());
|
2026-03-09 16:32:16 -07:00
|
|
|
Ok(RoutingResult {
|
|
|
|
|
model_name,
|
|
|
|
|
route_name: Some(route_name),
|
|
|
|
|
})
|
2025-12-11 15:21:57 -08:00
|
|
|
}
|
|
|
|
|
None => {
|
2026-01-28 17:47:33 -08:00
|
|
|
// No route determined, return sentinel value "none"
|
|
|
|
|
// This signals to llm.rs to use the original validated request model
|
2026-02-09 13:33:27 -08:00
|
|
|
current_span.record("route.selected_model", "none");
|
|
|
|
|
info!("no route determined, using default model");
|
2025-12-11 15:21:57 -08:00
|
|
|
|
|
|
|
|
Ok(RoutingResult {
|
2026-01-28 17:47:33 -08:00
|
|
|
model_name: "none".to_string(),
|
2026-03-09 16:32:16 -07:00
|
|
|
route_name: None,
|
2025-12-11 15:21:57 -08:00
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
Err(err) => {
|
2026-02-09 13:33:27 -08:00
|
|
|
current_span.record("route.selected_model", "unknown");
|
2025-12-25 21:08:37 -08:00
|
|
|
Err(RoutingError::internal_error(format!(
|
|
|
|
|
"Failed to determine route: {}",
|
|
|
|
|
err
|
|
|
|
|
)))
|
2025-12-11 15:21:57 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|