Unified overrides for custom router and orchestrator models (#820)

* support configurable orchestrator model via orchestration config section

* add self-hosting docs and demo for Plano-Orchestrator

* list all Plano-Orchestrator model variants in docs

* use overrides for custom routing and orchestration model

* update docs

* update orchestrator model name

* rename arch provider to plano, use llm_routing_model and agent_orchestration_model

* regenerate rendered config reference
This commit is contained in:
Adil Hafeez 2026-03-15 09:36:11 -07:00 committed by GitHub
parent 785bf7e021
commit bc059aed4d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 312 additions and 103 deletions

View file

@ -2,7 +2,7 @@ use std::{collections::HashMap, sync::Arc};
use common::{
configuration::{AgentUsagePreference, OrchestrationPreference},
consts::{ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME, REQUEST_ID_HEADER},
consts::{ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER},
};
use hermesllm::apis::openai::{ChatCompletionsResponse, Message};
use hyper::header;
@ -19,6 +19,7 @@ pub struct OrchestratorService {
orchestrator_url: String,
client: reqwest::Client,
orchestrator_model: Arc<dyn OrchestratorModel>,
orchestrator_provider_name: String,
}
#[derive(Debug, Error)]
@ -36,7 +37,11 @@ pub enum OrchestrationError {
pub type Result<T> = std::result::Result<T, OrchestrationError>;
impl OrchestratorService {
pub fn new(orchestrator_url: String, orchestration_model_name: String) -> Self {
pub fn new(
orchestrator_url: String,
orchestration_model_name: String,
orchestrator_provider_name: String,
) -> Self {
// Empty agent orchestrations - will be provided via usage_preferences in requests
let agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>> = HashMap::new();
@ -50,6 +55,7 @@ impl OrchestratorService {
orchestrator_url,
client: reqwest::Client::new(),
orchestrator_model,
orchestrator_provider_name,
}
}
@ -75,12 +81,12 @@ impl OrchestratorService {
debug!(
model = %self.orchestrator_model.get_model_name(),
endpoint = %self.orchestrator_url,
"sending request to arch-orchestrator"
"sending request to plano-orchestrator"
);
debug!(
body = %serde_json::to_string(&orchestrator_request).unwrap(),
"arch orchestrator request"
"plano orchestrator request"
);
let mut orchestration_request_headers = header::HeaderMap::new();
@ -91,7 +97,7 @@ impl OrchestratorService {
orchestration_request_headers.insert(
header::HeaderName::from_static(ARCH_PROVIDER_HINT_HEADER),
header::HeaderValue::from_str(PLANO_ORCHESTRATOR_MODEL_NAME).unwrap(),
header::HeaderValue::from_str(&self.orchestrator_provider_name).unwrap(),
);
// Inject OpenTelemetry trace context from current span
@ -110,7 +116,7 @@ impl OrchestratorService {
orchestration_request_headers.insert(
header::HeaderName::from_static("model"),
header::HeaderValue::from_static(PLANO_ORCHESTRATOR_MODEL_NAME),
header::HeaderValue::from_str(&self.orchestrator_provider_name).unwrap(),
);
let start_time = std::time::Instant::now();