mirror of
https://github.com/katanemo/plano.git
synced 2026-05-08 23:32:43 +02:00
refactor brightstaff (#736)
This commit is contained in:
parent
1f23c573bf
commit
1ad3e0f64e
30 changed files with 1802 additions and 1700 deletions
48
crates/brightstaff/src/router/http.rs
Normal file
48
crates/brightstaff/src/router/http.rs
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
use hermesllm::apis::openai::ChatCompletionsResponse;
|
||||
use hyper::header;
|
||||
use thiserror::Error;
|
||||
use tracing::warn;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum HttpError {
|
||||
#[error("Failed to send request: {0}")]
|
||||
Request(#[from] reqwest::Error),
|
||||
|
||||
#[error("Failed to parse JSON response: {0}")]
|
||||
Json(serde_json::Error, String),
|
||||
}
|
||||
|
||||
/// Sends a POST request to the given URL and extracts the text content
|
||||
/// from the first choice of the `ChatCompletionsResponse`.
|
||||
///
|
||||
/// Returns `Some((content, elapsed))` on success, or `None` if the response
|
||||
/// had no choices or the first choice had no content.
|
||||
pub async fn post_and_extract_content(
|
||||
client: &reqwest::Client,
|
||||
url: &str,
|
||||
headers: header::HeaderMap,
|
||||
body: String,
|
||||
) -> Result<Option<(String, std::time::Duration)>, HttpError> {
|
||||
let start_time = std::time::Instant::now();
|
||||
|
||||
let res = client.post(url).headers(headers).body(body).send().await?;
|
||||
|
||||
let body = res.text().await?;
|
||||
let elapsed = start_time.elapsed();
|
||||
|
||||
let response: ChatCompletionsResponse = serde_json::from_str(&body).map_err(|err| {
|
||||
warn!(error = %err, body = %body, "failed to parse json response");
|
||||
HttpError::Json(err, format!("Failed to parse JSON: {}", body))
|
||||
})?;
|
||||
|
||||
if response.choices.is_empty() {
|
||||
warn!(body = %body, "no choices in response");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(response.choices[0]
|
||||
.message
|
||||
.content
|
||||
.as_ref()
|
||||
.map(|c| (c.clone(), elapsed)))
|
||||
}
|
||||
148
crates/brightstaff/src/router/llm.rs
Normal file
148
crates/brightstaff/src/router/llm.rs
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use common::{
|
||||
configuration::{LlmProvider, ModelUsagePreference, RoutingPreference},
|
||||
consts::{ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER, TRACE_PARENT_HEADER},
|
||||
};
|
||||
use hermesllm::apis::openai::Message;
|
||||
use hyper::header;
|
||||
use thiserror::Error;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use super::http::{self, post_and_extract_content};
|
||||
use super::router_model::RouterModel;
|
||||
|
||||
use crate::router::router_model_v1;
|
||||
|
||||
pub struct RouterService {
|
||||
router_url: String,
|
||||
client: reqwest::Client,
|
||||
router_model: Arc<dyn RouterModel>,
|
||||
routing_provider_name: String,
|
||||
llm_usage_defined: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum RoutingError {
|
||||
#[error(transparent)]
|
||||
Http(#[from] http::HttpError),
|
||||
|
||||
#[error("Router model error: {0}")]
|
||||
RouterModelError(#[from] super::router_model::RoutingModelError),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, RoutingError>;
|
||||
|
||||
impl RouterService {
|
||||
pub fn new(
|
||||
providers: Vec<LlmProvider>,
|
||||
router_url: String,
|
||||
routing_model_name: String,
|
||||
routing_provider_name: String,
|
||||
) -> Self {
|
||||
let providers_with_usage = providers
|
||||
.iter()
|
||||
.filter(|provider| provider.routing_preferences.is_some())
|
||||
.cloned()
|
||||
.collect::<Vec<LlmProvider>>();
|
||||
|
||||
let llm_routes: HashMap<String, Vec<RoutingPreference>> = providers_with_usage
|
||||
.iter()
|
||||
.filter_map(|provider| {
|
||||
provider
|
||||
.routing_preferences
|
||||
.as_ref()
|
||||
.map(|prefs| (provider.name.clone(), prefs.clone()))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let router_model = Arc::new(router_model_v1::RouterModelV1::new(
|
||||
llm_routes,
|
||||
routing_model_name,
|
||||
router_model_v1::MAX_TOKEN_LEN,
|
||||
));
|
||||
|
||||
RouterService {
|
||||
router_url,
|
||||
client: reqwest::Client::new(),
|
||||
router_model,
|
||||
routing_provider_name,
|
||||
llm_usage_defined: !providers_with_usage.is_empty(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn determine_route(
|
||||
&self,
|
||||
messages: &[Message],
|
||||
traceparent: &str,
|
||||
usage_preferences: Option<Vec<ModelUsagePreference>>,
|
||||
request_id: &str,
|
||||
) -> Result<Option<(String, String)>> {
|
||||
if messages.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if usage_preferences
|
||||
.as_ref()
|
||||
.is_none_or(|prefs| prefs.len() < 2)
|
||||
&& !self.llm_usage_defined
|
||||
{
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let router_request = self
|
||||
.router_model
|
||||
.generate_request(messages, &usage_preferences);
|
||||
|
||||
debug!(
|
||||
model = %self.router_model.get_model_name(),
|
||||
endpoint = %self.router_url,
|
||||
"sending request to arch-router"
|
||||
);
|
||||
|
||||
let body = serde_json::to_string(&router_request)
|
||||
.map_err(super::router_model::RoutingModelError::from)?;
|
||||
debug!(body = %body, "arch router request");
|
||||
|
||||
let mut headers = header::HeaderMap::new();
|
||||
headers.insert(
|
||||
header::CONTENT_TYPE,
|
||||
header::HeaderValue::from_static("application/json"),
|
||||
);
|
||||
if let Ok(val) = header::HeaderValue::from_str(&self.routing_provider_name) {
|
||||
headers.insert(
|
||||
header::HeaderName::from_static(ARCH_PROVIDER_HINT_HEADER),
|
||||
val,
|
||||
);
|
||||
}
|
||||
if let Ok(val) = header::HeaderValue::from_str(traceparent) {
|
||||
headers.insert(header::HeaderName::from_static(TRACE_PARENT_HEADER), val);
|
||||
}
|
||||
if let Ok(val) = header::HeaderValue::from_str(request_id) {
|
||||
headers.insert(header::HeaderName::from_static(REQUEST_ID_HEADER), val);
|
||||
}
|
||||
headers.insert(
|
||||
header::HeaderName::from_static("model"),
|
||||
header::HeaderValue::from_static("arch-router"),
|
||||
);
|
||||
|
||||
let Some((content, elapsed)) =
|
||||
post_and_extract_content(&self.client, &self.router_url, headers, body).await?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let parsed = self
|
||||
.router_model
|
||||
.parse_response(&content, &usage_preferences)?;
|
||||
|
||||
info!(
|
||||
content = %content.replace("\n", "\\n"),
|
||||
selected_model = ?parsed,
|
||||
response_time_ms = elapsed.as_millis(),
|
||||
"arch-router determined route"
|
||||
);
|
||||
|
||||
Ok(parsed)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,187 +0,0 @@
|
|||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use common::{
|
||||
configuration::{LlmProvider, ModelUsagePreference, RoutingPreference},
|
||||
consts::{ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER, TRACE_PARENT_HEADER},
|
||||
};
|
||||
use hermesllm::apis::openai::{ChatCompletionsResponse, Message};
|
||||
use hyper::header;
|
||||
use thiserror::Error;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::router::router_model_v1::{self};
|
||||
|
||||
use super::router_model::RouterModel;
|
||||
|
||||
pub struct RouterService {
|
||||
router_url: String,
|
||||
client: reqwest::Client,
|
||||
router_model: Arc<dyn RouterModel>,
|
||||
#[allow(dead_code)]
|
||||
routing_provider_name: String,
|
||||
llm_usage_defined: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum RoutingError {
|
||||
#[error("Failed to send request: {0}")]
|
||||
RequestError(#[from] reqwest::Error),
|
||||
|
||||
#[error("Failed to parse JSON: {0}, JSON: {1}")]
|
||||
JsonError(serde_json::Error, String),
|
||||
|
||||
#[error("Router model error: {0}")]
|
||||
RouterModelError(#[from] super::router_model::RoutingModelError),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, RoutingError>;
|
||||
|
||||
impl RouterService {
|
||||
pub fn new(
|
||||
providers: Vec<LlmProvider>,
|
||||
router_url: String,
|
||||
routing_model_name: String,
|
||||
routing_provider_name: String,
|
||||
) -> Self {
|
||||
let providers_with_usage = providers
|
||||
.iter()
|
||||
.filter(|provider| provider.routing_preferences.is_some())
|
||||
.cloned()
|
||||
.collect::<Vec<LlmProvider>>();
|
||||
|
||||
let llm_routes: HashMap<String, Vec<RoutingPreference>> = providers_with_usage
|
||||
.iter()
|
||||
.filter_map(|provider| {
|
||||
provider
|
||||
.routing_preferences
|
||||
.as_ref()
|
||||
.map(|prefs| (provider.name.clone(), prefs.clone()))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let router_model = Arc::new(router_model_v1::RouterModelV1::new(
|
||||
llm_routes,
|
||||
routing_model_name,
|
||||
router_model_v1::MAX_TOKEN_LEN,
|
||||
));
|
||||
|
||||
RouterService {
|
||||
router_url,
|
||||
client: reqwest::Client::new(),
|
||||
router_model,
|
||||
routing_provider_name,
|
||||
llm_usage_defined: !providers_with_usage.is_empty(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn determine_route(
|
||||
&self,
|
||||
messages: &[Message],
|
||||
traceparent: &str,
|
||||
usage_preferences: Option<Vec<ModelUsagePreference>>,
|
||||
request_id: &str,
|
||||
) -> Result<Option<(String, String)>> {
|
||||
if messages.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if (usage_preferences.is_none() || usage_preferences.as_ref().unwrap().len() < 2)
|
||||
&& !self.llm_usage_defined
|
||||
{
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let router_request = self
|
||||
.router_model
|
||||
.generate_request(messages, &usage_preferences);
|
||||
|
||||
debug!(
|
||||
model = %self.router_model.get_model_name(),
|
||||
endpoint = %self.router_url,
|
||||
"sending request to arch-router"
|
||||
);
|
||||
|
||||
debug!(
|
||||
body = %serde_json::to_string(&router_request).unwrap(),
|
||||
"arch router request"
|
||||
);
|
||||
|
||||
let mut llm_route_request_headers = header::HeaderMap::new();
|
||||
llm_route_request_headers.insert(
|
||||
header::CONTENT_TYPE,
|
||||
header::HeaderValue::from_static("application/json"),
|
||||
);
|
||||
|
||||
llm_route_request_headers.insert(
|
||||
header::HeaderName::from_static(ARCH_PROVIDER_HINT_HEADER),
|
||||
header::HeaderValue::from_str(&self.routing_provider_name).unwrap(),
|
||||
);
|
||||
|
||||
llm_route_request_headers.insert(
|
||||
header::HeaderName::from_static(TRACE_PARENT_HEADER),
|
||||
header::HeaderValue::from_str(traceparent).unwrap(),
|
||||
);
|
||||
|
||||
llm_route_request_headers.insert(
|
||||
header::HeaderName::from_static(REQUEST_ID_HEADER),
|
||||
header::HeaderValue::from_str(request_id).unwrap(),
|
||||
);
|
||||
|
||||
llm_route_request_headers.insert(
|
||||
header::HeaderName::from_static("model"),
|
||||
header::HeaderValue::from_static("arch-router"),
|
||||
);
|
||||
|
||||
let start_time = std::time::Instant::now();
|
||||
let res = self
|
||||
.client
|
||||
.post(&self.router_url)
|
||||
.headers(llm_route_request_headers)
|
||||
.body(serde_json::to_string(&router_request).unwrap())
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let body = res.text().await?;
|
||||
let router_response_time = start_time.elapsed();
|
||||
|
||||
let chat_completion_response: ChatCompletionsResponse = match serde_json::from_str(&body) {
|
||||
Ok(response) => response,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
error = %err,
|
||||
body = %serde_json::to_string(&body).unwrap(),
|
||||
"failed to parse json response"
|
||||
);
|
||||
return Err(RoutingError::JsonError(
|
||||
err,
|
||||
format!("Failed to parse JSON: {}", body),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
if chat_completion_response.choices.is_empty() {
|
||||
warn!(body = %body, "no choices in router response");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if let Some(content) = &chat_completion_response.choices[0].message.content {
|
||||
let parsed_response = self
|
||||
.router_model
|
||||
.parse_response(content, &usage_preferences)?;
|
||||
info!(
|
||||
content = %content.replace("\n", "\\n"),
|
||||
selected_model = ?parsed_response,
|
||||
response_time_ms = router_response_time.as_millis(),
|
||||
"arch-router determined route"
|
||||
);
|
||||
|
||||
if let Some(ref parsed_response) = parsed_response {
|
||||
return Ok(Some(parsed_response.clone()));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
pub mod llm_router;
|
||||
pub(crate) mod http;
|
||||
pub mod llm;
|
||||
pub mod orchestrator;
|
||||
pub mod orchestrator_model;
|
||||
pub mod orchestrator_model_v1;
|
||||
pub mod plano_orchestrator;
|
||||
pub mod router_model;
|
||||
pub mod router_model_v1;
|
||||
|
|
|
|||
139
crates/brightstaff/src/router/orchestrator.rs
Normal file
139
crates/brightstaff/src/router/orchestrator.rs
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use common::{
|
||||
configuration::{AgentUsagePreference, OrchestrationPreference},
|
||||
consts::{ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER},
|
||||
};
|
||||
use hermesllm::apis::openai::Message;
|
||||
use hyper::header;
|
||||
use opentelemetry::global;
|
||||
use opentelemetry_http::HeaderInjector;
|
||||
use thiserror::Error;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use super::http::{self, post_and_extract_content};
|
||||
use super::orchestrator_model::OrchestratorModel;
|
||||
|
||||
use crate::router::orchestrator_model_v1;
|
||||
|
||||
pub struct OrchestratorService {
|
||||
orchestrator_url: String,
|
||||
client: reqwest::Client,
|
||||
orchestrator_model: Arc<dyn OrchestratorModel>,
|
||||
orchestrator_provider_name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum OrchestrationError {
|
||||
#[error(transparent)]
|
||||
Http(#[from] http::HttpError),
|
||||
|
||||
#[error("Orchestrator model error: {0}")]
|
||||
OrchestratorModelError(#[from] super::orchestrator_model::OrchestratorModelError),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, OrchestrationError>;
|
||||
|
||||
impl OrchestratorService {
|
||||
pub fn new(
|
||||
orchestrator_url: String,
|
||||
orchestration_model_name: String,
|
||||
orchestrator_provider_name: String,
|
||||
) -> Self {
|
||||
let agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>> = HashMap::new();
|
||||
|
||||
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
|
||||
agent_orchestrations,
|
||||
orchestration_model_name.clone(),
|
||||
orchestrator_model_v1::MAX_TOKEN_LEN,
|
||||
));
|
||||
|
||||
OrchestratorService {
|
||||
orchestrator_url,
|
||||
client: reqwest::Client::new(),
|
||||
orchestrator_model,
|
||||
orchestrator_provider_name,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn determine_orchestration(
|
||||
&self,
|
||||
messages: &[Message],
|
||||
usage_preferences: Option<Vec<AgentUsagePreference>>,
|
||||
request_id: Option<String>,
|
||||
) -> Result<Option<Vec<(String, String)>>> {
|
||||
if messages.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if usage_preferences
|
||||
.as_ref()
|
||||
.is_none_or(|prefs| prefs.is_empty())
|
||||
{
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let orchestrator_request = self
|
||||
.orchestrator_model
|
||||
.generate_request(messages, &usage_preferences);
|
||||
|
||||
debug!(
|
||||
model = %self.orchestrator_model.get_model_name(),
|
||||
endpoint = %self.orchestrator_url,
|
||||
"sending request to arch-orchestrator"
|
||||
);
|
||||
|
||||
let body = serde_json::to_string(&orchestrator_request)
|
||||
.map_err(super::orchestrator_model::OrchestratorModelError::from)?;
|
||||
debug!(body = %body, "arch orchestrator request");
|
||||
|
||||
let mut headers = header::HeaderMap::new();
|
||||
headers.insert(
|
||||
header::CONTENT_TYPE,
|
||||
header::HeaderValue::from_static("application/json"),
|
||||
);
|
||||
headers.insert(
|
||||
header::HeaderName::from_static(ARCH_PROVIDER_HINT_HEADER),
|
||||
header::HeaderValue::from_str(&self.orchestrator_provider_name)
|
||||
.unwrap_or_else(|_| header::HeaderValue::from_static("plano-orchestrator")),
|
||||
);
|
||||
|
||||
// Inject OpenTelemetry trace context from current span
|
||||
global::get_text_map_propagator(|propagator| {
|
||||
let cx =
|
||||
tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
|
||||
propagator.inject_context(&cx, &mut HeaderInjector(&mut headers));
|
||||
});
|
||||
|
||||
if let Some(ref request_id) = request_id {
|
||||
if let Ok(val) = header::HeaderValue::from_str(request_id) {
|
||||
headers.insert(header::HeaderName::from_static(REQUEST_ID_HEADER), val);
|
||||
}
|
||||
}
|
||||
|
||||
headers.insert(
|
||||
header::HeaderName::from_static("model"),
|
||||
header::HeaderValue::from_str(&self.orchestrator_provider_name)
|
||||
.unwrap_or_else(|_| header::HeaderValue::from_static("plano-orchestrator")),
|
||||
);
|
||||
|
||||
let Some((content, elapsed)) =
|
||||
post_and_extract_content(&self.client, &self.orchestrator_url, headers, body).await?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let parsed = self
|
||||
.orchestrator_model
|
||||
.parse_response(&content, &usage_preferences)?;
|
||||
|
||||
info!(
|
||||
content = %content.replace("\n", "\\n"),
|
||||
selected_routes = ?parsed,
|
||||
response_time_ms = elapsed.as_millis(),
|
||||
"arch-orchestrator determined routes"
|
||||
);
|
||||
|
||||
Ok(parsed)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,174 +0,0 @@
|
|||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use common::{
|
||||
configuration::{AgentUsagePreference, OrchestrationPreference},
|
||||
consts::{ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER},
|
||||
};
|
||||
use hermesllm::apis::openai::{ChatCompletionsResponse, Message};
|
||||
use hyper::header;
|
||||
use opentelemetry::global;
|
||||
use opentelemetry_http::HeaderInjector;
|
||||
use thiserror::Error;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::router::orchestrator_model_v1::{self};
|
||||
|
||||
use super::orchestrator_model::OrchestratorModel;
|
||||
|
||||
pub struct OrchestratorService {
|
||||
orchestrator_url: String,
|
||||
client: reqwest::Client,
|
||||
orchestrator_model: Arc<dyn OrchestratorModel>,
|
||||
orchestrator_provider_name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum OrchestrationError {
|
||||
#[error("Failed to send request: {0}")]
|
||||
RequestError(#[from] reqwest::Error),
|
||||
|
||||
#[error("Failed to parse JSON: {0}, JSON: {1}")]
|
||||
JsonError(serde_json::Error, String),
|
||||
|
||||
#[error("Orchestrator model error: {0}")]
|
||||
OrchestratorModelError(#[from] super::orchestrator_model::OrchestratorModelError),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, OrchestrationError>;
|
||||
|
||||
impl OrchestratorService {
|
||||
pub fn new(
|
||||
orchestrator_url: String,
|
||||
orchestration_model_name: String,
|
||||
orchestrator_provider_name: String,
|
||||
) -> Self {
|
||||
// Empty agent orchestrations - will be provided via usage_preferences in requests
|
||||
let agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>> = HashMap::new();
|
||||
|
||||
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
|
||||
agent_orchestrations,
|
||||
orchestration_model_name.clone(),
|
||||
orchestrator_model_v1::MAX_TOKEN_LEN,
|
||||
));
|
||||
|
||||
OrchestratorService {
|
||||
orchestrator_url,
|
||||
client: reqwest::Client::new(),
|
||||
orchestrator_model,
|
||||
orchestrator_provider_name,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn determine_orchestration(
|
||||
&self,
|
||||
messages: &[Message],
|
||||
usage_preferences: Option<Vec<AgentUsagePreference>>,
|
||||
request_id: Option<String>,
|
||||
) -> Result<Option<Vec<(String, String)>>> {
|
||||
if messages.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Require usage_preferences to be provided
|
||||
if usage_preferences.is_none() || usage_preferences.as_ref().unwrap().is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let orchestrator_request = self
|
||||
.orchestrator_model
|
||||
.generate_request(messages, &usage_preferences);
|
||||
|
||||
debug!(
|
||||
model = %self.orchestrator_model.get_model_name(),
|
||||
endpoint = %self.orchestrator_url,
|
||||
"sending request to plano-orchestrator"
|
||||
);
|
||||
|
||||
debug!(
|
||||
body = %serde_json::to_string(&orchestrator_request).unwrap(),
|
||||
"plano orchestrator request"
|
||||
);
|
||||
|
||||
let mut orchestration_request_headers = header::HeaderMap::new();
|
||||
orchestration_request_headers.insert(
|
||||
header::CONTENT_TYPE,
|
||||
header::HeaderValue::from_static("application/json"),
|
||||
);
|
||||
|
||||
orchestration_request_headers.insert(
|
||||
header::HeaderName::from_static(ARCH_PROVIDER_HINT_HEADER),
|
||||
header::HeaderValue::from_str(&self.orchestrator_provider_name).unwrap(),
|
||||
);
|
||||
|
||||
// Inject OpenTelemetry trace context from current span
|
||||
global::get_text_map_propagator(|propagator| {
|
||||
let cx =
|
||||
tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
|
||||
propagator.inject_context(&cx, &mut HeaderInjector(&mut orchestration_request_headers));
|
||||
});
|
||||
|
||||
if let Some(request_id) = request_id {
|
||||
orchestration_request_headers.insert(
|
||||
header::HeaderName::from_static(REQUEST_ID_HEADER),
|
||||
header::HeaderValue::from_str(&request_id).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
orchestration_request_headers.insert(
|
||||
header::HeaderName::from_static("model"),
|
||||
header::HeaderValue::from_str(&self.orchestrator_provider_name).unwrap(),
|
||||
);
|
||||
|
||||
let start_time = std::time::Instant::now();
|
||||
let res = self
|
||||
.client
|
||||
.post(&self.orchestrator_url)
|
||||
.headers(orchestration_request_headers)
|
||||
.body(serde_json::to_string(&orchestrator_request).unwrap())
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let body = res.text().await?;
|
||||
let orchestrator_response_time = start_time.elapsed();
|
||||
|
||||
let chat_completion_response: ChatCompletionsResponse = match serde_json::from_str(&body) {
|
||||
Ok(response) => response,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
error = %err,
|
||||
body = %serde_json::to_string(&body).unwrap(),
|
||||
"failed to parse json response"
|
||||
);
|
||||
return Err(OrchestrationError::JsonError(
|
||||
err,
|
||||
format!("Failed to parse JSON: {}", body),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
if chat_completion_response.choices.is_empty() {
|
||||
warn!(body = %body, "no choices in orchestrator response");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if let Some(content) = &chat_completion_response.choices[0].message.content {
|
||||
let parsed_response = self
|
||||
.orchestrator_model
|
||||
.parse_response(content, &usage_preferences)?;
|
||||
info!(
|
||||
content = %content.replace("\n", "\\n"),
|
||||
selected_routes = ?parsed_response,
|
||||
response_time_ms = orchestrator_response_time.as_millis(),
|
||||
"arch-orchestrator determined routes"
|
||||
);
|
||||
|
||||
if let Some(ref parsed_response) = parsed_response {
|
||||
return Ok(Some(parsed_response.clone()));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue