actually call archfc for orchestration

This commit is contained in:
Adil Hafeez 2025-12-19 02:40:29 -08:00
parent a611e1fc88
commit 1231706120
No known key found for this signature in database
GPG key ID: 9B18EF7691369645
11 changed files with 97 additions and 97 deletions

View file

@ -9,7 +9,7 @@ stdout_logfile_maxbytes=0
stderr_logfile_maxbytes=0
[program:envoy]
command=/bin/sh -c "python -m cli.config_generator && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:info --log-format '[%%Y-%%m-%%d %%T.%%e][%%l] %%v' 2>&1 | tee /var/log/envoy.log | while IFS= read -r line; do echo '[archgw_logs]' \"$line\"; done"
command=/bin/sh -c "python -m cli.config_generator && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug --log-format '[%%Y-%%m-%%d %%T.%%e][%%l] %%v' 2>&1 | tee /var/log/envoy.log | while IFS= read -r line; do echo '[archgw_logs]' \"$line\"; done"
stdout_logfile=/dev/stdout
redirect_stderr=true
stdout_logfile_maxbytes=0

View file

@ -323,6 +323,15 @@ def validate_and_render_schema():
}
)
if "plano-orchestrator" not in model_provider_name_set:
updated_model_providers.append(
{
"name": "plano-orchestrator",
"provider_interface": "arch",
"model": "Plano-Orchestrator",
}
)
config_yaml["model_providers"] = deepcopy(updated_model_providers)
listeners_with_provider = 0

View file

@ -178,7 +178,6 @@ mod tests {
Arc::new(OrchestratorService::new(
"http://localhost:8080".to_string(),
"test-model".to_string(),
"test-provider".to_string(),
))
}

View file

@ -22,7 +22,6 @@ mod integration_tests {
Arc::new(OrchestratorService::new(
"http://localhost:8080".to_string(),
"test-model".to_string(),
"test-provider".to_string(),
))
}

View file

@ -103,7 +103,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let orchestrator_service: Arc<OrchestratorService> = Arc::new(OrchestratorService::new(
llm_provider_url.clone() + CHAT_COMPLETIONS_PATH,
PLANO_ORCHESTRATOR_MODEL_NAME.to_string(),
routing_llm_provider,
));

View file

@ -17,6 +17,7 @@ pub struct RouterService {
router_url: String,
client: reqwest::Client,
router_model: Arc<dyn RouterModel>,
#[allow(dead_code)]
routing_provider_name: String,
llm_usage_defined: bool,
}

View file

@ -2,7 +2,7 @@ use std::{collections::HashMap, sync::Arc};
use common::{
configuration::{AgentUsagePreference, OrchestrationPreference},
consts::ARCH_PROVIDER_HINT_HEADER,
consts::{ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME},
};
use hermesllm::apis::openai::{ChatCompletionsResponse, Message};
use hyper::header;
@ -17,7 +17,6 @@ pub struct OrchestratorService {
orchestrator_url: String,
client: reqwest::Client,
orchestrator_model: Arc<dyn OrchestratorModel>,
orchestration_provider_name: String,
}
#[derive(Debug, Error)]
@ -38,7 +37,6 @@ impl OrchestratorService {
pub fn new(
orchestrator_url: String,
orchestration_model_name: String,
orchestration_provider_name: String,
) -> Self {
// Empty agent orchestrations - will be provided via usage_preferences in requests
let agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>> = HashMap::new();
@ -53,7 +51,6 @@ impl OrchestratorService {
orchestrator_url,
client: reqwest::Client::new(),
orchestrator_model,
orchestration_provider_name,
}
}
@ -95,7 +92,7 @@ impl OrchestratorService {
orchestration_request_headers.insert(
header::HeaderName::from_static(ARCH_PROVIDER_HINT_HEADER),
header::HeaderValue::from_str(&self.orchestration_provider_name).unwrap(),
header::HeaderValue::from_str(PLANO_ORCHESTRATOR_MODEL_NAME).unwrap(),
);
if let Some(trace_parent) = trace_parent {
@ -107,7 +104,7 @@ impl OrchestratorService {
orchestration_request_headers.insert(
header::HeaderName::from_static("model"),
header::HeaderValue::from_static("Plano-Orchestrator"),
header::HeaderValue::from_static(PLANO_ORCHESTRATOR_MODEL_NAME),
);
let start_time = std::time::Instant::now();

View file

@ -34,3 +34,4 @@ pub const LLM_ROUTE_HEADER: &str = "x-arch-llm-route";
pub const ENVOY_RETRY_HEADER: &str = "x-envoy-max-retries";
pub const BRIGHT_STAFF_SERVICE_NAME : &str = "brightstaff";
pub const PLANO_ORCHESTRATOR_MODEL_NAME: &str = "Plano-Orchestrator";
pub const ARCH_FC_CLUSTER: &str = "arch";

View file

@ -47,7 +47,7 @@ pub struct StreamContext {
ttft_time: Option<u128>,
traceparent: Option<String>,
request_body_sent_time: Option<u128>,
overrides: Rc<Option<Overrides>>,
_overrides: Rc<Option<Overrides>>,
user_message: Option<String>,
upstream_status_code: Option<StatusCode>,
binary_frame_decoder: Option<BedrockBinaryFrameDecoder<bytes::BytesMut>>,
@ -65,7 +65,7 @@ impl StreamContext {
) -> Self {
StreamContext {
metrics,
overrides,
_overrides: overrides,
ratelimit_selector: None,
streaming_response: false,
response_tokens: 0,
@ -133,6 +133,7 @@ impl StreamContext {
.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
.map(|llm_name| llm_name.into());
info!("llm_providers: {:?}", self.llm_providers);
self.llm_provider = Some(routing::get_llm_provider(
&self.llm_providers,
provider_hint,
@ -744,55 +745,37 @@ impl HttpContext for StreamContext {
.map(|val| val == "true")
.unwrap_or(false);
let use_agent_orchestrator = match self.overrides.as_ref() {
Some(overrides) => overrides.use_agent_orchestrator.unwrap_or_default(),
None => false,
};
let routing_header_value = self.get_http_request_header(ARCH_ROUTING_HEADER);
if routing_header_value.is_some() && !routing_header_value.as_ref().unwrap().is_empty() {
let routing_header_value = routing_header_value.as_ref().unwrap();
info!("routing header already set: {}", routing_header_value);
self.llm_provider = Some(Rc::new(LlmProvider {
name: routing_header_value.to_string(),
provider_interface: LlmProviderType::OpenAI,
..Default::default() //TODO: THiS IS BROKEN. WHY ARE WE ASSUMING OPENAI FOR UPSTREAM?
}));
} else {
//TODO: Fix this brittle code path. We need to return values and have compile time
self.select_llm_provider();
// let routing_header_value = self.get_http_request_header(ARCH_ROUTING_HEADER);
self.select_llm_provider();
// Check if this is a supported API endpoint
if SupportedAPIsFromClient::from_endpoint(&request_path).is_none() {
self.send_http_response(404, vec![], Some(b"Unsupported endpoint"));
return Action::Continue;
}
if SupportedAPIsFromClient::from_endpoint(&request_path).is_none() {
self.send_http_response(404, vec![], Some(b"Unsupported endpoint"));
return Action::Continue;
}
// Get the SupportedApi for routing decisions
let supported_api: Option<SupportedAPIsFromClient> =
SupportedAPIsFromClient::from_endpoint(&request_path);
self.client_api = supported_api;
// Get the SupportedApi for routing decisions
let supported_api: Option<SupportedAPIsFromClient> =
SupportedAPIsFromClient::from_endpoint(&request_path);
self.client_api = supported_api;
// Debug: log provider, client API, resolved API, and request path
if let (Some(api), Some(provider)) =
(self.client_api.as_ref(), self.llm_provider.as_ref())
{
let provider_id = provider.to_provider_id();
self.resolved_api =
Some(provider_id.compatible_api_for_client(api, self.streaming_response));
// Debug: log provider, client API, resolved API, and request path
if let (Some(api), Some(provider)) =
(self.client_api.as_ref(), self.llm_provider.as_ref())
{
let provider_id = provider.to_provider_id();
self.resolved_api =
Some(provider_id.compatible_api_for_client(api, self.streaming_response));
debug!(
"[PLANO_REQ_ID:{}] ROUTING_INFO: provider='{}' client_api={:?} resolved_api={:?} request_path='{}'",
self.request_identifier(),
provider.to_provider_id(),
api,
self.resolved_api,
request_path
);
} else {
self.resolved_api = None;
}
debug!(
"[PLANO_REQ_ID:{}] ROUTING_INFO: provider='{}' client_api={:?} resolved_api={:?} request_path='{}'",
self.request_identifier(),
provider.to_provider_id(),
api,
self.resolved_api,
request_path
);
//We need to update the upstream path if there is a variation for a provider like Gemini/Groq, etc.
self.update_upstream_path(&request_path);
@ -816,7 +799,6 @@ impl HttpContext for StreamContext {
if let Err(error) = self.modify_auth_headers() {
// ensure that the provider has an endpoint if the access key is missing else return a bad request
if self.llm_provider.as_ref().unwrap().endpoint.is_none()
&& !use_agent_orchestrator
&& self.llm_provider.as_ref().unwrap().provider_interface
!= LlmProviderType::Arch
{
@ -918,11 +900,6 @@ impl HttpContext for StreamContext {
None => None,
};
let use_agent_orchestrator = match self.overrides.as_ref() {
Some(overrides) => overrides.use_agent_orchestrator.unwrap_or_default(),
None => false,
};
// Store the original model for logging
let model_requested = deserialized_client_request.model().to_string();
@ -930,29 +907,25 @@ impl HttpContext for StreamContext {
let resolved_model = match model_name {
Some(model_name) => model_name.clone(),
None => {
if use_agent_orchestrator {
"agent_orchestrator".to_string()
} else {
warn!(
"[PLANO_REQ_ID:{}] MODEL_RESOLUTION_ERROR: no model specified | req_model='{}' provider='{}' config_model={:?}",
self.request_identifier(),
model_requested,
self.llm_provider().name,
self.llm_provider().model
);
self.send_server_error(
ServerError::BadRequest {
why: format!(
"No model specified in request and couldn't determine model name from arch_config. Model name in req: {}, arch_config, provider: {}, model: {:?}",
model_requested,
self.llm_provider().name,
self.llm_provider().model
),
},
Some(StatusCode::BAD_REQUEST),
);
return Action::Continue;
}
warn!(
"[PLANO_REQ_ID:{}] MODEL_RESOLUTION_ERROR: no model specified | req_model='{}' provider='{}' config_model={:?}",
self.request_identifier(),
model_requested,
self.llm_provider().name,
self.llm_provider().model
);
self.send_server_error(
ServerError::BadRequest {
why: format!(
"No model specified in request and couldn't determine model name from arch_config. Model name in req: {}, arch_config, provider: {}, model: {:?}",
model_requested,
self.llm_provider().name,
self.llm_provider().model
),
},
Some(StatusCode::BAD_REQUEST),
);
return Action::Continue;
}
};

View file

@ -9,18 +9,9 @@ agents:
url: http://host.docker.internal:10530
model_providers:
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
default: true
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
model_aliases:
fast-llm:
target: gpt-4o-mini
smart-llm:
target: gpt-4o
listeners:
- type: agent
name: travel_booking_service
@ -28,11 +19,11 @@ listeners:
router: plano_orchestrator_v1
agents:
- id: weather_agent
description: Get current weather and forecast information for any location worldwide
description: Get real-time weather conditions and multi-day forecasts for any city worldwide using Open-Meteo API (free, no API key needed). Provides temperature, humidity, wind speed, precipitation, and detailed weather conditions.
- id: flight_agent
description: Search and book flights between cities with pricing and availability
- id: hotel_agent
description: Search and reserve hotel rooms with preferences and pricing
description: Get live flight information between airports using AviationStack API. Shows real-time flight status, scheduled/actual departure and arrival times, gate information, and whether flights are in the air. Uses IATA airport codes (e.g., JFK, LAX, LHR). Displays current and scheduled flights with live tracking data.
- id: currency_agent
description: Get real-time currency exchange rates and perform currency conversions using Frankfurter API (free, no API key needed). Provides latest exchange rates, historical rates, and currency conversions between any supported currencies. Uses standard 3-letter currency codes (e.g., USD, EUR, GBP, JPY).
tracing:
random_sampling: 100

View file

@ -14,3 +14,34 @@ model: Plano-Orchestrator
"continue_final_message": false,
"add_generation_prompt": true
}
### test archfc with plano orchestrator for travel
POST https://archfc.katanemo.dev/v1/chat/completions HTTP/1.1
Content-Type: application/json
model: Plano-Orchestrator
{
"model": "Plano-Orchestrator",
"messages": [
{
"role": "user",
"content": "You are a helpful assistant that selects the most suitable routes based on user intent.\nYou are provided with a list of available routes enclosed within <routes></routes> XML tags:\n<routes>\n{\"name\": \"weather_agent\", \"description\": \"Get real-time weather conditions and multi-day forecasts for any city worldwide using Open-Meteo API (free, no API key needed). Provides temperature, humidity, wind speed, precipitation, and detailed weather conditions.\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n{\"name\": \"flight_agent\", \"description\": \"Get live flight information between airports using AviationStack API. Shows real-time flight status, scheduled/actual departure and arrival times, gate information, and whether flights are in the air. Uses IATA airport codes (e.g., JFK, LAX, LHR). Displays current and scheduled flights with live tracking data.\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n{\"name\": \"currency_agent\", \"description\": \"Get real-time currency exchange rates and perform currency conversions using Frankfurter API (free, no API key needed). Provides latest exchange rates, historical rates, and currency conversions between any supported currencies. Uses standard 3-letter currency codes (e.g., USD, EUR, GBP, JPY).\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n</routes>\n\nYou are also given the conversation context enclosed within <conversation></conversation> XML tags:\n<conversation>\n[\n {\n \"role\": \"user\",\n \"content\": \"book a flight and hotel to seattle from LA if weather in sunny over the weekend\"\n }\n]\n</conversation>\n\n## Instructions\n1. Analyze the latest user intent from the conversation.\n2. Compare it against the available routes to find which routes can help fulfill the request.\n3. Respond only with the exact route names from <routes>.\n4. If no routes can help or the intent is already fulfilled, return an empty list.\n\n## Response Format\nReturn your answer strictly in JSON as follows:\n{\"route\": [\"route_name_1\", \"route_name_2\", \"...\"]}\nIf no routes are needed, return an empty list for `route`."
}
]
}
### local 12001 llm
POST http://localhost:12001/v1/chat/completions HTTP/1.1
Content-Type: application/json
x-arch-llm-provider-hint: Plano-Orchestrator
model: Plano-Orchestrator
{
"model": "Plano-Orchestrator",
"messages": [
{
"role": "user",
"content": "You are a helpful assistant that selects the most suitable routes based on user intent.\nYou are provided with a list of available routes enclosed within <routes></routes> XML tags:\n<routes>\n{\"name\": \"weather_agent\", \"description\": \"Get real-time weather conditions and multi-day forecasts for any city worldwide using Open-Meteo API (free, no API key needed). Provides temperature, humidity, wind speed, precipitation, and detailed weather conditions.\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n{\"name\": \"flight_agent\", \"description\": \"Get live flight information between airports using AviationStack API. Shows real-time flight status, scheduled/actual departure and arrival times, gate information, and whether flights are in the air. Uses IATA airport codes (e.g., JFK, LAX, LHR). Displays current and scheduled flights with live tracking data.\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n{\"name\": \"currency_agent\", \"description\": \"Get real-time currency exchange rates and perform currency conversions using Frankfurter API (free, no API key needed). Provides latest exchange rates, historical rates, and currency conversions between any supported currencies. Uses standard 3-letter currency codes (e.g., USD, EUR, GBP, JPY).\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n</routes>\n\nYou are also given the conversation context enclosed within <conversation></conversation> XML tags:\n<conversation>\n[\n {\n \"role\": \"user\",\n \"content\": \"book a flight and hotel to seattle from LA if weather in sunny over the weekend\"\n }\n]\n</conversation>\n\n## Instructions\n1. Analyze the latest user intent from the conversation.\n2. Compare it against the available routes to find which routes can help fulfill the request.\n3. Respond only with the exact route names from <routes>.\n4. If no routes can help or the intent is already fulfilled, return an empty list.\n\n## Response Format\nReturn your answer strictly in JSON as follows:\n{\"route\": [\"route_name_1\", \"route_name_2\", \"...\"]}\nIf no routes are needed, return an empty list for `route`."
}
]
}