diff --git a/arch/supervisord.conf b/arch/supervisord.conf index 9761f779..9ffe1ce8 100644 --- a/arch/supervisord.conf +++ b/arch/supervisord.conf @@ -9,7 +9,7 @@ stdout_logfile_maxbytes=0 stderr_logfile_maxbytes=0 [program:envoy] -command=/bin/sh -c "python -m cli.config_generator && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:info --log-format '[%%Y-%%m-%%d %%T.%%e][%%l] %%v' 2>&1 | tee /var/log/envoy.log | while IFS= read -r line; do echo '[archgw_logs]' \"$line\"; done" +command=/bin/sh -c "python -m cli.config_generator && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug --log-format '[%%Y-%%m-%%d %%T.%%e][%%l] %%v' 2>&1 | tee /var/log/envoy.log | while IFS= read -r line; do echo '[archgw_logs]' \"$line\"; done" stdout_logfile=/dev/stdout redirect_stderr=true stdout_logfile_maxbytes=0 diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py index 3acdb932..d69473af 100644 --- a/arch/tools/cli/config_generator.py +++ b/arch/tools/cli/config_generator.py @@ -323,6 +323,15 @@ def validate_and_render_schema(): } ) + if "plano-orchestrator" not in model_provider_name_set: + updated_model_providers.append( + { + "name": "plano-orchestrator", + "provider_interface": "arch", + "model": "Plano-Orchestrator", + } + ) + config_yaml["model_providers"] = deepcopy(updated_model_providers) listeners_with_provider = 0 diff --git a/crates/brightstaff/src/handlers/agent_selector.rs b/crates/brightstaff/src/handlers/agent_selector.rs index 0733035b..e26f6391 100644 --- a/crates/brightstaff/src/handlers/agent_selector.rs +++ b/crates/brightstaff/src/handlers/agent_selector.rs @@ -178,7 +178,6 @@ mod tests { Arc::new(OrchestratorService::new( "http://localhost:8080".to_string(), "test-model".to_string(), - "test-provider".to_string(), )) } diff --git a/crates/brightstaff/src/handlers/integration_tests.rs b/crates/brightstaff/src/handlers/integration_tests.rs index c4542b9a..01ea1574 100644 --- a/crates/brightstaff/src/handlers/integration_tests.rs +++ b/crates/brightstaff/src/handlers/integration_tests.rs @@ -22,7 +22,6 @@ mod integration_tests { Arc::new(OrchestratorService::new( "http://localhost:8080".to_string(), "test-model".to_string(), - "test-provider".to_string(), )) } diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index 41c6597b..8681b690 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -103,7 +103,6 @@ async fn main() -> Result<(), Box> { let orchestrator_service: Arc = Arc::new(OrchestratorService::new( llm_provider_url.clone() + CHAT_COMPLETIONS_PATH, PLANO_ORCHESTRATOR_MODEL_NAME.to_string(), - routing_llm_provider, )); diff --git a/crates/brightstaff/src/router/llm_router.rs b/crates/brightstaff/src/router/llm_router.rs index 5f71f9c8..e0e85a8d 100644 --- a/crates/brightstaff/src/router/llm_router.rs +++ b/crates/brightstaff/src/router/llm_router.rs @@ -17,6 +17,7 @@ pub struct RouterService { router_url: String, client: reqwest::Client, router_model: Arc, + #[allow(dead_code)] routing_provider_name: String, llm_usage_defined: bool, } diff --git a/crates/brightstaff/src/router/plano_orchestrator.rs b/crates/brightstaff/src/router/plano_orchestrator.rs index 5a47a0d3..62efedce 100644 --- a/crates/brightstaff/src/router/plano_orchestrator.rs +++ b/crates/brightstaff/src/router/plano_orchestrator.rs @@ -2,7 +2,7 @@ use std::{collections::HashMap, sync::Arc}; use common::{ configuration::{AgentUsagePreference, OrchestrationPreference}, - consts::ARCH_PROVIDER_HINT_HEADER, + consts::{ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME}, }; use hermesllm::apis::openai::{ChatCompletionsResponse, Message}; use hyper::header; @@ -17,7 +17,6 @@ pub struct OrchestratorService { orchestrator_url: String, client: reqwest::Client, orchestrator_model: Arc, - orchestration_provider_name: String, } #[derive(Debug, Error)] @@ -38,7 +37,6 @@ impl OrchestratorService { pub fn new( orchestrator_url: String, orchestration_model_name: String, - orchestration_provider_name: String, ) -> Self { // Empty agent orchestrations - will be provided via usage_preferences in requests let agent_orchestrations: HashMap> = HashMap::new(); @@ -53,7 +51,6 @@ impl OrchestratorService { orchestrator_url, client: reqwest::Client::new(), orchestrator_model, - orchestration_provider_name, } } @@ -95,7 +92,7 @@ impl OrchestratorService { orchestration_request_headers.insert( header::HeaderName::from_static(ARCH_PROVIDER_HINT_HEADER), - header::HeaderValue::from_str(&self.orchestration_provider_name).unwrap(), + header::HeaderValue::from_str(PLANO_ORCHESTRATOR_MODEL_NAME).unwrap(), ); if let Some(trace_parent) = trace_parent { @@ -107,7 +104,7 @@ impl OrchestratorService { orchestration_request_headers.insert( header::HeaderName::from_static("model"), - header::HeaderValue::from_static("Plano-Orchestrator"), + header::HeaderValue::from_static(PLANO_ORCHESTRATOR_MODEL_NAME), ); let start_time = std::time::Instant::now(); diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs index 73a11e0a..f9245af9 100644 --- a/crates/common/src/consts.rs +++ b/crates/common/src/consts.rs @@ -34,3 +34,4 @@ pub const LLM_ROUTE_HEADER: &str = "x-arch-llm-route"; pub const ENVOY_RETRY_HEADER: &str = "x-envoy-max-retries"; pub const BRIGHT_STAFF_SERVICE_NAME : &str = "brightstaff"; pub const PLANO_ORCHESTRATOR_MODEL_NAME: &str = "Plano-Orchestrator"; +pub const ARCH_FC_CLUSTER: &str = "arch"; \ No newline at end of file diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 1fa1a418..395ec980 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -47,7 +47,7 @@ pub struct StreamContext { ttft_time: Option, traceparent: Option, request_body_sent_time: Option, - overrides: Rc>, + _overrides: Rc>, user_message: Option, upstream_status_code: Option, binary_frame_decoder: Option>, @@ -65,7 +65,7 @@ impl StreamContext { ) -> Self { StreamContext { metrics, - overrides, + _overrides: overrides, ratelimit_selector: None, streaming_response: false, response_tokens: 0, @@ -133,6 +133,7 @@ impl StreamContext { .get_http_request_header(ARCH_PROVIDER_HINT_HEADER) .map(|llm_name| llm_name.into()); + info!("llm_providers: {:?}", self.llm_providers); self.llm_provider = Some(routing::get_llm_provider( &self.llm_providers, provider_hint, @@ -744,55 +745,37 @@ impl HttpContext for StreamContext { .map(|val| val == "true") .unwrap_or(false); - let use_agent_orchestrator = match self.overrides.as_ref() { - Some(overrides) => overrides.use_agent_orchestrator.unwrap_or_default(), - None => false, - }; - let routing_header_value = self.get_http_request_header(ARCH_ROUTING_HEADER); - - if routing_header_value.is_some() && !routing_header_value.as_ref().unwrap().is_empty() { - let routing_header_value = routing_header_value.as_ref().unwrap(); - info!("routing header already set: {}", routing_header_value); - self.llm_provider = Some(Rc::new(LlmProvider { - name: routing_header_value.to_string(), - provider_interface: LlmProviderType::OpenAI, - ..Default::default() //TODO: THiS IS BROKEN. WHY ARE WE ASSUMING OPENAI FOR UPSTREAM? - })); - } else { - //TODO: Fix this brittle code path. We need to return values and have compile time - self.select_llm_provider(); + // let routing_header_value = self.get_http_request_header(ARCH_ROUTING_HEADER); + self.select_llm_provider(); // Check if this is a supported API endpoint - if SupportedAPIsFromClient::from_endpoint(&request_path).is_none() { - self.send_http_response(404, vec![], Some(b"Unsupported endpoint")); - return Action::Continue; - } + if SupportedAPIsFromClient::from_endpoint(&request_path).is_none() { + self.send_http_response(404, vec![], Some(b"Unsupported endpoint")); + return Action::Continue; + } - // Get the SupportedApi for routing decisions - let supported_api: Option = - SupportedAPIsFromClient::from_endpoint(&request_path); - self.client_api = supported_api; + // Get the SupportedApi for routing decisions + let supported_api: Option = + SupportedAPIsFromClient::from_endpoint(&request_path); + self.client_api = supported_api; - // Debug: log provider, client API, resolved API, and request path - if let (Some(api), Some(provider)) = - (self.client_api.as_ref(), self.llm_provider.as_ref()) - { - let provider_id = provider.to_provider_id(); - self.resolved_api = - Some(provider_id.compatible_api_for_client(api, self.streaming_response)); + // Debug: log provider, client API, resolved API, and request path + if let (Some(api), Some(provider)) = + (self.client_api.as_ref(), self.llm_provider.as_ref()) + { + let provider_id = provider.to_provider_id(); + self.resolved_api = + Some(provider_id.compatible_api_for_client(api, self.streaming_response)); - debug!( - "[PLANO_REQ_ID:{}] ROUTING_INFO: provider='{}' client_api={:?} resolved_api={:?} request_path='{}'", - self.request_identifier(), - provider.to_provider_id(), - api, - self.resolved_api, - request_path - ); - } else { - self.resolved_api = None; - } + debug!( + "[PLANO_REQ_ID:{}] ROUTING_INFO: provider='{}' client_api={:?} resolved_api={:?} request_path='{}'", + self.request_identifier(), + provider.to_provider_id(), + api, + self.resolved_api, + request_path + ); //We need to update the upstream path if there is a variation for a provider like Gemini/Groq, etc. self.update_upstream_path(&request_path); @@ -816,7 +799,6 @@ impl HttpContext for StreamContext { if let Err(error) = self.modify_auth_headers() { // ensure that the provider has an endpoint if the access key is missing else return a bad request if self.llm_provider.as_ref().unwrap().endpoint.is_none() - && !use_agent_orchestrator && self.llm_provider.as_ref().unwrap().provider_interface != LlmProviderType::Arch { @@ -918,11 +900,6 @@ impl HttpContext for StreamContext { None => None, }; - let use_agent_orchestrator = match self.overrides.as_ref() { - Some(overrides) => overrides.use_agent_orchestrator.unwrap_or_default(), - None => false, - }; - // Store the original model for logging let model_requested = deserialized_client_request.model().to_string(); @@ -930,29 +907,25 @@ impl HttpContext for StreamContext { let resolved_model = match model_name { Some(model_name) => model_name.clone(), None => { - if use_agent_orchestrator { - "agent_orchestrator".to_string() - } else { - warn!( - "[PLANO_REQ_ID:{}] MODEL_RESOLUTION_ERROR: no model specified | req_model='{}' provider='{}' config_model={:?}", - self.request_identifier(), - model_requested, - self.llm_provider().name, - self.llm_provider().model - ); - self.send_server_error( - ServerError::BadRequest { - why: format!( - "No model specified in request and couldn't determine model name from arch_config. Model name in req: {}, arch_config, provider: {}, model: {:?}", - model_requested, - self.llm_provider().name, - self.llm_provider().model - ), - }, - Some(StatusCode::BAD_REQUEST), - ); - return Action::Continue; - } + warn!( + "[PLANO_REQ_ID:{}] MODEL_RESOLUTION_ERROR: no model specified | req_model='{}' provider='{}' config_model={:?}", + self.request_identifier(), + model_requested, + self.llm_provider().name, + self.llm_provider().model + ); + self.send_server_error( + ServerError::BadRequest { + why: format!( + "No model specified in request and couldn't determine model name from arch_config. Model name in req: {}, arch_config, provider: {}, model: {:?}", + model_requested, + self.llm_provider().name, + self.llm_provider().model + ), + }, + Some(StatusCode::BAD_REQUEST), + ); + return Action::Continue; } }; diff --git a/demos/use_cases/travel_booking/arch_config.yaml b/demos/use_cases/travel_booking/arch_config.yaml index 13e0325b..c4803428 100644 --- a/demos/use_cases/travel_booking/arch_config.yaml +++ b/demos/use_cases/travel_booking/arch_config.yaml @@ -9,18 +9,9 @@ agents: url: http://host.docker.internal:10530 model_providers: - - model: openai/gpt-4o-mini - access_key: $OPENAI_API_KEY - default: true - model: openai/gpt-4o access_key: $OPENAI_API_KEY -model_aliases: - fast-llm: - target: gpt-4o-mini - smart-llm: - target: gpt-4o - listeners: - type: agent name: travel_booking_service @@ -28,11 +19,11 @@ listeners: router: plano_orchestrator_v1 agents: - id: weather_agent - description: Get current weather and forecast information for any location worldwide + description: Get real-time weather conditions and multi-day forecasts for any city worldwide using Open-Meteo API (free, no API key needed). Provides temperature, humidity, wind speed, precipitation, and detailed weather conditions. - id: flight_agent - description: Search and book flights between cities with pricing and availability - - id: hotel_agent - description: Search and reserve hotel rooms with preferences and pricing + description: Get live flight information between airports using AviationStack API. Shows real-time flight status, scheduled/actual departure and arrival times, gate information, and whether flights are in the air. Uses IATA airport codes (e.g., JFK, LAX, LHR). Displays current and scheduled flights with live tracking data. + - id: currency_agent + description: Get real-time currency exchange rates and perform currency conversions using Frankfurter API (free, no API key needed). Provides latest exchange rates, historical rates, and currency conversions between any supported currencies. Uses standard 3-letter currency codes (e.g., USD, EUR, GBP, JPY). tracing: random_sampling: 100 diff --git a/demos/use_cases/travel_booking/test.rest b/demos/use_cases/travel_booking/test.rest index 98abb405..9aa77e6e 100644 --- a/demos/use_cases/travel_booking/test.rest +++ b/demos/use_cases/travel_booking/test.rest @@ -14,3 +14,34 @@ model: Plano-Orchestrator "continue_final_message": false, "add_generation_prompt": true } + +### test archfc with plano orchestrator for travel +POST https://archfc.katanemo.dev/v1/chat/completions HTTP/1.1 +Content-Type: application/json +model: Plano-Orchestrator + +{ + "model": "Plano-Orchestrator", + "messages": [ + { + "role": "user", + "content": "You are a helpful assistant that selects the most suitable routes based on user intent.\nYou are provided with a list of available routes enclosed within XML tags:\n\n{\"name\": \"weather_agent\", \"description\": \"Get real-time weather conditions and multi-day forecasts for any city worldwide using Open-Meteo API (free, no API key needed). Provides temperature, humidity, wind speed, precipitation, and detailed weather conditions.\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n{\"name\": \"flight_agent\", \"description\": \"Get live flight information between airports using AviationStack API. Shows real-time flight status, scheduled/actual departure and arrival times, gate information, and whether flights are in the air. Uses IATA airport codes (e.g., JFK, LAX, LHR). Displays current and scheduled flights with live tracking data.\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n{\"name\": \"currency_agent\", \"description\": \"Get real-time currency exchange rates and perform currency conversions using Frankfurter API (free, no API key needed). Provides latest exchange rates, historical rates, and currency conversions between any supported currencies. Uses standard 3-letter currency codes (e.g., USD, EUR, GBP, JPY).\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n\n\nYou are also given the conversation context enclosed within XML tags:\n\n[\n {\n \"role\": \"user\",\n \"content\": \"book a flight and hotel to seattle from LA if weather in sunny over the weekend\"\n }\n]\n\n\n## Instructions\n1. Analyze the latest user intent from the conversation.\n2. Compare it against the available routes to find which routes can help fulfill the request.\n3. Respond only with the exact route names from .\n4. If no routes can help or the intent is already fulfilled, return an empty list.\n\n## Response Format\nReturn your answer strictly in JSON as follows:\n{\"route\": [\"route_name_1\", \"route_name_2\", \"...\"]}\nIf no routes are needed, return an empty list for `route`." + } + ] +} + +### local 12001 llm +POST http://localhost:12001/v1/chat/completions HTTP/1.1 +Content-Type: application/json +x-arch-llm-provider-hint: Plano-Orchestrator +model: Plano-Orchestrator + +{ + "model": "Plano-Orchestrator", + "messages": [ + { + "role": "user", + "content": "You are a helpful assistant that selects the most suitable routes based on user intent.\nYou are provided with a list of available routes enclosed within XML tags:\n\n{\"name\": \"weather_agent\", \"description\": \"Get real-time weather conditions and multi-day forecasts for any city worldwide using Open-Meteo API (free, no API key needed). Provides temperature, humidity, wind speed, precipitation, and detailed weather conditions.\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n{\"name\": \"flight_agent\", \"description\": \"Get live flight information between airports using AviationStack API. Shows real-time flight status, scheduled/actual departure and arrival times, gate information, and whether flights are in the air. Uses IATA airport codes (e.g., JFK, LAX, LHR). Displays current and scheduled flights with live tracking data.\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n{\"name\": \"currency_agent\", \"description\": \"Get real-time currency exchange rates and perform currency conversions using Frankfurter API (free, no API key needed). Provides latest exchange rates, historical rates, and currency conversions between any supported currencies. Uses standard 3-letter currency codes (e.g., USD, EUR, GBP, JPY).\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n\n\nYou are also given the conversation context enclosed within XML tags:\n\n[\n {\n \"role\": \"user\",\n \"content\": \"book a flight and hotel to seattle from LA if weather in sunny over the weekend\"\n }\n]\n\n\n## Instructions\n1. Analyze the latest user intent from the conversation.\n2. Compare it against the available routes to find which routes can help fulfill the request.\n3. Respond only with the exact route names from .\n4. If no routes can help or the intent is already fulfilled, return an empty list.\n\n## Response Format\nReturn your answer strictly in JSON as follows:\n{\"route\": [\"route_name_1\", \"route_name_2\", \"...\"]}\nIf no routes are needed, return an empty list for `route`." + } + ] +}