actually call archfc for orchestration

2026-06-17 15:25:17 +02:00 · 2025-12-19 02:40:29 -08:00 · 2025-12-19 02:40:29 -08:00 · 1231706120
commit 1231706120
parent a611e1fc88
11 changed files with 97 additions and 97 deletions
--- a/arch/supervisord.conf
+++ b/arch/supervisord.conf
@ -9,7 +9,7 @@ stdout_logfile_maxbytes=0
 stderr_logfile_maxbytes=0

 [program:envoy]
-command=/bin/sh -c "python -m cli.config_generator && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:info --log-format '[%%Y-%%m-%%d %%T.%%e][%%l] %%v' 2>&1 | tee /var/log/envoy.log | while IFS= read -r line; do echo '[archgw_logs]' \"$line\"; done"
+command=/bin/sh -c "python -m cli.config_generator && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug --log-format '[%%Y-%%m-%%d %%T.%%e][%%l] %%v' 2>&1 | tee /var/log/envoy.log | while IFS= read -r line; do echo '[archgw_logs]' \"$line\"; done"
 stdout_logfile=/dev/stdout
 redirect_stderr=true
 stdout_logfile_maxbytes=0
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@ -323,6 +323,15 @@ def validate_and_render_schema():
            }
        )

+    if "plano-orchestrator" not in model_provider_name_set:
+        updated_model_providers.append(
+            {
+                "name": "plano-orchestrator",
+                "provider_interface": "arch",
+                "model": "Plano-Orchestrator",
+            }
+        )
+
    config_yaml["model_providers"] = deepcopy(updated_model_providers)

    listeners_with_provider = 0
--- a/crates/brightstaff/src/handlers/agent_selector.rs
+++ b/crates/brightstaff/src/handlers/agent_selector.rs
@ -178,7 +178,6 @@ mod tests {
        Arc::new(OrchestratorService::new(
            "http://localhost:8080".to_string(),
            "test-model".to_string(),
-            "test-provider".to_string(),
        ))
    }

--- a/crates/brightstaff/src/handlers/integration_tests.rs
+++ b/crates/brightstaff/src/handlers/integration_tests.rs
@ -22,7 +22,6 @@ mod integration_tests {
        Arc::new(OrchestratorService::new(
            "http://localhost:8080".to_string(),
            "test-model".to_string(),
-            "test-provider".to_string(),
        ))
    }

--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@ -103,7 +103,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
    let orchestrator_service: Arc<OrchestratorService> = Arc::new(OrchestratorService::new(
        llm_provider_url.clone() + CHAT_COMPLETIONS_PATH,
        PLANO_ORCHESTRATOR_MODEL_NAME.to_string(),
-        routing_llm_provider,
    ));


--- a/crates/brightstaff/src/router/llm_router.rs
+++ b/crates/brightstaff/src/router/llm_router.rs
@ -17,6 +17,7 @@ pub struct RouterService {
    router_url: String,
    client: reqwest::Client,
    router_model: Arc<dyn RouterModel>,
+    #[allow(dead_code)]
    routing_provider_name: String,
    llm_usage_defined: bool,
 }
--- a/crates/brightstaff/src/router/plano_orchestrator.rs
+++ b/crates/brightstaff/src/router/plano_orchestrator.rs
@ -2,7 +2,7 @@ use std::{collections::HashMap, sync::Arc};

 use common::{
    configuration::{AgentUsagePreference, OrchestrationPreference},
-    consts::ARCH_PROVIDER_HINT_HEADER,
+    consts::{ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME},
 };
 use hermesllm::apis::openai::{ChatCompletionsResponse, Message};
 use hyper::header;
@ -17,7 +17,6 @@ pub struct OrchestratorService {
    orchestrator_url: String,
    client: reqwest::Client,
    orchestrator_model: Arc<dyn OrchestratorModel>,
-    orchestration_provider_name: String,
 }

 #[derive(Debug, Error)]
@ -38,7 +37,6 @@ impl OrchestratorService {
    pub fn new(
        orchestrator_url: String,
        orchestration_model_name: String,
-        orchestration_provider_name: String,
    ) -> Self {
        // Empty agent orchestrations - will be provided via usage_preferences in requests
        let agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>> = HashMap::new();
@ -53,7 +51,6 @@ impl OrchestratorService {
            orchestrator_url,
            client: reqwest::Client::new(),
            orchestrator_model,
-            orchestration_provider_name,
        }
    }

@ -95,7 +92,7 @@ impl OrchestratorService {

        orchestration_request_headers.insert(
            header::HeaderName::from_static(ARCH_PROVIDER_HINT_HEADER),
-            header::HeaderValue::from_str(&self.orchestration_provider_name).unwrap(),
+            header::HeaderValue::from_str(PLANO_ORCHESTRATOR_MODEL_NAME).unwrap(),
        );

        if let Some(trace_parent) = trace_parent {
@ -107,7 +104,7 @@ impl OrchestratorService {

        orchestration_request_headers.insert(
            header::HeaderName::from_static("model"),
-            header::HeaderValue::from_static("Plano-Orchestrator"),
+            header::HeaderValue::from_static(PLANO_ORCHESTRATOR_MODEL_NAME),
        );

        let start_time = std::time::Instant::now();
--- a/crates/common/src/consts.rs
+++ b/crates/common/src/consts.rs
@ -34,3 +34,4 @@ pub const LLM_ROUTE_HEADER: &str = "x-arch-llm-route";
 pub const ENVOY_RETRY_HEADER: &str = "x-envoy-max-retries";
 pub const BRIGHT_STAFF_SERVICE_NAME : &str = "brightstaff";
 pub const PLANO_ORCHESTRATOR_MODEL_NAME: &str = "Plano-Orchestrator";
+pub const ARCH_FC_CLUSTER: &str = "arch";
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -47,7 +47,7 @@ pub struct StreamContext {
    ttft_time: Option<u128>,
    traceparent: Option<String>,
    request_body_sent_time: Option<u128>,
-    overrides: Rc<Option<Overrides>>,
+    _overrides: Rc<Option<Overrides>>,
    user_message: Option<String>,
    upstream_status_code: Option<StatusCode>,
    binary_frame_decoder: Option<BedrockBinaryFrameDecoder<bytes::BytesMut>>,
@ -65,7 +65,7 @@ impl StreamContext {
    ) -> Self {
        StreamContext {
            metrics,
-            overrides,
+            _overrides: overrides,
            ratelimit_selector: None,
            streaming_response: false,
            response_tokens: 0,
@ -133,6 +133,7 @@ impl StreamContext {
            .get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
            .map(|llm_name| llm_name.into());

+        info!("llm_providers: {:?}", self.llm_providers);
        self.llm_provider = Some(routing::get_llm_provider(
            &self.llm_providers,
            provider_hint,
@ -744,55 +745,37 @@ impl HttpContext for StreamContext {
            .map(|val| val == "true")
            .unwrap_or(false);

-        let use_agent_orchestrator = match self.overrides.as_ref() {
-            Some(overrides) => overrides.use_agent_orchestrator.unwrap_or_default(),
-            None => false,
-        };

-        let routing_header_value = self.get_http_request_header(ARCH_ROUTING_HEADER);
-
-        if routing_header_value.is_some() && !routing_header_value.as_ref().unwrap().is_empty() {
-            let routing_header_value = routing_header_value.as_ref().unwrap();
-            info!("routing header already set: {}", routing_header_value);
-            self.llm_provider = Some(Rc::new(LlmProvider {
-                name: routing_header_value.to_string(),
-                provider_interface: LlmProviderType::OpenAI,
-                ..Default::default() //TODO: THiS IS BROKEN. WHY ARE WE ASSUMING OPENAI FOR UPSTREAM?
-            }));
-        } else {
-            //TODO: Fix this brittle code path. We need to return values and have compile time
-            self.select_llm_provider();
+        // let routing_header_value = self.get_http_request_header(ARCH_ROUTING_HEADER);

+        self.select_llm_provider();
            // Check if this is a supported API endpoint
-            if SupportedAPIsFromClient::from_endpoint(&request_path).is_none() {
-                self.send_http_response(404, vec![], Some(b"Unsupported endpoint"));
-                return Action::Continue;
-            }
+        if SupportedAPIsFromClient::from_endpoint(&request_path).is_none() {
+            self.send_http_response(404, vec![], Some(b"Unsupported endpoint"));
+            return Action::Continue;
+        }

-            // Get the SupportedApi for routing decisions
-            let supported_api: Option<SupportedAPIsFromClient> =
-                SupportedAPIsFromClient::from_endpoint(&request_path);
-            self.client_api = supported_api;
+        // Get the SupportedApi for routing decisions
+        let supported_api: Option<SupportedAPIsFromClient> =
+            SupportedAPIsFromClient::from_endpoint(&request_path);
+        self.client_api = supported_api;

-            // Debug: log provider, client API, resolved API, and request path
-            if let (Some(api), Some(provider)) =
-                (self.client_api.as_ref(), self.llm_provider.as_ref())
-            {
-                let provider_id = provider.to_provider_id();
-                self.resolved_api =
-                    Some(provider_id.compatible_api_for_client(api, self.streaming_response));
+        // Debug: log provider, client API, resolved API, and request path
+        if let (Some(api), Some(provider)) =
+            (self.client_api.as_ref(), self.llm_provider.as_ref())
+        {
+            let provider_id = provider.to_provider_id();
+            self.resolved_api =
+                Some(provider_id.compatible_api_for_client(api, self.streaming_response));

-                debug!(
-                    "[PLANO_REQ_ID:{}] ROUTING_INFO: provider='{}' client_api={:?} resolved_api={:?} request_path='{}'",
-                    self.request_identifier(),
-                    provider.to_provider_id(),
-                    api,
-                    self.resolved_api,
-                    request_path
-                );
-            } else {
-                self.resolved_api = None;
-            }
+            debug!(
+                "[PLANO_REQ_ID:{}] ROUTING_INFO: provider='{}' client_api={:?} resolved_api={:?} request_path='{}'",
+                self.request_identifier(),
+                provider.to_provider_id(),
+                api,
+                self.resolved_api,
+                request_path
+            );

            //We need to update the upstream path if there is a variation for a provider like Gemini/Groq, etc.
            self.update_upstream_path(&request_path);
@ -816,7 +799,6 @@ impl HttpContext for StreamContext {
            if let Err(error) = self.modify_auth_headers() {
                // ensure that the provider has an endpoint if the access key is missing else return a bad request
                if self.llm_provider.as_ref().unwrap().endpoint.is_none()
-                    && !use_agent_orchestrator
                    && self.llm_provider.as_ref().unwrap().provider_interface
                        != LlmProviderType::Arch
                {
@ -918,11 +900,6 @@ impl HttpContext for StreamContext {
            None => None,
        };

-        let use_agent_orchestrator = match self.overrides.as_ref() {
-            Some(overrides) => overrides.use_agent_orchestrator.unwrap_or_default(),
-            None => false,
-        };
-
        // Store the original model for logging
        let model_requested = deserialized_client_request.model().to_string();

@ -930,29 +907,25 @@ impl HttpContext for StreamContext {
        let resolved_model = match model_name {
            Some(model_name) => model_name.clone(),
            None => {
-                if use_agent_orchestrator {
-                    "agent_orchestrator".to_string()
-                } else {
-                    warn!(
-                        "[PLANO_REQ_ID:{}] MODEL_RESOLUTION_ERROR: no model specified | req_model='{}' provider='{}' config_model={:?}",
-                        self.request_identifier(),
-                        model_requested,
-                        self.llm_provider().name,
-                        self.llm_provider().model
-                    );
-                    self.send_server_error(
-                        ServerError::BadRequest {
-                            why: format!(
-                                "No model specified in request and couldn't determine model name from arch_config. Model name in req: {}, arch_config, provider: {}, model: {:?}",
-                                model_requested,
-                                self.llm_provider().name,
-                                self.llm_provider().model
-                            ),
-                        },
-                        Some(StatusCode::BAD_REQUEST),
-                    );
-                    return Action::Continue;
-                }
+                warn!(
+                    "[PLANO_REQ_ID:{}] MODEL_RESOLUTION_ERROR: no model specified | req_model='{}' provider='{}' config_model={:?}",
+                    self.request_identifier(),
+                    model_requested,
+                    self.llm_provider().name,
+                    self.llm_provider().model
+                );
+                self.send_server_error(
+                    ServerError::BadRequest {
+                        why: format!(
+                            "No model specified in request and couldn't determine model name from arch_config. Model name in req: {}, arch_config, provider: {}, model: {:?}",
+                            model_requested,
+                            self.llm_provider().name,
+                            self.llm_provider().model
+                        ),
+                    },
+                    Some(StatusCode::BAD_REQUEST),
+                );
+                return Action::Continue;
            }
        };

--- a/demos/use_cases/travel_booking/arch_config.yaml
+++ b/demos/use_cases/travel_booking/arch_config.yaml
@ -9,18 +9,9 @@ agents:
    url: http://host.docker.internal:10530

 model_providers:
-  - model: openai/gpt-4o-mini
-    access_key: $OPENAI_API_KEY
-    default: true
  - model: openai/gpt-4o
    access_key: $OPENAI_API_KEY

-model_aliases:
-  fast-llm:
-    target: gpt-4o-mini
-  smart-llm:
-    target: gpt-4o
-
 listeners:
  - type: agent
    name: travel_booking_service
@ -28,11 +19,11 @@ listeners:
    router: plano_orchestrator_v1
    agents:
      - id: weather_agent
-        description: Get current weather and forecast information for any location worldwide
+        description: Get real-time weather conditions and multi-day forecasts for any city worldwide using Open-Meteo API (free, no API key needed). Provides temperature, humidity, wind speed, precipitation, and detailed weather conditions.
      - id: flight_agent
-        description: Search and book flights between cities with pricing and availability
-      - id: hotel_agent
-        description: Search and reserve hotel rooms with preferences and pricing
+        description: Get live flight information between airports using AviationStack API. Shows real-time flight status, scheduled/actual departure and arrival times, gate information, and whether flights are in the air. Uses IATA airport codes (e.g., JFK, LAX, LHR). Displays current and scheduled flights with live tracking data.
+      - id: currency_agent
+        description: Get real-time currency exchange rates and perform currency conversions using Frankfurter API (free, no API key needed). Provides latest exchange rates, historical rates, and currency conversions between any supported currencies. Uses standard 3-letter currency codes (e.g., USD, EUR, GBP, JPY).

 tracing:
  random_sampling: 100
--- a/demos/use_cases/travel_booking/test.rest
+++ b/demos/use_cases/travel_booking/test.rest
@ -14,3 +14,34 @@ model: Plano-Orchestrator
  "continue_final_message": false,
  "add_generation_prompt": true
 }
+
+### test archfc with plano orchestrator for travel
+POST https://archfc.katanemo.dev/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+model: Plano-Orchestrator
+
+{
+  "model": "Plano-Orchestrator",
+  "messages": [
+    {
+      "role": "user",
+      "content": "You are a helpful assistant that selects the most suitable routes based on user intent.\nYou are provided with a list of available routes enclosed within <routes></routes> XML tags:\n<routes>\n{\"name\": \"weather_agent\", \"description\": \"Get real-time weather conditions and multi-day forecasts for any city worldwide using Open-Meteo API (free, no API key needed). Provides temperature, humidity, wind speed, precipitation, and detailed weather conditions.\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n{\"name\": \"flight_agent\", \"description\": \"Get live flight information between airports using AviationStack API. Shows real-time flight status, scheduled/actual departure and arrival times, gate information, and whether flights are in the air. Uses IATA airport codes (e.g., JFK, LAX, LHR). Displays current and scheduled flights with live tracking data.\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n{\"name\": \"currency_agent\", \"description\": \"Get real-time currency exchange rates and perform currency conversions using Frankfurter API (free, no API key needed). Provides latest exchange rates, historical rates, and currency conversions between any supported currencies. Uses standard 3-letter currency codes (e.g., USD, EUR, GBP, JPY).\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n</routes>\n\nYou are also given the conversation context enclosed within <conversation></conversation> XML tags:\n<conversation>\n[\n    {\n        \"role\": \"user\",\n        \"content\": \"book a flight and hotel to seattle from LA if weather in sunny over the weekend\"\n    }\n]\n</conversation>\n\n## Instructions\n1. Analyze the latest user intent from the conversation.\n2. Compare it against the available routes to find which routes can help fulfill the request.\n3. Respond only with the exact route names from <routes>.\n4. If no routes can help or the intent is already fulfilled, return an empty list.\n\n## Response Format\nReturn your answer strictly in JSON as follows:\n{\"route\": [\"route_name_1\", \"route_name_2\", \"...\"]}\nIf no routes are needed, return an empty list for `route`."
+    }
+  ]
+}
+
+### local 12001 llm
+POST http://localhost:12001/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+x-arch-llm-provider-hint: Plano-Orchestrator
+model: Plano-Orchestrator
+
+{
+  "model": "Plano-Orchestrator",
+  "messages": [
+    {
+      "role": "user",
+      "content": "You are a helpful assistant that selects the most suitable routes based on user intent.\nYou are provided with a list of available routes enclosed within <routes></routes> XML tags:\n<routes>\n{\"name\": \"weather_agent\", \"description\": \"Get real-time weather conditions and multi-day forecasts for any city worldwide using Open-Meteo API (free, no API key needed). Provides temperature, humidity, wind speed, precipitation, and detailed weather conditions.\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n{\"name\": \"flight_agent\", \"description\": \"Get live flight information between airports using AviationStack API. Shows real-time flight status, scheduled/actual departure and arrival times, gate information, and whether flights are in the air. Uses IATA airport codes (e.g., JFK, LAX, LHR). Displays current and scheduled flights with live tracking data.\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n{\"name\": \"currency_agent\", \"description\": \"Get real-time currency exchange rates and perform currency conversions using Frankfurter API (free, no API key needed). Provides latest exchange rates, historical rates, and currency conversions between any supported currencies. Uses standard 3-letter currency codes (e.g., USD, EUR, GBP, JPY).\", \"parameters\": {\"type\": \"object\", \"properties\": {}, \"required\": []}}\n</routes>\n\nYou are also given the conversation context enclosed within <conversation></conversation> XML tags:\n<conversation>\n[\n    {\n        \"role\": \"user\",\n        \"content\": \"book a flight and hotel to seattle from LA if weather in sunny over the weekend\"\n    }\n]\n</conversation>\n\n## Instructions\n1. Analyze the latest user intent from the conversation.\n2. Compare it against the available routes to find which routes can help fulfill the request.\n3. Respond only with the exact route names from <routes>.\n4. If no routes can help or the intent is already fulfilled, return an empty list.\n\n## Response Format\nReturn your answer strictly in JSON as follows:\n{\"route\": [\"route_name_1\", \"route_name_2\", \"...\"]}\nIf no routes are needed, return an empty list for `route`."
+    }
+  ]
+}