From 60e489099df505becc3c75918fdf2e0578f18330 Mon Sep 17 00:00:00 2001 From: Salman Paracha Date: Wed, 12 Nov 2025 14:45:14 -0800 Subject: [PATCH] removed model_server from envoy.template and forwarding traffic to bright_staff --- arch/envoy.template.yaml | 25 ++------------------- crates/common/src/api/open_ai.rs | 3 +-- crates/common/src/consts.rs | 2 +- crates/prompt_gateway/src/stream_context.rs | 25 ++++++++++++++++++--- 4 files changed, 26 insertions(+), 29 deletions(-) diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index ae9d0fbc..d6b95656 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -262,19 +262,16 @@ static_resources: domains: - "*" routes: - {% for internal_cluster in ["arch_fc", "model_server"] %} - match: prefix: "/" headers: - name: "x-arch-upstream" string_match: - exact: {{ internal_cluster }} + exact: bright_staff route: auto_host_rewrite: true - cluster: {{ internal_cluster }} + cluster: bright_staff timeout: 300s - {% endfor %} - {% for cluster_name, cluster in arch_clusters.items() %} - match: prefix: "/" @@ -868,24 +865,6 @@ static_resources: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 - - {% for internal_cluster in ["arch_fc", "model_server"] %} - - name: {{ internal_cluster }} - connect_timeout: 0.5s - type: STRICT_DNS - dns_lookup_family: V4_ONLY - lb_policy: ROUND_ROBIN - load_assignment: - cluster_name: {{ internal_cluster }} - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: host.docker.internal - port_value: 51000 - hostname: {{ internal_cluster }} - {% endfor %} - name: mistral_7b_instruct connect_timeout: 0.5s type: STRICT_DNS diff --git a/crates/common/src/api/open_ai.rs b/crates/common/src/api/open_ai.rs index 080923c1..951bfaf5 100644 --- a/crates/common/src/api/open_ai.rs +++ b/crates/common/src/api/open_ai.rs @@ -4,7 +4,6 @@ use crate::{ }; use core::{panic, str}; use serde::{ser::SerializeMap, Deserialize, Serialize}; -use serde_yaml::Value; use std::{ collections::{HashMap, VecDeque}, fmt::Display, @@ -265,7 +264,7 @@ pub struct ToolCall { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FunctionCallDetail { pub name: String, - pub arguments: Option>, + pub arguments: String, } #[derive(Debug, Deserialize, Serialize)] diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs index 13624d8d..8edbff1a 100644 --- a/crates/common/src/consts.rs +++ b/crates/common/src/consts.rs @@ -7,7 +7,7 @@ pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds -pub const MODEL_SERVER_NAME: &str = "model_server"; +pub const MODEL_SERVER_NAME: &str = "bright_staff"; pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider"; pub const MESSAGES_KEY: &str = "messages"; pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint"; diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs index 96caa378..9efbba21 100644 --- a/crates/prompt_gateway/src/stream_context.rs +++ b/crates/prompt_gateway/src/stream_context.rs @@ -371,7 +371,26 @@ impl StreamContext { let tools_call_name = self.tool_calls.as_ref().unwrap()[0].function.name.clone(); let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap().clone(); - let tool_params = &self.tool_calls.as_ref().unwrap()[0].function.arguments; + let tool_params_str = &self.tool_calls.as_ref().unwrap()[0].function.arguments; + + // Parse arguments JSON string into HashMap + // Note: convert from serde_json::Value to serde_yaml::Value for compatibility + let tool_params: Option> = match serde_json::from_str::>(tool_params_str) { + Ok(json_params) => { + let yaml_params: HashMap = json_params + .into_iter() + .filter_map(|(k, v)| { + serde_yaml::to_value(&v).ok().map(|yaml_v| (k, yaml_v)) + }) + .collect(); + Some(yaml_params) + }, + Err(e) => { + warn!("Failed to parse tool call arguments: {}", e); + None + } + }; + let endpoint_details = prompt_target.endpoint.as_ref().unwrap(); let endpoint_path: String = endpoint_details .path @@ -384,7 +403,7 @@ impl StreamContext { let (path, api_call_body) = match compute_request_path_body( &endpoint_path, - tool_params, + &tool_params, &prompt_target_params, &http_method, ) { @@ -870,7 +889,7 @@ mod test { id: "1".to_string(), function: common::api::open_ai::FunctionCallDetail { name: "test".to_string(), - arguments: None, + arguments: "{}".to_string(), }, tool_type: common::api::open_ai::ToolType::Function, }]),