removed model_server from envoy.template and forwarding traffic to bright_staff

This commit is contained in:
Salman Paracha 2025-11-12 14:45:14 -08:00
parent 36eb877cbd
commit 60e489099d
4 changed files with 26 additions and 29 deletions

View file

@ -262,19 +262,16 @@ static_resources:
domains:
- "*"
routes:
{% for internal_cluster in ["arch_fc", "model_server"] %}
- match:
prefix: "/"
headers:
- name: "x-arch-upstream"
string_match:
exact: {{ internal_cluster }}
exact: bright_staff
route:
auto_host_rewrite: true
cluster: {{ internal_cluster }}
cluster: bright_staff
timeout: 300s
{% endfor %}
{% for cluster_name, cluster in arch_clusters.items() %}
- match:
prefix: "/"
@ -868,24 +865,6 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
{% for internal_cluster in ["arch_fc", "model_server"] %}
- name: {{ internal_cluster }}
connect_timeout: 0.5s
type: STRICT_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: {{ internal_cluster }}
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: host.docker.internal
port_value: 51000
hostname: {{ internal_cluster }}
{% endfor %}
- name: mistral_7b_instruct
connect_timeout: 0.5s
type: STRICT_DNS

View file

@ -4,7 +4,6 @@ use crate::{
};
use core::{panic, str};
use serde::{ser::SerializeMap, Deserialize, Serialize};
use serde_yaml::Value;
use std::{
collections::{HashMap, VecDeque},
fmt::Display,
@ -265,7 +264,7 @@ pub struct ToolCall {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FunctionCallDetail {
pub name: String,
pub arguments: Option<HashMap<String, Value>>,
pub arguments: String,
}
#[derive(Debug, Deserialize, Serialize)]

View file

@ -7,7 +7,7 @@ pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const MODEL_SERVER_NAME: &str = "model_server";
pub const MODEL_SERVER_NAME: &str = "bright_staff";
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
pub const MESSAGES_KEY: &str = "messages";
pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";

View file

@ -371,7 +371,26 @@ impl StreamContext {
let tools_call_name = self.tool_calls.as_ref().unwrap()[0].function.name.clone();
let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap().clone();
let tool_params = &self.tool_calls.as_ref().unwrap()[0].function.arguments;
let tool_params_str = &self.tool_calls.as_ref().unwrap()[0].function.arguments;
// Parse arguments JSON string into HashMap
// Note: convert from serde_json::Value to serde_yaml::Value for compatibility
let tool_params: Option<HashMap<String, serde_yaml::Value>> = match serde_json::from_str::<HashMap<String, serde_json::Value>>(tool_params_str) {
Ok(json_params) => {
let yaml_params: HashMap<String, serde_yaml::Value> = json_params
.into_iter()
.filter_map(|(k, v)| {
serde_yaml::to_value(&v).ok().map(|yaml_v| (k, yaml_v))
})
.collect();
Some(yaml_params)
},
Err(e) => {
warn!("Failed to parse tool call arguments: {}", e);
None
}
};
let endpoint_details = prompt_target.endpoint.as_ref().unwrap();
let endpoint_path: String = endpoint_details
.path
@ -384,7 +403,7 @@ impl StreamContext {
let (path, api_call_body) = match compute_request_path_body(
&endpoint_path,
tool_params,
&tool_params,
&prompt_target_params,
&http_method,
) {
@ -870,7 +889,7 @@ mod test {
id: "1".to_string(),
function: common::api::open_ai::FunctionCallDetail {
name: "test".to_string(),
arguments: None,
arguments: "{}".to_string(),
},
tool_type: common::api::open_ai::ToolType::Function,
}]),