mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
removed model_server from envoy.template and forwarding traffic to bright_staff
This commit is contained in:
parent
36eb877cbd
commit
60e489099d
4 changed files with 26 additions and 29 deletions
|
|
@ -262,19 +262,16 @@ static_resources:
|
|||
domains:
|
||||
- "*"
|
||||
routes:
|
||||
{% for internal_cluster in ["arch_fc", "model_server"] %}
|
||||
- match:
|
||||
prefix: "/"
|
||||
headers:
|
||||
- name: "x-arch-upstream"
|
||||
string_match:
|
||||
exact: {{ internal_cluster }}
|
||||
exact: bright_staff
|
||||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ internal_cluster }}
|
||||
cluster: bright_staff
|
||||
timeout: 300s
|
||||
{% endfor %}
|
||||
|
||||
{% for cluster_name, cluster in arch_clusters.items() %}
|
||||
- match:
|
||||
prefix: "/"
|
||||
|
|
@ -868,24 +865,6 @@ static_resources:
|
|||
tls_params:
|
||||
tls_minimum_protocol_version: TLSv1_2
|
||||
tls_maximum_protocol_version: TLSv1_3
|
||||
|
||||
{% for internal_cluster in ["arch_fc", "model_server"] %}
|
||||
- name: {{ internal_cluster }}
|
||||
connect_timeout: 0.5s
|
||||
type: STRICT_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
load_assignment:
|
||||
cluster_name: {{ internal_cluster }}
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: host.docker.internal
|
||||
port_value: 51000
|
||||
hostname: {{ internal_cluster }}
|
||||
{% endfor %}
|
||||
- name: mistral_7b_instruct
|
||||
connect_timeout: 0.5s
|
||||
type: STRICT_DNS
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ use crate::{
|
|||
};
|
||||
use core::{panic, str};
|
||||
use serde::{ser::SerializeMap, Deserialize, Serialize};
|
||||
use serde_yaml::Value;
|
||||
use std::{
|
||||
collections::{HashMap, VecDeque},
|
||||
fmt::Display,
|
||||
|
|
@ -265,7 +264,7 @@ pub struct ToolCall {
|
|||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FunctionCallDetail {
|
||||
pub name: String,
|
||||
pub arguments: Option<HashMap<String, Value>>,
|
||||
pub arguments: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
|
|||
pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
|
||||
pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
|
||||
pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
|
||||
pub const MODEL_SERVER_NAME: &str = "model_server";
|
||||
pub const MODEL_SERVER_NAME: &str = "bright_staff";
|
||||
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
|
||||
pub const MESSAGES_KEY: &str = "messages";
|
||||
pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
|
||||
|
|
|
|||
|
|
@ -371,7 +371,26 @@ impl StreamContext {
|
|||
|
||||
let tools_call_name = self.tool_calls.as_ref().unwrap()[0].function.name.clone();
|
||||
let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap().clone();
|
||||
let tool_params = &self.tool_calls.as_ref().unwrap()[0].function.arguments;
|
||||
let tool_params_str = &self.tool_calls.as_ref().unwrap()[0].function.arguments;
|
||||
|
||||
// Parse arguments JSON string into HashMap
|
||||
// Note: convert from serde_json::Value to serde_yaml::Value for compatibility
|
||||
let tool_params: Option<HashMap<String, serde_yaml::Value>> = match serde_json::from_str::<HashMap<String, serde_json::Value>>(tool_params_str) {
|
||||
Ok(json_params) => {
|
||||
let yaml_params: HashMap<String, serde_yaml::Value> = json_params
|
||||
.into_iter()
|
||||
.filter_map(|(k, v)| {
|
||||
serde_yaml::to_value(&v).ok().map(|yaml_v| (k, yaml_v))
|
||||
})
|
||||
.collect();
|
||||
Some(yaml_params)
|
||||
},
|
||||
Err(e) => {
|
||||
warn!("Failed to parse tool call arguments: {}", e);
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
let endpoint_details = prompt_target.endpoint.as_ref().unwrap();
|
||||
let endpoint_path: String = endpoint_details
|
||||
.path
|
||||
|
|
@ -384,7 +403,7 @@ impl StreamContext {
|
|||
|
||||
let (path, api_call_body) = match compute_request_path_body(
|
||||
&endpoint_path,
|
||||
tool_params,
|
||||
&tool_params,
|
||||
&prompt_target_params,
|
||||
&http_method,
|
||||
) {
|
||||
|
|
@ -870,7 +889,7 @@ mod test {
|
|||
id: "1".to_string(),
|
||||
function: common::api::open_ai::FunctionCallDetail {
|
||||
name: "test".to_string(),
|
||||
arguments: None,
|
||||
arguments: "{}".to_string(),
|
||||
},
|
||||
tool_type: common::api::open_ai::ToolType::Function,
|
||||
}]),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue