removing model_server python module to brightstaff (function calling) (#615)

* adding function_calling functionality via rust

* fixed rendered YAML file

* removed model_server from envoy.template and forwarding traffic to bright_staff

* fixed bugs in function_calling.rs that were breaking tests. All good now

* updating e2e test to clean up disk usage

* removing Arch* models to be used as a default model if one is not specified

* if the user sets arch-function base_url we should honor it

* fixing demos as we needed to pin to a particular version of huggingface_hub else the chatbot ui wouldn't build

* adding a constant for Arch-Function model name

* fixing some edge cases with calls made to Arch-Function

* fixed JSON parsing issues in function_calling.rs

* fixed bug where the raw response from Arch-Function was re-encoded

* removed debug from supervisord.conf

* commenting out disk cleanup

* adding back disk space

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-288.local>
Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
Salman Paracha 2025-11-22 12:55:00 -08:00 committed by GitHub
parent 126b029345
commit 88c2bd1851
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
40 changed files with 2517 additions and 1356 deletions

View file

@ -264,13 +264,6 @@ impl StreamContext {
.tool_calls
.clone_into(&mut self.tool_calls);
if self.tool_calls.as_ref().unwrap().len() > 1 {
warn!(
"multiple tool calls not supported yet, tool_calls count found: {}",
self.tool_calls.as_ref().unwrap().len()
);
}
if self.tool_calls.is_none() || self.tool_calls.as_ref().unwrap().is_empty() {
// This means that Arch FC did not have enough information to resolve the function call
// Arch FC probably responded with a message asking for more information.
@ -314,6 +307,14 @@ impl StreamContext {
);
}
// At this point, we know tool_calls is not None and not empty
if self.tool_calls.as_ref().unwrap().len() > 1 {
warn!(
"multiple tool calls not supported yet, tool_calls count found: {}",
self.tool_calls.as_ref().unwrap().len()
);
}
// update prompt target name from the tool call response
callout_context.prompt_target_name =
Some(self.tool_calls.as_ref().unwrap()[0].function.name.clone());
@ -371,7 +372,26 @@ impl StreamContext {
let tools_call_name = self.tool_calls.as_ref().unwrap()[0].function.name.clone();
let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap().clone();
let tool_params = &self.tool_calls.as_ref().unwrap()[0].function.arguments;
let tool_params_str = &self.tool_calls.as_ref().unwrap()[0].function.arguments;
// Parse arguments JSON string into HashMap
// Note: convert from serde_json::Value to serde_yaml::Value for compatibility
let tool_params: Option<HashMap<String, serde_yaml::Value>> = match serde_json::from_str::<HashMap<String, serde_json::Value>>(tool_params_str) {
Ok(json_params) => {
let yaml_params: HashMap<String, serde_yaml::Value> = json_params
.into_iter()
.filter_map(|(k, v)| {
serde_yaml::to_value(&v).ok().map(|yaml_v| (k, yaml_v))
})
.collect();
Some(yaml_params)
},
Err(e) => {
warn!("Failed to parse tool call arguments: {}", e);
None
}
};
let endpoint_details = prompt_target.endpoint.as_ref().unwrap();
let endpoint_path: String = endpoint_details
.path
@ -384,7 +404,7 @@ impl StreamContext {
let (path, api_call_body) = match compute_request_path_body(
&endpoint_path,
tool_params,
&tool_params,
&prompt_target_params,
&http_method,
) {
@ -870,7 +890,7 @@ mod test {
id: "1".to_string(),
function: common::api::open_ai::FunctionCallDetail {
name: "test".to_string(),
arguments: None,
arguments: "{}".to_string(),
},
tool_type: common::api::open_ai::ToolType::Function,
}]),