removing model_server python module to brightstaff (function calling) (#615)

* adding function_calling functionality via rust

* fixed rendered YAML file

* removed model_server from envoy.template and forwarding traffic to bright_staff

* fixed bugs in function_calling.rs that were breaking tests. All good now

* updating e2e test to clean up disk usage

* removing Arch* models to be used as a default model if one is not specified

* if the user sets arch-function base_url we should honor it

* fixing demos as we needed to pin to a particular version of huggingface_hub else the chatbot ui wouldn't build

* adding a constant for Arch-Function model name

* fixing some edge cases with calls made to Arch-Function

* fixed JSON parsing issues in function_calling.rs

* fixed bug where the raw response from Arch-Function was re-encoded

* removed debug from supervisord.conf

* commenting out disk cleanup

* adding back disk space

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-288.local>
Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
Salman Paracha 2025-11-22 12:55:00 -08:00 committed by GitHub
parent 126b029345
commit 88c2bd1851
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
40 changed files with 2517 additions and 1356 deletions

View file

@ -4,7 +4,6 @@ use crate::{
};
use core::{panic, str};
use serde::{ser::SerializeMap, Deserialize, Serialize};
use serde_yaml::Value;
use std::{
collections::{HashMap, VecDeque},
fmt::Display,
@ -265,7 +264,7 @@ pub struct ToolCall {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FunctionCallDetail {
pub name: String,
pub arguments: Option<HashMap<String, Value>>,
pub arguments: String,
}
#[derive(Debug, Deserialize, Serialize)]

View file

@ -7,7 +7,7 @@ pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const MODEL_SERVER_NAME: &str = "model_server";
pub const MODEL_SERVER_NAME: &str = "bright_staff";
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
pub const MESSAGES_KEY: &str = "messages";
pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";

View file

@ -40,8 +40,14 @@ pub fn get_llm_provider(
let mut rng = thread_rng();
llm_providers
.iter()
.filter(|(_, provider)| {
provider.model
.as_ref()
.map(|m| !m.starts_with("Arch"))
.unwrap_or(true)
})
.choose(&mut rng)
.expect("There should always be at least one llm provider")
.expect("There should always be at least one non-Arch llm provider")
.1
.clone()
}