removing model_server python module to brightstaff (function calling) (#615)

* adding function_calling functionality via rust * fixed rendered YAML file * removed model_server from envoy.template and forwarding traffic to bright_staff * fixed bugs in function_calling.rs that were breaking tests. All good now * updating e2e test to clean up disk usage * removing Arch* models to be used as a default model if one is not specified * if the user sets arch-function base_url we should honor it * fixing demos as we needed to pin to a particular version of huggingface_hub else the chatbot ui wouldn't build * adding a constant for Arch-Function model name * fixing some edge cases with calls made to Arch-Function * fixed JSON parsing issues in function_calling.rs * fixed bug where the raw response from Arch-Function was re-encoded * removed debug from supervisord.conf * commenting out disk cleanup * adding back disk space --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-288.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
2026-04-30 03:16:28 +02:00 · 2025-11-22 12:55:00 -08:00 · 2025-11-22 12:55:00 -08:00 · 88c2bd1851
commit 88c2bd1851
parent 126b029345
40 changed files with 2517 additions and 1356 deletions
--- a/crates/common/src/api/open_ai.rs
+++ b/crates/common/src/api/open_ai.rs
@ -4,7 +4,6 @@ use crate::{
 };
 use core::{panic, str};
 use serde::{ser::SerializeMap, Deserialize, Serialize};
-use serde_yaml::Value;
 use std::{
    collections::{HashMap, VecDeque},
    fmt::Display,
@ -265,7 +264,7 @@ pub struct ToolCall {
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct FunctionCallDetail {
    pub name: String,
-    pub arguments: Option<HashMap<String, Value>>,
+    pub arguments: String,
 }

 #[derive(Debug, Deserialize, Serialize)]
--- a/crates/common/src/consts.rs
+++ b/crates/common/src/consts.rs
@ -7,7 +7,7 @@ pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
 pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
 pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
 pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
-pub const MODEL_SERVER_NAME: &str = "model_server";
+pub const MODEL_SERVER_NAME: &str = "bright_staff";
 pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
 pub const MESSAGES_KEY: &str = "messages";
 pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
--- a/crates/common/src/routing.rs
+++ b/crates/common/src/routing.rs
@ -40,8 +40,14 @@ pub fn get_llm_provider(
    let mut rng = thread_rng();
    llm_providers
        .iter()
+        .filter(|(_, provider)| {
+            provider.model
+                .as_ref()
+                .map(|m| !m.starts_with("Arch"))
+                .unwrap_or(true)
+        })
        .choose(&mut rng)
-        .expect("There should always be at least one llm provider")
+        .expect("There should always be at least one non-Arch llm provider")
        .1
        .clone()
 }