removing model_server python module to brightstaff (function calling) (#615)

* adding function_calling functionality via rust

* fixed rendered YAML file

* removed model_server from envoy.template and forwarding traffic to bright_staff

* fixed bugs in function_calling.rs that were breaking tests. All good now

* updating e2e test to clean up disk usage

* removing Arch* models to be used as a default model if one is not specified

* if the user sets arch-function base_url we should honor it

* fixing demos as we needed to pin to a particular version of huggingface_hub else the chatbot ui wouldn't build

* adding a constant for Arch-Function model name

* fixing some edge cases with calls made to Arch-Function

* fixed JSON parsing issues in function_calling.rs

* fixed bug where the raw response from Arch-Function was re-encoded

* removed debug from supervisord.conf

* commenting out disk cleanup

* adding back disk space

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-288.local>
Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
Salman Paracha 2025-11-22 12:55:00 -08:00 committed by GitHub
parent 126b029345
commit 88c2bd1851
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
40 changed files with 2517 additions and 1356 deletions

View file

@ -101,6 +101,12 @@ pub struct ChatCompletionsRequest {
pub top_logprobs: Option<u32>,
pub user: Option<String>,
// pub web_search: Option<bool>, // GOOD FIRST ISSUE: Future support for web search
// VLLM-specific parameters (used by Arch-Function)
pub top_k: Option<u32>,
pub stop_token_ids: Option<Vec<u32>>,
pub continue_final_message: Option<bool>,
pub add_generation_prompt: Option<bool>,
}
impl ChatCompletionsRequest {
@ -385,6 +391,8 @@ pub struct ChatCompletionsResponse {
pub usage: Usage,
pub system_fingerprint: Option<String>,
pub service_tier: Option<String>,
// This isn't a standard OpenAI field, but we include it for extensibility
pub metadata: Option<HashMap<String, Value>>,
}
impl Default for ChatCompletionsResponse {
@ -398,6 +406,7 @@ impl Default for ChatCompletionsResponse {
usage: Usage::default(),
system_fingerprint: None,
service_tier: None,
metadata: None,
}
}
}