removing model_server python module to brightstaff (function calling) (#615)

* adding function_calling functionality via rust * fixed rendered YAML file * removed model_server from envoy.template and forwarding traffic to bright_staff * fixed bugs in function_calling.rs that were breaking tests. All good now * updating e2e test to clean up disk usage * removing Arch* models to be used as a default model if one is not specified * if the user sets arch-function base_url we should honor it * fixing demos as we needed to pin to a particular version of huggingface_hub else the chatbot ui wouldn't build * adding a constant for Arch-Function model name * fixing some edge cases with calls made to Arch-Function * fixed JSON parsing issues in function_calling.rs * fixed bug where the raw response from Arch-Function was re-encoded * removed debug from supervisord.conf * commenting out disk cleanup * adding back disk space --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-288.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
2026-05-10 00:02:43 +02:00 · 2025-11-22 12:55:00 -08:00 · 2025-11-22 12:55:00 -08:00 · 88c2bd1851
commit 88c2bd1851
parent 126b029345
40 changed files with 2517 additions and 1356 deletions
--- a/crates/hermesllm/src/apis/openai.rs
+++ b/crates/hermesllm/src/apis/openai.rs
@ -101,6 +101,12 @@ pub struct ChatCompletionsRequest {
    pub top_logprobs: Option<u32>,
    pub user: Option<String>,
    // pub web_search: Option<bool>, // GOOD FIRST ISSUE: Future support for web search
+
+    // VLLM-specific parameters (used by Arch-Function)
+    pub top_k: Option<u32>,
+    pub stop_token_ids: Option<Vec<u32>>,
+    pub continue_final_message: Option<bool>,
+    pub add_generation_prompt: Option<bool>,
 }

 impl ChatCompletionsRequest {
@ -385,6 +391,8 @@ pub struct ChatCompletionsResponse {
    pub usage: Usage,
    pub system_fingerprint: Option<String>,
    pub service_tier: Option<String>,
+    // This isn't a standard OpenAI field, but we include it for extensibility
+    pub metadata: Option<HashMap<String, Value>>,
 }

 impl Default for ChatCompletionsResponse {
@ -398,6 +406,7 @@ impl Default for ChatCompletionsResponse {
            usage: Usage::default(),
            system_fingerprint: None,
            service_tier: None,
+            metadata: None,
        }
    }
 }
--- a/crates/hermesllm/src/providers/response.rs
+++ b/crates/hermesllm/src/providers/response.rs
@ -316,6 +316,17 @@ impl TryFrom<(SseEvent, &SupportedAPIs, &SupportedUpstreamAPIs)> for SseEvent {
        // Create a new transformed event based on the original
        let mut transformed_event = sse_event;

+        // Handle [DONE] marker early - don't try to parse as JSON
+        if transformed_event.is_done() {
+            // For OpenAI client API, keep [DONE] as-is
+            // For Anthropic client API, it will be transformed via ProviderStreamResponseType
+            if matches!(client_api, SupportedAPIs::OpenAIChatCompletions(_)) {
+                // Keep the [DONE] marker as-is for OpenAI clients
+                transformed_event.sse_transform_buffer = "data: [DONE]".to_string();
+                return Ok(transformed_event);
+            }
+        }
+
        // If has data, parse the data as a provider stream response (business logic layer)
        if transformed_event.data.is_some() {
            let data_str = transformed_event.data.as_ref().unwrap();
--- a/crates/hermesllm/src/transforms/response/to_openai.rs
+++ b/crates/hermesllm/src/transforms/response/to_openai.rs
@ -83,8 +83,7 @@ impl TryFrom<MessagesResponse> for ChatCompletionsResponse {
            model: resp.model,
            choices: vec![choice],
            usage,
-            system_fingerprint: None,
-            service_tier: None,
+            ..Default::default()
        })
    }
 }
@ -169,8 +168,7 @@ impl TryFrom<ConverseResponse> for ChatCompletionsResponse {
            model,
            choices: vec![choice],
            usage,
-            system_fingerprint: None,
-            service_tier: None,
+            ..Default::default()
        })
    }
 }