removing model_server python module to brightstaff (function calling) (#615)

* adding function_calling functionality via rust

* fixed rendered YAML file

* removed model_server from envoy.template and forwarding traffic to bright_staff

* fixed bugs in function_calling.rs that were breaking tests. All good now

* updating e2e test to clean up disk usage

* removing Arch* models to be used as a default model if one is not specified

* if the user sets arch-function base_url we should honor it

* fixing demos as we needed to pin to a particular version of huggingface_hub else the chatbot ui wouldn't build

* adding a constant for Arch-Function model name

* fixing some edge cases with calls made to Arch-Function

* fixed JSON parsing issues in function_calling.rs

* fixed bug where the raw response from Arch-Function was re-encoded

* removed debug from supervisord.conf

* commenting out disk cleanup

* adding back disk space

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-288.local>
Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
Salman Paracha 2025-11-22 12:55:00 -08:00 committed by GitHub
parent 126b029345
commit 88c2bd1851
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
40 changed files with 2517 additions and 1356 deletions

View file

@ -0,0 +1,93 @@
use bytes::Bytes;
use http_body_util::combinators::BoxBody;
use http_body_util::StreamBody;
use hyper::body::Frame;
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
use tokio_stream::StreamExt;
use tracing::warn;
/// Trait for processing streaming chunks
/// Implementors can inject custom logic during streaming (e.g., hallucination detection, logging)
pub trait StreamProcessor: Send + 'static {
/// Process an incoming chunk of bytes
fn process_chunk(&mut self, chunk: Bytes) -> Result<Option<Bytes>, String>;
/// Called when streaming completes successfully
fn on_complete(&mut self) {}
/// Called when streaming encounters an error
fn on_error(&mut self, _error: &str) {}
}
/// A no-op processor that just forwards chunks as-is
pub struct PassthroughProcessor;
impl StreamProcessor for PassthroughProcessor {
fn process_chunk(&mut self, chunk: Bytes) -> Result<Option<Bytes>, String> {
Ok(Some(chunk))
}
}
/// Result of creating a streaming response
pub struct StreamingResponse {
pub body: BoxBody<Bytes, hyper::Error>,
pub processor_handle: tokio::task::JoinHandle<()>,
}
pub fn create_streaming_response<S, P>(
mut byte_stream: S,
mut processor: P,
buffer_size: usize,
) -> StreamingResponse
where
S: StreamExt<Item = Result<Bytes, reqwest::Error>> + Send + Unpin + 'static,
P: StreamProcessor,
{
let (tx, rx) = mpsc::channel::<Bytes>(buffer_size);
// Spawn a task to process and forward chunks
let processor_handle = tokio::spawn(async move {
while let Some(item) = byte_stream.next().await {
let chunk = match item {
Ok(chunk) => chunk,
Err(err) => {
let err_msg = format!("Error receiving chunk: {:?}", err);
warn!("{}", err_msg);
processor.on_error(&err_msg);
break;
}
};
// Process the chunk
match processor.process_chunk(chunk) {
Ok(Some(processed_chunk)) => {
if tx.send(processed_chunk).await.is_err() {
warn!("Receiver dropped");
break;
}
}
Ok(None) => {
// Skip this chunk
continue;
}
Err(err) => {
warn!("Processor error: {}", err);
processor.on_error(&err);
break;
}
}
}
processor.on_complete();
});
// Convert channel receiver to HTTP stream
let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk)));
let stream_body = BoxBody::new(StreamBody::new(stream));
StreamingResponse {
body: stream_body,
processor_handle,
}
}