mirror of
https://github.com/katanemo/plano.git
synced 2026-05-10 08:12:48 +02:00
removing model_server python module to brightstaff (function calling) (#615)
* adding function_calling functionality via rust * fixed rendered YAML file * removed model_server from envoy.template and forwarding traffic to bright_staff * fixed bugs in function_calling.rs that were breaking tests. All good now * updating e2e test to clean up disk usage * removing Arch* models to be used as a default model if one is not specified * if the user sets arch-function base_url we should honor it * fixing demos as we needed to pin to a particular version of huggingface_hub else the chatbot ui wouldn't build * adding a constant for Arch-Function model name * fixing some edge cases with calls made to Arch-Function * fixed JSON parsing issues in function_calling.rs * fixed bug where the raw response from Arch-Function was re-encoded * removed debug from supervisord.conf * commenting out disk cleanup * adding back disk space --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-288.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
parent
126b029345
commit
88c2bd1851
40 changed files with 2517 additions and 1356 deletions
93
crates/brightstaff/src/handlers/utils.rs
Normal file
93
crates/brightstaff/src/handlers/utils.rs
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
use bytes::Bytes;
|
||||
use http_body_util::combinators::BoxBody;
|
||||
use http_body_util::StreamBody;
|
||||
use hyper::body::Frame;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tokio_stream::StreamExt;
|
||||
use tracing::warn;
|
||||
|
||||
/// Trait for processing streaming chunks
|
||||
/// Implementors can inject custom logic during streaming (e.g., hallucination detection, logging)
|
||||
pub trait StreamProcessor: Send + 'static {
|
||||
/// Process an incoming chunk of bytes
|
||||
fn process_chunk(&mut self, chunk: Bytes) -> Result<Option<Bytes>, String>;
|
||||
|
||||
/// Called when streaming completes successfully
|
||||
fn on_complete(&mut self) {}
|
||||
|
||||
/// Called when streaming encounters an error
|
||||
fn on_error(&mut self, _error: &str) {}
|
||||
}
|
||||
|
||||
/// A no-op processor that just forwards chunks as-is
|
||||
pub struct PassthroughProcessor;
|
||||
|
||||
impl StreamProcessor for PassthroughProcessor {
|
||||
fn process_chunk(&mut self, chunk: Bytes) -> Result<Option<Bytes>, String> {
|
||||
Ok(Some(chunk))
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of creating a streaming response
|
||||
pub struct StreamingResponse {
|
||||
pub body: BoxBody<Bytes, hyper::Error>,
|
||||
pub processor_handle: tokio::task::JoinHandle<()>,
|
||||
}
|
||||
|
||||
pub fn create_streaming_response<S, P>(
|
||||
mut byte_stream: S,
|
||||
mut processor: P,
|
||||
buffer_size: usize,
|
||||
) -> StreamingResponse
|
||||
where
|
||||
S: StreamExt<Item = Result<Bytes, reqwest::Error>> + Send + Unpin + 'static,
|
||||
P: StreamProcessor,
|
||||
{
|
||||
let (tx, rx) = mpsc::channel::<Bytes>(buffer_size);
|
||||
|
||||
// Spawn a task to process and forward chunks
|
||||
let processor_handle = tokio::spawn(async move {
|
||||
while let Some(item) = byte_stream.next().await {
|
||||
let chunk = match item {
|
||||
Ok(chunk) => chunk,
|
||||
Err(err) => {
|
||||
let err_msg = format!("Error receiving chunk: {:?}", err);
|
||||
warn!("{}", err_msg);
|
||||
processor.on_error(&err_msg);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
// Process the chunk
|
||||
match processor.process_chunk(chunk) {
|
||||
Ok(Some(processed_chunk)) => {
|
||||
if tx.send(processed_chunk).await.is_err() {
|
||||
warn!("Receiver dropped");
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
// Skip this chunk
|
||||
continue;
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("Processor error: {}", err);
|
||||
processor.on_error(&err);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
processor.on_complete();
|
||||
});
|
||||
|
||||
// Convert channel receiver to HTTP stream
|
||||
let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk)));
|
||||
let stream_body = BoxBody::new(StreamBody::new(stream));
|
||||
|
||||
StreamingResponse {
|
||||
body: stream_body,
|
||||
processor_handle,
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue