plano/crates/prompt_gateway/src/context.rs

95 lines
6 KiB
Rust
Raw Normal View History

2024-10-21 15:04:15 -07:00
use common::errors::ServerError;
use common::stats::IncrementingMetric;
use proxy_wasm::traits::Context;
use crate::stream_context::{ResponseHandlerType, StreamContext};
impl Context for StreamContext {
fn on_http_call_response(
&mut self,
token_id: u32,
_num_headers: usize,
body_size: usize,
_num_trailers: usize,
) {
let callout_context = self
.callouts
.get_mut()
.remove(&token_id)
.expect("invalid token_id");
self.metrics.active_http_calls.increment(-1);
/*
state transition
graph LR
on_http_request_body --> prompt received
prompt received --> get embeddings & arch guard
arch guard --> get embeddings
get embeddings --> zeroshot intent
on_http_request_body prompt received get embeddings zeroshot intent
2024-11-07 22:11:00 -06:00
arch guard
2024-10-21 15:04:15 -07:00
continue from zeroshot intent
graph LR
zeroshot intent --> arch_fc
zeroshot intent --> default prompt target
arch_fc --> developer api call & hallucination check
hallucination check --> parameter gathering & developer api call
developer api call --> resume request to llm
zeroshot intent arch_fc developer api call resume request to llm
default prompt target hallucination check parameter gathering
using https://mermaid-ascii.art/
*/
if let Some(body) = self.get_http_call_response_body(0, body_size) {
#[cfg_attr(any(), rustfmt::skip)]
2024-10-21 15:04:15 -07:00
match callout_context.response_handler_type {
ResponseHandlerType::ArchGuard => self.arch_guard_handler(body, callout_context),
ResponseHandlerType::Embeddings => self.embeddings_handler(body, callout_context),
ResponseHandlerType::ZeroShotIntent => self.zero_shot_intent_detection_resp_handler(body, callout_context),
2024-10-21 15:04:15 -07:00
ResponseHandlerType::ArchFC => self.arch_fc_response_handler(body, callout_context),
ResponseHandlerType::Hallucination => self.hallucination_classification_resp_handler(body, callout_context),
ResponseHandlerType::FunctionCall => self.api_call_response_handler(body, callout_context),
ResponseHandlerType::DefaultTarget =>self.default_target_handler(body, callout_context),
2024-10-21 15:04:15 -07:00
}
} else {
self.send_server_error(
ServerError::LogicError(String::from("No response body in inline HTTP request")),
None,
);
}
}
}