2024-10-21 15:04:15 -07:00
|
|
|
use common::errors::ServerError;
|
|
|
|
|
use common::stats::IncrementingMetric;
|
|
|
|
|
use proxy_wasm::traits::Context;
|
|
|
|
|
|
|
|
|
|
use crate::stream_context::{ResponseHandlerType, StreamContext};
|
|
|
|
|
|
|
|
|
|
impl Context for StreamContext {
|
|
|
|
|
fn on_http_call_response(
|
|
|
|
|
&mut self,
|
|
|
|
|
token_id: u32,
|
|
|
|
|
_num_headers: usize,
|
|
|
|
|
body_size: usize,
|
|
|
|
|
_num_trailers: usize,
|
|
|
|
|
) {
|
|
|
|
|
let callout_context = self
|
|
|
|
|
.callouts
|
|
|
|
|
.get_mut()
|
|
|
|
|
.remove(&token_id)
|
|
|
|
|
.expect("invalid token_id");
|
|
|
|
|
self.metrics.active_http_calls.increment(-1);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
state transition
|
|
|
|
|
|
|
|
|
|
graph LR
|
|
|
|
|
|
|
|
|
|
on_http_request_body --> prompt received
|
|
|
|
|
prompt received --> get embeddings & arch guard
|
|
|
|
|
arch guard --> get embeddings
|
|
|
|
|
get embeddings --> zeroshot intent
|
|
|
|
|
|
|
|
|
|
┌──────────────────────┐ ┌─────────────────┐ ┌────────────────┐ ┌─────────────────┐
|
|
|
|
|
│ │ │ │ │ │ │ │
|
|
|
|
|
│ on_http_request_body ├──►│ prompt received ├──►│ get embeddings ├──►│ zeroshot intent │
|
|
|
|
|
│ │ │ │ │ │ │ │
|
|
|
|
|
└──────────────────────┘ └────────┬────────┘ └────────────────┘ └─────────────────┘
|
2024-11-07 22:11:00 -06:00
|
|
|
│ ▲
|
|
|
|
|
│ │
|
|
|
|
|
│ │
|
|
|
|
|
│ ┌────────┴───────┐
|
|
|
|
|
│ │ │
|
|
|
|
|
└───────────►│ arch guard │
|
|
|
|
|
│ │
|
|
|
|
|
└────────────────┘
|
2024-10-21 15:04:15 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
continue from zeroshot intent
|
|
|
|
|
|
|
|
|
|
graph LR
|
|
|
|
|
|
|
|
|
|
zeroshot intent --> arch_fc
|
|
|
|
|
zeroshot intent --> default prompt target
|
|
|
|
|
arch_fc --> developer api call & hallucination check
|
|
|
|
|
hallucination check --> parameter gathering & developer api call
|
|
|
|
|
developer api call --> resume request to llm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
┌─────────────────┐ ┌───────────────────────┐ ┌─────────────────────┐ ┌───────────────────────┐
|
|
|
|
|
│ │ │ │ │ │ │ │
|
|
|
|
|
│ zeroshot intent ├──►│ arch_fc ├──►│ developer api call ├──►│ resume request to llm │
|
|
|
|
|
│ │ │ │ │ │ │ │
|
|
|
|
|
└────────┬────────┘ └───────────┬───────────┘ └─────────────────────┘ └───────────────────────┘
|
|
|
|
|
│ │ ▲
|
|
|
|
|
│ └─────────────┐ │
|
|
|
|
|
│ │ │
|
|
|
|
|
│ ┌───────────────────────┐ │ ┌──────────┴──────────┐ ┌───────────────────────┐
|
|
|
|
|
│ │ │ │ │ │ │ │
|
|
|
|
|
└───────────►│ default prompt target │ └▲│ hallucination check ├──►│ parameter gathering │
|
|
|
|
|
│ │ │ │ │ │
|
|
|
|
|
└───────────────────────┘ └─────────────────────┘ └───────────────────────┘
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
using https://mermaid-ascii.art/
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
if let Some(body) = self.get_http_call_response_body(0, body_size) {
|
2024-10-22 12:07:40 -07:00
|
|
|
#[cfg_attr(any(), rustfmt::skip)]
|
2024-10-21 15:04:15 -07:00
|
|
|
match callout_context.response_handler_type {
|
|
|
|
|
ResponseHandlerType::ArchGuard => self.arch_guard_handler(body, callout_context),
|
2024-10-22 12:07:40 -07:00
|
|
|
ResponseHandlerType::Embeddings => self.embeddings_handler(body, callout_context),
|
|
|
|
|
ResponseHandlerType::ZeroShotIntent => self.zero_shot_intent_detection_resp_handler(body, callout_context),
|
2024-10-21 15:04:15 -07:00
|
|
|
ResponseHandlerType::ArchFC => self.arch_fc_response_handler(body, callout_context),
|
2024-10-22 12:07:40 -07:00
|
|
|
ResponseHandlerType::Hallucination => self.hallucination_classification_resp_handler(body, callout_context),
|
|
|
|
|
ResponseHandlerType::FunctionCall => self.api_call_response_handler(body, callout_context),
|
|
|
|
|
ResponseHandlerType::DefaultTarget =>self.default_target_handler(body, callout_context),
|
2024-10-21 15:04:15 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
self.send_server_error(
|
|
|
|
|
ServerError::LogicError(String::from("No response body in inline HTTP request")),
|
|
|
|
|
None,
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|