mirror of
https://github.com/katanemo/plano.git
synced 2026-05-11 00:32:42 +02:00
refactor prompt gateway (#204)
This commit is contained in:
parent
dced8a5708
commit
2f374df034
9 changed files with 500 additions and 441 deletions
103
crates/prompt_gateway/src/context.rs
Normal file
103
crates/prompt_gateway/src/context.rs
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
use common::errors::ServerError;
|
||||
use common::stats::IncrementingMetric;
|
||||
use proxy_wasm::traits::Context;
|
||||
|
||||
use crate::stream_context::{ResponseHandlerType, StreamContext};
|
||||
|
||||
impl Context for StreamContext {
|
||||
fn on_http_call_response(
|
||||
&mut self,
|
||||
token_id: u32,
|
||||
_num_headers: usize,
|
||||
body_size: usize,
|
||||
_num_trailers: usize,
|
||||
) {
|
||||
let callout_context = self
|
||||
.callouts
|
||||
.get_mut()
|
||||
.remove(&token_id)
|
||||
.expect("invalid token_id");
|
||||
self.metrics.active_http_calls.increment(-1);
|
||||
|
||||
/*
|
||||
state transition
|
||||
|
||||
graph LR
|
||||
|
||||
on_http_request_body --> prompt received
|
||||
prompt received --> get embeddings & arch guard
|
||||
arch guard --> get embeddings
|
||||
get embeddings --> zeroshot intent
|
||||
|
||||
┌──────────────────────┐ ┌─────────────────┐ ┌────────────────┐ ┌─────────────────┐
|
||||
│ │ │ │ │ │ │ │
|
||||
│ on_http_request_body ├──►│ prompt received ├──►│ get embeddings ├──►│ zeroshot intent │
|
||||
│ │ │ │ │ │ │ │
|
||||
└──────────────────────┘ └────────┬────────┘ └────────────────┘ └─────────────────┘
|
||||
│ ▲
|
||||
│ │
|
||||
│ │
|
||||
│ ┌────────┴───────┐
|
||||
│ │ │
|
||||
└───────────►│ arch guard │
|
||||
│ │
|
||||
└────────────────┘
|
||||
|
||||
|
||||
continue from zeroshot intent
|
||||
|
||||
graph LR
|
||||
|
||||
zeroshot intent --> arch_fc
|
||||
zeroshot intent --> default prompt target
|
||||
arch_fc --> developer api call & hallucination check
|
||||
hallucination check --> parameter gathering & developer api call
|
||||
developer api call --> resume request to llm
|
||||
|
||||
|
||||
┌─────────────────┐ ┌───────────────────────┐ ┌─────────────────────┐ ┌───────────────────────┐
|
||||
│ │ │ │ │ │ │ │
|
||||
│ zeroshot intent ├──►│ arch_fc ├──►│ developer api call ├──►│ resume request to llm │
|
||||
│ │ │ │ │ │ │ │
|
||||
└────────┬────────┘ └───────────┬───────────┘ └─────────────────────┘ └───────────────────────┘
|
||||
│ │ ▲
|
||||
│ └─────────────┐ │
|
||||
│ │ │
|
||||
│ ┌───────────────────────┐ │ ┌──────────┴──────────┐ ┌───────────────────────┐
|
||||
│ │ │ │ │ │ │ │
|
||||
└───────────►│ default prompt target │ └▲│ hallucination check ├──►│ parameter gathering │
|
||||
│ │ │ │ │ │
|
||||
└───────────────────────┘ └─────────────────────┘ └───────────────────────┘
|
||||
|
||||
|
||||
using https://mermaid-ascii.art/
|
||||
*/
|
||||
|
||||
if let Some(body) = self.get_http_call_response_body(0, body_size) {
|
||||
match callout_context.response_handler_type {
|
||||
ResponseHandlerType::GetEmbeddings => {
|
||||
self.embeddings_handler(body, callout_context)
|
||||
}
|
||||
ResponseHandlerType::ArchGuard => self.arch_guard_handler(body, callout_context),
|
||||
ResponseHandlerType::ZeroShotIntent => {
|
||||
self.zero_shot_intent_detection_resp_handler(body, callout_context)
|
||||
}
|
||||
ResponseHandlerType::ArchFC => self.arch_fc_response_handler(body, callout_context),
|
||||
ResponseHandlerType::HallucinationDetect => {
|
||||
self.hallucination_classification_resp_handler(body, callout_context)
|
||||
}
|
||||
ResponseHandlerType::FunctionCall => {
|
||||
self.function_call_response_handler(body, callout_context)
|
||||
}
|
||||
ResponseHandlerType::DefaultTarget => {
|
||||
self.default_target_handler(body, callout_context)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
self.send_server_error(
|
||||
ServerError::LogicError(String::from("No response body in inline HTTP request")),
|
||||
None,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue