mirror of
https://github.com/katanemo/plano.git
synced 2026-06-20 15:28:07 +02:00
add preliminary support for llm agents (#432)
This commit is contained in:
parent
8d66fefded
commit
84cd1df7bf
29 changed files with 1388 additions and 121 deletions
|
|
@ -189,7 +189,7 @@ pub struct ToolCall {
|
|||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FunctionCallDetail {
|
||||
pub name: String,
|
||||
pub arguments: HashMap<String, Value>,
|
||||
pub arguments: Option<HashMap<String, Value>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ pub struct Configuration {
|
|||
pub struct Overrides {
|
||||
pub prompt_target_intent_matching_threshold: Option<f64>,
|
||||
pub optimize_context_window: Option<bool>,
|
||||
pub use_agent_orchestrator: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
|
|
@ -159,7 +160,7 @@ pub struct LlmProvider {
|
|||
pub name: String,
|
||||
pub provider_interface: LlmProviderType,
|
||||
pub access_key: Option<String>,
|
||||
pub model: String,
|
||||
pub model: Option<String>,
|
||||
pub default: Option<bool>,
|
||||
pub stream: Option<bool>,
|
||||
pub endpoint: Option<String>,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use crate::metrics::Metrics;
|
||||
use crate::stream_context::StreamContext;
|
||||
use common::configuration::Configuration;
|
||||
use common::configuration::Overrides;
|
||||
use common::consts::OTEL_COLLECTOR_HTTP;
|
||||
use common::consts::OTEL_POST_PATH;
|
||||
use common::http::CallArgs;
|
||||
|
|
@ -31,6 +32,7 @@ pub struct FilterContext {
|
|||
callouts: RefCell<HashMap<u32, CallContext>>,
|
||||
llm_providers: Option<Rc<LlmProviders>>,
|
||||
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
|
||||
overrides: Rc<Option<Overrides>>,
|
||||
}
|
||||
|
||||
impl FilterContext {
|
||||
|
|
@ -40,6 +42,7 @@ impl FilterContext {
|
|||
metrics: Rc::new(Metrics::new()),
|
||||
llm_providers: None,
|
||||
traces_queue: Arc::new(Mutex::new(VecDeque::new())),
|
||||
overrides: Rc::new(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -69,6 +72,7 @@ impl RootContext for FilterContext {
|
|||
};
|
||||
|
||||
ratelimit::ratelimits(Some(config.ratelimits.unwrap_or_default()));
|
||||
self.overrides = Rc::new(config.overrides);
|
||||
|
||||
match config.llm_providers.try_into() {
|
||||
Ok(llm_providers) => self.llm_providers = Some(Rc::new(llm_providers)),
|
||||
|
|
@ -93,6 +97,7 @@ impl RootContext for FilterContext {
|
|||
.expect("LLM Providers must exist when Streams are being created"),
|
||||
),
|
||||
Arc::clone(&self.traces_queue),
|
||||
Rc::clone(&self.overrides),
|
||||
)))
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ use common::api::open_ai::{
|
|||
ChatCompletionStreamResponseServerEvents, ChatCompletionsRequest, ChatCompletionsResponse,
|
||||
Message, StreamOptions,
|
||||
};
|
||||
use common::configuration::LlmProvider;
|
||||
use common::configuration::{LlmProvider, LlmProviderType, Overrides};
|
||||
use common::consts::{
|
||||
ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH, HEALTHZ_PATH,
|
||||
RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
|
||||
|
|
@ -42,6 +42,7 @@ pub struct StreamContext {
|
|||
request_body_sent_time: Option<u128>,
|
||||
user_message: Option<Message>,
|
||||
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
|
||||
overrides: Rc<Option<Overrides>>,
|
||||
}
|
||||
|
||||
impl StreamContext {
|
||||
|
|
@ -50,10 +51,12 @@ impl StreamContext {
|
|||
metrics: Rc<Metrics>,
|
||||
llm_providers: Rc<LlmProviders>,
|
||||
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
|
||||
overrides: Rc<Option<Overrides>>,
|
||||
) -> Self {
|
||||
StreamContext {
|
||||
context_id,
|
||||
metrics,
|
||||
overrides,
|
||||
ratelimit_selector: None,
|
||||
streaming_response: false,
|
||||
response_tokens: 0,
|
||||
|
|
@ -91,7 +94,12 @@ impl StreamContext {
|
|||
self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
|
||||
.unwrap_or_default(),
|
||||
self.llm_provider.as_ref().unwrap().name,
|
||||
self.llm_provider.as_ref().unwrap().model
|
||||
self.llm_provider
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.model
|
||||
.as_ref()
|
||||
.unwrap_or(&String::new())
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -151,11 +159,11 @@ impl StreamContext {
|
|||
// Tokenize and record token count.
|
||||
let token_count = tokenizer::token_count(model, json_string).unwrap_or(0);
|
||||
|
||||
trace!("Recorded input token count: {}", token_count);
|
||||
// Record the token count to metrics.
|
||||
self.metrics
|
||||
.input_sequence_length
|
||||
.record(token_count as u64);
|
||||
trace!("Recorded input token count: {}", token_count);
|
||||
|
||||
// Check if rate limiting needs to be applied.
|
||||
if let Some(selector) = self.ratelimit_selector.take() {
|
||||
|
|
@ -184,24 +192,41 @@ impl HttpContext for StreamContext {
|
|||
return Action::Continue;
|
||||
}
|
||||
|
||||
self.select_llm_provider();
|
||||
let routing_header_value = self.get_http_request_header(ARCH_ROUTING_HEADER);
|
||||
|
||||
// if endpoint is not set then use provider name as routing header so envoy can resolve the cluster name
|
||||
if self.llm_provider().endpoint.is_none() {
|
||||
let use_agent_orchestrator = match self.overrides.as_ref() {
|
||||
Some(overrides) => overrides.use_agent_orchestrator.unwrap_or_default(),
|
||||
None => false,
|
||||
};
|
||||
|
||||
if let Some(routing_header_value) = routing_header_value.as_ref() {
|
||||
debug!("routing header already set: {}", routing_header_value);
|
||||
self.llm_provider = Some(Rc::new(LlmProvider {
|
||||
name: routing_header_value.to_string(),
|
||||
provider_interface: LlmProviderType::OpenAI,
|
||||
access_key: None,
|
||||
endpoint: None,
|
||||
model: None,
|
||||
default: None,
|
||||
stream: None,
|
||||
port: None,
|
||||
rate_limits: None,
|
||||
}));
|
||||
} else {
|
||||
self.select_llm_provider();
|
||||
self.add_http_request_header(
|
||||
ARCH_ROUTING_HEADER,
|
||||
&self.llm_provider().provider_interface.to_string(),
|
||||
);
|
||||
} else {
|
||||
self.add_http_request_header(ARCH_ROUTING_HEADER, &self.llm_provider().name);
|
||||
}
|
||||
|
||||
if let Err(error) = self.modify_auth_headers() {
|
||||
// ensure that the provider has an endpoint if the access key is missing else return a bad request
|
||||
if self.llm_provider.as_ref().unwrap().endpoint.is_none() {
|
||||
self.send_server_error(error, Some(StatusCode::BAD_REQUEST));
|
||||
if let Err(error) = self.modify_auth_headers() {
|
||||
// ensure that the provider has an endpoint if the access key is missing else return a bad request
|
||||
if self.llm_provider.as_ref().unwrap().endpoint.is_none() && !use_agent_orchestrator
|
||||
{
|
||||
self.send_server_error(error, Some(StatusCode::BAD_REQUEST));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.delete_content_length_header();
|
||||
self.save_ratelimit_header();
|
||||
|
||||
|
|
@ -230,34 +255,38 @@ impl HttpContext for StreamContext {
|
|||
return Action::Continue;
|
||||
}
|
||||
|
||||
let body_bytes = match self.get_http_request_body(0, body_size) {
|
||||
Some(body_bytes) => body_bytes,
|
||||
None => {
|
||||
self.send_server_error(
|
||||
ServerError::LogicError(format!(
|
||||
"Failed to obtain body bytes even though body_size is {}",
|
||||
body_size
|
||||
)),
|
||||
None,
|
||||
);
|
||||
return Action::Pause;
|
||||
}
|
||||
};
|
||||
|
||||
// Deserialize body into spec.
|
||||
// Currently OpenAI API.
|
||||
let mut deserialized_body: ChatCompletionsRequest =
|
||||
match self.get_http_request_body(0, body_size) {
|
||||
Some(body_bytes) => match serde_json::from_slice(&body_bytes) {
|
||||
Ok(deserialized) => deserialized,
|
||||
Err(e) => {
|
||||
self.send_server_error(
|
||||
ServerError::Deserialization(e),
|
||||
Some(StatusCode::BAD_REQUEST),
|
||||
);
|
||||
return Action::Pause;
|
||||
}
|
||||
},
|
||||
None => {
|
||||
match serde_json::from_slice(&body_bytes) {
|
||||
Ok(deserialized) => deserialized,
|
||||
Err(e) => {
|
||||
debug!("body str: {}", String::from_utf8_lossy(&body_bytes));
|
||||
self.send_server_error(
|
||||
ServerError::LogicError(format!(
|
||||
"Failed to obtain body bytes even though body_size is {}",
|
||||
body_size
|
||||
)),
|
||||
None,
|
||||
ServerError::Deserialization(e),
|
||||
Some(StatusCode::BAD_REQUEST),
|
||||
);
|
||||
return Action::Pause;
|
||||
}
|
||||
};
|
||||
|
||||
// remove metadata from the request body
|
||||
deserialized_body.metadata = None;
|
||||
//TODO: move this to prompt gateway
|
||||
// deserialized_body.metadata = None;
|
||||
// delete model key from message array
|
||||
for message in deserialized_body.messages.iter_mut() {
|
||||
message.model = None;
|
||||
|
|
@ -270,10 +299,16 @@ impl HttpContext for StreamContext {
|
|||
.last()
|
||||
.cloned();
|
||||
|
||||
// override model name from the llm provider
|
||||
deserialized_body
|
||||
.model
|
||||
.clone_from(&self.llm_provider.as_ref().unwrap().model);
|
||||
let model_name = match self.llm_provider.as_ref() {
|
||||
Some(llm_provider) => match llm_provider.model.as_ref() {
|
||||
Some(model) => model,
|
||||
None => "--",
|
||||
},
|
||||
None => "--",
|
||||
};
|
||||
|
||||
deserialized_body.model = model_name.to_string();
|
||||
|
||||
let chat_completion_request_str = serde_json::to_string(&deserialized_body).unwrap();
|
||||
|
||||
trace!(
|
||||
|
|
@ -469,6 +504,10 @@ impl HttpContext for StreamContext {
|
|||
};
|
||||
|
||||
if self.streaming_response {
|
||||
if body_utf8 == "data: [DONE]\n" {
|
||||
return Action::Continue;
|
||||
}
|
||||
|
||||
let chat_completions_chunk_response_events =
|
||||
match ChatCompletionStreamResponseServerEvents::try_from(body_utf8.as_str()) {
|
||||
Ok(response) => response,
|
||||
|
|
@ -482,7 +521,10 @@ impl HttpContext for StreamContext {
|
|||
};
|
||||
|
||||
if chat_completions_chunk_response_events.events.is_empty() {
|
||||
debug!("empty streaming response");
|
||||
debug!(
|
||||
"cound't parse any streaming events: body str: {}",
|
||||
body_utf8
|
||||
);
|
||||
return Action::Continue;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,11 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
|
|||
.call_proxy_on_request_headers(http_context, 0, false)
|
||||
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path"))
|
||||
.returning(Some("/v1/chat/completions"))
|
||||
.expect_get_header_map_value(
|
||||
Some(MapType::HttpRequestHeaders),
|
||||
Some("x-arch-llm-provider"),
|
||||
)
|
||||
.returning(None)
|
||||
.expect_get_header_map_value(
|
||||
Some(MapType::HttpRequestHeaders),
|
||||
Some("x-arch-llm-provider-hint"),
|
||||
|
|
@ -36,6 +41,7 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
|
|||
Some("Authorization"),
|
||||
Some("Bearer secret_key"),
|
||||
)
|
||||
.expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length"))
|
||||
.expect_get_header_map_value(
|
||||
Some(MapType::HttpRequestHeaders),
|
||||
Some("x-arch-llm-provider-hint"),
|
||||
|
|
@ -48,8 +54,6 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
|
|||
.returning(Some("selector-key"))
|
||||
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key"))
|
||||
.returning(Some("selector-value"))
|
||||
.expect_get_header_map_pairs(Some(MapType::HttpRequestHeaders))
|
||||
.returning(None)
|
||||
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path"))
|
||||
.returning(Some("/v1/chat/completions"))
|
||||
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-request-id"))
|
||||
|
|
@ -223,8 +227,8 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
|
|||
.returning(Some(chat_completions_request_body))
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_metric_record("input_sequence_length", 21)
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_metric_record("input_sequence_length", 21)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
|
||||
|
|
@ -266,7 +270,7 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
|
|||
{\
|
||||
\"messages\": [\
|
||||
{\
|
||||
\"role\": \"system\",\
|
||||
\"role\": \"system\"\
|
||||
},\
|
||||
{\
|
||||
\"role\": \"user\",\
|
||||
|
|
@ -283,14 +287,19 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
|
|||
)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(incomplete_chat_completions_request_body))
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_send_local_response(
|
||||
Some(StatusCode::BAD_REQUEST.as_u16().into()),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.execute_and_expect(ReturnType::Action(Action::Pause))
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_metric_record("input_sequence_length", 14)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.execute_and_expect(ReturnType::Action(Action::Continue))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
use crate::metrics::Metrics;
|
||||
use crate::stream_context::StreamContext;
|
||||
use common::configuration::{Configuration, Overrides, PromptGuards, PromptTarget, Tracing};
|
||||
use common::configuration::{
|
||||
Configuration, Endpoint, Overrides, PromptGuards, PromptTarget, Tracing,
|
||||
};
|
||||
use common::http::Client;
|
||||
use common::stats::Gauge;
|
||||
use log::trace;
|
||||
|
|
@ -21,6 +23,7 @@ pub struct FilterContext {
|
|||
overrides: Rc<Option<Overrides>>,
|
||||
system_prompt: Rc<Option<String>>,
|
||||
prompt_targets: Rc<HashMap<String, PromptTarget>>,
|
||||
endpoints: Rc<Option<HashMap<String, Endpoint>>>,
|
||||
prompt_guards: Rc<PromptGuards>,
|
||||
tracing: Rc<Option<Tracing>>,
|
||||
}
|
||||
|
|
@ -34,6 +37,7 @@ impl FilterContext {
|
|||
prompt_targets: Rc::new(HashMap::new()),
|
||||
overrides: Rc::new(None),
|
||||
prompt_guards: Rc::new(PromptGuards::default()),
|
||||
endpoints: Rc::new(None),
|
||||
tracing: Rc::new(None),
|
||||
}
|
||||
}
|
||||
|
|
@ -73,6 +77,7 @@ impl RootContext for FilterContext {
|
|||
}
|
||||
self.system_prompt = Rc::new(config.system_prompt);
|
||||
self.prompt_targets = Rc::new(prompt_targets);
|
||||
self.endpoints = Rc::new(config.endpoints);
|
||||
|
||||
if let Some(prompt_guards) = config.prompt_guards {
|
||||
self.prompt_guards = Rc::new(prompt_guards)
|
||||
|
|
@ -94,6 +99,7 @@ impl RootContext for FilterContext {
|
|||
Rc::clone(&self.metrics),
|
||||
Rc::clone(&self.system_prompt),
|
||||
Rc::clone(&self.prompt_targets),
|
||||
Rc::clone(&self.endpoints),
|
||||
Rc::clone(&self.overrides),
|
||||
Rc::clone(&self.tracing),
|
||||
)))
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ use common::{
|
|||
self, ArchState, ChatCompletionStreamResponse, ChatCompletionTool, ChatCompletionsRequest,
|
||||
},
|
||||
consts::{
|
||||
ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME, ARCH_STATE_HEADER,
|
||||
ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME, ARCH_ROUTING_HEADER, ARCH_STATE_HEADER,
|
||||
ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, CHAT_COMPLETIONS_PATH, HEALTHZ_PATH,
|
||||
MODEL_SERVER_NAME, MODEL_SERVER_REQUEST_TIMEOUT_MS, REQUEST_ID_HEADER, TOOL_ROLE,
|
||||
TRACE_PARENT_HEADER, USER_ROLE,
|
||||
|
|
@ -33,6 +33,27 @@ impl HttpContext for StreamContext {
|
|||
// manipulate the body in benign ways e.g., compression.
|
||||
self.set_http_request_header("content-length", None);
|
||||
|
||||
if let Some(overrides) = self.overrides.as_ref() {
|
||||
if overrides.use_agent_orchestrator.unwrap_or_default() {
|
||||
// get endpoint that has agent_orchestrator set to true
|
||||
if let Some(endpoints) = self.endpoints.as_ref() {
|
||||
if endpoints.len() == 1 {
|
||||
let (name, _) = endpoints.iter().next().unwrap();
|
||||
debug!("Setting ARCH_PROVIDER_HINT_HEADER to {}", name);
|
||||
self.set_http_request_header(ARCH_ROUTING_HEADER, Some(name));
|
||||
} else {
|
||||
warn!("Need single endpoint when use_agent_orchestrator is set");
|
||||
self.send_server_error(
|
||||
ServerError::LogicError(
|
||||
"Need single endpoint when use_agent_orchestrator is set".to_string(),
|
||||
),
|
||||
None,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let request_path = self.get_http_request_header(":path").unwrap_or_default();
|
||||
if request_path == HEALTHZ_PATH {
|
||||
self.send_http_response(200, vec![], None);
|
||||
|
|
@ -49,6 +70,7 @@ impl HttpContext for StreamContext {
|
|||
|
||||
self.request_id = self.get_http_request_header(REQUEST_ID_HEADER);
|
||||
self.traceparent = self.get_http_request_header(TRACE_PARENT_HEADER);
|
||||
|
||||
Action::Continue
|
||||
}
|
||||
|
||||
|
|
@ -152,6 +174,18 @@ impl HttpContext for StreamContext {
|
|||
}
|
||||
}
|
||||
|
||||
if let Some(overrides) = self.overrides.as_ref() {
|
||||
if overrides.use_agent_orchestrator.unwrap_or_default() {
|
||||
if metadata.is_none() {
|
||||
metadata = Some(HashMap::new());
|
||||
}
|
||||
metadata
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.insert("use_agent_orchestrator".to_string(), "true".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
let arch_fc_chat_completion_request = ChatCompletionsRequest {
|
||||
messages: deserialized_body.messages.clone(),
|
||||
metadata,
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ use common::api::open_ai::{
|
|||
to_server_events, ArchState, ChatCompletionStreamResponse, ChatCompletionsRequest,
|
||||
ChatCompletionsResponse, Message, ToolCall,
|
||||
};
|
||||
use common::configuration::{Overrides, PromptTarget, Tracing};
|
||||
use common::configuration::{Endpoint, Overrides, PromptTarget, Tracing};
|
||||
use common::consts::{
|
||||
API_REQUEST_TIMEOUT_MS, ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME,
|
||||
ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, DEFAULT_TARGET_REQUEST_TIMEOUT_MS, MESSAGES_KEY,
|
||||
|
|
@ -46,6 +46,7 @@ pub struct StreamCallContext {
|
|||
pub struct StreamContext {
|
||||
system_prompt: Rc<Option<String>>,
|
||||
pub prompt_targets: Rc<HashMap<String, PromptTarget>>,
|
||||
pub endpoints: Rc<Option<HashMap<String, Endpoint>>>,
|
||||
pub overrides: Rc<Option<Overrides>>,
|
||||
pub metrics: Rc<Metrics>,
|
||||
pub callouts: RefCell<HashMap<u32, StreamCallContext>>,
|
||||
|
|
@ -72,6 +73,7 @@ impl StreamContext {
|
|||
metrics: Rc<Metrics>,
|
||||
system_prompt: Rc<Option<String>>,
|
||||
prompt_targets: Rc<HashMap<String, PromptTarget>>,
|
||||
endpoints: Rc<Option<HashMap<String, Endpoint>>>,
|
||||
overrides: Rc<Option<Overrides>>,
|
||||
tracing: Rc<Option<Tracing>>,
|
||||
) -> Self {
|
||||
|
|
@ -80,6 +82,7 @@ impl StreamContext {
|
|||
metrics,
|
||||
system_prompt,
|
||||
prompt_targets,
|
||||
endpoints,
|
||||
callouts: RefCell::new(HashMap::new()),
|
||||
chat_completions_request: None,
|
||||
tool_calls: None,
|
||||
|
|
@ -312,12 +315,59 @@ impl StreamContext {
|
|||
callout_context.prompt_target_name =
|
||||
Some(self.tool_calls.as_ref().unwrap()[0].function.name.clone());
|
||||
|
||||
if let Some(overrides) = self.overrides.as_ref() {
|
||||
if overrides.use_agent_orchestrator.unwrap_or_default() {
|
||||
let mut metadata = HashMap::new();
|
||||
metadata.insert("use_agent_orchestrator".to_string(), "true".to_string());
|
||||
|
||||
metadata.insert(
|
||||
"agent-name".to_string(),
|
||||
callout_context
|
||||
.prompt_target_name
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.to_string(),
|
||||
);
|
||||
|
||||
if let Some(overrides) = self.overrides.as_ref() {
|
||||
if overrides.optimize_context_window.unwrap_or_default() {
|
||||
metadata.insert("optimize_context_window".to_string(), "true".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(overrides) = self.overrides.as_ref() {
|
||||
if overrides.use_agent_orchestrator.unwrap_or_default() {
|
||||
metadata.insert("use_agent_orchestrator".to_string(), "true".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
let messages = self.construct_llm_messages(&callout_context);
|
||||
|
||||
let chat_completion_request = ChatCompletionsRequest {
|
||||
model: callout_context.request_body.model.clone(),
|
||||
messages,
|
||||
tools: None,
|
||||
stream: callout_context.request_body.stream,
|
||||
stream_options: callout_context.request_body.stream_options.clone(),
|
||||
metadata: Some(metadata),
|
||||
};
|
||||
|
||||
let body_str = serde_json::to_string(&chat_completion_request).unwrap();
|
||||
debug!("sending request to llm agent: {}", body_str);
|
||||
self.set_http_request_body(0, self.request_body_size, body_str.as_bytes());
|
||||
self.resume_http_request();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
self.schedule_api_call_request(callout_context);
|
||||
}
|
||||
|
||||
fn schedule_api_call_request(&mut self, mut callout_context: StreamCallContext) {
|
||||
// Construct messages early to avoid mutable borrow conflicts
|
||||
|
||||
let tools_call_name = self.tool_calls.as_ref().unwrap()[0].function.name.clone();
|
||||
let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap();
|
||||
let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap().clone();
|
||||
let tool_params = &self.tool_calls.as_ref().unwrap()[0].function.arguments;
|
||||
let endpoint_details = prompt_target.endpoint.as_ref().unwrap();
|
||||
let endpoint_path: String = endpoint_details
|
||||
|
|
@ -329,7 +379,7 @@ impl StreamContext {
|
|||
let http_method = endpoint_details.method.clone().unwrap_or_default();
|
||||
let prompt_target_params = prompt_target.parameters.clone().unwrap_or_default();
|
||||
|
||||
let (path, body) = match compute_request_path_body(
|
||||
let (path, api_call_body) = match compute_request_path_body(
|
||||
&endpoint_path,
|
||||
tool_params,
|
||||
&prompt_target_params,
|
||||
|
|
@ -346,6 +396,8 @@ impl StreamContext {
|
|||
}
|
||||
};
|
||||
|
||||
debug!("api call body {:?}", api_call_body);
|
||||
|
||||
let timeout_str = API_REQUEST_TIMEOUT_MS.to_string();
|
||||
|
||||
let http_method_str = http_method.to_string();
|
||||
|
|
@ -375,11 +427,12 @@ impl StreamContext {
|
|||
headers.insert(key.as_str(), value.as_str());
|
||||
}
|
||||
|
||||
|
||||
let call_args = CallArgs::new(
|
||||
ARCH_INTERNAL_CLUSTER_NAME,
|
||||
&path,
|
||||
headers.into_iter().collect(),
|
||||
body.as_deref().map(|s| s.as_bytes()),
|
||||
api_call_body.as_deref().map(|s| s.as_bytes()),
|
||||
vec![],
|
||||
Duration::from_secs(5),
|
||||
);
|
||||
|
|
@ -406,6 +459,11 @@ impl StreamContext {
|
|||
"developer api call response received: status code: {}",
|
||||
http_status
|
||||
);
|
||||
let prompt_target = self
|
||||
.prompt_targets
|
||||
.get(callout_context.prompt_target_name.as_ref().unwrap())
|
||||
.unwrap()
|
||||
.clone();
|
||||
if http_status != StatusCode::OK.as_str() {
|
||||
warn!(
|
||||
"api server responded with non 2xx status code: {}",
|
||||
|
|
@ -441,6 +499,40 @@ impl StreamContext {
|
|||
}
|
||||
};
|
||||
|
||||
if !prompt_target
|
||||
.auto_llm_dispatch_on_response
|
||||
.unwrap_or(true)
|
||||
{
|
||||
let tool_call_response = self.tool_call_response.as_ref().unwrap().clone();
|
||||
|
||||
let direct_response_str = if self.streaming_response {
|
||||
let chunks = vec![
|
||||
ChatCompletionStreamResponse::new(
|
||||
None,
|
||||
Some(ASSISTANT_ROLE.to_string()),
|
||||
Some(ARCH_FC_MODEL_NAME.to_owned()),
|
||||
None,
|
||||
),
|
||||
ChatCompletionStreamResponse::new(
|
||||
Some(tool_call_response.clone()),
|
||||
None,
|
||||
Some(ARCH_FC_MODEL_NAME.to_owned()),
|
||||
None,
|
||||
),
|
||||
];
|
||||
|
||||
to_server_events(chunks)
|
||||
} else {
|
||||
tool_call_response
|
||||
};
|
||||
|
||||
return self.send_http_response(
|
||||
StatusCode::OK.as_u16().into(),
|
||||
vec![],
|
||||
Some(direct_response_str.as_bytes()),
|
||||
);
|
||||
}
|
||||
|
||||
let final_prompt = format!(
|
||||
"{}\ncontext: {}",
|
||||
user_message.content.unwrap(),
|
||||
|
|
@ -565,7 +657,7 @@ impl StreamContext {
|
|||
// check if the default target should be dispatched to the LLM provider
|
||||
if !prompt_target
|
||||
.auto_llm_dispatch_on_response
|
||||
.unwrap_or_default()
|
||||
.unwrap_or(true)
|
||||
{
|
||||
let default_target_response_str = if self.streaming_response {
|
||||
let chat_completion_response =
|
||||
|
|
|
|||
|
|
@ -4,8 +4,13 @@ use std::collections::HashMap;
|
|||
use serde_yaml::Value;
|
||||
|
||||
// only add params that are of string, number and bool type
|
||||
pub fn filter_tool_params(tool_params: &HashMap<String, Value>) -> HashMap<String, String> {
|
||||
pub fn filter_tool_params(tool_params: &Option<HashMap<String, Value>>) -> HashMap<String, String> {
|
||||
if tool_params.is_none() {
|
||||
return HashMap::new();
|
||||
}
|
||||
tool_params
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.filter(|(_, value)| value.is_number() || value.is_string() || value.is_bool())
|
||||
.map(|(key, value)| match value {
|
||||
|
|
@ -22,7 +27,7 @@ pub fn filter_tool_params(tool_params: &HashMap<String, Value>) -> HashMap<Strin
|
|||
|
||||
pub fn compute_request_path_body(
|
||||
endpoint_path: &str,
|
||||
tool_params: &HashMap<String, Value>,
|
||||
tool_params: &Option<HashMap<String, Value>>,
|
||||
prompt_target_params: &[Parameter],
|
||||
http_method: &HttpMethod,
|
||||
) -> Result<(String, Option<String>), String> {
|
||||
|
|
|
|||
|
|
@ -352,10 +352,10 @@ fn prompt_gateway_request_to_llm_gateway() {
|
|||
tool_type: ToolType::Function,
|
||||
function: FunctionCallDetail {
|
||||
name: String::from("weather_forecast"),
|
||||
arguments: HashMap::from([(
|
||||
arguments: Some(HashMap::from([(
|
||||
String::from("city"),
|
||||
Value::String(String::from("seattle")),
|
||||
)]),
|
||||
)])),
|
||||
},
|
||||
}]),
|
||||
model: None,
|
||||
|
|
@ -381,8 +381,8 @@ fn prompt_gateway_request_to_llm_gateway() {
|
|||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_http_call(
|
||||
Some("arch_internal"),
|
||||
Some(vec![
|
||||
|
|
@ -400,6 +400,7 @@ fn prompt_gateway_request_to_llm_gateway() {
|
|||
)
|
||||
.returning(Some(2))
|
||||
.expect_metric_increment("active_http_calls", 1)
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.execute_and_expect(ReturnType::None)
|
||||
.unwrap();
|
||||
|
||||
|
|
@ -499,10 +500,10 @@ fn prompt_gateway_request_no_intent_match() {
|
|||
tool_type: ToolType::Function,
|
||||
function: FunctionCallDetail {
|
||||
name: String::from("weather_forecast"),
|
||||
arguments: HashMap::from([(
|
||||
arguments: Some(HashMap::from([(
|
||||
String::from("city"),
|
||||
Value::String(String::from("seattle")),
|
||||
)]),
|
||||
)])),
|
||||
},
|
||||
}]),
|
||||
model: None,
|
||||
|
|
@ -655,10 +656,10 @@ fn prompt_gateway_request_no_intent_match_default_target() {
|
|||
tool_type: ToolType::Function,
|
||||
function: FunctionCallDetail {
|
||||
name: String::from("weather_forecast"),
|
||||
arguments: HashMap::from([(
|
||||
arguments: Some(HashMap::from([(
|
||||
String::from("city"),
|
||||
Value::String(String::from("seattle")),
|
||||
)]),
|
||||
)])),
|
||||
},
|
||||
}]),
|
||||
model: None,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue