diff --git a/arch/src/consts.rs b/arch/src/consts.rs index 805d2d35..ccee4640 100644 --- a/arch/src/consts.rs +++ b/arch/src/consts.rs @@ -9,3 +9,4 @@ pub const ARC_FC_CLUSTER: &str = "arch_fc"; pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes pub const MODEL_SERVER_NAME: &str = "model_server"; pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider"; +pub const ARCH_MESSAGES_KEY: &str = "arch_messages"; diff --git a/arch/src/stream_context.rs b/arch/src/stream_context.rs index e16ae160..bd6527e2 100644 --- a/arch/src/stream_context.rs +++ b/arch/src/stream_context.rs @@ -1,7 +1,7 @@ use crate::consts::{ - ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_ROUTING_HEADER, ARC_FC_CLUSTER, DEFAULT_EMBEDDING_MODEL, - DEFAULT_INTENT_MODEL, DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME, - RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE, + ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_MESSAGES_KEY, ARCH_ROUTING_HEADER, ARC_FC_CLUSTER, + DEFAULT_EMBEDDING_MODEL, DEFAULT_INTENT_MODEL, DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, + MODEL_SERVER_NAME, RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE, }; use crate::filter_context::{embeddings_store, WasmMetrics}; use crate::llm_providers::{LlmProvider, LlmProviders}; @@ -507,7 +507,11 @@ impl StreamContext { callout_context.similarity_scores ); //HACK: for now we only support one tool call, we will support multiple tool calls in the future - let tool_params = &tool_calls[0].function.arguments; + let mut tool_params = tool_calls[0].function.arguments.clone(); + tool_params.insert( + String::from(ARCH_MESSAGES_KEY), + serde_yaml::to_value(&callout_context.request_body.messages).unwrap(), + ); let tools_call_name = tool_calls[0].function.name.clone(); let tool_params_json_str = serde_json::to_string(&tool_params).unwrap(); diff --git a/demos/function_calling/arch_config.yaml b/demos/function_calling/arch_config.yaml index fc7fffb8..70ac9922 100644 --- a/demos/function_calling/arch_config.yaml +++ b/demos/function_calling/arch_config.yaml @@ -77,6 +77,7 @@ prompt_targets: system_prompt: | You are a helpful insurance claim details provider. Use insurance claim data that is provided to you. Please following following guidelines when responding to user queries: - Use policy number to retrieve insurance claim details + ratelimits: - provider: gpt-3.5-turbo selector: diff --git a/public_types/src/configuration.rs b/public_types/src/configuration.rs index 1abb7e46..a2c4fa73 100644 --- a/public_types/src/configuration.rs +++ b/public_types/src/configuration.rs @@ -1,7 +1,7 @@ use std::{collections::HashMap, time::Duration}; use duration_string::DurationString; -use serde::{Deserialize, Serialize, Deserializer}; +use serde::{Deserialize, Deserializer, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct Overrides {