update config (#93)

2026-06-23 15:38:07 +02:00 · 2024-09-30 17:49:05 -07:00 · 2024-09-30 17:49:05 -07:00 · cc35eb0cd7
commit cc35eb0cd7
parent 4182879717
13 changed files with 575 additions and 329 deletions
--- a/arch/Cargo.lock
+++ b/arch/Cargo.lock
@ -441,6 +441,15 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "duration-string"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fcc1d9ae294a15ed05aeae8e11ee5f2b3fe971c077d45a42fb20825fba6ee13"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "either"
 version = "1.13.0"
@ -1075,6 +1084,7 @@ dependencies = [
 name = "public_types"
 version = "0.1.0"
 dependencies = [
+ "duration-string",
 "serde",
 "serde_yaml",
 ]
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@ -176,7 +176,11 @@ static_resources:
                  hostname: "arch_fc"
 {% for _, cluster in arch_clusters.items() %}
    - name: {{ cluster.name }}
+      {% if cluster.connect_timeout -%}
+      connect_timeout: {{ cluster.connect_timeout }}
+      {% else -%}
      connect_timeout: 5s
+      {% endif -%}
      type: STRICT_DNS
      lb_policy: ROUND_ROBIN
      load_assignment:
@ -186,7 +190,7 @@ static_resources:
              - endpoint:
                  address:
                    socket_address:
-                      address: {{ cluster.address }}
+                      address: {{ cluster.endpoint }}
                      port_value: {{ cluster.port }}
-                  hostname: {{ cluster.address }}
+                  hostname: {{ cluster.name }}
 {% endfor %}
--- a/arch/src/stream_context.rs
+++ b/arch/src/stream_context.rs
@ -23,7 +23,7 @@ use public_types::common_types::{
    EmbeddingType, PromptGuardRequest, PromptGuardResponse, PromptGuardTask,
    ZeroShotClassificationRequest, ZeroShotClassificationResponse,
 };
-use public_types::configuration::{Overrides, PromptGuards, PromptTarget, PromptType};
+use public_types::configuration::{Overrides, PromptGuards, PromptTarget};
 use public_types::embeddings::{
    CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse,
 };
@ -358,103 +358,97 @@ impl StreamContext {

        info!("prompt_target name: {:?}", prompt_target_name);

-        match prompt_target.prompt_type {
-            PromptType::FunctionResolver => {
-                let mut chat_completion_tools: Vec<ChatCompletionTool> = Vec::new();
-                for pt in self.prompt_targets.read().unwrap().values() {
-                    // only extract entity names
-                    let properties: HashMap<String, FunctionParameter> = match pt.parameters {
-                        // Clone is unavoidable here because we don't want to move the values out of the prompt target struct.
-                        Some(ref entities) => {
-                            let mut properties: HashMap<String, FunctionParameter> = HashMap::new();
-                            for entity in entities.iter() {
-                                let param = FunctionParameter {
-                                    parameter_type: ParameterType::from(
-                                        entity.parameter_type.clone().unwrap_or("str".to_string()),
-                                    ),
-                                    description: entity.description.clone(),
-                                    required: entity.required,
-                                    enum_values: entity.enum_values.clone(),
-                                    default: entity.default.clone(),
-                                };
-                                properties.insert(entity.name.clone(), param);
-                            }
-                            properties
-                        }
-                        None => HashMap::new(),
-                    };
-                    let tools_parameters = FunctionParameters { properties };
-
-                    chat_completion_tools.push({
-                        ChatCompletionTool {
-                            tool_type: ToolType::Function,
-                            function: FunctionDefinition {
-                                name: pt.name.clone(),
-                                description: pt.description.clone(),
-                                parameters: tools_parameters,
-                            },
-                        }
-                    });
+        //TODO: handle default function resolver type
+        let mut chat_completion_tools: Vec<ChatCompletionTool> = Vec::new();
+        for pt in self.prompt_targets.read().unwrap().values() {
+            // only extract entity names
+            let properties: HashMap<String, FunctionParameter> = match pt.parameters {
+                // Clone is unavoidable here because we don't want to move the values out of the prompt target struct.
+                Some(ref entities) => {
+                    let mut properties: HashMap<String, FunctionParameter> = HashMap::new();
+                    for entity in entities.iter() {
+                        let param = FunctionParameter {
+                            parameter_type: ParameterType::from(
+                                entity.parameter_type.clone().unwrap_or("str".to_string()),
+                            ),
+                            description: entity.description.clone(),
+                            required: entity.required,
+                            enum_values: entity.enum_values.clone(),
+                            default: entity.default.clone(),
+                        };
+                        properties.insert(entity.name.clone(), param);
+                    }
+                    properties
                }
+                None => HashMap::new(),
+            };
+            let tools_parameters = FunctionParameters { properties };

-                let chat_completions = ChatCompletionsRequest {
-                    model: GPT_35_TURBO.to_string(),
-                    messages: callout_context.request_body.messages.clone(),
-                    tools: Some(chat_completion_tools),
-                    stream: false,
-                    stream_options: None,
-                };
-
-                let msg_body = match serde_json::to_string(&chat_completions) {
-                    Ok(msg_body) => {
-                        debug!("arch_fc request body content: {}", msg_body);
-                        msg_body
-                    }
-                    Err(e) => {
-                        return self.send_server_error(
-                            format!("Error serializing request_params: {:?}", e),
-                            None,
-                        );
-                    }
-                };
-
-                let token_id = match self.dispatch_http_call(
-                    ARC_FC_CLUSTER,
-                    vec![
-                        (":method", "POST"),
-                        (":path", "/v1/chat/completions"),
-                        (":authority", ARC_FC_CLUSTER),
-                        ("content-type", "application/json"),
-                        ("x-envoy-max-retries", "3"),
-                        (
-                            "x-envoy-upstream-rq-timeout-ms",
-                            ARCH_FC_REQUEST_TIMEOUT_MS.to_string().as_str(),
-                        ),
-                    ],
-                    Some(msg_body.as_bytes()),
-                    vec![],
-                    Duration::from_secs(5),
-                ) {
-                    Ok(token_id) => token_id,
-                    Err(e) => {
-                        let error_msg =
-                            format!("Error dispatching HTTP call for function-call: {:?}", e);
-                        return self.send_server_error(error_msg, Some(StatusCode::BAD_REQUEST));
-                    }
-                };
-
-                debug!(
-                    "dispatched call to function {} token_id={}",
-                    ARC_FC_CLUSTER, token_id
-                );
-
-                self.metrics.active_http_calls.increment(1);
-                callout_context.response_handler_type = ResponseHandlerType::FunctionResolver;
-                callout_context.prompt_target_name = Some(prompt_target.name);
-                if self.callouts.insert(token_id, callout_context).is_some() {
-                    panic!("duplicate token_id")
+            chat_completion_tools.push({
+                ChatCompletionTool {
+                    tool_type: ToolType::Function,
+                    function: FunctionDefinition {
+                        name: pt.name.clone(),
+                        description: pt.description.clone(),
+                        parameters: tools_parameters,
+                    },
                }
+            });
+        }
+
+        let chat_completions = ChatCompletionsRequest {
+            model: GPT_35_TURBO.to_string(),
+            messages: callout_context.request_body.messages.clone(),
+            tools: Some(chat_completion_tools),
+            stream: false,
+            stream_options: None,
+        };
+
+        let msg_body = match serde_json::to_string(&chat_completions) {
+            Ok(msg_body) => {
+                debug!("arch_fc request body content: {}", msg_body);
+                msg_body
            }
+            Err(e) => {
+                return self
+                    .send_server_error(format!("Error serializing request_params: {:?}", e), None);
+            }
+        };
+
+        let token_id = match self.dispatch_http_call(
+            ARC_FC_CLUSTER,
+            vec![
+                (":method", "POST"),
+                (":path", "/v1/chat/completions"),
+                (":authority", ARC_FC_CLUSTER),
+                ("content-type", "application/json"),
+                ("x-envoy-max-retries", "3"),
+                (
+                    "x-envoy-upstream-rq-timeout-ms",
+                    ARCH_FC_REQUEST_TIMEOUT_MS.to_string().as_str(),
+                ),
+            ],
+            Some(msg_body.as_bytes()),
+            vec![],
+            Duration::from_secs(5),
+        ) {
+            Ok(token_id) => token_id,
+            Err(e) => {
+                let error_msg = format!("Error dispatching HTTP call for function-call: {:?}", e);
+                return self.send_server_error(error_msg, Some(StatusCode::BAD_REQUEST));
+            }
+        };
+
+        debug!(
+            "dispatched call to function {} token_id={}",
+            ARC_FC_CLUSTER, token_id
+        );
+
+        self.metrics.active_http_calls.increment(1);
+        callout_context.response_handler_type = ResponseHandlerType::FunctionResolver;
+        callout_context.prompt_target_name = Some(prompt_target.name);
+        if self.callouts.insert(token_id, callout_context).is_some() {
+            panic!("duplicate token_id")
        }
    }

@ -530,17 +524,32 @@ impl StreamContext {
        debug!("tool_params: {}", tool_params_json_str);

        let endpoint = prompt_target.endpoint.unwrap();
-        let path = endpoint.path.unwrap_or(String::from("/"));
+        let mut path = endpoint.path.unwrap_or(String::from("/"));
+        let method = endpoint
+            .method
+            .unwrap_or(public_types::configuration::Method::Post);
+        let mut body = Some(tool_params_json_str.as_bytes());
+        if method == public_types::configuration::Method::Post {
+            let mut query_params = vec![];
+            for (key, value) in tool_params {
+                query_params.push(format!("{}={}", key, format!("{:?}", value)));
+            }
+            let path_args = &query_params.join("&");
+            path.push_str("?");
+            path.push_str(path_args);
+        } else {
+            body = None;
+        }
        let token_id = match self.dispatch_http_call(
-            &endpoint.cluster,
+            &endpoint.name,
            vec![
-                (":method", "POST"),
+                (":method", method.to_string().as_str()),
                (":path", path.as_ref()),
-                (":authority", endpoint.cluster.as_str()),
+                (":authority", endpoint.name.as_str()),
                ("content-type", "application/json"),
                ("x-envoy-max-retries", "3"),
            ],
-            Some(tool_params_json_str.as_bytes()),
+            body,
            vec![],
            Duration::from_secs(5),
        ) {
@ -548,14 +557,14 @@ impl StreamContext {
            Err(e) => {
                let error_msg = format!(
                    "Error dispatching call to cluster: {}, path: {}, err: {:?}",
-                    &endpoint.cluster, path, e
+                    &endpoint.name, path, e
                );
                debug!("{}", error_msg);
                return self.send_server_error(error_msg, Some(StatusCode::BAD_REQUEST));
            }
        };

-        callout_context.up_stream_cluster = Some(endpoint.cluster);
+        callout_context.up_stream_cluster = Some(endpoint.name);
        callout_context.up_stream_cluster_path = Some(path);
        callout_context.response_handler_type = ResponseHandlerType::FunctionCall;
        if self.callouts.insert(token_id, callout_context).is_some() {
@ -682,27 +691,18 @@ impl StreamContext {
        if prompt_guard_resp.jailbreak_verdict.is_some()
            && prompt_guard_resp.jailbreak_verdict.unwrap()
        {
+            //TODO: handle other scenarios like forward to error target
            let default_err = "Jailbreak detected. Please refrain from discussing jailbreaking.";
            let error_msg = match self.prompt_guards.as_ref() {
-                Some(prompt_guards) => match prompt_guards.input_guards.jailbreak.as_ref() {
-                    Some(jailbreak) => match jailbreak.on_exception_message.as_ref() {
-                        Some(error_msg) => error_msg,
-                        None => default_err,
-                    },
-                    None => default_err,
-                },
-                None => default_err,
-            };
-
-            return self.send_server_error(error_msg.to_string(), Some(StatusCode::BAD_REQUEST));
-        }
-
-        if prompt_guard_resp.toxic_verdict.is_some() && prompt_guard_resp.toxic_verdict.unwrap() {
-            let default_err = "Toxicity detected. Please refrain from using toxic language.";
-            let error_msg = match self.prompt_guards.as_ref() {
-                Some(prompt_guards) => match prompt_guards.input_guards.toxicity.as_ref() {
-                    Some(toxicity) => match toxicity.on_exception_message.as_ref() {
-                        Some(error_msg) => error_msg,
+                Some(prompt_guards) => match prompt_guards
+                    .input_guards
+                    .get(&public_types::configuration::GuardType::Jailbreak)
+                {
+                    Some(jailbreak) => match jailbreak.on_exception.as_ref() {
+                        Some(on_exception_details) => match on_exception_details.message.as_ref() {
+                            Some(error_msg) => error_msg,
+                            None => default_err,
+                        },
                        None => default_err,
                    },
                    None => default_err,
@ -883,32 +883,27 @@ impl HttpContext for StreamContext {
            }
        };

-        let prompt_guard_task = match (
-            prompt_guards.input_guards.toxicity.is_some(),
-            prompt_guards.input_guards.jailbreak.is_some(),
-        ) {
-            (true, true) => PromptGuardTask::Both,
-            (true, false) => PromptGuardTask::Toxicity,
-            (false, true) => PromptGuardTask::Jailbreak,
-            (false, false) => {
-                info!("Input guards set but no prompt guards were found");
-                let callout_context = CallContext {
-                    response_handler_type: ResponseHandlerType::ArchGuard,
-                    user_message: Some(user_message),
-                    prompt_target_name: None,
-                    request_body: deserialized_body,
-                    similarity_scores: None,
-                    up_stream_cluster: None,
-                    up_stream_cluster_path: None,
-                };
-                self.get_embeddings(callout_context);
-                return Action::Pause;
-            }
-        };
+        let prompt_guard_jailbreak_task = prompt_guards
+            .input_guards
+            .contains_key(&public_types::configuration::GuardType::Jailbreak);
+        if !prompt_guard_jailbreak_task {
+            info!("Input guards set but no prompt guards were found");
+            let callout_context = CallContext {
+                response_handler_type: ResponseHandlerType::ArchGuard,
+                user_message: Some(user_message),
+                prompt_target_name: None,
+                request_body: deserialized_body,
+                similarity_scores: None,
+                up_stream_cluster: None,
+                up_stream_cluster_path: None,
+            };
+            self.get_embeddings(callout_context);
+            return Action::Pause;
+        }

        let get_prompt_guards_request = PromptGuardRequest {
            input: user_message.clone(),
-            task: prompt_guard_task,
+            task: PromptGuardTask::Jailbreak,
        };

        let json_data: String = match serde_json::to_string(&get_prompt_guards_request) {
--- a/arch/tests/integration.rs
+++ b/arch/tests/integration.rs
@ -175,27 +175,36 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {

 fn default_config() -> Configuration {
    let config: &str = r#"
-default_prompt_endpoint: "127.0.0.1"
-load_balancing: "round_robin"
-timeout_ms: 5000
+version: "0.1-beta"
+
+listener:
+  address: 0.0.0.0
+  port: 10000
+  message_format: huggingface
+  connect_timeout: 0.005s
+
+endpoints:
+  api_server:
+    endpoint: api_server:80
+    connect_timeout: 0.005s

 llm_providers:
-  - name: "open-ai-gpt-4"
-    api_key: "$OPEN_AI_API_KEY"
+  - name: open-ai-gpt-4
+    access_key: $OPEN_AI_API_KEY
    model: gpt-4
+    default: true
+
+overrides:
+  # confidence threshold for prompt target intent matching
+  prompt_target_intent_matching_threshold: 0.6

 system_prompt: |
-  You are a helpful weather forecaster. Please following following guidelines when responding to user queries:
-  - Use farenheight for temperature
-  - Use miles per hour for wind speed
+  You are a helpful assistant.

 prompt_targets:
-  - type: function_resolver
-    name: weather_forecast
-    description: This resolver provides weather forecast information.
-    endpoint:
-      cluster: weatherhost
-      path: /weather
+
+  - name: weather_forecast
+    description: This function provides realtime weather forecast information for a given city.
    parameters:
      - name: city
        required: true
@ -204,16 +213,32 @@ prompt_targets:
        description: The number of days for which the weather forecast is requested.
      - name: units
        description: The units in which the weather forecast is requested.
-
-  - type: function_resolver
-    name: weather_forecast_2
-    description: This resolver provides weather forecast information.
    endpoint:
-      cluster: weatherhost
+      name: api_server
      path: /weather
-    entities:
-      - name: city
+    system_prompt: |
+      You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
+      - Use farenheight for temperature
+      - Use miles per hour for wind speed

+  - name: insurance_claim_details
+    type: function_resolver
+    description: This function resolver provides insurance claim details for a given policy number.
+    parameters:
+      - name: policy_number
+        required: true
+        description: The policy number for which the insurance claim details are requested.
+        type: string
+      - name: include_expired
+        description: whether to include expired insurance claims in the response.
+        type: bool
+        required: true
+    endpoint:
+      name: api_server
+      path: /insurance_claim_details
+    system_prompt: |
+      You are a helpful insurance claim details provider. Use insurance claim data that is provided to you. Please following following guidelines when responding to user queries:
+      - Use policy number to retrieve insurance claim details
 ratelimits:
  - provider: gpt-3.5-turbo
    selector:
@ -222,7 +247,7 @@ ratelimits:
    limit:
      tokens: 1
      unit: minute
-  "#;
+"#;
    serde_yaml::from_str(config).unwrap()
 }

@ -442,7 +467,7 @@ fn request_ratelimited() {
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
-        .expect_http_call(Some("weatherhost"), None, None, None, None)
+        .expect_http_call(Some("api_server"), None, None, None, None)
        .returning(Some(4))
        .expect_metric_increment("active_http_calls", 1)
        .execute_and_expect(ReturnType::None)
@ -557,7 +582,7 @@ fn request_not_ratelimited() {
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
-        .expect_http_call(Some("weatherhost"), None, None, None, None)
+        .expect_http_call(Some("api_server"), None, None, None, None)
        .returning(Some(4))
        .expect_metric_increment("active_http_calls", 1)
        .execute_and_expect(ReturnType::None)