Use intent model from archfc to pick prompt gateway (#328)

This commit is contained in:
Shuguang Chen 2024-12-20 13:25:01 -08:00 committed by GitHub
parent 67b8fd635e
commit ba7279becb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
151 changed files with 8642 additions and 10932 deletions

View file

@ -21,7 +21,7 @@ pub struct ChatCompletionsRequest {
pub metadata: Option<HashMap<String, String>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum ToolType {
#[serde(rename = "function")]
Function,
@ -80,6 +80,8 @@ pub struct FunctionParameter {
pub enum_values: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub default: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub format: Option<String>,
}
impl Serialize for FunctionParameter {
@ -96,6 +98,9 @@ impl Serialize for FunctionParameter {
if let Some(default) = &self.default {
map.serialize_entry("default", default)?;
}
if let Some(format) = &self.format {
map.serialize_entry("format", format)?;
}
map.end()
}
}
@ -165,8 +170,8 @@ pub struct Message {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Choice {
pub finish_reason: String,
pub index: usize,
pub finish_reason: Option<String>,
pub index: Option<usize>,
pub message: Message,
}
@ -197,6 +202,18 @@ pub struct ToolCallState {
pub enum ArchState {
ToolCall(Vec<ToolCallState>),
}
#[derive(Deserialize, Serialize)]
#[serde(untagged)]
pub enum ModelServerResponse {
ChatCompletionsResponse(ChatCompletionsResponse),
ModelServerErrorResponse(ModelServerErrorResponse),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelServerErrorResponse {
pub result: String,
pub intent_latency: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChatCompletionsResponse {
@ -217,8 +234,8 @@ impl ChatCompletionsResponse {
tool_calls: None,
tool_call_id: None,
},
index: 0,
finish_reason: "done".to_string(),
index: Some(0),
finish_reason: Some("done".to_string()),
}],
usage: None,
model: ARCH_FC_MODEL_NAME.to_string(),
@ -408,6 +425,7 @@ mod test {
required: Some(true),
enum_values: None,
default: Some("test".to_string()),
format: None,
},
);
@ -462,6 +480,7 @@ mod test {
required: Some(true),
enum_values: None,
default: Some("test".to_string()),
format: None,
},
)]);

View file

@ -2,6 +2,10 @@ use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fmt::Display;
use crate::api::open_ai::{
ChatCompletionTool, FunctionDefinition, FunctionParameter, FunctionParameters, ParameterType,
};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Configuration {
pub version: String,
@ -192,6 +196,7 @@ pub struct Parameter {
pub enum_values: Option<Vec<String>>,
pub default: Option<String>,
pub in_path: Option<bool>,
pub format: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Default)]
@ -231,11 +236,47 @@ pub struct PromptTarget {
pub auto_llm_dispatch_on_response: Option<bool>,
}
// convert PromptTarget to ChatCompletionTool
impl From<&PromptTarget> for ChatCompletionTool {
fn from(val: &PromptTarget) -> Self {
let properties: HashMap<String, FunctionParameter> = match val.parameters {
Some(ref entities) => {
let mut properties: HashMap<String, FunctionParameter> = HashMap::new();
for entity in entities.iter() {
let param = FunctionParameter {
parameter_type: ParameterType::from(
entity.parameter_type.clone().unwrap_or("str".to_string()),
),
description: entity.description.clone(),
required: entity.required,
enum_values: entity.enum_values.clone(),
default: entity.default.clone(),
format: entity.format.clone(),
};
properties.insert(entity.name.clone(), param);
}
properties
}
None => HashMap::new(),
};
ChatCompletionTool {
tool_type: crate::api::open_ai::ToolType::Function,
function: FunctionDefinition {
name: val.name.clone(),
description: val.description.clone(),
parameters: FunctionParameters { properties },
},
}
}
}
#[cfg(test)]
mod test {
use pretty_assertions::assert_eq;
use std::fs;
use crate::configuration::GuardType;
use crate::{api::open_ai::ToolType, configuration::GuardType};
#[test]
fn test_deserialize_configuration() {
@ -307,4 +348,76 @@ mod test {
let mode = config.mode.as_ref().unwrap_or(&super::GatewayMode::Prompt);
assert_eq!(*mode, super::GatewayMode::Prompt);
}
#[test]
fn test_tool_conversion() {
let ref_config = fs::read_to_string(
"../../docs/source/resources/includes/arch_config_full_reference.yaml",
)
.expect("reference config file not found");
let config: super::Configuration = serde_yaml::from_str(&ref_config).unwrap();
let prompt_targets = &config.prompt_targets;
let prompt_target = prompt_targets
.as_ref()
.unwrap()
.iter()
.find(|p| p.name == "reboot_network_device")
.unwrap();
let chat_completion_tool: super::ChatCompletionTool = prompt_target.into();
assert_eq!(chat_completion_tool.tool_type, ToolType::Function);
assert_eq!(chat_completion_tool.function.name, "reboot_network_device");
assert_eq!(
chat_completion_tool.function.description,
"Reboot a specific network device"
);
assert_eq!(chat_completion_tool.function.parameters.properties.len(), 2);
assert_eq!(
chat_completion_tool
.function
.parameters
.properties
.contains_key("device_id"),
true
);
assert_eq!(
chat_completion_tool
.function
.parameters
.properties
.get("device_id")
.unwrap()
.parameter_type,
crate::api::open_ai::ParameterType::String
);
assert_eq!(
chat_completion_tool
.function
.parameters
.properties
.get("device_id")
.unwrap()
.description,
"Identifier of the network device to reboot.".to_string()
);
assert_eq!(
chat_completion_tool
.function
.parameters
.properties
.get("device_id")
.unwrap()
.required,
Some(true)
);
assert_eq!(
chat_completion_tool
.function
.parameters
.properties
.get("confirmation")
.unwrap()
.parameter_type,
crate::api::open_ai::ParameterType::Bool
);
}
}

View file

@ -1,7 +1,3 @@
pub const DEFAULT_EMBEDDING_MODEL: &str = "katanemo/bge-large-en-v1.5";
pub const DEFAULT_INTENT_MODEL: &str = "katanemo/bart-large-mnli";
pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8;
pub const DEFAULT_HALLUCINATED_THRESHOLD: f64 = 0.25;
pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-arch-ratelimit-selector";
pub const SYSTEM_ROLE: &str = "system";
pub const USER_ROLE: &str = "user";
@ -9,11 +5,6 @@ pub const TOOL_ROLE: &str = "tool";
pub const ASSISTANT_ROLE: &str = "assistant";
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
pub const MODEL_SERVER_NAME: &str = "model_server";
pub const ZEROSHOT_INTERNAL_HOST: &str = "zeroshot";
pub const ARCH_FC_INTERNAL_HOST: &str = "arch_fc";
pub const HALLUCINATION_INTERNAL_HOST: &str = "hallucination";
pub const EMBEDDINGS_INTERNAL_HOST: &str = "embeddings";
pub const GUARD_INTERNAL_HOST: &str = "guard";
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
pub const MESSAGES_KEY: &str = "messages";
pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
@ -25,7 +16,6 @@ pub const REQUEST_ID_HEADER: &str = "x-request-id";
pub const TRACE_PARENT_HEADER: &str = "traceparent";
pub const ARCH_INTERNAL_CLUSTER_NAME: &str = "arch_internal";
pub const ARCH_UPSTREAM_HOST_HEADER: &str = "x-arch-upstream";
pub const ARCH_LLM_UPSTREAM_LISTENER: &str = "arch_llm_listener";
pub const ARCH_MODEL_PREFIX: &str = "Arch";
pub const HALLUCINATION_TEMPLATE: &str =
"It seems I'm missing some information. Could you provide the following details ";

View file

@ -1,59 +0,0 @@
/*
* OMF Embeddings
*
* No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
*
* The version of the OpenAPI document: 1.0.0
*
* Generated by: https://openapi-generator.tech
*/
use crate::embeddings;
use serde::{Deserialize, Serialize};
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
pub struct CreateEmbeddingRequest {
#[serde(rename = "input")]
pub input: Box<embeddings::CreateEmbeddingRequestInput>,
/// ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.
#[serde(rename = "model")]
pub model: String,
/// The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/).
#[serde(rename = "encoding_format", skip_serializing_if = "Option::is_none")]
pub encoding_format: Option<EncodingFormat>,
/// The number of dimensions the resulting output embeddings should have. Only supported in `text-embedding-3` and later models.
#[serde(rename = "dimensions", skip_serializing_if = "Option::is_none")]
pub dimensions: Option<i32>,
/// A unique identifier representing your end-user, which can help to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
#[serde(rename = "user", skip_serializing_if = "Option::is_none")]
pub user: Option<String>,
}
impl CreateEmbeddingRequest {
pub fn new(
input: embeddings::CreateEmbeddingRequestInput,
model: String,
) -> CreateEmbeddingRequest {
CreateEmbeddingRequest {
input: Box::new(input),
model,
encoding_format: None,
dimensions: None,
user: None,
}
}
}
/// The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/).
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
pub enum EncodingFormat {
#[serde(rename = "float")]
Float,
#[serde(rename = "base64")]
Base64,
}
impl Default for EncodingFormat {
fn default() -> EncodingFormat {
Self::Float
}
}

View file

@ -1,28 +0,0 @@
/*
* OMF Embeddings
*
* No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
*
* The version of the OpenAPI document: 1.0.0
*
* Generated by: https://openapi-generator.tech
*/
use serde::{Deserialize, Serialize};
/// CreateEmbeddingRequestInput : Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. for counting tokens.
/// Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. for counting tokens.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(untagged)]
pub enum CreateEmbeddingRequestInput {
/// The string that will be turned into an embedding.
String(String),
/// The array of integers that will be turned into an embedding.
Array(Vec<i32>),
}
impl Default for CreateEmbeddingRequestInput {
fn default() -> Self {
Self::String(Default::default())
}
}

View file

@ -1,55 +0,0 @@
/*
* OMF Embeddings
*
* No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
*
* The version of the OpenAPI document: 1.0.0
*
* Generated by: https://openapi-generator.tech
*/
use crate::embeddings;
use serde::{Deserialize, Serialize};
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
pub struct CreateEmbeddingResponse {
/// The list of embeddings generated by the model.
#[serde(rename = "data")]
pub data: Vec<embeddings::Embedding>,
/// The name of the model used to generate the embedding.
#[serde(rename = "model")]
pub model: String,
/// The object type, which is always \"list\".
#[serde(rename = "object")]
pub object: Object,
#[serde(rename = "usage")]
pub usage: Box<embeddings::CreateEmbeddingResponseUsage>,
}
impl CreateEmbeddingResponse {
pub fn new(
data: Vec<embeddings::Embedding>,
model: String,
object: Object,
usage: embeddings::CreateEmbeddingResponseUsage,
) -> CreateEmbeddingResponse {
CreateEmbeddingResponse {
data,
model,
object,
usage: Box::new(usage),
}
}
}
/// The object type, which is always \"list\".
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
pub enum Object {
#[serde(rename = "list")]
List,
}
impl Default for Object {
fn default() -> Object {
Self::List
}
}

View file

@ -1,32 +0,0 @@
/*
* OMF Embeddings
*
* No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
*
* The version of the OpenAPI document: 1.0.0
*
* Generated by: https://openapi-generator.tech
*/
use serde::{Deserialize, Serialize};
/// CreateEmbeddingResponseUsage : The usage information for the request.
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
pub struct CreateEmbeddingResponseUsage {
/// The number of tokens used by the prompt.
#[serde(rename = "prompt_tokens")]
pub prompt_tokens: i32,
/// The total number of tokens used by the request.
#[serde(rename = "total_tokens")]
pub total_tokens: i32,
}
impl CreateEmbeddingResponseUsage {
/// The usage information for the request.
pub fn new(prompt_tokens: i32, total_tokens: i32) -> CreateEmbeddingResponseUsage {
CreateEmbeddingResponseUsage {
prompt_tokens,
total_tokens,
}
}
}

View file

@ -1,48 +0,0 @@
/*
* OMF Embeddings
*
* No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
*
* The version of the OpenAPI document: 1.0.0
*
* Generated by: https://openapi-generator.tech
*/
use serde::{Deserialize, Serialize};
/// Embedding : Represents an embedding vector returned by embedding endpoint.
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
pub struct Embedding {
/// The index of the embedding in the list of embeddings.
#[serde(rename = "index")]
pub index: i32,
/// The embedding vector, which is a list of floats. The length of vector depends on the model as listed in the [embedding guide](/docs/guides/embeddings).
#[serde(rename = "embedding")]
pub embedding: Vec<f64>,
/// The object type, which is always \"embedding\"
#[serde(rename = "object")]
pub object: Object,
}
impl Embedding {
/// Represents an embedding vector returned by embedding endpoint.
pub fn new(index: i32, embedding: Vec<f64>, object: Object) -> Embedding {
Embedding {
index,
embedding,
object,
}
}
}
/// The object type, which is always \"embedding\"
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
pub enum Object {
#[serde(rename = "embedding")]
Embedding,
}
impl Default for Object {
fn default() -> Object {
Self::Embedding
}
}

View file

@ -1,10 +0,0 @@
pub mod create_embedding_request;
pub use self::create_embedding_request::CreateEmbeddingRequest;
pub mod create_embedding_request_input;
pub use self::create_embedding_request_input::CreateEmbeddingRequestInput;
pub mod create_embedding_response;
pub use self::create_embedding_response::CreateEmbeddingResponse;
pub mod create_embedding_response_usage;
pub use self::create_embedding_response_usage::CreateEmbeddingResponseUsage;
pub mod embedding;
pub use self::embedding::Embedding;

View file

@ -1,14 +1,13 @@
pub mod api;
pub mod configuration;
pub mod consts;
pub mod embeddings;
pub mod errors;
pub mod http;
pub mod llm_providers;
pub mod path;
pub mod pii;
pub mod ratelimit;
pub mod routing;
pub mod stats;
pub mod tokenizer;
pub mod tracing;
pub mod path;

View file

@ -1,6 +1,9 @@
use std::collections::HashMap;
pub fn replace_params_in_path(path: &str, params: &HashMap<String, String>) -> Result<String, String> {
pub fn replace_params_in_path(
path: &str,
params: &HashMap<String, String>,
) -> Result<String, String> {
let mut result = String::new();
let mut in_param = false;
let mut current_param = String::new();
@ -17,12 +20,10 @@ pub fn replace_params_in_path(path: &str, params: &HashMap<String, String>) -> R
return Err(format!("Missing value for parameter `{}`", param_name));
}
current_param.clear();
} else if in_param {
current_param.push(c);
} else {
if in_param {
current_param.push(c);
} else {
result.push(c);
}
result.push(c);
}
}