diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index b4d4b999..96c3a955 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -211,7 +211,7 @@ static_resources: domains: - "*" routes: - {% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination", "model_server"] %} + {% for internal_clustrer in ["arch_fc", "model_server"] %} - match: prefix: "/" headers: @@ -448,7 +448,7 @@ static_resources: typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.mistral.ai - {% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination", "model_server"] %} + {% for internal_clustrer in ["arch_fc", "model_server"] %} - name: {{ internal_clustrer }} connect_timeout: 5s type: STRICT_DNS diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs index 87b661ca..561dbae3 100644 --- a/crates/common/src/consts.rs +++ b/crates/common/src/consts.rs @@ -1,6 +1,3 @@ -pub const DEFAULT_INTENT_MODEL: &str = "katanemo/bart-large-mnli"; -pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8; -pub const DEFAULT_HALLUCINATED_THRESHOLD: f64 = 0.25; pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-arch-ratelimit-selector"; pub const SYSTEM_ROLE: &str = "system"; pub const USER_ROLE: &str = "user"; @@ -8,11 +5,6 @@ pub const TOOL_ROLE: &str = "tool"; pub const ASSISTANT_ROLE: &str = "assistant"; pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes pub const MODEL_SERVER_NAME: &str = "model_server"; -pub const ZEROSHOT_INTERNAL_HOST: &str = "zeroshot"; -pub const ARCH_FC_INTERNAL_HOST: &str = "arch_fc"; -pub const HALLUCINATION_INTERNAL_HOST: &str = "hallucination"; -pub const EMBEDDINGS_INTERNAL_HOST: &str = "embeddings"; -pub const GUARD_INTERNAL_HOST: &str = "guard"; pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider"; pub const MESSAGES_KEY: &str = "messages"; pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint"; @@ -24,7 +16,6 @@ pub const REQUEST_ID_HEADER: &str = "x-request-id"; pub const TRACE_PARENT_HEADER: &str = "traceparent"; pub const ARCH_INTERNAL_CLUSTER_NAME: &str = "arch_internal"; pub const ARCH_UPSTREAM_HOST_HEADER: &str = "x-arch-upstream"; -pub const ARCH_LLM_UPSTREAM_LISTENER: &str = "arch_llm_listener"; pub const ARCH_MODEL_PREFIX: &str = "Arch"; pub const HALLUCINATION_TEMPLATE: &str = "It seems I'm missing some information. Could you provide the following details "; diff --git a/crates/common/src/embeddings/create_embedding_request.rs b/crates/common/src/embeddings/create_embedding_request.rs deleted file mode 100644 index 21e52f8a..00000000 --- a/crates/common/src/embeddings/create_embedding_request.rs +++ /dev/null @@ -1,59 +0,0 @@ -/* - * OMF Embeddings - * - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 1.0.0 - * - * Generated by: https://openapi-generator.tech - */ - -use crate::embeddings; -use serde::{Deserialize, Serialize}; - -#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] -pub struct CreateEmbeddingRequest { - #[serde(rename = "input")] - pub input: Box, - /// ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them. - #[serde(rename = "model")] - pub model: String, - /// The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/). - #[serde(rename = "encoding_format", skip_serializing_if = "Option::is_none")] - pub encoding_format: Option, - /// The number of dimensions the resulting output embeddings should have. Only supported in `text-embedding-3` and later models. - #[serde(rename = "dimensions", skip_serializing_if = "Option::is_none")] - pub dimensions: Option, - /// A unique identifier representing your end-user, which can help to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids). - #[serde(rename = "user", skip_serializing_if = "Option::is_none")] - pub user: Option, -} - -impl CreateEmbeddingRequest { - pub fn new( - input: embeddings::CreateEmbeddingRequestInput, - model: String, - ) -> CreateEmbeddingRequest { - CreateEmbeddingRequest { - input: Box::new(input), - model, - encoding_format: None, - dimensions: None, - user: None, - } - } -} -/// The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/). -#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] -pub enum EncodingFormat { - #[serde(rename = "float")] - Float, - #[serde(rename = "base64")] - Base64, -} - -impl Default for EncodingFormat { - fn default() -> EncodingFormat { - Self::Float - } -} diff --git a/crates/common/src/embeddings/create_embedding_request_input.rs b/crates/common/src/embeddings/create_embedding_request_input.rs deleted file mode 100644 index 83195ced..00000000 --- a/crates/common/src/embeddings/create_embedding_request_input.rs +++ /dev/null @@ -1,28 +0,0 @@ -/* - * OMF Embeddings - * - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 1.0.0 - * - * Generated by: https://openapi-generator.tech - */ - -use serde::{Deserialize, Serialize}; - -/// CreateEmbeddingRequestInput : Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. for counting tokens. -/// Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. for counting tokens. -#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] -#[serde(untagged)] -pub enum CreateEmbeddingRequestInput { - /// The string that will be turned into an embedding. - String(String), - /// The array of integers that will be turned into an embedding. - Array(Vec), -} - -impl Default for CreateEmbeddingRequestInput { - fn default() -> Self { - Self::String(Default::default()) - } -} diff --git a/crates/common/src/embeddings/create_embedding_response.rs b/crates/common/src/embeddings/create_embedding_response.rs deleted file mode 100644 index 278929e0..00000000 --- a/crates/common/src/embeddings/create_embedding_response.rs +++ /dev/null @@ -1,55 +0,0 @@ -/* - * OMF Embeddings - * - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 1.0.0 - * - * Generated by: https://openapi-generator.tech - */ - -use crate::embeddings; -use serde::{Deserialize, Serialize}; - -#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] -pub struct CreateEmbeddingResponse { - /// The list of embeddings generated by the model. - #[serde(rename = "data")] - pub data: Vec, - /// The name of the model used to generate the embedding. - #[serde(rename = "model")] - pub model: String, - /// The object type, which is always \"list\". - #[serde(rename = "object")] - pub object: Object, - #[serde(rename = "usage")] - pub usage: Box, -} - -impl CreateEmbeddingResponse { - pub fn new( - data: Vec, - model: String, - object: Object, - usage: embeddings::CreateEmbeddingResponseUsage, - ) -> CreateEmbeddingResponse { - CreateEmbeddingResponse { - data, - model, - object, - usage: Box::new(usage), - } - } -} -/// The object type, which is always \"list\". -#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] -pub enum Object { - #[serde(rename = "list")] - List, -} - -impl Default for Object { - fn default() -> Object { - Self::List - } -} diff --git a/crates/common/src/embeddings/create_embedding_response_usage.rs b/crates/common/src/embeddings/create_embedding_response_usage.rs deleted file mode 100644 index 2a4730ca..00000000 --- a/crates/common/src/embeddings/create_embedding_response_usage.rs +++ /dev/null @@ -1,32 +0,0 @@ -/* - * OMF Embeddings - * - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 1.0.0 - * - * Generated by: https://openapi-generator.tech - */ - -use serde::{Deserialize, Serialize}; - -/// CreateEmbeddingResponseUsage : The usage information for the request. -#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] -pub struct CreateEmbeddingResponseUsage { - /// The number of tokens used by the prompt. - #[serde(rename = "prompt_tokens")] - pub prompt_tokens: i32, - /// The total number of tokens used by the request. - #[serde(rename = "total_tokens")] - pub total_tokens: i32, -} - -impl CreateEmbeddingResponseUsage { - /// The usage information for the request. - pub fn new(prompt_tokens: i32, total_tokens: i32) -> CreateEmbeddingResponseUsage { - CreateEmbeddingResponseUsage { - prompt_tokens, - total_tokens, - } - } -} diff --git a/crates/common/src/embeddings/embedding.rs b/crates/common/src/embeddings/embedding.rs deleted file mode 100644 index e36db376..00000000 --- a/crates/common/src/embeddings/embedding.rs +++ /dev/null @@ -1,48 +0,0 @@ -/* - * OMF Embeddings - * - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 1.0.0 - * - * Generated by: https://openapi-generator.tech - */ - -use serde::{Deserialize, Serialize}; - -/// Embedding : Represents an embedding vector returned by embedding endpoint. -#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] -pub struct Embedding { - /// The index of the embedding in the list of embeddings. - #[serde(rename = "index")] - pub index: i32, - /// The embedding vector, which is a list of floats. The length of vector depends on the model as listed in the [embedding guide](/docs/guides/embeddings). - #[serde(rename = "embedding")] - pub embedding: Vec, - /// The object type, which is always \"embedding\" - #[serde(rename = "object")] - pub object: Object, -} - -impl Embedding { - /// Represents an embedding vector returned by embedding endpoint. - pub fn new(index: i32, embedding: Vec, object: Object) -> Embedding { - Embedding { - index, - embedding, - object, - } - } -} -/// The object type, which is always \"embedding\" -#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] -pub enum Object { - #[serde(rename = "embedding")] - Embedding, -} - -impl Default for Object { - fn default() -> Object { - Self::Embedding - } -} diff --git a/crates/common/src/embeddings/mod.rs b/crates/common/src/embeddings/mod.rs deleted file mode 100644 index d7ef176b..00000000 --- a/crates/common/src/embeddings/mod.rs +++ /dev/null @@ -1,10 +0,0 @@ -pub mod create_embedding_request; -pub use self::create_embedding_request::CreateEmbeddingRequest; -pub mod create_embedding_request_input; -pub use self::create_embedding_request_input::CreateEmbeddingRequestInput; -pub mod create_embedding_response; -pub use self::create_embedding_response::CreateEmbeddingResponse; -pub mod create_embedding_response_usage; -pub use self::create_embedding_response_usage::CreateEmbeddingResponseUsage; -pub mod embedding; -pub use self::embedding::Embedding; diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index a7c881c6..32549893 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -1,7 +1,6 @@ pub mod api; pub mod configuration; pub mod consts; -pub mod embeddings; pub mod errors; pub mod http; pub mod llm_providers;