fix rust tests

2026-06-17 15:25:17 +02:00 · 2024-12-11 12:48:28 -08:00 · 2024-12-11 12:48:28 -08:00 · e5795c8932
commit e5795c8932
parent 700d1757b7
9 changed files with 2 additions and 244 deletions
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@ -211,7 +211,7 @@ static_resources:
                      domains:
                        - "*"
                      routes:
-                        {% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination", "model_server"] %}
+                        {% for internal_clustrer in ["arch_fc", "model_server"] %}
                        - match:
                            prefix: "/"
                            headers:
@ -448,7 +448,7 @@ static_resources:
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
          sni: api.mistral.ai
-    {% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination", "model_server"] %}
+    {% for internal_clustrer in ["arch_fc", "model_server"] %}
    - name: {{ internal_clustrer }}
      connect_timeout: 5s
      type: STRICT_DNS
--- a/crates/common/src/consts.rs
+++ b/crates/common/src/consts.rs
@ -1,6 +1,3 @@
-pub const DEFAULT_INTENT_MODEL: &str = "katanemo/bart-large-mnli";
-pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8;
-pub const DEFAULT_HALLUCINATED_THRESHOLD: f64 = 0.25;
 pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-arch-ratelimit-selector";
 pub const SYSTEM_ROLE: &str = "system";
 pub const USER_ROLE: &str = "user";
@ -8,11 +5,6 @@ pub const TOOL_ROLE: &str = "tool";
 pub const ASSISTANT_ROLE: &str = "assistant";
 pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
 pub const MODEL_SERVER_NAME: &str = "model_server";
-pub const ZEROSHOT_INTERNAL_HOST: &str = "zeroshot";
-pub const ARCH_FC_INTERNAL_HOST: &str = "arch_fc";
-pub const HALLUCINATION_INTERNAL_HOST: &str = "hallucination";
-pub const EMBEDDINGS_INTERNAL_HOST: &str = "embeddings";
-pub const GUARD_INTERNAL_HOST: &str = "guard";
 pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
 pub const MESSAGES_KEY: &str = "messages";
 pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
@ -24,7 +16,6 @@ pub const REQUEST_ID_HEADER: &str = "x-request-id";
 pub const TRACE_PARENT_HEADER: &str = "traceparent";
 pub const ARCH_INTERNAL_CLUSTER_NAME: &str = "arch_internal";
 pub const ARCH_UPSTREAM_HOST_HEADER: &str = "x-arch-upstream";
-pub const ARCH_LLM_UPSTREAM_LISTENER: &str = "arch_llm_listener";
 pub const ARCH_MODEL_PREFIX: &str = "Arch";
 pub const HALLUCINATION_TEMPLATE: &str =
    "It seems I'm missing some information. Could you provide the following details ";
--- a/crates/common/src/embeddings/create_embedding_request.rs
+++ b/crates/common/src/embeddings/create_embedding_request.rs
@ -1,59 +0,0 @@
-/*
- * OMF Embeddings
- *
- * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
- *
- * The version of the OpenAPI document: 1.0.0
- *
- * Generated by: https://openapi-generator.tech
- */
-
-use crate::embeddings;
-use serde::{Deserialize, Serialize};
-
-#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
-pub struct CreateEmbeddingRequest {
-    #[serde(rename = "input")]
-    pub input: Box<embeddings::CreateEmbeddingRequestInput>,
-    /// ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.
-    #[serde(rename = "model")]
-    pub model: String,
-    /// The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/).
-    #[serde(rename = "encoding_format", skip_serializing_if = "Option::is_none")]
-    pub encoding_format: Option<EncodingFormat>,
-    /// The number of dimensions the resulting output embeddings should have. Only supported in `text-embedding-3` and later models.
-    #[serde(rename = "dimensions", skip_serializing_if = "Option::is_none")]
-    pub dimensions: Option<i32>,
-    /// A unique identifier representing your end-user, which can help to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
-    #[serde(rename = "user", skip_serializing_if = "Option::is_none")]
-    pub user: Option<String>,
-}
-
-impl CreateEmbeddingRequest {
-    pub fn new(
-        input: embeddings::CreateEmbeddingRequestInput,
-        model: String,
-    ) -> CreateEmbeddingRequest {
-        CreateEmbeddingRequest {
-            input: Box::new(input),
-            model,
-            encoding_format: None,
-            dimensions: None,
-            user: None,
-        }
-    }
-}
-/// The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/).
-#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
-pub enum EncodingFormat {
-    #[serde(rename = "float")]
-    Float,
-    #[serde(rename = "base64")]
-    Base64,
-}
-
-impl Default for EncodingFormat {
-    fn default() -> EncodingFormat {
-        Self::Float
-    }
-}
--- a/crates/common/src/embeddings/create_embedding_request_input.rs
+++ b/crates/common/src/embeddings/create_embedding_request_input.rs
@ -1,28 +0,0 @@
-/*
- * OMF Embeddings
- *
- * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
- *
- * The version of the OpenAPI document: 1.0.0
- *
- * Generated by: https://openapi-generator.tech
- */
-
-use serde::{Deserialize, Serialize};
-
-/// CreateEmbeddingRequestInput : Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. for counting tokens.
-/// Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. for counting tokens.
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-#[serde(untagged)]
-pub enum CreateEmbeddingRequestInput {
-    /// The string that will be turned into an embedding.
-    String(String),
-    /// The array of integers that will be turned into an embedding.
-    Array(Vec<i32>),
-}
-
-impl Default for CreateEmbeddingRequestInput {
-    fn default() -> Self {
-        Self::String(Default::default())
-    }
-}
--- a/crates/common/src/embeddings/create_embedding_response.rs
+++ b/crates/common/src/embeddings/create_embedding_response.rs
@ -1,55 +0,0 @@
-/*
- * OMF Embeddings
- *
- * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
- *
- * The version of the OpenAPI document: 1.0.0
- *
- * Generated by: https://openapi-generator.tech
- */
-
-use crate::embeddings;
-use serde::{Deserialize, Serialize};
-
-#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
-pub struct CreateEmbeddingResponse {
-    /// The list of embeddings generated by the model.
-    #[serde(rename = "data")]
-    pub data: Vec<embeddings::Embedding>,
-    /// The name of the model used to generate the embedding.
-    #[serde(rename = "model")]
-    pub model: String,
-    /// The object type, which is always \"list\".
-    #[serde(rename = "object")]
-    pub object: Object,
-    #[serde(rename = "usage")]
-    pub usage: Box<embeddings::CreateEmbeddingResponseUsage>,
-}
-
-impl CreateEmbeddingResponse {
-    pub fn new(
-        data: Vec<embeddings::Embedding>,
-        model: String,
-        object: Object,
-        usage: embeddings::CreateEmbeddingResponseUsage,
-    ) -> CreateEmbeddingResponse {
-        CreateEmbeddingResponse {
-            data,
-            model,
-            object,
-            usage: Box::new(usage),
-        }
-    }
-}
-/// The object type, which is always \"list\".
-#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
-pub enum Object {
-    #[serde(rename = "list")]
-    List,
-}
-
-impl Default for Object {
-    fn default() -> Object {
-        Self::List
-    }
-}
--- a/crates/common/src/embeddings/create_embedding_response_usage.rs
+++ b/crates/common/src/embeddings/create_embedding_response_usage.rs
@ -1,32 +0,0 @@
-/*
- * OMF Embeddings
- *
- * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
- *
- * The version of the OpenAPI document: 1.0.0
- *
- * Generated by: https://openapi-generator.tech
- */
-
-use serde::{Deserialize, Serialize};
-
-/// CreateEmbeddingResponseUsage : The usage information for the request.
-#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
-pub struct CreateEmbeddingResponseUsage {
-    /// The number of tokens used by the prompt.
-    #[serde(rename = "prompt_tokens")]
-    pub prompt_tokens: i32,
-    /// The total number of tokens used by the request.
-    #[serde(rename = "total_tokens")]
-    pub total_tokens: i32,
-}
-
-impl CreateEmbeddingResponseUsage {
-    /// The usage information for the request.
-    pub fn new(prompt_tokens: i32, total_tokens: i32) -> CreateEmbeddingResponseUsage {
-        CreateEmbeddingResponseUsage {
-            prompt_tokens,
-            total_tokens,
-        }
-    }
-}
--- a/crates/common/src/embeddings/embedding.rs
+++ b/crates/common/src/embeddings/embedding.rs
@ -1,48 +0,0 @@
-/*
- * OMF Embeddings
- *
- * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
- *
- * The version of the OpenAPI document: 1.0.0
- *
- * Generated by: https://openapi-generator.tech
- */
-
-use serde::{Deserialize, Serialize};
-
-/// Embedding : Represents an embedding vector returned by embedding endpoint.
-#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
-pub struct Embedding {
-    /// The index of the embedding in the list of embeddings.
-    #[serde(rename = "index")]
-    pub index: i32,
-    /// The embedding vector, which is a list of floats. The length of vector depends on the model as listed in the [embedding guide](/docs/guides/embeddings).
-    #[serde(rename = "embedding")]
-    pub embedding: Vec<f64>,
-    /// The object type, which is always \"embedding\"
-    #[serde(rename = "object")]
-    pub object: Object,
-}
-
-impl Embedding {
-    /// Represents an embedding vector returned by embedding endpoint.
-    pub fn new(index: i32, embedding: Vec<f64>, object: Object) -> Embedding {
-        Embedding {
-            index,
-            embedding,
-            object,
-        }
-    }
-}
-/// The object type, which is always \"embedding\"
-#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
-pub enum Object {
-    #[serde(rename = "embedding")]
-    Embedding,
-}
-
-impl Default for Object {
-    fn default() -> Object {
-        Self::Embedding
-    }
-}
--- a/crates/common/src/embeddings/mod.rs
+++ b/crates/common/src/embeddings/mod.rs
@ -1,10 +0,0 @@
-pub mod create_embedding_request;
-pub use self::create_embedding_request::CreateEmbeddingRequest;
-pub mod create_embedding_request_input;
-pub use self::create_embedding_request_input::CreateEmbeddingRequestInput;
-pub mod create_embedding_response;
-pub use self::create_embedding_response::CreateEmbeddingResponse;
-pub mod create_embedding_response_usage;
-pub use self::create_embedding_response_usage::CreateEmbeddingResponseUsage;
-pub mod embedding;
-pub use self::embedding::Embedding;
--- a/crates/common/src/lib.rs
+++ b/crates/common/src/lib.rs
@ -1,7 +1,6 @@
 pub mod api;
 pub mod configuration;
 pub mod consts;
-pub mod embeddings;
 pub mod errors;
 pub mod http;
 pub mod llm_providers;