mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
fix rust tests
This commit is contained in:
parent
700d1757b7
commit
e5795c8932
9 changed files with 2 additions and 244 deletions
|
|
@ -211,7 +211,7 @@ static_resources:
|
|||
domains:
|
||||
- "*"
|
||||
routes:
|
||||
{% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination", "model_server"] %}
|
||||
{% for internal_clustrer in ["arch_fc", "model_server"] %}
|
||||
- match:
|
||||
prefix: "/"
|
||||
headers:
|
||||
|
|
@ -448,7 +448,7 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
||||
sni: api.mistral.ai
|
||||
{% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination", "model_server"] %}
|
||||
{% for internal_clustrer in ["arch_fc", "model_server"] %}
|
||||
- name: {{ internal_clustrer }}
|
||||
connect_timeout: 5s
|
||||
type: STRICT_DNS
|
||||
|
|
|
|||
|
|
@ -1,6 +1,3 @@
|
|||
pub const DEFAULT_INTENT_MODEL: &str = "katanemo/bart-large-mnli";
|
||||
pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8;
|
||||
pub const DEFAULT_HALLUCINATED_THRESHOLD: f64 = 0.25;
|
||||
pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-arch-ratelimit-selector";
|
||||
pub const SYSTEM_ROLE: &str = "system";
|
||||
pub const USER_ROLE: &str = "user";
|
||||
|
|
@ -8,11 +5,6 @@ pub const TOOL_ROLE: &str = "tool";
|
|||
pub const ASSISTANT_ROLE: &str = "assistant";
|
||||
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||
pub const MODEL_SERVER_NAME: &str = "model_server";
|
||||
pub const ZEROSHOT_INTERNAL_HOST: &str = "zeroshot";
|
||||
pub const ARCH_FC_INTERNAL_HOST: &str = "arch_fc";
|
||||
pub const HALLUCINATION_INTERNAL_HOST: &str = "hallucination";
|
||||
pub const EMBEDDINGS_INTERNAL_HOST: &str = "embeddings";
|
||||
pub const GUARD_INTERNAL_HOST: &str = "guard";
|
||||
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
|
||||
pub const MESSAGES_KEY: &str = "messages";
|
||||
pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
|
||||
|
|
@ -24,7 +16,6 @@ pub const REQUEST_ID_HEADER: &str = "x-request-id";
|
|||
pub const TRACE_PARENT_HEADER: &str = "traceparent";
|
||||
pub const ARCH_INTERNAL_CLUSTER_NAME: &str = "arch_internal";
|
||||
pub const ARCH_UPSTREAM_HOST_HEADER: &str = "x-arch-upstream";
|
||||
pub const ARCH_LLM_UPSTREAM_LISTENER: &str = "arch_llm_listener";
|
||||
pub const ARCH_MODEL_PREFIX: &str = "Arch";
|
||||
pub const HALLUCINATION_TEMPLATE: &str =
|
||||
"It seems I'm missing some information. Could you provide the following details ";
|
||||
|
|
|
|||
|
|
@ -1,59 +0,0 @@
|
|||
/*
|
||||
* OMF Embeddings
|
||||
*
|
||||
* No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
||||
*
|
||||
* The version of the OpenAPI document: 1.0.0
|
||||
*
|
||||
* Generated by: https://openapi-generator.tech
|
||||
*/
|
||||
|
||||
use crate::embeddings;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct CreateEmbeddingRequest {
|
||||
#[serde(rename = "input")]
|
||||
pub input: Box<embeddings::CreateEmbeddingRequestInput>,
|
||||
/// ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.
|
||||
#[serde(rename = "model")]
|
||||
pub model: String,
|
||||
/// The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/).
|
||||
#[serde(rename = "encoding_format", skip_serializing_if = "Option::is_none")]
|
||||
pub encoding_format: Option<EncodingFormat>,
|
||||
/// The number of dimensions the resulting output embeddings should have. Only supported in `text-embedding-3` and later models.
|
||||
#[serde(rename = "dimensions", skip_serializing_if = "Option::is_none")]
|
||||
pub dimensions: Option<i32>,
|
||||
/// A unique identifier representing your end-user, which can help to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
|
||||
#[serde(rename = "user", skip_serializing_if = "Option::is_none")]
|
||||
pub user: Option<String>,
|
||||
}
|
||||
|
||||
impl CreateEmbeddingRequest {
|
||||
pub fn new(
|
||||
input: embeddings::CreateEmbeddingRequestInput,
|
||||
model: String,
|
||||
) -> CreateEmbeddingRequest {
|
||||
CreateEmbeddingRequest {
|
||||
input: Box::new(input),
|
||||
model,
|
||||
encoding_format: None,
|
||||
dimensions: None,
|
||||
user: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
/// The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/).
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
|
||||
pub enum EncodingFormat {
|
||||
#[serde(rename = "float")]
|
||||
Float,
|
||||
#[serde(rename = "base64")]
|
||||
Base64,
|
||||
}
|
||||
|
||||
impl Default for EncodingFormat {
|
||||
fn default() -> EncodingFormat {
|
||||
Self::Float
|
||||
}
|
||||
}
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
/*
|
||||
* OMF Embeddings
|
||||
*
|
||||
* No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
||||
*
|
||||
* The version of the OpenAPI document: 1.0.0
|
||||
*
|
||||
* Generated by: https://openapi-generator.tech
|
||||
*/
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// CreateEmbeddingRequestInput : Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. for counting tokens.
|
||||
/// Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. for counting tokens.
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum CreateEmbeddingRequestInput {
|
||||
/// The string that will be turned into an embedding.
|
||||
String(String),
|
||||
/// The array of integers that will be turned into an embedding.
|
||||
Array(Vec<i32>),
|
||||
}
|
||||
|
||||
impl Default for CreateEmbeddingRequestInput {
|
||||
fn default() -> Self {
|
||||
Self::String(Default::default())
|
||||
}
|
||||
}
|
||||
|
|
@ -1,55 +0,0 @@
|
|||
/*
|
||||
* OMF Embeddings
|
||||
*
|
||||
* No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
||||
*
|
||||
* The version of the OpenAPI document: 1.0.0
|
||||
*
|
||||
* Generated by: https://openapi-generator.tech
|
||||
*/
|
||||
|
||||
use crate::embeddings;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct CreateEmbeddingResponse {
|
||||
/// The list of embeddings generated by the model.
|
||||
#[serde(rename = "data")]
|
||||
pub data: Vec<embeddings::Embedding>,
|
||||
/// The name of the model used to generate the embedding.
|
||||
#[serde(rename = "model")]
|
||||
pub model: String,
|
||||
/// The object type, which is always \"list\".
|
||||
#[serde(rename = "object")]
|
||||
pub object: Object,
|
||||
#[serde(rename = "usage")]
|
||||
pub usage: Box<embeddings::CreateEmbeddingResponseUsage>,
|
||||
}
|
||||
|
||||
impl CreateEmbeddingResponse {
|
||||
pub fn new(
|
||||
data: Vec<embeddings::Embedding>,
|
||||
model: String,
|
||||
object: Object,
|
||||
usage: embeddings::CreateEmbeddingResponseUsage,
|
||||
) -> CreateEmbeddingResponse {
|
||||
CreateEmbeddingResponse {
|
||||
data,
|
||||
model,
|
||||
object,
|
||||
usage: Box::new(usage),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// The object type, which is always \"list\".
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
|
||||
pub enum Object {
|
||||
#[serde(rename = "list")]
|
||||
List,
|
||||
}
|
||||
|
||||
impl Default for Object {
|
||||
fn default() -> Object {
|
||||
Self::List
|
||||
}
|
||||
}
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
/*
|
||||
* OMF Embeddings
|
||||
*
|
||||
* No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
||||
*
|
||||
* The version of the OpenAPI document: 1.0.0
|
||||
*
|
||||
* Generated by: https://openapi-generator.tech
|
||||
*/
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// CreateEmbeddingResponseUsage : The usage information for the request.
|
||||
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct CreateEmbeddingResponseUsage {
|
||||
/// The number of tokens used by the prompt.
|
||||
#[serde(rename = "prompt_tokens")]
|
||||
pub prompt_tokens: i32,
|
||||
/// The total number of tokens used by the request.
|
||||
#[serde(rename = "total_tokens")]
|
||||
pub total_tokens: i32,
|
||||
}
|
||||
|
||||
impl CreateEmbeddingResponseUsage {
|
||||
/// The usage information for the request.
|
||||
pub fn new(prompt_tokens: i32, total_tokens: i32) -> CreateEmbeddingResponseUsage {
|
||||
CreateEmbeddingResponseUsage {
|
||||
prompt_tokens,
|
||||
total_tokens,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
/*
|
||||
* OMF Embeddings
|
||||
*
|
||||
* No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
||||
*
|
||||
* The version of the OpenAPI document: 1.0.0
|
||||
*
|
||||
* Generated by: https://openapi-generator.tech
|
||||
*/
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Embedding : Represents an embedding vector returned by embedding endpoint.
|
||||
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Embedding {
|
||||
/// The index of the embedding in the list of embeddings.
|
||||
#[serde(rename = "index")]
|
||||
pub index: i32,
|
||||
/// The embedding vector, which is a list of floats. The length of vector depends on the model as listed in the [embedding guide](/docs/guides/embeddings).
|
||||
#[serde(rename = "embedding")]
|
||||
pub embedding: Vec<f64>,
|
||||
/// The object type, which is always \"embedding\"
|
||||
#[serde(rename = "object")]
|
||||
pub object: Object,
|
||||
}
|
||||
|
||||
impl Embedding {
|
||||
/// Represents an embedding vector returned by embedding endpoint.
|
||||
pub fn new(index: i32, embedding: Vec<f64>, object: Object) -> Embedding {
|
||||
Embedding {
|
||||
index,
|
||||
embedding,
|
||||
object,
|
||||
}
|
||||
}
|
||||
}
|
||||
/// The object type, which is always \"embedding\"
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
|
||||
pub enum Object {
|
||||
#[serde(rename = "embedding")]
|
||||
Embedding,
|
||||
}
|
||||
|
||||
impl Default for Object {
|
||||
fn default() -> Object {
|
||||
Self::Embedding
|
||||
}
|
||||
}
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
pub mod create_embedding_request;
|
||||
pub use self::create_embedding_request::CreateEmbeddingRequest;
|
||||
pub mod create_embedding_request_input;
|
||||
pub use self::create_embedding_request_input::CreateEmbeddingRequestInput;
|
||||
pub mod create_embedding_response;
|
||||
pub use self::create_embedding_response::CreateEmbeddingResponse;
|
||||
pub mod create_embedding_response_usage;
|
||||
pub use self::create_embedding_response_usage::CreateEmbeddingResponseUsage;
|
||||
pub mod embedding;
|
||||
pub use self::embedding::Embedding;
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
pub mod api;
|
||||
pub mod configuration;
|
||||
pub mod consts;
|
||||
pub mod embeddings;
|
||||
pub mod errors;
|
||||
pub mod http;
|
||||
pub mod llm_providers;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue