Merge branch 'main' into debug-streaming-v2

This commit is contained in:
Adil Hafeez 2024-10-23 00:59:17 -07:00
commit 6982d0a575
16 changed files with 1097 additions and 885 deletions

View file

@ -7,10 +7,13 @@ pub const SYSTEM_ROLE: &str = "system";
pub const USER_ROLE: &str = "user";
pub const TOOL_ROLE: &str = "tool";
pub const ASSISTANT_ROLE: &str = "assistant";
pub const GPT_35_TURBO: &str = "gpt-3.5-turbo";
pub const ARC_FC_CLUSTER: &str = "arch_fc";
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
pub const MODEL_SERVER_NAME: &str = "model_server";
pub const ZEROSHOT_INTERNAL_HOST: &str = "zeroshot";
pub const ARCH_FC_INTERNAL_HOST: &str = "arch_fc";
pub const HALLUCINATION_INTERNAL_HOST: &str = "hallucination";
pub const EMBEDDINGS_INTERNAL_HOST: &str = "embeddings";
pub const GUARD_INTERNAL_HOST: &str = "guard";
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
pub const ARCH_MESSAGES_KEY: &str = "arch_messages";
pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";

View file

@ -23,11 +23,12 @@ pub enum ServerError {
Serialization(serde_json::Error),
#[error("{0}")]
LogicError(String),
#[error("upstream error response authority={authority}, path={path}, status={status}")]
#[error("upstream application error host={host}, path={path}, status={status}, body={body}")]
Upstream {
authority: String,
host: String,
path: String,
status: String,
body: String,
},
#[error("jailbreak detected: {0}")]
Jailbreak(String),

View file

@ -1,6 +1,9 @@
use crate::{errors::ClientError, stats::{Gauge, IncrementingMetric}};
use crate::{
errors::ClientError,
stats::{Gauge, IncrementingMetric},
};
use derivative::Derivative;
use log::debug;
use log::{debug, trace};
use proxy_wasm::{traits::Context, types::Status};
use serde::Serialize;
use std::{cell::RefCell, collections::HashMap, fmt::Debug, time::Duration};
@ -45,9 +48,10 @@ pub trait Client: Context {
call_args: CallArgs,
call_context: Self::CallContext,
) -> Result<u32, ClientError> {
debug!(
trace!(
"dispatching http call with args={:?} context={:?}",
call_args, call_context
call_args,
call_context
);
match self.dispatch_http_call(

View file

@ -4,10 +4,10 @@ pub mod common_types;
pub mod configuration;
pub mod consts;
pub mod embeddings;
pub mod errors;
pub mod http;
pub mod llm_providers;
pub mod ratelimit;
pub mod routing;
pub mod stats;
pub mod tokenizer;
pub mod errors;