mirror of
https://github.com/katanemo/plano.git
synced 2026-06-20 15:28:07 +02:00
Merge branch 'main' into debug-streaming-v2
This commit is contained in:
commit
6982d0a575
16 changed files with 1097 additions and 885 deletions
|
|
@ -7,10 +7,13 @@ pub const SYSTEM_ROLE: &str = "system";
|
|||
pub const USER_ROLE: &str = "user";
|
||||
pub const TOOL_ROLE: &str = "tool";
|
||||
pub const ASSISTANT_ROLE: &str = "assistant";
|
||||
pub const GPT_35_TURBO: &str = "gpt-3.5-turbo";
|
||||
pub const ARC_FC_CLUSTER: &str = "arch_fc";
|
||||
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||
pub const MODEL_SERVER_NAME: &str = "model_server";
|
||||
pub const ZEROSHOT_INTERNAL_HOST: &str = "zeroshot";
|
||||
pub const ARCH_FC_INTERNAL_HOST: &str = "arch_fc";
|
||||
pub const HALLUCINATION_INTERNAL_HOST: &str = "hallucination";
|
||||
pub const EMBEDDINGS_INTERNAL_HOST: &str = "embeddings";
|
||||
pub const GUARD_INTERNAL_HOST: &str = "guard";
|
||||
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
|
||||
pub const ARCH_MESSAGES_KEY: &str = "arch_messages";
|
||||
pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
|
||||
|
|
|
|||
|
|
@ -23,11 +23,12 @@ pub enum ServerError {
|
|||
Serialization(serde_json::Error),
|
||||
#[error("{0}")]
|
||||
LogicError(String),
|
||||
#[error("upstream error response authority={authority}, path={path}, status={status}")]
|
||||
#[error("upstream application error host={host}, path={path}, status={status}, body={body}")]
|
||||
Upstream {
|
||||
authority: String,
|
||||
host: String,
|
||||
path: String,
|
||||
status: String,
|
||||
body: String,
|
||||
},
|
||||
#[error("jailbreak detected: {0}")]
|
||||
Jailbreak(String),
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
use crate::{errors::ClientError, stats::{Gauge, IncrementingMetric}};
|
||||
use crate::{
|
||||
errors::ClientError,
|
||||
stats::{Gauge, IncrementingMetric},
|
||||
};
|
||||
use derivative::Derivative;
|
||||
use log::debug;
|
||||
use log::{debug, trace};
|
||||
use proxy_wasm::{traits::Context, types::Status};
|
||||
use serde::Serialize;
|
||||
use std::{cell::RefCell, collections::HashMap, fmt::Debug, time::Duration};
|
||||
|
|
@ -45,9 +48,10 @@ pub trait Client: Context {
|
|||
call_args: CallArgs,
|
||||
call_context: Self::CallContext,
|
||||
) -> Result<u32, ClientError> {
|
||||
debug!(
|
||||
trace!(
|
||||
"dispatching http call with args={:?} context={:?}",
|
||||
call_args, call_context
|
||||
call_args,
|
||||
call_context
|
||||
);
|
||||
|
||||
match self.dispatch_http_call(
|
||||
|
|
|
|||
|
|
@ -4,10 +4,10 @@ pub mod common_types;
|
|||
pub mod configuration;
|
||||
pub mod consts;
|
||||
pub mod embeddings;
|
||||
pub mod errors;
|
||||
pub mod http;
|
||||
pub mod llm_providers;
|
||||
pub mod ratelimit;
|
||||
pub mod routing;
|
||||
pub mod stats;
|
||||
pub mod tokenizer;
|
||||
pub mod errors;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue