mirror of
https://github.com/katanemo/plano.git
synced 2026-07-02 15:51:02 +02:00
update observability and dashboards
- spin up individual clusters for every endpoint so envoy can start tracking its latency - update dashboards to show individual clusters
This commit is contained in:
parent
6cd05572c4
commit
3796ee8773
5 changed files with 450 additions and 562 deletions
|
|
@ -6,9 +6,13 @@ pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-arch-ratelimit-selector";
|
|||
pub const SYSTEM_ROLE: &str = "system";
|
||||
pub const USER_ROLE: &str = "user";
|
||||
pub const GPT_35_TURBO: &str = "gpt-3.5-turbo";
|
||||
pub const ARC_FC_CLUSTER: &str = "arch_fc";
|
||||
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||
pub const MODEL_SERVER_NAME: &str = "model_server";
|
||||
pub const ZEROSHOT_INTERNAL_HOST: &str = "zeroshot";
|
||||
pub const ARCH_FC_INTERNAL_HOST: &str = "arch_fc";
|
||||
pub const HALLUCINATION_INTERNAL_HOST: &str = "hallucination";
|
||||
pub const EMBEDDINGS_INTERNAL_HOST: &str = "embeddings";
|
||||
pub const GUARD_INTERNAL_HOST: &str = "guard";
|
||||
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
|
||||
pub const ARCH_MESSAGES_KEY: &str = "arch_messages";
|
||||
pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue