mirror of
https://github.com/katanemo/plano.git
synced 2026-05-18 13:45:15 +02:00
improve service names (#54)
- embedding-server => model_server - public-types => public_types - chatbot-ui => chatbot_ui - function-calling => function_calling
This commit is contained in:
parent
215f96e273
commit
060a0d665e
35 changed files with 54 additions and 52 deletions
|
|
@ -7,3 +7,4 @@ pub const USER_ROLE: &str = "user";
|
|||
pub const GPT_35_TURBO: &str = "gpt-3.5-turbo";
|
||||
pub const BOLT_FC_CLUSTER: &str = "bolt_fc_1b";
|
||||
pub const BOLT_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||
pub const MODEL_SERVER_NAME: &str = "model_server";
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use crate::consts::DEFAULT_EMBEDDING_MODEL;
|
||||
use crate::consts::{DEFAULT_EMBEDDING_MODEL, MODEL_SERVER_NAME};
|
||||
use crate::ratelimit;
|
||||
use crate::stats::{Counter, Gauge, RecordingMetric};
|
||||
use crate::stream_context::StreamContext;
|
||||
|
|
@ -123,11 +123,11 @@ impl FilterContext {
|
|||
|
||||
let json_data = to_string(&embeddings_input).unwrap();
|
||||
let token_id = match self.dispatch_http_call(
|
||||
"embeddingserver",
|
||||
MODEL_SERVER_NAME,
|
||||
vec![
|
||||
(":method", "POST"),
|
||||
(":path", "/embeddings"),
|
||||
(":authority", "embeddingserver"),
|
||||
(":authority", MODEL_SERVER_NAME),
|
||||
("content-type", "application/json"),
|
||||
("x-envoy-upstream-rq-timeout-ms", "60000"),
|
||||
],
|
||||
|
|
@ -219,7 +219,7 @@ impl RootContext for FilterContext {
|
|||
.insert(pt.name.clone(), pt.clone());
|
||||
}
|
||||
|
||||
debug!("set configuration object: {:?}", self.config);
|
||||
debug!("set configuration object");
|
||||
|
||||
if let Some(ratelimits_config) = self
|
||||
.config
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use crate::consts::{
|
||||
BOLT_FC_CLUSTER, BOLT_FC_REQUEST_TIMEOUT_MS, DEFAULT_EMBEDDING_MODEL, DEFAULT_INTENT_MODEL,
|
||||
DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE,
|
||||
USER_ROLE,
|
||||
DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME,
|
||||
RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE,
|
||||
};
|
||||
use crate::filter_context::{embeddings_store, WasmMetrics};
|
||||
use crate::ratelimit;
|
||||
|
|
@ -175,11 +175,11 @@ impl StreamContext {
|
|||
};
|
||||
|
||||
let token_id = match self.dispatch_http_call(
|
||||
"embeddingserver",
|
||||
MODEL_SERVER_NAME,
|
||||
vec![
|
||||
(":method", "POST"),
|
||||
(":path", "/zeroshot"),
|
||||
(":authority", "embeddingserver"),
|
||||
(":authority", MODEL_SERVER_NAME),
|
||||
("content-type", "application/json"),
|
||||
("x-envoy-max-retries", "3"),
|
||||
("x-envoy-upstream-rq-timeout-ms", "60000"),
|
||||
|
|
@ -637,11 +637,11 @@ impl HttpContext for StreamContext {
|
|||
};
|
||||
|
||||
let token_id = match self.dispatch_http_call(
|
||||
"embeddingserver",
|
||||
MODEL_SERVER_NAME,
|
||||
vec![
|
||||
(":method", "POST"),
|
||||
(":path", "/embeddings"),
|
||||
(":authority", "embeddingserver"),
|
||||
(":authority", MODEL_SERVER_NAME),
|
||||
("content-type", "application/json"),
|
||||
("x-envoy-max-retries", "3"),
|
||||
("x-envoy-upstream-rq-timeout-ms", "60000"),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue