mirror of
https://github.com/katanemo/plano.git
synced 2026-06-02 14:35:14 +02:00
improve service names (#54)
- embedding-server => model_server - public-types => public_types - chatbot-ui => chatbot_ui - function-calling => function_calling
This commit is contained in:
parent
215f96e273
commit
060a0d665e
35 changed files with 54 additions and 52 deletions
9
.gitignore
vendored
9
.gitignore
vendored
|
|
@ -1,8 +1,7 @@
|
|||
envoyfilter/target
|
||||
envoyfilter/qdrant_data/
|
||||
public-types/target
|
||||
embedding-server/venv/
|
||||
chatbot-ui/venv/
|
||||
public_types/target
|
||||
/venv/
|
||||
__pycache__
|
||||
grafana-data
|
||||
prom_data
|
||||
|
|
@ -12,5 +11,5 @@ generated
|
|||
.DS_Store
|
||||
*.gguf
|
||||
venv
|
||||
demos/function-calling/ollama/models/
|
||||
demos/function-calling/ollama/id_ed*
|
||||
demos/function_calling/ollama/models/
|
||||
demos/function_calling/ollama/id_ed*
|
||||
|
|
|
|||
|
|
@ -2,13 +2,13 @@ import os
|
|||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv('ENVOY_CONFIG_TEMPLATE_FILE', 'envoy.template.yaml')
|
||||
KATANEMO_CONFIG_FILE = os.getenv('KATANEMO_CONFIG_FILE', 'katanemo-config.yaml')
|
||||
BOLT_CONFIG_FILE = os.getenv('BOLT_CONFIG_FILE', 'bolt-config.yaml')
|
||||
ENVOY_CONFIG_FILE_RENDERED = os.getenv('ENVOY_CONFIG_FILE_RENDERED', '/usr/src/app/out/envoy.yaml')
|
||||
|
||||
env = Environment(loader=FileSystemLoader('./'))
|
||||
template = env.get_template('envoy.template.yaml')
|
||||
|
||||
with open(KATANEMO_CONFIG_FILE, 'r') as file:
|
||||
with open(BOLT_CONFIG_FILE, 'r') as file:
|
||||
katanemo_config = file.read()
|
||||
|
||||
data = {
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
|
||||
services:
|
||||
|
||||
config-generator:
|
||||
config_generator:
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: config_generator/Dockerfile
|
||||
volumes:
|
||||
- ../../envoyfilter/envoy.template.yaml:/usr/src/app/envoy.template.yaml
|
||||
- ./katanemo-config.yaml:/usr/src/app/katanemo-config.yaml
|
||||
- ./bolt-config.yaml:/usr/src/app/bolt-config.yaml
|
||||
- ./generated:/usr/src/app/out
|
||||
|
||||
bolt:
|
||||
|
|
@ -22,16 +22,16 @@ services:
|
|||
- ./generated/envoy.yaml:/etc/envoy/envoy.yaml
|
||||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||
depends_on:
|
||||
config-generator:
|
||||
config_generator:
|
||||
condition: service_completed_successfully
|
||||
embeddingserver:
|
||||
model_server:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
- LOG_LEVEL=debug
|
||||
|
||||
embeddingserver:
|
||||
model_server:
|
||||
build:
|
||||
context: ../../embedding-server
|
||||
context: ../../model_server
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "18081:80"
|
||||
|
|
@ -42,7 +42,7 @@ services:
|
|||
volumes:
|
||||
- ~/.cache/huggingface:/root/.cache/huggingface
|
||||
|
||||
functionresolver:
|
||||
function_resolver:
|
||||
build:
|
||||
context: ../../function_resolver
|
||||
dockerfile: Dockerfile
|
||||
|
|
@ -71,9 +71,9 @@ services:
|
|||
profiles:
|
||||
- manual
|
||||
|
||||
chatbot-ui:
|
||||
chatbot_ui:
|
||||
build:
|
||||
context: ../../chatbot-ui
|
||||
context: ../../chatbot_ui
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "18080:8080"
|
||||
|
|
@ -92,6 +92,8 @@ services:
|
|||
volumes:
|
||||
- ./prometheus:/etc/prometheus
|
||||
- ./prom_data:/prometheus
|
||||
profiles:
|
||||
- monitoring
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana
|
||||
|
|
@ -106,3 +108,5 @@ services:
|
|||
- ./grafana:/etc/grafana/provisioning/datasources
|
||||
- ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml
|
||||
- ./grafana/dashboards:/var/lib/grafana/dashboards
|
||||
profiles:
|
||||
- monitoring
|
||||
4
envoyfilter/Cargo.lock
generated
4
envoyfilter/Cargo.lock
generated
|
|
@ -976,7 +976,7 @@ dependencies = [
|
|||
"open-message-format-embeddings",
|
||||
"proxy-wasm",
|
||||
"proxy-wasm-test-framework",
|
||||
"public-types",
|
||||
"public_types",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
|
|
@ -1453,7 +1453,7 @@ dependencies = [
|
|||
]
|
||||
|
||||
[[package]]
|
||||
name = "public-types"
|
||||
name = "public_types"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"open-message-format-embeddings",
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ serde_yaml = "0.9.34"
|
|||
serde_json = "1.0"
|
||||
md5 = "0.7.0"
|
||||
open-message-format-embeddings = { path = "../open-message-format/clients/omf-embeddings-rust" }
|
||||
public-types = { path = "../public-types" }
|
||||
public_types = { path = "../public_types" }
|
||||
http = "1.1.0"
|
||||
governor = { version = "0.6.3", default-features = false, features = ["no_std"]}
|
||||
tiktoken-rs = "0.5.9"
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ COPY envoyfilter/src /envoyfilter/src
|
|||
COPY envoyfilter/Cargo.toml /envoyfilter/
|
||||
COPY envoyfilter/Cargo.lock /envoyfilter/
|
||||
COPY open-message-format /open-message-format
|
||||
COPY public-types /public-types
|
||||
COPY public_types /public_types
|
||||
|
||||
RUN cargo build --release --target wasm32-wasi
|
||||
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ static_resources:
|
|||
- match:
|
||||
prefix: "/embeddings"
|
||||
route:
|
||||
cluster: embeddingserver
|
||||
cluster: model_server
|
||||
- match:
|
||||
prefix: "/"
|
||||
direct_response:
|
||||
|
|
@ -123,20 +123,20 @@ static_resources:
|
|||
tls_minimum_protocol_version: TLSv1_2
|
||||
tls_maximum_protocol_version: TLSv1_3
|
||||
|
||||
- name: embeddingserver
|
||||
- name: model_server
|
||||
connect_timeout: 5s
|
||||
type: STRICT_DNS
|
||||
lb_policy: ROUND_ROBIN
|
||||
load_assignment:
|
||||
cluster_name: embeddingserver
|
||||
cluster_name: model_server
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: embeddingserver
|
||||
address: model_server
|
||||
port_value: 80
|
||||
hostname: "embeddingserver"
|
||||
hostname: "model_server"
|
||||
- name: weatherhost
|
||||
connect_timeout: 5s
|
||||
type: STRICT_DNS
|
||||
|
|
@ -148,9 +148,9 @@ static_resources:
|
|||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: embeddingserver
|
||||
address: model_server
|
||||
port_value: 80
|
||||
hostname: "embeddingserver"
|
||||
hostname: "model_server"
|
||||
- name: nerhost
|
||||
connect_timeout: 5s
|
||||
type: STRICT_DNS
|
||||
|
|
@ -162,9 +162,9 @@ static_resources:
|
|||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: embeddingserver
|
||||
address: model_server
|
||||
port_value: 80
|
||||
hostname: "embeddingserver"
|
||||
hostname: "model_server"
|
||||
- name: mistral_7b_instruct
|
||||
connect_timeout: 5s
|
||||
type: STRICT_DNS
|
||||
|
|
@ -190,6 +190,6 @@ static_resources:
|
|||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: functionresolver
|
||||
address: function_resolver
|
||||
port_value: 80
|
||||
hostname: "bolt_fc_1b"
|
||||
|
|
|
|||
|
|
@ -7,3 +7,4 @@ pub const USER_ROLE: &str = "user";
|
|||
pub const GPT_35_TURBO: &str = "gpt-3.5-turbo";
|
||||
pub const BOLT_FC_CLUSTER: &str = "bolt_fc_1b";
|
||||
pub const BOLT_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||
pub const MODEL_SERVER_NAME: &str = "model_server";
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use crate::consts::DEFAULT_EMBEDDING_MODEL;
|
||||
use crate::consts::{DEFAULT_EMBEDDING_MODEL, MODEL_SERVER_NAME};
|
||||
use crate::ratelimit;
|
||||
use crate::stats::{Counter, Gauge, RecordingMetric};
|
||||
use crate::stream_context::StreamContext;
|
||||
|
|
@ -123,11 +123,11 @@ impl FilterContext {
|
|||
|
||||
let json_data = to_string(&embeddings_input).unwrap();
|
||||
let token_id = match self.dispatch_http_call(
|
||||
"embeddingserver",
|
||||
MODEL_SERVER_NAME,
|
||||
vec![
|
||||
(":method", "POST"),
|
||||
(":path", "/embeddings"),
|
||||
(":authority", "embeddingserver"),
|
||||
(":authority", MODEL_SERVER_NAME),
|
||||
("content-type", "application/json"),
|
||||
("x-envoy-upstream-rq-timeout-ms", "60000"),
|
||||
],
|
||||
|
|
@ -219,7 +219,7 @@ impl RootContext for FilterContext {
|
|||
.insert(pt.name.clone(), pt.clone());
|
||||
}
|
||||
|
||||
debug!("set configuration object: {:?}", self.config);
|
||||
debug!("set configuration object");
|
||||
|
||||
if let Some(ratelimits_config) = self
|
||||
.config
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use crate::consts::{
|
||||
BOLT_FC_CLUSTER, BOLT_FC_REQUEST_TIMEOUT_MS, DEFAULT_EMBEDDING_MODEL, DEFAULT_INTENT_MODEL,
|
||||
DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE,
|
||||
USER_ROLE,
|
||||
DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME,
|
||||
RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE,
|
||||
};
|
||||
use crate::filter_context::{embeddings_store, WasmMetrics};
|
||||
use crate::ratelimit;
|
||||
|
|
@ -175,11 +175,11 @@ impl StreamContext {
|
|||
};
|
||||
|
||||
let token_id = match self.dispatch_http_call(
|
||||
"embeddingserver",
|
||||
MODEL_SERVER_NAME,
|
||||
vec![
|
||||
(":method", "POST"),
|
||||
(":path", "/zeroshot"),
|
||||
(":authority", "embeddingserver"),
|
||||
(":authority", MODEL_SERVER_NAME),
|
||||
("content-type", "application/json"),
|
||||
("x-envoy-max-retries", "3"),
|
||||
("x-envoy-upstream-rq-timeout-ms", "60000"),
|
||||
|
|
@ -637,11 +637,11 @@ impl HttpContext for StreamContext {
|
|||
};
|
||||
|
||||
let token_id = match self.dispatch_http_call(
|
||||
"embeddingserver",
|
||||
MODEL_SERVER_NAME,
|
||||
vec![
|
||||
(":method", "POST"),
|
||||
(":path", "/embeddings"),
|
||||
(":authority", "embeddingserver"),
|
||||
(":authority", MODEL_SERVER_NAME),
|
||||
("content-type", "application/json"),
|
||||
("x-envoy-max-retries", "3"),
|
||||
("x-envoy-upstream-rq-timeout-ms", "60000"),
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
|
|||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_http_call(Some("embeddingserver"), None, None, None, None)
|
||||
.expect_http_call(Some("model_server"), None, None, None, None)
|
||||
.returning(Some(1))
|
||||
.expect_metric_increment("active_http_calls", 1)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
|
|
@ -114,7 +114,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
|
|||
.returning(Some(&embeddings_response_buffer))
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_http_call(Some("embeddingserver"), None, None, None, None)
|
||||
.expect_http_call(Some("model_server"), None, None, None, None)
|
||||
.returning(Some(2))
|
||||
.expect_metric_increment("active_http_calls", 1)
|
||||
.execute_and_expect(ReturnType::None)
|
||||
|
|
|
|||
|
|
@ -9,24 +9,24 @@
|
|||
"path": "envoyfilter"
|
||||
},
|
||||
{
|
||||
"name": "embedding-server",
|
||||
"path": "embedding-server"
|
||||
"name": "model_server",
|
||||
"path": "model_server"
|
||||
},
|
||||
{
|
||||
"name": "function_resolver",
|
||||
"path": "function_resolver"
|
||||
},
|
||||
{
|
||||
"name": "chatbot-ui",
|
||||
"path": "chatbot-ui"
|
||||
"name": "chatbot_ui",
|
||||
"path": "chatbot_ui"
|
||||
},
|
||||
{
|
||||
"name": "open-message-format",
|
||||
"path": "open-message-format"
|
||||
},
|
||||
{
|
||||
"name": "demos/function-calling",
|
||||
"path": "./demos/function-calling",
|
||||
"name": "demos/function_calling",
|
||||
"path": "./demos/function_calling",
|
||||
},
|
||||
],
|
||||
"settings": {}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
# copied from https://github.com/bergos/embedding-server
|
||||
|
||||
FROM python:3 AS base
|
||||
|
||||
#
|
||||
|
|
@ -603,7 +603,7 @@ dependencies = [
|
|||
]
|
||||
|
||||
[[package]]
|
||||
name = "public-types"
|
||||
name = "public_types"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"open-message-format-embeddings",
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
[package]
|
||||
name = "public-types"
|
||||
name = "public_types"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue