improve service names (#54)

- embedding-server => model_server
- public-types => public_types
- chatbot-ui => chatbot_ui
- function-calling => function_calling
This commit is contained in:
Adil Hafeez 2024-09-17 08:47:35 -07:00 committed by GitHub
parent 215f96e273
commit 060a0d665e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 54 additions and 52 deletions

9
.gitignore vendored
View file

@ -1,8 +1,7 @@
envoyfilter/target
envoyfilter/qdrant_data/
public-types/target
embedding-server/venv/
chatbot-ui/venv/
public_types/target
/venv/
__pycache__
grafana-data
prom_data
@ -12,5 +11,5 @@ generated
.DS_Store
*.gguf
venv
demos/function-calling/ollama/models/
demos/function-calling/ollama/id_ed*
demos/function_calling/ollama/models/
demos/function_calling/ollama/id_ed*

View file

@ -2,13 +2,13 @@ import os
from jinja2 import Environment, FileSystemLoader
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv('ENVOY_CONFIG_TEMPLATE_FILE', 'envoy.template.yaml')
KATANEMO_CONFIG_FILE = os.getenv('KATANEMO_CONFIG_FILE', 'katanemo-config.yaml')
BOLT_CONFIG_FILE = os.getenv('BOLT_CONFIG_FILE', 'bolt-config.yaml')
ENVOY_CONFIG_FILE_RENDERED = os.getenv('ENVOY_CONFIG_FILE_RENDERED', '/usr/src/app/out/envoy.yaml')
env = Environment(loader=FileSystemLoader('./'))
template = env.get_template('envoy.template.yaml')
with open(KATANEMO_CONFIG_FILE, 'r') as file:
with open(BOLT_CONFIG_FILE, 'r') as file:
katanemo_config = file.read()
data = {

View file

@ -1,13 +1,13 @@
services:
config-generator:
config_generator:
build:
context: ../../
dockerfile: config_generator/Dockerfile
volumes:
- ../../envoyfilter/envoy.template.yaml:/usr/src/app/envoy.template.yaml
- ./katanemo-config.yaml:/usr/src/app/katanemo-config.yaml
- ./bolt-config.yaml:/usr/src/app/bolt-config.yaml
- ./generated:/usr/src/app/out
bolt:
@ -22,16 +22,16 @@ services:
- ./generated/envoy.yaml:/etc/envoy/envoy.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
depends_on:
config-generator:
config_generator:
condition: service_completed_successfully
embeddingserver:
model_server:
condition: service_healthy
environment:
- LOG_LEVEL=debug
embeddingserver:
model_server:
build:
context: ../../embedding-server
context: ../../model_server
dockerfile: Dockerfile
ports:
- "18081:80"
@ -42,7 +42,7 @@ services:
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
functionresolver:
function_resolver:
build:
context: ../../function_resolver
dockerfile: Dockerfile
@ -71,9 +71,9 @@ services:
profiles:
- manual
chatbot-ui:
chatbot_ui:
build:
context: ../../chatbot-ui
context: ../../chatbot_ui
dockerfile: Dockerfile
ports:
- "18080:8080"
@ -92,6 +92,8 @@ services:
volumes:
- ./prometheus:/etc/prometheus
- ./prom_data:/prometheus
profiles:
- monitoring
grafana:
image: grafana/grafana
@ -106,3 +108,5 @@ services:
- ./grafana:/etc/grafana/provisioning/datasources
- ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml
- ./grafana/dashboards:/var/lib/grafana/dashboards
profiles:
- monitoring

View file

@ -976,7 +976,7 @@ dependencies = [
"open-message-format-embeddings",
"proxy-wasm",
"proxy-wasm-test-framework",
"public-types",
"public_types",
"serde",
"serde_json",
"serde_yaml",
@ -1453,7 +1453,7 @@ dependencies = [
]
[[package]]
name = "public-types"
name = "public_types"
version = "0.1.0"
dependencies = [
"open-message-format-embeddings",

View file

@ -15,7 +15,7 @@ serde_yaml = "0.9.34"
serde_json = "1.0"
md5 = "0.7.0"
open-message-format-embeddings = { path = "../open-message-format/clients/omf-embeddings-rust" }
public-types = { path = "../public-types" }
public_types = { path = "../public_types" }
http = "1.1.0"
governor = { version = "0.6.3", default-features = false, features = ["no_std"]}
tiktoken-rs = "0.5.9"

View file

@ -6,7 +6,7 @@ COPY envoyfilter/src /envoyfilter/src
COPY envoyfilter/Cargo.toml /envoyfilter/
COPY envoyfilter/Cargo.lock /envoyfilter/
COPY open-message-format /open-message-format
COPY public-types /public-types
COPY public_types /public_types
RUN cargo build --release --target wasm32-wasi

View file

@ -63,7 +63,7 @@ static_resources:
- match:
prefix: "/embeddings"
route:
cluster: embeddingserver
cluster: model_server
- match:
prefix: "/"
direct_response:
@ -123,20 +123,20 @@ static_resources:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: embeddingserver
- name: model_server
connect_timeout: 5s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: embeddingserver
cluster_name: model_server
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: embeddingserver
address: model_server
port_value: 80
hostname: "embeddingserver"
hostname: "model_server"
- name: weatherhost
connect_timeout: 5s
type: STRICT_DNS
@ -148,9 +148,9 @@ static_resources:
- endpoint:
address:
socket_address:
address: embeddingserver
address: model_server
port_value: 80
hostname: "embeddingserver"
hostname: "model_server"
- name: nerhost
connect_timeout: 5s
type: STRICT_DNS
@ -162,9 +162,9 @@ static_resources:
- endpoint:
address:
socket_address:
address: embeddingserver
address: model_server
port_value: 80
hostname: "embeddingserver"
hostname: "model_server"
- name: mistral_7b_instruct
connect_timeout: 5s
type: STRICT_DNS
@ -190,6 +190,6 @@ static_resources:
- endpoint:
address:
socket_address:
address: functionresolver
address: function_resolver
port_value: 80
hostname: "bolt_fc_1b"

View file

@ -7,3 +7,4 @@ pub const USER_ROLE: &str = "user";
pub const GPT_35_TURBO: &str = "gpt-3.5-turbo";
pub const BOLT_FC_CLUSTER: &str = "bolt_fc_1b";
pub const BOLT_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
pub const MODEL_SERVER_NAME: &str = "model_server";

View file

@ -1,4 +1,4 @@
use crate::consts::DEFAULT_EMBEDDING_MODEL;
use crate::consts::{DEFAULT_EMBEDDING_MODEL, MODEL_SERVER_NAME};
use crate::ratelimit;
use crate::stats::{Counter, Gauge, RecordingMetric};
use crate::stream_context::StreamContext;
@ -123,11 +123,11 @@ impl FilterContext {
let json_data = to_string(&embeddings_input).unwrap();
let token_id = match self.dispatch_http_call(
"embeddingserver",
MODEL_SERVER_NAME,
vec![
(":method", "POST"),
(":path", "/embeddings"),
(":authority", "embeddingserver"),
(":authority", MODEL_SERVER_NAME),
("content-type", "application/json"),
("x-envoy-upstream-rq-timeout-ms", "60000"),
],
@ -219,7 +219,7 @@ impl RootContext for FilterContext {
.insert(pt.name.clone(), pt.clone());
}
debug!("set configuration object: {:?}", self.config);
debug!("set configuration object");
if let Some(ratelimits_config) = self
.config

View file

@ -1,7 +1,7 @@
use crate::consts::{
BOLT_FC_CLUSTER, BOLT_FC_REQUEST_TIMEOUT_MS, DEFAULT_EMBEDDING_MODEL, DEFAULT_INTENT_MODEL,
DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE,
USER_ROLE,
DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME,
RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE,
};
use crate::filter_context::{embeddings_store, WasmMetrics};
use crate::ratelimit;
@ -175,11 +175,11 @@ impl StreamContext {
};
let token_id = match self.dispatch_http_call(
"embeddingserver",
MODEL_SERVER_NAME,
vec![
(":method", "POST"),
(":path", "/zeroshot"),
(":authority", "embeddingserver"),
(":authority", MODEL_SERVER_NAME),
("content-type", "application/json"),
("x-envoy-max-retries", "3"),
("x-envoy-upstream-rq-timeout-ms", "60000"),
@ -637,11 +637,11 @@ impl HttpContext for StreamContext {
};
let token_id = match self.dispatch_http_call(
"embeddingserver",
MODEL_SERVER_NAME,
vec![
(":method", "POST"),
(":path", "/embeddings"),
(":authority", "embeddingserver"),
(":authority", MODEL_SERVER_NAME),
("content-type", "application/json"),
("x-envoy-max-retries", "3"),
("x-envoy-upstream-rq-timeout-ms", "60000"),

View file

@ -83,7 +83,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_http_call(Some("embeddingserver"), None, None, None, None)
.expect_http_call(Some("model_server"), None, None, None, None)
.returning(Some(1))
.expect_metric_increment("active_http_calls", 1)
.expect_log(Some(LogLevel::Debug), None)
@ -114,7 +114,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
.returning(Some(&embeddings_response_buffer))
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_http_call(Some("embeddingserver"), None, None, None, None)
.expect_http_call(Some("model_server"), None, None, None, None)
.returning(Some(2))
.expect_metric_increment("active_http_calls", 1)
.execute_and_expect(ReturnType::None)

View file

@ -9,24 +9,24 @@
"path": "envoyfilter"
},
{
"name": "embedding-server",
"path": "embedding-server"
"name": "model_server",
"path": "model_server"
},
{
"name": "function_resolver",
"path": "function_resolver"
},
{
"name": "chatbot-ui",
"path": "chatbot-ui"
"name": "chatbot_ui",
"path": "chatbot_ui"
},
{
"name": "open-message-format",
"path": "open-message-format"
},
{
"name": "demos/function-calling",
"path": "./demos/function-calling",
"name": "demos/function_calling",
"path": "./demos/function_calling",
},
],
"settings": {}

View file

@ -1,5 +1,3 @@
# copied from https://github.com/bergos/embedding-server
FROM python:3 AS base
#

View file

@ -603,7 +603,7 @@ dependencies = [
]
[[package]]
name = "public-types"
name = "public_types"
version = "0.1.0"
dependencies = [
"open-message-format-embeddings",

View file

@ -1,5 +1,5 @@
[package]
name = "public-types"
name = "public_types"
version = "0.1.0"
edition = "2021"