diff --git a/.gitignore b/.gitignore index 8be85e66..377b444e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,7 @@ envoyfilter/target envoyfilter/qdrant_data/ -public-types/target -embedding-server/venv/ -chatbot-ui/venv/ +public_types/target +/venv/ __pycache__ grafana-data prom_data @@ -12,5 +11,5 @@ generated .DS_Store *.gguf venv -demos/function-calling/ollama/models/ -demos/function-calling/ollama/id_ed* +demos/function_calling/ollama/models/ +demos/function_calling/ollama/id_ed* diff --git a/chatbot-ui/.vscode/launch.json b/chatbot_ui/.vscode/launch.json similarity index 100% rename from chatbot-ui/.vscode/launch.json rename to chatbot_ui/.vscode/launch.json diff --git a/chatbot-ui/Dockerfile b/chatbot_ui/Dockerfile similarity index 100% rename from chatbot-ui/Dockerfile rename to chatbot_ui/Dockerfile diff --git a/chatbot-ui/app/run.py b/chatbot_ui/app/run.py similarity index 100% rename from chatbot-ui/app/run.py rename to chatbot_ui/app/run.py diff --git a/chatbot-ui/requirements.txt b/chatbot_ui/requirements.txt similarity index 100% rename from chatbot-ui/requirements.txt rename to chatbot_ui/requirements.txt diff --git a/config_generator/config_generator.py b/config_generator/config_generator.py index f08d3e42..7d599b9c 100644 --- a/config_generator/config_generator.py +++ b/config_generator/config_generator.py @@ -2,13 +2,13 @@ import os from jinja2 import Environment, FileSystemLoader ENVOY_CONFIG_TEMPLATE_FILE = os.getenv('ENVOY_CONFIG_TEMPLATE_FILE', 'envoy.template.yaml') -KATANEMO_CONFIG_FILE = os.getenv('KATANEMO_CONFIG_FILE', 'katanemo-config.yaml') +BOLT_CONFIG_FILE = os.getenv('BOLT_CONFIG_FILE', 'bolt-config.yaml') ENVOY_CONFIG_FILE_RENDERED = os.getenv('ENVOY_CONFIG_FILE_RENDERED', '/usr/src/app/out/envoy.yaml') env = Environment(loader=FileSystemLoader('./')) template = env.get_template('envoy.template.yaml') -with open(KATANEMO_CONFIG_FILE, 'r') as file: +with open(BOLT_CONFIG_FILE, 'r') as file: katanemo_config = file.read() data = { diff --git a/demos/function-calling/Bolt-FC-1B-Q3_K_L.model_file b/demos/function_calling/Bolt-FC-1B-Q3_K_L.model_file similarity index 100% rename from demos/function-calling/Bolt-FC-1B-Q3_K_L.model_file rename to demos/function_calling/Bolt-FC-1B-Q3_K_L.model_file diff --git a/demos/function-calling/Bolt-FC-1B-Q4_K_M.model_file b/demos/function_calling/Bolt-FC-1B-Q4_K_M.model_file similarity index 100% rename from demos/function-calling/Bolt-FC-1B-Q4_K_M.model_file rename to demos/function_calling/Bolt-FC-1B-Q4_K_M.model_file diff --git a/demos/function-calling/README.md b/demos/function_calling/README.md similarity index 100% rename from demos/function-calling/README.md rename to demos/function_calling/README.md diff --git a/demos/function-calling/katanemo-config.yaml b/demos/function_calling/bolt-config.yaml similarity index 100% rename from demos/function-calling/katanemo-config.yaml rename to demos/function_calling/bolt-config.yaml diff --git a/demos/function-calling/docker-compose.yaml b/demos/function_calling/docker-compose.yaml similarity index 89% rename from demos/function-calling/docker-compose.yaml rename to demos/function_calling/docker-compose.yaml index 76c6e82f..2b1e22e9 100644 --- a/demos/function-calling/docker-compose.yaml +++ b/demos/function_calling/docker-compose.yaml @@ -1,13 +1,13 @@ services: - config-generator: + config_generator: build: context: ../../ dockerfile: config_generator/Dockerfile volumes: - ../../envoyfilter/envoy.template.yaml:/usr/src/app/envoy.template.yaml - - ./katanemo-config.yaml:/usr/src/app/katanemo-config.yaml + - ./bolt-config.yaml:/usr/src/app/bolt-config.yaml - ./generated:/usr/src/app/out bolt: @@ -22,16 +22,16 @@ services: - ./generated/envoy.yaml:/etc/envoy/envoy.yaml - /etc/ssl/cert.pem:/etc/ssl/cert.pem depends_on: - config-generator: + config_generator: condition: service_completed_successfully - embeddingserver: + model_server: condition: service_healthy environment: - LOG_LEVEL=debug - embeddingserver: + model_server: build: - context: ../../embedding-server + context: ../../model_server dockerfile: Dockerfile ports: - "18081:80" @@ -42,7 +42,7 @@ services: volumes: - ~/.cache/huggingface:/root/.cache/huggingface - functionresolver: + function_resolver: build: context: ../../function_resolver dockerfile: Dockerfile @@ -71,9 +71,9 @@ services: profiles: - manual - chatbot-ui: + chatbot_ui: build: - context: ../../chatbot-ui + context: ../../chatbot_ui dockerfile: Dockerfile ports: - "18080:8080" @@ -92,6 +92,8 @@ services: volumes: - ./prometheus:/etc/prometheus - ./prom_data:/prometheus + profiles: + - monitoring grafana: image: grafana/grafana @@ -106,3 +108,5 @@ services: - ./grafana:/etc/grafana/provisioning/datasources - ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml - ./grafana/dashboards:/var/lib/grafana/dashboards + profiles: + - monitoring diff --git a/demos/function-calling/grafana/dashboard.yaml b/demos/function_calling/grafana/dashboard.yaml similarity index 100% rename from demos/function-calling/grafana/dashboard.yaml rename to demos/function_calling/grafana/dashboard.yaml diff --git a/demos/function-calling/grafana/dashboards/envoy_overview.json b/demos/function_calling/grafana/dashboards/envoy_overview.json similarity index 100% rename from demos/function-calling/grafana/dashboards/envoy_overview.json rename to demos/function_calling/grafana/dashboards/envoy_overview.json diff --git a/demos/function-calling/grafana/datasource.yaml b/demos/function_calling/grafana/datasource.yaml similarity index 100% rename from demos/function-calling/grafana/datasource.yaml rename to demos/function_calling/grafana/datasource.yaml diff --git a/demos/function-calling/prometheus/prometheus.yaml b/demos/function_calling/prometheus/prometheus.yaml similarity index 100% rename from demos/function-calling/prometheus/prometheus.yaml rename to demos/function_calling/prometheus/prometheus.yaml diff --git a/envoyfilter/Cargo.lock b/envoyfilter/Cargo.lock index a9391094..32925b37 100644 --- a/envoyfilter/Cargo.lock +++ b/envoyfilter/Cargo.lock @@ -976,7 +976,7 @@ dependencies = [ "open-message-format-embeddings", "proxy-wasm", "proxy-wasm-test-framework", - "public-types", + "public_types", "serde", "serde_json", "serde_yaml", @@ -1453,7 +1453,7 @@ dependencies = [ ] [[package]] -name = "public-types" +name = "public_types" version = "0.1.0" dependencies = [ "open-message-format-embeddings", diff --git a/envoyfilter/Cargo.toml b/envoyfilter/Cargo.toml index 62786171..4d59c97c 100644 --- a/envoyfilter/Cargo.toml +++ b/envoyfilter/Cargo.toml @@ -15,7 +15,7 @@ serde_yaml = "0.9.34" serde_json = "1.0" md5 = "0.7.0" open-message-format-embeddings = { path = "../open-message-format/clients/omf-embeddings-rust" } -public-types = { path = "../public-types" } +public_types = { path = "../public_types" } http = "1.1.0" governor = { version = "0.6.3", default-features = false, features = ["no_std"]} tiktoken-rs = "0.5.9" diff --git a/envoyfilter/Dockerfile b/envoyfilter/Dockerfile index d1d14440..de01600a 100644 --- a/envoyfilter/Dockerfile +++ b/envoyfilter/Dockerfile @@ -6,7 +6,7 @@ COPY envoyfilter/src /envoyfilter/src COPY envoyfilter/Cargo.toml /envoyfilter/ COPY envoyfilter/Cargo.lock /envoyfilter/ COPY open-message-format /open-message-format -COPY public-types /public-types +COPY public_types /public_types RUN cargo build --release --target wasm32-wasi diff --git a/envoyfilter/envoy.template.yaml b/envoyfilter/envoy.template.yaml index 318fc2b7..bd79b6c0 100644 --- a/envoyfilter/envoy.template.yaml +++ b/envoyfilter/envoy.template.yaml @@ -63,7 +63,7 @@ static_resources: - match: prefix: "/embeddings" route: - cluster: embeddingserver + cluster: model_server - match: prefix: "/" direct_response: @@ -123,20 +123,20 @@ static_resources: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 - - name: embeddingserver + - name: model_server connect_timeout: 5s type: STRICT_DNS lb_policy: ROUND_ROBIN load_assignment: - cluster_name: embeddingserver + cluster_name: model_server endpoints: - lb_endpoints: - endpoint: address: socket_address: - address: embeddingserver + address: model_server port_value: 80 - hostname: "embeddingserver" + hostname: "model_server" - name: weatherhost connect_timeout: 5s type: STRICT_DNS @@ -148,9 +148,9 @@ static_resources: - endpoint: address: socket_address: - address: embeddingserver + address: model_server port_value: 80 - hostname: "embeddingserver" + hostname: "model_server" - name: nerhost connect_timeout: 5s type: STRICT_DNS @@ -162,9 +162,9 @@ static_resources: - endpoint: address: socket_address: - address: embeddingserver + address: model_server port_value: 80 - hostname: "embeddingserver" + hostname: "model_server" - name: mistral_7b_instruct connect_timeout: 5s type: STRICT_DNS @@ -190,6 +190,6 @@ static_resources: - endpoint: address: socket_address: - address: functionresolver + address: function_resolver port_value: 80 hostname: "bolt_fc_1b" diff --git a/envoyfilter/src/consts.rs b/envoyfilter/src/consts.rs index 363875dd..ffb999b7 100644 --- a/envoyfilter/src/consts.rs +++ b/envoyfilter/src/consts.rs @@ -7,3 +7,4 @@ pub const USER_ROLE: &str = "user"; pub const GPT_35_TURBO: &str = "gpt-3.5-turbo"; pub const BOLT_FC_CLUSTER: &str = "bolt_fc_1b"; pub const BOLT_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes +pub const MODEL_SERVER_NAME: &str = "model_server"; diff --git a/envoyfilter/src/filter_context.rs b/envoyfilter/src/filter_context.rs index 2b9719c8..cbe6f0e9 100644 --- a/envoyfilter/src/filter_context.rs +++ b/envoyfilter/src/filter_context.rs @@ -1,4 +1,4 @@ -use crate::consts::DEFAULT_EMBEDDING_MODEL; +use crate::consts::{DEFAULT_EMBEDDING_MODEL, MODEL_SERVER_NAME}; use crate::ratelimit; use crate::stats::{Counter, Gauge, RecordingMetric}; use crate::stream_context::StreamContext; @@ -123,11 +123,11 @@ impl FilterContext { let json_data = to_string(&embeddings_input).unwrap(); let token_id = match self.dispatch_http_call( - "embeddingserver", + MODEL_SERVER_NAME, vec![ (":method", "POST"), (":path", "/embeddings"), - (":authority", "embeddingserver"), + (":authority", MODEL_SERVER_NAME), ("content-type", "application/json"), ("x-envoy-upstream-rq-timeout-ms", "60000"), ], @@ -219,7 +219,7 @@ impl RootContext for FilterContext { .insert(pt.name.clone(), pt.clone()); } - debug!("set configuration object: {:?}", self.config); + debug!("set configuration object"); if let Some(ratelimits_config) = self .config diff --git a/envoyfilter/src/stream_context.rs b/envoyfilter/src/stream_context.rs index c74cf097..35fc81c8 100644 --- a/envoyfilter/src/stream_context.rs +++ b/envoyfilter/src/stream_context.rs @@ -1,7 +1,7 @@ use crate::consts::{ BOLT_FC_CLUSTER, BOLT_FC_REQUEST_TIMEOUT_MS, DEFAULT_EMBEDDING_MODEL, DEFAULT_INTENT_MODEL, - DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, - USER_ROLE, + DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME, + RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE, }; use crate::filter_context::{embeddings_store, WasmMetrics}; use crate::ratelimit; @@ -175,11 +175,11 @@ impl StreamContext { }; let token_id = match self.dispatch_http_call( - "embeddingserver", + MODEL_SERVER_NAME, vec![ (":method", "POST"), (":path", "/zeroshot"), - (":authority", "embeddingserver"), + (":authority", MODEL_SERVER_NAME), ("content-type", "application/json"), ("x-envoy-max-retries", "3"), ("x-envoy-upstream-rq-timeout-ms", "60000"), @@ -637,11 +637,11 @@ impl HttpContext for StreamContext { }; let token_id = match self.dispatch_http_call( - "embeddingserver", + MODEL_SERVER_NAME, vec![ (":method", "POST"), (":path", "/embeddings"), - (":authority", "embeddingserver"), + (":authority", MODEL_SERVER_NAME), ("content-type", "application/json"), ("x-envoy-max-retries", "3"), ("x-envoy-upstream-rq-timeout-ms", "60000"), diff --git a/envoyfilter/tests/integration.rs b/envoyfilter/tests/integration.rs index 1c4f97cb..ecaba388 100644 --- a/envoyfilter/tests/integration.rs +++ b/envoyfilter/tests/integration.rs @@ -83,7 +83,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) { .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id - .expect_http_call(Some("embeddingserver"), None, None, None, None) + .expect_http_call(Some("model_server"), None, None, None, None) .returning(Some(1)) .expect_metric_increment("active_http_calls", 1) .expect_log(Some(LogLevel::Debug), None) @@ -114,7 +114,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) { .returning(Some(&embeddings_response_buffer)) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) - .expect_http_call(Some("embeddingserver"), None, None, None, None) + .expect_http_call(Some("model_server"), None, None, None, None) .returning(Some(2)) .expect_metric_increment("active_http_calls", 1) .execute_and_expect(ReturnType::None) diff --git a/gateway.code-workspace b/gateway.code-workspace index 109f42d7..a6dac5c6 100644 --- a/gateway.code-workspace +++ b/gateway.code-workspace @@ -9,24 +9,24 @@ "path": "envoyfilter" }, { - "name": "embedding-server", - "path": "embedding-server" + "name": "model_server", + "path": "model_server" }, { "name": "function_resolver", "path": "function_resolver" }, { - "name": "chatbot-ui", - "path": "chatbot-ui" + "name": "chatbot_ui", + "path": "chatbot_ui" }, { "name": "open-message-format", "path": "open-message-format" }, { - "name": "demos/function-calling", - "path": "./demos/function-calling", + "name": "demos/function_calling", + "path": "./demos/function_calling", }, ], "settings": {} diff --git a/embedding-server/.vscode/launch.json b/model_server/.vscode/launch.json similarity index 100% rename from embedding-server/.vscode/launch.json rename to model_server/.vscode/launch.json diff --git a/embedding-server/Dockerfile b/model_server/Dockerfile similarity index 95% rename from embedding-server/Dockerfile rename to model_server/Dockerfile index caaf118e..10173ee8 100644 --- a/embedding-server/Dockerfile +++ b/model_server/Dockerfile @@ -1,5 +1,3 @@ -# copied from https://github.com/bergos/embedding-server - FROM python:3 AS base # diff --git a/embedding-server/app/install.py b/model_server/app/install.py similarity index 100% rename from embedding-server/app/install.py rename to model_server/app/install.py diff --git a/embedding-server/app/load_models.py b/model_server/app/load_models.py similarity index 100% rename from embedding-server/app/load_models.py rename to model_server/app/load_models.py diff --git a/embedding-server/app/main.py b/model_server/app/main.py similarity index 100% rename from embedding-server/app/main.py rename to model_server/app/main.py diff --git a/embedding-server/requirements.txt b/model_server/requirements.txt similarity index 100% rename from embedding-server/requirements.txt rename to model_server/requirements.txt diff --git a/public-types/Cargo.lock b/public_types/Cargo.lock similarity index 99% rename from public-types/Cargo.lock rename to public_types/Cargo.lock index 2717bf2a..71075198 100644 --- a/public-types/Cargo.lock +++ b/public_types/Cargo.lock @@ -603,7 +603,7 @@ dependencies = [ ] [[package]] -name = "public-types" +name = "public_types" version = "0.1.0" dependencies = [ "open-message-format-embeddings", diff --git a/public-types/Cargo.toml b/public_types/Cargo.toml similarity index 90% rename from public-types/Cargo.toml rename to public_types/Cargo.toml index 514bf9bb..ccbd220d 100644 --- a/public-types/Cargo.toml +++ b/public_types/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "public-types" +name = "public_types" version = "0.1.0" edition = "2021" diff --git a/public-types/src/common_types.rs b/public_types/src/common_types.rs similarity index 100% rename from public-types/src/common_types.rs rename to public_types/src/common_types.rs diff --git a/public-types/src/configuration.rs b/public_types/src/configuration.rs similarity index 100% rename from public-types/src/configuration.rs rename to public_types/src/configuration.rs diff --git a/public-types/src/lib.rs b/public_types/src/lib.rs similarity index 100% rename from public-types/src/lib.rs rename to public_types/src/lib.rs