improve service names (#54)

- embedding-server => model_server - public-types => public_types - chatbot-ui => chatbot_ui - function-calling => function_calling
2026-07-17 16:31:04 +02:00 · 2024-09-17 08:47:35 -07:00 · 2024-09-17 08:47:35 -07:00 · 060a0d665e
commit 060a0d665e
parent 215f96e273
35 changed files with 54 additions and 52 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,8 +1,7 @@
 envoyfilter/target
 envoyfilter/qdrant_data/
-public-types/target
-embedding-server/venv/
-chatbot-ui/venv/
+public_types/target
+/venv/
 __pycache__
 grafana-data
 prom_data
@ -12,5 +11,5 @@ generated
 .DS_Store
 *.gguf
 venv
-demos/function-calling/ollama/models/
-demos/function-calling/ollama/id_ed*
+demos/function_calling/ollama/models/
+demos/function_calling/ollama/id_ed*
--- a/chatbot_ui/.vscode/launch.json
+++ b/chatbot_ui/.vscode/launch.json
--- a/chatbot_ui/Dockerfile
+++ b/chatbot_ui/Dockerfile
--- a/chatbot_ui/app/run.py
+++ b/chatbot_ui/app/run.py
--- a/chatbot_ui/requirements.txt
+++ b/chatbot_ui/requirements.txt
--- a/config_generator/config_generator.py
+++ b/config_generator/config_generator.py
@ -2,13 +2,13 @@ import os
 from jinja2 import Environment, FileSystemLoader

 ENVOY_CONFIG_TEMPLATE_FILE = os.getenv('ENVOY_CONFIG_TEMPLATE_FILE', 'envoy.template.yaml')
-KATANEMO_CONFIG_FILE = os.getenv('KATANEMO_CONFIG_FILE', 'katanemo-config.yaml')
+BOLT_CONFIG_FILE = os.getenv('BOLT_CONFIG_FILE', 'bolt-config.yaml')
 ENVOY_CONFIG_FILE_RENDERED = os.getenv('ENVOY_CONFIG_FILE_RENDERED', '/usr/src/app/out/envoy.yaml')

 env = Environment(loader=FileSystemLoader('./'))
 template = env.get_template('envoy.template.yaml')

-with open(KATANEMO_CONFIG_FILE, 'r') as file:
+with open(BOLT_CONFIG_FILE, 'r') as file:
    katanemo_config = file.read()

 data = {
--- a/demos/function_calling/Bolt-FC-1B-Q3_K_L.model_file
+++ b/demos/function_calling/Bolt-FC-1B-Q3_K_L.model_file
--- a/demos/function_calling/Bolt-FC-1B-Q4_K_M.model_file
+++ b/demos/function_calling/Bolt-FC-1B-Q4_K_M.model_file
--- a/demos/function_calling/README.md
+++ b/demos/function_calling/README.md
--- a/demos/function-calling/katanemo-config.yaml
+++ b/demos/function-calling/katanemo-config.yaml
--- a/demos/function_calling/docker-compose.yaml
+++ b/demos/function_calling/docker-compose.yaml
@ -1,13 +1,13 @@

 services:

-  config-generator:
+  config_generator:
    build:
      context: ../../
      dockerfile: config_generator/Dockerfile
    volumes:
      - ../../envoyfilter/envoy.template.yaml:/usr/src/app/envoy.template.yaml
-      - ./katanemo-config.yaml:/usr/src/app/katanemo-config.yaml
+      - ./bolt-config.yaml:/usr/src/app/bolt-config.yaml
      - ./generated:/usr/src/app/out

  bolt:
@ -22,16 +22,16 @@ services:
      - ./generated/envoy.yaml:/etc/envoy/envoy.yaml
      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
    depends_on:
-      config-generator:
+      config_generator:
        condition: service_completed_successfully
-      embeddingserver:
+      model_server:
        condition: service_healthy
    environment:
      - LOG_LEVEL=debug

-  embeddingserver:
+  model_server:
    build:
-      context: ../../embedding-server
+      context: ../../model_server
      dockerfile: Dockerfile
    ports:
      - "18081:80"
@ -42,7 +42,7 @@ services:
    volumes:
      - ~/.cache/huggingface:/root/.cache/huggingface

-  functionresolver:
+  function_resolver:
    build:
      context: ../../function_resolver
      dockerfile: Dockerfile
@ -71,9 +71,9 @@ services:
    profiles:
      - manual

-  chatbot-ui:
+  chatbot_ui:
    build:
-      context: ../../chatbot-ui
+      context: ../../chatbot_ui
      dockerfile: Dockerfile
    ports:
      - "18080:8080"
@ -92,6 +92,8 @@ services:
    volumes:
      - ./prometheus:/etc/prometheus
      - ./prom_data:/prometheus
+    profiles:
+      - monitoring

  grafana:
    image: grafana/grafana
@ -106,3 +108,5 @@ services:
      - ./grafana:/etc/grafana/provisioning/datasources
      - ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml
      - ./grafana/dashboards:/var/lib/grafana/dashboards
+    profiles:
+      - monitoring
--- a/demos/function_calling/grafana/dashboard.yaml
+++ b/demos/function_calling/grafana/dashboard.yaml
--- a/demos/function_calling/grafana/dashboards/envoy_overview.json
+++ b/demos/function_calling/grafana/dashboards/envoy_overview.json
--- a/demos/function_calling/grafana/datasource.yaml
+++ b/demos/function_calling/grafana/datasource.yaml
--- a/demos/function_calling/prometheus/prometheus.yaml
+++ b/demos/function_calling/prometheus/prometheus.yaml
--- a/envoyfilter/Cargo.lock
+++ b/envoyfilter/Cargo.lock
@ -976,7 +976,7 @@ dependencies = [
 "open-message-format-embeddings",
 "proxy-wasm",
 "proxy-wasm-test-framework",
- "public-types",
+ "public_types",
 "serde",
 "serde_json",
 "serde_yaml",
@ -1453,7 +1453,7 @@ dependencies = [
 ]

 [[package]]
-name = "public-types"
+name = "public_types"
 version = "0.1.0"
 dependencies = [
 "open-message-format-embeddings",
--- a/envoyfilter/Cargo.toml
+++ b/envoyfilter/Cargo.toml
@ -15,7 +15,7 @@ serde_yaml = "0.9.34"
 serde_json = "1.0"
 md5 = "0.7.0"
 open-message-format-embeddings = { path = "../open-message-format/clients/omf-embeddings-rust" }
-public-types = { path = "../public-types" }
+public_types = { path = "../public_types" }
 http = "1.1.0"
 governor = { version = "0.6.3", default-features = false, features = ["no_std"]}
 tiktoken-rs = "0.5.9"
--- a/envoyfilter/Dockerfile
+++ b/envoyfilter/Dockerfile
@ -6,7 +6,7 @@ COPY envoyfilter/src /envoyfilter/src
 COPY envoyfilter/Cargo.toml /envoyfilter/
 COPY envoyfilter/Cargo.lock /envoyfilter/
 COPY open-message-format /open-message-format
-COPY public-types /public-types
+COPY public_types /public_types

 RUN cargo build --release --target wasm32-wasi

--- a/envoyfilter/envoy.template.yaml
+++ b/envoyfilter/envoy.template.yaml
@ -63,7 +63,7 @@ static_resources:
                      - match:
                          prefix: "/embeddings"
                        route:
-                          cluster: embeddingserver
+                          cluster: model_server
                      - match:
                          prefix: "/"
                        direct_response:
@ -123,20 +123,20 @@ static_resources:
              tls_minimum_protocol_version: TLSv1_2
              tls_maximum_protocol_version: TLSv1_3

-    - name: embeddingserver
+    - name: model_server
      connect_timeout: 5s
      type: STRICT_DNS
      lb_policy: ROUND_ROBIN
      load_assignment:
-        cluster_name: embeddingserver
+        cluster_name: model_server
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
-                      address: embeddingserver
+                      address: model_server
                      port_value: 80
-                  hostname: "embeddingserver"
+                  hostname: "model_server"
    - name: weatherhost
      connect_timeout: 5s
      type: STRICT_DNS
@ -148,9 +148,9 @@ static_resources:
              - endpoint:
                  address:
                    socket_address:
-                      address: embeddingserver
+                      address: model_server
                      port_value: 80
-                  hostname: "embeddingserver"
+                  hostname: "model_server"
    - name: nerhost
      connect_timeout: 5s
      type: STRICT_DNS
@ -162,9 +162,9 @@ static_resources:
              - endpoint:
                  address:
                    socket_address:
-                      address: embeddingserver
+                      address: model_server
                      port_value: 80
-                  hostname: "embeddingserver"
+                  hostname: "model_server"
    - name: mistral_7b_instruct
      connect_timeout: 5s
      type: STRICT_DNS
@ -190,6 +190,6 @@ static_resources:
              - endpoint:
                  address:
                    socket_address:
-                      address: functionresolver
+                      address: function_resolver
                      port_value: 80
                  hostname: "bolt_fc_1b"
--- a/envoyfilter/src/consts.rs
+++ b/envoyfilter/src/consts.rs
@ -7,3 +7,4 @@ pub const USER_ROLE: &str = "user";
 pub const GPT_35_TURBO: &str = "gpt-3.5-turbo";
 pub const BOLT_FC_CLUSTER: &str = "bolt_fc_1b";
 pub const BOLT_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
+pub const MODEL_SERVER_NAME: &str = "model_server";
--- a/envoyfilter/src/filter_context.rs
+++ b/envoyfilter/src/filter_context.rs
@ -1,4 +1,4 @@
-use crate::consts::DEFAULT_EMBEDDING_MODEL;
+use crate::consts::{DEFAULT_EMBEDDING_MODEL, MODEL_SERVER_NAME};
 use crate::ratelimit;
 use crate::stats::{Counter, Gauge, RecordingMetric};
 use crate::stream_context::StreamContext;
@ -123,11 +123,11 @@ impl FilterContext {

        let json_data = to_string(&embeddings_input).unwrap();
        let token_id = match self.dispatch_http_call(
-            "embeddingserver",
+            MODEL_SERVER_NAME,
            vec![
                (":method", "POST"),
                (":path", "/embeddings"),
-                (":authority", "embeddingserver"),
+                (":authority", MODEL_SERVER_NAME),
                ("content-type", "application/json"),
                ("x-envoy-upstream-rq-timeout-ms", "60000"),
            ],
@ -219,7 +219,7 @@ impl RootContext for FilterContext {
                    .insert(pt.name.clone(), pt.clone());
            }

-            debug!("set configuration object: {:?}", self.config);
+            debug!("set configuration object");

            if let Some(ratelimits_config) = self
                .config
--- a/envoyfilter/src/stream_context.rs
+++ b/envoyfilter/src/stream_context.rs
@ -1,7 +1,7 @@
 use crate::consts::{
    BOLT_FC_CLUSTER, BOLT_FC_REQUEST_TIMEOUT_MS, DEFAULT_EMBEDDING_MODEL, DEFAULT_INTENT_MODEL,
-    DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE,
-    USER_ROLE,
+    DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME,
+    RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE,
 };
 use crate::filter_context::{embeddings_store, WasmMetrics};
 use crate::ratelimit;
@ -175,11 +175,11 @@ impl StreamContext {
        };

        let token_id = match self.dispatch_http_call(
-            "embeddingserver",
+            MODEL_SERVER_NAME,
            vec![
                (":method", "POST"),
                (":path", "/zeroshot"),
-                (":authority", "embeddingserver"),
+                (":authority", MODEL_SERVER_NAME),
                ("content-type", "application/json"),
                ("x-envoy-max-retries", "3"),
                ("x-envoy-upstream-rq-timeout-ms", "60000"),
@ -637,11 +637,11 @@ impl HttpContext for StreamContext {
        };

        let token_id = match self.dispatch_http_call(
-            "embeddingserver",
+            MODEL_SERVER_NAME,
            vec![
                (":method", "POST"),
                (":path", "/embeddings"),
-                (":authority", "embeddingserver"),
+                (":authority", MODEL_SERVER_NAME),
                ("content-type", "application/json"),
                ("x-envoy-max-retries", "3"),
                ("x-envoy-upstream-rq-timeout-ms", "60000"),
--- a/envoyfilter/tests/integration.rs
+++ b/envoyfilter/tests/integration.rs
@ -83,7 +83,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
        // The actual call is not important in this test, we just need to grab the token_id
-        .expect_http_call(Some("embeddingserver"), None, None, None, None)
+        .expect_http_call(Some("model_server"), None, None, None, None)
        .returning(Some(1))
        .expect_metric_increment("active_http_calls", 1)
        .expect_log(Some(LogLevel::Debug), None)
@ -114,7 +114,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
        .returning(Some(&embeddings_response_buffer))
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
-        .expect_http_call(Some("embeddingserver"), None, None, None, None)
+        .expect_http_call(Some("model_server"), None, None, None, None)
        .returning(Some(2))
        .expect_metric_increment("active_http_calls", 1)
        .execute_and_expect(ReturnType::None)
--- a/gateway.code-workspace
+++ b/gateway.code-workspace
@ -9,24 +9,24 @@
      "path": "envoyfilter"
    },
    {
-      "name": "embedding-server",
-      "path": "embedding-server"
+      "name": "model_server",
+      "path": "model_server"
    },
    {
      "name": "function_resolver",
      "path": "function_resolver"
    },
    {
-      "name": "chatbot-ui",
-      "path": "chatbot-ui"
+      "name": "chatbot_ui",
+      "path": "chatbot_ui"
    },
    {
      "name": "open-message-format",
      "path": "open-message-format"
    },
    {
-      "name": "demos/function-calling",
-      "path": "./demos/function-calling",
+      "name": "demos/function_calling",
+      "path": "./demos/function_calling",
    },
  ],
  "settings": {}
--- a/embedding-server/.vscode/launch.json
+++ b/embedding-server/.vscode/launch.json
--- a/embedding-server/Dockerfile
+++ b/embedding-server/Dockerfile
@ -1,5 +1,3 @@
-# copied from https://github.com/bergos/embedding-server
-
 FROM python:3 AS base

 #
--- a/embedding-server/app/install.py
+++ b/embedding-server/app/install.py
--- a/embedding-server/app/load_models.py
+++ b/embedding-server/app/load_models.py
--- a/embedding-server/app/main.py
+++ b/embedding-server/app/main.py
--- a/embedding-server/requirements.txt
+++ b/embedding-server/requirements.txt
--- a/public_types/Cargo.lock
+++ b/public_types/Cargo.lock
@ -603,7 +603,7 @@ dependencies = [
 ]

 [[package]]
-name = "public-types"
+name = "public_types"
 version = "0.1.0"
 dependencies = [
 "open-message-format-embeddings",
--- a/public_types/Cargo.toml
+++ b/public_types/Cargo.toml
@ -1,5 +1,5 @@
 [package]
-name = "public-types"
+name = "public_types"
 version = "0.1.0"
 edition = "2021"

--- a/public_types/src/common_types.rs
+++ b/public_types/src/common_types.rs
--- a/public_types/src/configuration.rs
+++ b/public_types/src/configuration.rs
--- a/public_types/src/lib.rs
+++ b/public_types/src/lib.rs