From ea86f736054cbe54a0e84db09db8a75103ff9da7 Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil@katanemo.com>
Date: Fri, 27 Sep 2024 16:41:39 -0700
Subject: [PATCH] rename envoyfilter => arch (#91)

* rename envoyfilter => arch

* fix more files

* more fixes

* more renames
---
 .github/workflows/checks.yml                  | 14 +++---
 .gitignore                                    |  4 +-
 .pre-commit-config.yaml                       |  8 ++--
 {envoyfilter => arch}/Cargo.lock              |  0
 {envoyfilter => arch}/Cargo.toml              |  0
 {envoyfilter => arch}/Dockerfile              | 10 ++--
 {envoyfilter => arch}/README.md               |  0
 {envoyfilter => arch}/build_filter.sh         |  2 +-
 {envoyfilter => arch}/docker-compose.yaml     |  0
 {envoyfilter => arch}/download_mistral_7b.sh  |  0
 {envoyfilter => arch}/envoy.template.yaml     | 17 ++-----
 {envoyfilter => arch}/envoy.yaml              |  6 +--
 {envoyfilter => arch}/grafana/datasource.yaml |  0
 {envoyfilter => arch}/init_vector_store.sh    |  0
 {envoyfilter => arch}/katanemo-config.yaml    |  0
 .../prometheus/prometheus.yaml                |  0
 {envoyfilter => arch}/src/consts.rs           |  8 ++--
 {envoyfilter => arch}/src/filter_context.rs   |  0
 {envoyfilter => arch}/src/lib.rs              |  0
 {envoyfilter => arch}/src/llm_providers.rs    |  4 +-
 {envoyfilter => arch}/src/ratelimit.rs        |  0
 {envoyfilter => arch}/src/routing.rs          |  0
 {envoyfilter => arch}/src/stats.rs            |  0
 {envoyfilter => arch}/src/stream_context.rs   | 48 +++++++++----------
 {envoyfilter => arch}/src/tokenizer.rs        |  0
 {envoyfilter => arch}/tests/integration.rs    | 30 ++++++------
 chatbot_ui/app/run.py                         |  8 ++--
 config_generator/Dockerfile                   |  4 +-
 config_generator/config_generator.py          |  4 +-
 .../{bolt_config.yaml => arch_config.yaml}    |  0
 demos/function_calling/docker-compose.yaml    | 17 ++++---
 gateway.code-workspace                        |  4 +-
 model_server/app/main.py                      |  2 +-
 33 files changed, 91 insertions(+), 99 deletions(-)
 rename {envoyfilter => arch}/Cargo.lock (100%)
 rename {envoyfilter => arch}/Cargo.toml (100%)
 rename {envoyfilter => arch}/Dockerfile (60%)
 rename {envoyfilter => arch}/README.md (100%)
 rename {envoyfilter => arch}/build_filter.sh (50%)
 rename {envoyfilter => arch}/docker-compose.yaml (100%)
 rename {envoyfilter => arch}/download_mistral_7b.sh (100%)
 rename {envoyfilter => arch}/envoy.template.yaml (92%)
 rename {envoyfilter => arch}/envoy.yaml (98%)
 rename {envoyfilter => arch}/grafana/datasource.yaml (100%)
 rename {envoyfilter => arch}/init_vector_store.sh (100%)
 rename {envoyfilter => arch}/katanemo-config.yaml (100%)
 rename {envoyfilter => arch}/prometheus/prometheus.yaml (100%)
 rename {envoyfilter => arch}/src/consts.rs (62%)
 rename {envoyfilter => arch}/src/filter_context.rs (100%)
 rename {envoyfilter => arch}/src/lib.rs (100%)
 rename {envoyfilter => arch}/src/llm_providers.rs (91%)
 rename {envoyfilter => arch}/src/ratelimit.rs (100%)
 rename {envoyfilter => arch}/src/routing.rs (100%)
 rename {envoyfilter => arch}/src/stats.rs (100%)
 rename {envoyfilter => arch}/src/stream_context.rs (96%)
 rename {envoyfilter => arch}/src/tokenizer.rs (100%)
 rename {envoyfilter => arch}/tests/integration.rs (95%)
 rename demos/function_calling/{bolt_config.yaml => arch_config.yaml} (100%)

diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index 8bca6786..548bb10f 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -11,8 +11,8 @@ jobs:
         uses: actions/checkout@v4
       - name: Setup | Rust
         run: rustup toolchain install stable --profile minimal
-      - name: Run Clippy on envoyfilter
-        run: cd envoyfilter && cargo clippy --all-targets --all-features -- -Dwarnings
+      - name: Run Clippy on arch
+        run: cd arch && cargo clippy --all-targets --all-features -- -Dwarnings
       - name: Run Clippy on public_types
         run: cd public_types && cargo clippy --all-targets --all-features -- -Dwarnings
 
@@ -25,8 +25,8 @@ jobs:
         uses: actions/checkout@v4
       - name: Setup | Rust
         run: rustup toolchain install stable --profile minimal
-      - name: Run Rustfmt on envoyfilter
-        run: cd envoyfilter && cargo fmt -p intelligent-prompt-gateway -- --check
+      - name: Run Rustfmt on arch
+        run: cd arch && cargo fmt -p intelligent-prompt-gateway -- --check
       - name: Run Rustfmt on public_types
         run: cd public_types && cargo fmt -p public_types -- --check
 
@@ -41,8 +41,8 @@ jobs:
       - name: Setup | Install wasm toolchain
         run: rustup target add wasm32-wasi
       - name: Build wasm module
-        run: cd envoyfilter && cargo build --release --target=wasm32-wasi
-      - name: Run Tests on envoyfilter
-        run: cd envoyfilter && cargo test
+        run: cd arch && cargo build --release --target=wasm32-wasi
+      - name: Run Tests on arch
+        run: cd arch && cargo test
       - name: Run Tests on public_types
         run: cd public_types && cargo test
diff --git a/.gitignore b/.gitignore
index 5f5a4d58..6f16c3d3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
-envoyfilter/target
-envoyfilter/qdrant_data/
+arch/target
+arch/qdrant_data/
 public_types/target
 /venv/
 __pycache__
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5b34693d..67f03f83 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,7 +3,7 @@ repos:
     rev: v4.6.0
     hooks:
       - id: check-yaml
-        exclude: envoyfilter/envoy.template.yaml
+        exclude: arch/envoy.template.yaml
       - id: end-of-file-fixer
       - id: trailing-whitespace
   - repo: local
@@ -12,16 +12,16 @@ repos:
         name: cargo-fmt
         language: system
         types: [file, rust]
-        entry: bash -c "cd envoyfilter && cargo fmt -p intelligent-prompt-gateway -- --check"
+        entry: bash -c "cd arch && cargo fmt -p intelligent-prompt-gateway -- --check"
       - id: cargo-clippy
         name: cargo-clippy
         language: system
         types: [file, rust]
-        entry: bash -c "cd envoyfilter && cargo clippy -p intelligent-prompt-gateway --all"
+        entry: bash -c "cd arch && cargo clippy -p intelligent-prompt-gateway --all"
       - id: cargo-test
         name: cargo-test
         language: system
         types: [file, rust]
         # --lib is to only test the library, since when integration tests are made,
         # they will be in a seperate tests directory
-        entry: bash -c "cd envoyfilter && cargo test -p intelligent-prompt-gateway --lib"
+        entry: bash -c "cd arch && cargo test -p intelligent-prompt-gateway --lib"
diff --git a/envoyfilter/Cargo.lock b/arch/Cargo.lock
similarity index 100%
rename from envoyfilter/Cargo.lock
rename to arch/Cargo.lock
diff --git a/envoyfilter/Cargo.toml b/arch/Cargo.toml
similarity index 100%
rename from envoyfilter/Cargo.toml
rename to arch/Cargo.toml
diff --git a/envoyfilter/Dockerfile b/arch/Dockerfile
similarity index 60%
rename from envoyfilter/Dockerfile
rename to arch/Dockerfile
index 14649e9d..009c4744 100644
--- a/envoyfilter/Dockerfile
+++ b/arch/Dockerfile
@@ -1,17 +1,17 @@
 # build filter using rust toolchain
 FROM rust:1.80.0 as builder
 RUN rustup -v target add wasm32-wasi
-WORKDIR /envoyfilter
-COPY envoyfilter/src /envoyfilter/src
-COPY envoyfilter/Cargo.toml /envoyfilter/
-COPY envoyfilter/Cargo.lock /envoyfilter/
+WORKDIR /arch
+COPY arch/src /arch/src
+COPY arch/Cargo.toml /arch/
+COPY arch/Cargo.lock /arch/
 COPY public_types /public_types
 
 RUN cargo build --release --target wasm32-wasi
 
 # copy built filter into envoy image
 FROM envoyproxy/envoy:v1.30-latest
-COPY --from=builder /envoyfilter/target/wasm32-wasi/release/intelligent_prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm
+COPY --from=builder /arch/target/wasm32-wasi/release/intelligent_prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm
 # CMD ["envoy", "-c", "/etc/envoy/envoy.yaml"]
 # CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--log-level", "debug"]
 CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "wasm:debug"]
diff --git a/envoyfilter/README.md b/arch/README.md
similarity index 100%
rename from envoyfilter/README.md
rename to arch/README.md
diff --git a/envoyfilter/build_filter.sh b/arch/build_filter.sh
similarity index 50%
rename from envoyfilter/build_filter.sh
rename to arch/build_filter.sh
index ff42dede..36736112 100644
--- a/envoyfilter/build_filter.sh
+++ b/arch/build_filter.sh
@@ -1,3 +1,3 @@
 RUST_VERSION=1.80.0
 docker run --rm -v rustup_cache:/usr/local/rustup/ rust:$RUST_VERSION rustup -v target add wasm32-wasi
-docker run --rm -v $PWD/../open-message-format:/code/open-message-format -v ~/.cargo:/root/.cargo -v $(pwd):/code/envoyfilter -w /code/envoyfilter -v rustup_cache:/usr/local/rustup/ rust:$RUST_VERSION cargo build --release --target wasm32-wasi
+docker run --rm -v $PWD/../open-message-format:/code/open-message-format -v ~/.cargo:/root/.cargo -v $(pwd):/code/arch -w /code/arch -v rustup_cache:/usr/local/rustup/ rust:$RUST_VERSION cargo build --release --target wasm32-wasi
diff --git a/envoyfilter/docker-compose.yaml b/arch/docker-compose.yaml
similarity index 100%
rename from envoyfilter/docker-compose.yaml
rename to arch/docker-compose.yaml
diff --git a/envoyfilter/download_mistral_7b.sh b/arch/download_mistral_7b.sh
similarity index 100%
rename from envoyfilter/download_mistral_7b.sh
rename to arch/download_mistral_7b.sh
diff --git a/envoyfilter/envoy.template.yaml b/arch/envoy.template.yaml
similarity index 92%
rename from envoyfilter/envoy.template.yaml
rename to arch/envoy.template.yaml
index 2f171d27..99965503 100644
--- a/envoyfilter/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@@ -34,17 +34,10 @@ static_resources:
                           auto_host_rewrite: true
                           cluster: mistral_7b_instruct
                           timeout: 60s
-                      - match:
-                          prefix: "/bolt_fc_1b/v1/chat/completions"
-                        route:
-                          prefix_rewrite: /v1/chat/completions
-                          auto_host_rewrite: true
-                          cluster: bolt_fc_1b
-                          timeout: 120s
                       - match:
                           prefix: "/v1/chat/completions"
                           headers:
-                            - name: "x-bolt-llm-provider"
+                            - name: "x-arch-llm-provider"
                               string_match:
                                 exact: openai
                         route:
@@ -54,7 +47,7 @@ static_resources:
                       - match:
                           prefix: "/v1/chat/completions"
                           headers:
-                            - name: "x-bolt-llm-provider"
+                            - name: "x-arch-llm-provider"
                               string_match:
                                 exact: mistral
                         route:
@@ -167,12 +160,12 @@ static_resources:
                       address: mistral_7b_instruct
                       port_value: 10001
                   hostname: "mistral_7b_instruct"
-    - name: bolt_fc_1b
+    - name: arch_fc
       connect_timeout: 5s
       type: STRICT_DNS
       lb_policy: ROUND_ROBIN
       load_assignment:
-        cluster_name: bolt_fc_1b
+        cluster_name: arch_fc
         endpoints:
           - lb_endpoints:
               - endpoint:
@@ -180,7 +173,7 @@ static_resources:
                     socket_address:
                       address: function_resolver
                       port_value: 80
-                  hostname: "bolt_fc_1b"
+                  hostname: "arch_fc"
 {% for _, cluster in arch_clusters.items() %}
     - name: {{ cluster.name }}
       connect_timeout: 5s
diff --git a/envoyfilter/envoy.yaml b/arch/envoy.yaml
similarity index 98%
rename from envoyfilter/envoy.yaml
rename to arch/envoy.yaml
index f0236bf6..31e9c3fa 100644
--- a/envoyfilter/envoy.yaml
+++ b/arch/envoy.yaml
@@ -17,13 +17,13 @@ static_resources:
               scheme_header_transformation:
                 scheme_to_overwrite: https
               route_config:
-                  - name: bolt
+                  - name: arch
                     domains:
                       - "*"
                     routes:
                       - match:
                           headers:
-                            - name: "x-bolt-llm-provider"
+                            - name: "x-arch-llm-provider"
                               string_match:
                                 exact: openai
                         route:
@@ -32,7 +32,7 @@ static_resources:
                           timeout: 60s
                       - match:
                           headers:
-                            - name: "x-bolt-llm-provider"
+                            - name: "x-arch-llm-provider"
                               string_match:
                                 exact: mistral
                         route:
diff --git a/envoyfilter/grafana/datasource.yaml b/arch/grafana/datasource.yaml
similarity index 100%
rename from envoyfilter/grafana/datasource.yaml
rename to arch/grafana/datasource.yaml
diff --git a/envoyfilter/init_vector_store.sh b/arch/init_vector_store.sh
similarity index 100%
rename from envoyfilter/init_vector_store.sh
rename to arch/init_vector_store.sh
diff --git a/envoyfilter/katanemo-config.yaml b/arch/katanemo-config.yaml
similarity index 100%
rename from envoyfilter/katanemo-config.yaml
rename to arch/katanemo-config.yaml
diff --git a/envoyfilter/prometheus/prometheus.yaml b/arch/prometheus/prometheus.yaml
similarity index 100%
rename from envoyfilter/prometheus/prometheus.yaml
rename to arch/prometheus/prometheus.yaml
diff --git a/envoyfilter/src/consts.rs b/arch/src/consts.rs
similarity index 62%
rename from envoyfilter/src/consts.rs
rename to arch/src/consts.rs
index 250bc145..805d2d35 100644
--- a/envoyfilter/src/consts.rs
+++ b/arch/src/consts.rs
@@ -1,11 +1,11 @@
 pub const DEFAULT_EMBEDDING_MODEL: &str = "BAAI/bge-large-en-v1.5";
 pub const DEFAULT_INTENT_MODEL: &str = "tasksource/deberta-base-long-nli";
 pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8;
-pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-bolt-ratelimit-selector";
+pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-arch-ratelimit-selector";
 pub const SYSTEM_ROLE: &str = "system";
 pub const USER_ROLE: &str = "user";
 pub const GPT_35_TURBO: &str = "gpt-3.5-turbo";
-pub const BOLT_FC_CLUSTER: &str = "bolt_fc_1b";
-pub const BOLT_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
+pub const ARC_FC_CLUSTER: &str = "arch_fc";
+pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
 pub const MODEL_SERVER_NAME: &str = "model_server";
-pub const BOLT_ROUTING_HEADER: &str = "x-bolt-llm-provider";
+pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
diff --git a/envoyfilter/src/filter_context.rs b/arch/src/filter_context.rs
similarity index 100%
rename from envoyfilter/src/filter_context.rs
rename to arch/src/filter_context.rs
diff --git a/envoyfilter/src/lib.rs b/arch/src/lib.rs
similarity index 100%
rename from envoyfilter/src/lib.rs
rename to arch/src/lib.rs
diff --git a/envoyfilter/src/llm_providers.rs b/arch/src/llm_providers.rs
similarity index 91%
rename from envoyfilter/src/llm_providers.rs
rename to arch/src/llm_providers.rs
index 91039ed2..c698bd1f 100644
--- a/envoyfilter/src/llm_providers.rs
+++ b/arch/src/llm_providers.rs
@@ -4,12 +4,12 @@ pub struct LlmProviders;
 impl LlmProviders {
     pub const OPENAI_PROVIDER: LlmProvider<'static> = LlmProvider {
         name: "openai",
-        api_key_header: "x-bolt-openai-api-key",
+        api_key_header: "x-arch-openai-api-key",
         model: "gpt-3.5-turbo",
     };
     pub const MISTRAL_PROVIDER: LlmProvider<'static> = LlmProvider {
         name: "mistral",
-        api_key_header: "x-bolt-mistral-api-key",
+        api_key_header: "x-arch-mistral-api-key",
         model: "mistral-large-latest",
     };
 
diff --git a/envoyfilter/src/ratelimit.rs b/arch/src/ratelimit.rs
similarity index 100%
rename from envoyfilter/src/ratelimit.rs
rename to arch/src/ratelimit.rs
diff --git a/envoyfilter/src/routing.rs b/arch/src/routing.rs
similarity index 100%
rename from envoyfilter/src/routing.rs
rename to arch/src/routing.rs
diff --git a/envoyfilter/src/stats.rs b/arch/src/stats.rs
similarity index 100%
rename from envoyfilter/src/stats.rs
rename to arch/src/stats.rs
diff --git a/envoyfilter/src/stream_context.rs b/arch/src/stream_context.rs
similarity index 96%
rename from envoyfilter/src/stream_context.rs
rename to arch/src/stream_context.rs
index 5607dc61..69c65092 100644
--- a/envoyfilter/src/stream_context.rs
+++ b/arch/src/stream_context.rs
@@ -1,5 +1,5 @@
 use crate::consts::{
-    BOLT_FC_CLUSTER, BOLT_FC_REQUEST_TIMEOUT_MS, BOLT_ROUTING_HEADER, DEFAULT_EMBEDDING_MODEL,
+    ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_ROUTING_HEADER, ARC_FC_CLUSTER, DEFAULT_EMBEDDING_MODEL,
     DEFAULT_INTENT_MODEL, DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME,
     RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE,
 };
@@ -93,7 +93,7 @@ impl StreamContext {
     }
 
     fn add_routing_header(&mut self) {
-        self.add_http_request_header(BOLT_ROUTING_HEADER, self.llm_provider().as_ref());
+        self.add_http_request_header(ARCH_ROUTING_HEADER, self.llm_provider().as_ref());
     }
 
     fn modify_auth_headers(&mut self) -> Result<(), String> {
@@ -305,15 +305,15 @@ impl StreamContext {
 
         let prompt_target_name = zeroshot_intent_response.predicted_class.clone();
 
-        // Check to see who responded to user message. This will help us identify if control should be passed to Bolt FC or not.
-        // If the last message was from Bolt FC, then Bolt FC is handling the conversation (possibly for parameter collection).
-        let mut bolt_assistant = false;
+        // Check to see who responded to user message. This will help us identify if control should be passed to Arch FC or not.
+        // If the last message was from Arch FC, then Arch FC is handling the conversation (possibly for parameter collection).
+        let mut arch_assistant = false;
         let messages = &callout_context.request_body.messages;
         if messages.len() >= 2 {
             let latest_assistant_message = &messages[messages.len() - 2];
             if let Some(model) = latest_assistant_message.model.as_ref() {
-                if model.starts_with("Bolt") {
-                    bolt_assistant = true;
+                if model.starts_with("Arch") {
+                    arch_assistant = true;
                 }
             }
         } else {
@@ -331,12 +331,12 @@ impl StreamContext {
 
         // check to ensure that the prompt target similarity score is above the threshold
         if prompt_target_similarity_score < prompt_target_intent_matching_threshold
-            && !bolt_assistant
+            && !arch_assistant
         {
-            // if bolt fc responded to the user message, then we don't need to check the similarity score
-            // it may be that bolt fc is handling the conversation for parameter collection
-            if bolt_assistant {
-                info!("bolt assistant is handling the conversation");
+            // if arch fc responded to the user message, then we don't need to check the similarity score
+            // it may be that arch fc is handling the conversation for parameter collection
+            if arch_assistant {
+                info!("arch assistant is handling the conversation");
             } else {
                 info!(
                     "prompt target below limit: {:.3}, threshold: {:.3}, continue conversation with user",
@@ -407,7 +407,7 @@ impl StreamContext {
 
                 let msg_body = match serde_json::to_string(&chat_completions) {
                     Ok(msg_body) => {
-                        debug!("bolt-fc request body content: {}", msg_body);
+                        debug!("arch_fc request body content: {}", msg_body);
                         msg_body
                     }
                     Err(e) => {
@@ -419,16 +419,16 @@ impl StreamContext {
                 };
 
                 let token_id = match self.dispatch_http_call(
-                    BOLT_FC_CLUSTER,
+                    ARC_FC_CLUSTER,
                     vec![
                         (":method", "POST"),
                         (":path", "/v1/chat/completions"),
-                        (":authority", BOLT_FC_CLUSTER),
+                        (":authority", ARC_FC_CLUSTER),
                         ("content-type", "application/json"),
                         ("x-envoy-max-retries", "3"),
                         (
                             "x-envoy-upstream-rq-timeout-ms",
-                            BOLT_FC_REQUEST_TIMEOUT_MS.to_string().as_str(),
+                            ARCH_FC_REQUEST_TIMEOUT_MS.to_string().as_str(),
                         ),
                     ],
                     Some(msg_body.as_bytes()),
@@ -445,7 +445,7 @@ impl StreamContext {
 
                 debug!(
                     "dispatched call to function {} token_id={}",
-                    BOLT_FC_CLUSTER, token_id
+                    ARC_FC_CLUSTER, token_id
                 );
 
                 self.metrics.active_http_calls.increment(1);
@@ -464,8 +464,8 @@ impl StreamContext {
         let body_str = String::from_utf8(body).unwrap();
         debug!("function_resolver response str: {}", body_str);
 
-        let boltfc_response: ChatCompletionsResponse = match serde_json::from_str(&body_str) {
-            Ok(boltfc_response) => boltfc_response,
+        let arch_fc_response: ChatCompletionsResponse = match serde_json::from_str(&body_str) {
+            Ok(arch_fc_response) => arch_fc_response,
             Err(e) => {
                 return self.send_server_error(
                     format!(
@@ -477,11 +477,11 @@ impl StreamContext {
             }
         };
 
-        let model_resp = &boltfc_response.choices[0];
+        let model_resp = &arch_fc_response.choices[0];
 
         if model_resp.message.tool_calls.is_none() {
-            // This means that Bolt FC did not have enough information to resolve the function call
-            // Bolt FC probably responded with a message asking for more information.
+            // This means that Arch FC did not have enough information to resolve the function call
+            // Arch FC probably responded with a message asking for more information.
             // Let's send the response back to the user to initalize lightweight dialog for parameter collection
 
             //TODO: add resolver name to the response so the client can send the response back to the correct resolver
@@ -784,7 +784,7 @@ impl HttpContext for StreamContext {
     // the lifecycle of the http request and response.
     fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
         let provider_hint = self
-            .get_http_request_header("x-bolt-deterministic-provider")
+            .get_http_request_header("x-arch-deterministic-provider")
             .is_some();
         self.llm_provider = Some(routing::get_llm_provider(provider_hint));
 
@@ -945,7 +945,7 @@ impl HttpContext for StreamContext {
             }
         };
 
-        debug!("dispatched HTTP call to bolt_guard token_id={}", token_id);
+        debug!("dispatched HTTP call to arch_guard token_id={}", token_id);
 
         let call_context = CallContext {
             response_handler_type: ResponseHandlerType::ArchGuard,
diff --git a/envoyfilter/src/tokenizer.rs b/arch/src/tokenizer.rs
similarity index 100%
rename from envoyfilter/src/tokenizer.rs
rename to arch/src/tokenizer.rs
diff --git a/envoyfilter/tests/integration.rs b/arch/tests/integration.rs
similarity index 95%
rename from envoyfilter/tests/integration.rs
rename to arch/tests/integration.rs
index ce02a203..21ce8979 100644
--- a/envoyfilter/tests/integration.rs
+++ b/arch/tests/integration.rs
@@ -29,17 +29,17 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
         .call_proxy_on_request_headers(http_context, 0, false)
         .expect_get_header_map_value(
             Some(MapType::HttpRequestHeaders),
-            Some("x-bolt-deterministic-provider"),
+            Some("x-arch-deterministic-provider"),
         )
         .returning(Some("true"))
         .expect_add_header_map_value(
             Some(MapType::HttpRequestHeaders),
-            Some("x-bolt-llm-provider"),
+            Some("x-arch-llm-provider"),
             Some("openai"),
         )
         .expect_get_header_map_value(
             Some(MapType::HttpRequestHeaders),
-            Some("x-bolt-openai-api-key"),
+            Some("x-arch-openai-api-key"),
         )
         .returning(Some("api-key"))
         .expect_replace_header_map_value(
@@ -49,16 +49,16 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
         )
         .expect_remove_header_map_value(
             Some(MapType::HttpRequestHeaders),
-            Some("x-bolt-openai-api-key"),
+            Some("x-arch-openai-api-key"),
         )
         .expect_remove_header_map_value(
             Some(MapType::HttpRequestHeaders),
-            Some("x-bolt-mistral-api-key"),
+            Some("x-arch-mistral-api-key"),
         )
         .expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length"))
         .expect_get_header_map_value(
             Some(MapType::HttpRequestHeaders),
-            Some("x-bolt-ratelimit-selector"),
+            Some("x-arch-ratelimit-selector"),
         )
         .returning(Some("selector-key"))
         .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key"))
@@ -164,7 +164,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
         .expect_log(Some(LogLevel::Debug), None)
         .expect_log(Some(LogLevel::Debug), None)
         .expect_log(Some(LogLevel::Info), None)
-        .expect_http_call(Some("bolt_fc_1b"), None, None, None, None)
+        .expect_http_call(Some("arch_fc"), None, None, None, None)
         .returning(Some(3))
         .expect_log(Some(LogLevel::Debug), None)
         .expect_log(Some(LogLevel::Debug), None)
@@ -402,7 +402,7 @@ fn request_ratelimited() {
 
     normal_flow(&mut module, filter_context, http_context);
 
-    let bolt_fc_resp = ChatCompletionsResponse {
+    let arch_fc_resp = ChatCompletionsResponse {
         usage: Usage {
             completion_tokens: 0,
         },
@@ -429,12 +429,12 @@ fn request_ratelimited() {
         model: String::from("test"),
     };
 
-    let bolt_fc_resp_str = serde_json::to_string(&bolt_fc_resp).unwrap();
+    let arch_fc_resp_str = serde_json::to_string(&arch_fc_resp).unwrap();
     module
-        .call_proxy_on_http_call_response(http_context, 3, 0, bolt_fc_resp_str.len() as i32, 0)
+        .call_proxy_on_http_call_response(http_context, 3, 0, arch_fc_resp_str.len() as i32, 0)
         .expect_metric_increment("active_http_calls", -1)
         .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
-        .returning(Some(&bolt_fc_resp_str))
+        .returning(Some(&arch_fc_resp_str))
         .expect_log(Some(LogLevel::Debug), None)
         .expect_log(Some(LogLevel::Debug), None)
         .expect_log(Some(LogLevel::Debug), None)
@@ -517,7 +517,7 @@ fn request_not_ratelimited() {
 
     normal_flow(&mut module, filter_context, http_context);
 
-    let bolt_fc_resp = ChatCompletionsResponse {
+    let arch_fc_resp = ChatCompletionsResponse {
         usage: Usage {
             completion_tokens: 0,
         },
@@ -544,12 +544,12 @@ fn request_not_ratelimited() {
         model: String::from("test"),
     };
 
-    let bolt_fc_resp_str = serde_json::to_string(&bolt_fc_resp).unwrap();
+    let arch_fc_resp_str = serde_json::to_string(&arch_fc_resp).unwrap();
     module
-        .call_proxy_on_http_call_response(http_context, 3, 0, bolt_fc_resp_str.len() as i32, 0)
+        .call_proxy_on_http_call_response(http_context, 3, 0, arch_fc_resp_str.len() as i32, 0)
         .expect_metric_increment("active_http_calls", -1)
         .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
-        .returning(Some(&bolt_fc_resp_str))
+        .returning(Some(&arch_fc_resp_str))
         .expect_log(Some(LogLevel::Debug), None)
         .expect_log(Some(LogLevel::Debug), None)
         .expect_log(Some(LogLevel::Debug), None)
diff --git a/chatbot_ui/app/run.py b/chatbot_ui/app/run.py
index 4d06287d..02b89d3c 100644
--- a/chatbot_ui/app/run.py
+++ b/chatbot_ui/app/run.py
@@ -22,9 +22,9 @@ def predict(message, history):
 
     # Custom headers
     custom_headers = {
-        'x-bolt-openai-api-key': f"{OPENAI_API_KEY}",
-        'x-bolt-mistral-api-key': f"{MISTRAL_API_KEY}",
-        'x-bolt-deterministic-provider': 'openai',
+        'x-arch-openai-api-key': f"{OPENAI_API_KEY}",
+        'x-arch-mistral-api-key': f"{MISTRAL_API_KEY}",
+        'x-arch-deterministic-provider': 'openai',
     }
 
     try:
@@ -53,7 +53,7 @@ def predict(message, history):
 
 with gr.Blocks(fill_height=True, css="footer {visibility: hidden}") as demo:
     print("Starting Demo...")
-    chatbot = gr.Chatbot(label="Bolt Chatbot", scale=1)
+    chatbot = gr.Chatbot(label="Arch Chatbot", scale=1)
     state = gr.State([])
     with gr.Row():
         txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter", scale=1, autofocus=True)
diff --git a/config_generator/Dockerfile b/config_generator/Dockerfile
index 00ff5b93..378a4176 100644
--- a/config_generator/Dockerfile
+++ b/config_generator/Dockerfile
@@ -3,7 +3,7 @@ WORKDIR /usr/src/app
 COPY config_generator/requirements.txt .
 RUN pip install -r requirements.txt
 COPY config_generator/config_generator.py .
-COPY envoyfilter/envoy.template.yaml .
-COPY envoyfilter/katanemo-config.yaml .
+COPY arch/envoy.template.yaml .
+COPY arch/katanemo-config.yaml .
 
 CMD ["python", "config_generator.py"]
diff --git a/config_generator/config_generator.py b/config_generator/config_generator.py
index 1c1d06ac..386806f1 100644
--- a/config_generator/config_generator.py
+++ b/config_generator/config_generator.py
@@ -3,13 +3,13 @@ from jinja2 import Environment, FileSystemLoader
 import yaml
 
 ENVOY_CONFIG_TEMPLATE_FILE = os.getenv('ENVOY_CONFIG_TEMPLATE_FILE', 'envoy.template.yaml')
-BOLT_CONFIG_FILE = os.getenv('BOLT_CONFIG_FILE', 'bolt_config.yaml')
+ARCH_CONFIG_FILE = os.getenv('ARCH_CONFIG_FILE', 'arch_config.yaml')
 ENVOY_CONFIG_FILE_RENDERED = os.getenv('ENVOY_CONFIG_FILE_RENDERED', '/usr/src/app/out/envoy.yaml')
 
 env = Environment(loader=FileSystemLoader('./'))
 template = env.get_template('envoy.template.yaml')
 
-with open(BOLT_CONFIG_FILE, 'r') as file:
+with open(ARCH_CONFIG_FILE, 'r') as file:
     katanemo_config = file.read()
 
 config_yaml = yaml.safe_load(katanemo_config)
diff --git a/demos/function_calling/bolt_config.yaml b/demos/function_calling/arch_config.yaml
similarity index 100%
rename from demos/function_calling/bolt_config.yaml
rename to demos/function_calling/arch_config.yaml
diff --git a/demos/function_calling/docker-compose.yaml b/demos/function_calling/docker-compose.yaml
index 20e3002a..5fc05cd6 100644
--- a/demos/function_calling/docker-compose.yaml
+++ b/demos/function_calling/docker-compose.yaml
@@ -5,15 +5,14 @@ services:
       context: ../../
       dockerfile: config_generator/Dockerfile
     volumes:
-      - ../../envoyfilter/envoy.template.yaml:/usr/src/app/envoy.template.yaml
-      - ./bolt_config.yaml:/usr/src/app/bolt_config.yaml
+      - ../../arch/envoy.template.yaml:/usr/src/app/envoy.template.yaml
+      - ./arch_config.yaml:/usr/src/app/arch_config.yaml
       - ./generated:/usr/src/app/out
 
-  bolt:
+  arch:
     build:
       context: ../../
-      dockerfile: envoyfilter/Dockerfile
-    hostname: bolt
+      dockerfile: arch/Dockerfile
     ports:
       - "10000:10000"
       - "19901:9901"
@@ -36,12 +35,12 @@ services:
     ports:
       - "18081:80"
     healthcheck:
-        test: ["CMD", "curl" ,"http://localhost:80/healthz"]
+        test: ["CMD", "curl" ,"http://localhost/healthz"]
         interval: 5s
         retries: 20
     volumes:
       - ~/.cache/huggingface:/root/.cache/huggingface
-      - ./bolt_config.yaml:/root/bolt_config.yaml
+      - ./arch_config.yaml:/root/arch_config.yaml
 
   function_resolver:
     build:
@@ -84,7 +83,7 @@ services:
     profiles:
       - manual
 
-  open-webui:
+  open_webui:
     image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main}
     container_name: open-webui
     volumes:
@@ -111,7 +110,7 @@ services:
     environment:
       - OPENAI_API_KEY=${OPENAI_API_KEY:?error}
       - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
-      - CHAT_COMPLETION_ENDPOINT=http://bolt:10000/v1
+      - CHAT_COMPLETION_ENDPOINT=http://arch:10000/v1
 
   prometheus:
     image: prom/prometheus
diff --git a/gateway.code-workspace b/gateway.code-workspace
index 98cb3c29..a35227cf 100644
--- a/gateway.code-workspace
+++ b/gateway.code-workspace
@@ -5,8 +5,8 @@
 			"path": "."
 		},
     {
-      "name": "envoyfilter",
-      "path": "envoyfilter"
+      "name": "arch",
+      "path": "arch"
     },
     {
       "name": "model_server",
diff --git a/model_server/app/main.py b/model_server/app/main.py
index 2c83d769..66e9de43 100644
--- a/model_server/app/main.py
+++ b/model_server/app/main.py
@@ -22,7 +22,7 @@ transformers = load_transformers()
 ner_models = load_ner_models()
 zero_shot_models = load_zero_shot_models()
 
-with open("/root/bolt_config.yaml", "r") as file:
+with open("/root/arch_config.yaml", "r") as file:
     config = yaml.safe_load(file)
 with open("guard_model_config.yaml") as f:
     guard_model_config = yaml.safe_load(f)