From ea86f736054cbe54a0e84db09db8a75103ff9da7 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Fri, 27 Sep 2024 16:41:39 -0700 Subject: [PATCH] rename envoyfilter => arch (#91) * rename envoyfilter => arch * fix more files * more fixes * more renames --- .github/workflows/checks.yml | 14 +++--- .gitignore | 4 +- .pre-commit-config.yaml | 8 ++-- {envoyfilter => arch}/Cargo.lock | 0 {envoyfilter => arch}/Cargo.toml | 0 {envoyfilter => arch}/Dockerfile | 10 ++-- {envoyfilter => arch}/README.md | 0 {envoyfilter => arch}/build_filter.sh | 2 +- {envoyfilter => arch}/docker-compose.yaml | 0 {envoyfilter => arch}/download_mistral_7b.sh | 0 {envoyfilter => arch}/envoy.template.yaml | 17 ++----- {envoyfilter => arch}/envoy.yaml | 6 +-- {envoyfilter => arch}/grafana/datasource.yaml | 0 {envoyfilter => arch}/init_vector_store.sh | 0 {envoyfilter => arch}/katanemo-config.yaml | 0 .../prometheus/prometheus.yaml | 0 {envoyfilter => arch}/src/consts.rs | 8 ++-- {envoyfilter => arch}/src/filter_context.rs | 0 {envoyfilter => arch}/src/lib.rs | 0 {envoyfilter => arch}/src/llm_providers.rs | 4 +- {envoyfilter => arch}/src/ratelimit.rs | 0 {envoyfilter => arch}/src/routing.rs | 0 {envoyfilter => arch}/src/stats.rs | 0 {envoyfilter => arch}/src/stream_context.rs | 48 +++++++++---------- {envoyfilter => arch}/src/tokenizer.rs | 0 {envoyfilter => arch}/tests/integration.rs | 30 ++++++------ chatbot_ui/app/run.py | 8 ++-- config_generator/Dockerfile | 4 +- config_generator/config_generator.py | 4 +- .../{bolt_config.yaml => arch_config.yaml} | 0 demos/function_calling/docker-compose.yaml | 17 ++++--- gateway.code-workspace | 4 +- model_server/app/main.py | 2 +- 33 files changed, 91 insertions(+), 99 deletions(-) rename {envoyfilter => arch}/Cargo.lock (100%) rename {envoyfilter => arch}/Cargo.toml (100%) rename {envoyfilter => arch}/Dockerfile (60%) rename {envoyfilter => arch}/README.md (100%) rename {envoyfilter => arch}/build_filter.sh (50%) rename {envoyfilter => arch}/docker-compose.yaml (100%) rename {envoyfilter => arch}/download_mistral_7b.sh (100%) rename {envoyfilter => arch}/envoy.template.yaml (92%) rename {envoyfilter => arch}/envoy.yaml (98%) rename {envoyfilter => arch}/grafana/datasource.yaml (100%) rename {envoyfilter => arch}/init_vector_store.sh (100%) rename {envoyfilter => arch}/katanemo-config.yaml (100%) rename {envoyfilter => arch}/prometheus/prometheus.yaml (100%) rename {envoyfilter => arch}/src/consts.rs (62%) rename {envoyfilter => arch}/src/filter_context.rs (100%) rename {envoyfilter => arch}/src/lib.rs (100%) rename {envoyfilter => arch}/src/llm_providers.rs (91%) rename {envoyfilter => arch}/src/ratelimit.rs (100%) rename {envoyfilter => arch}/src/routing.rs (100%) rename {envoyfilter => arch}/src/stats.rs (100%) rename {envoyfilter => arch}/src/stream_context.rs (96%) rename {envoyfilter => arch}/src/tokenizer.rs (100%) rename {envoyfilter => arch}/tests/integration.rs (95%) rename demos/function_calling/{bolt_config.yaml => arch_config.yaml} (100%) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 8bca6786..548bb10f 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -11,8 +11,8 @@ jobs: uses: actions/checkout@v4 - name: Setup | Rust run: rustup toolchain install stable --profile minimal - - name: Run Clippy on envoyfilter - run: cd envoyfilter && cargo clippy --all-targets --all-features -- -Dwarnings + - name: Run Clippy on arch + run: cd arch && cargo clippy --all-targets --all-features -- -Dwarnings - name: Run Clippy on public_types run: cd public_types && cargo clippy --all-targets --all-features -- -Dwarnings @@ -25,8 +25,8 @@ jobs: uses: actions/checkout@v4 - name: Setup | Rust run: rustup toolchain install stable --profile minimal - - name: Run Rustfmt on envoyfilter - run: cd envoyfilter && cargo fmt -p intelligent-prompt-gateway -- --check + - name: Run Rustfmt on arch + run: cd arch && cargo fmt -p intelligent-prompt-gateway -- --check - name: Run Rustfmt on public_types run: cd public_types && cargo fmt -p public_types -- --check @@ -41,8 +41,8 @@ jobs: - name: Setup | Install wasm toolchain run: rustup target add wasm32-wasi - name: Build wasm module - run: cd envoyfilter && cargo build --release --target=wasm32-wasi - - name: Run Tests on envoyfilter - run: cd envoyfilter && cargo test + run: cd arch && cargo build --release --target=wasm32-wasi + - name: Run Tests on arch + run: cd arch && cargo test - name: Run Tests on public_types run: cd public_types && cargo test diff --git a/.gitignore b/.gitignore index 5f5a4d58..6f16c3d3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ -envoyfilter/target -envoyfilter/qdrant_data/ +arch/target +arch/qdrant_data/ public_types/target /venv/ __pycache__ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5b34693d..67f03f83 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ repos: rev: v4.6.0 hooks: - id: check-yaml - exclude: envoyfilter/envoy.template.yaml + exclude: arch/envoy.template.yaml - id: end-of-file-fixer - id: trailing-whitespace - repo: local @@ -12,16 +12,16 @@ repos: name: cargo-fmt language: system types: [file, rust] - entry: bash -c "cd envoyfilter && cargo fmt -p intelligent-prompt-gateway -- --check" + entry: bash -c "cd arch && cargo fmt -p intelligent-prompt-gateway -- --check" - id: cargo-clippy name: cargo-clippy language: system types: [file, rust] - entry: bash -c "cd envoyfilter && cargo clippy -p intelligent-prompt-gateway --all" + entry: bash -c "cd arch && cargo clippy -p intelligent-prompt-gateway --all" - id: cargo-test name: cargo-test language: system types: [file, rust] # --lib is to only test the library, since when integration tests are made, # they will be in a seperate tests directory - entry: bash -c "cd envoyfilter && cargo test -p intelligent-prompt-gateway --lib" + entry: bash -c "cd arch && cargo test -p intelligent-prompt-gateway --lib" diff --git a/envoyfilter/Cargo.lock b/arch/Cargo.lock similarity index 100% rename from envoyfilter/Cargo.lock rename to arch/Cargo.lock diff --git a/envoyfilter/Cargo.toml b/arch/Cargo.toml similarity index 100% rename from envoyfilter/Cargo.toml rename to arch/Cargo.toml diff --git a/envoyfilter/Dockerfile b/arch/Dockerfile similarity index 60% rename from envoyfilter/Dockerfile rename to arch/Dockerfile index 14649e9d..009c4744 100644 --- a/envoyfilter/Dockerfile +++ b/arch/Dockerfile @@ -1,17 +1,17 @@ # build filter using rust toolchain FROM rust:1.80.0 as builder RUN rustup -v target add wasm32-wasi -WORKDIR /envoyfilter -COPY envoyfilter/src /envoyfilter/src -COPY envoyfilter/Cargo.toml /envoyfilter/ -COPY envoyfilter/Cargo.lock /envoyfilter/ +WORKDIR /arch +COPY arch/src /arch/src +COPY arch/Cargo.toml /arch/ +COPY arch/Cargo.lock /arch/ COPY public_types /public_types RUN cargo build --release --target wasm32-wasi # copy built filter into envoy image FROM envoyproxy/envoy:v1.30-latest -COPY --from=builder /envoyfilter/target/wasm32-wasi/release/intelligent_prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm +COPY --from=builder /arch/target/wasm32-wasi/release/intelligent_prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm # CMD ["envoy", "-c", "/etc/envoy/envoy.yaml"] # CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--log-level", "debug"] CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "wasm:debug"] diff --git a/envoyfilter/README.md b/arch/README.md similarity index 100% rename from envoyfilter/README.md rename to arch/README.md diff --git a/envoyfilter/build_filter.sh b/arch/build_filter.sh similarity index 50% rename from envoyfilter/build_filter.sh rename to arch/build_filter.sh index ff42dede..36736112 100644 --- a/envoyfilter/build_filter.sh +++ b/arch/build_filter.sh @@ -1,3 +1,3 @@ RUST_VERSION=1.80.0 docker run --rm -v rustup_cache:/usr/local/rustup/ rust:$RUST_VERSION rustup -v target add wasm32-wasi -docker run --rm -v $PWD/../open-message-format:/code/open-message-format -v ~/.cargo:/root/.cargo -v $(pwd):/code/envoyfilter -w /code/envoyfilter -v rustup_cache:/usr/local/rustup/ rust:$RUST_VERSION cargo build --release --target wasm32-wasi +docker run --rm -v $PWD/../open-message-format:/code/open-message-format -v ~/.cargo:/root/.cargo -v $(pwd):/code/arch -w /code/arch -v rustup_cache:/usr/local/rustup/ rust:$RUST_VERSION cargo build --release --target wasm32-wasi diff --git a/envoyfilter/docker-compose.yaml b/arch/docker-compose.yaml similarity index 100% rename from envoyfilter/docker-compose.yaml rename to arch/docker-compose.yaml diff --git a/envoyfilter/download_mistral_7b.sh b/arch/download_mistral_7b.sh similarity index 100% rename from envoyfilter/download_mistral_7b.sh rename to arch/download_mistral_7b.sh diff --git a/envoyfilter/envoy.template.yaml b/arch/envoy.template.yaml similarity index 92% rename from envoyfilter/envoy.template.yaml rename to arch/envoy.template.yaml index 2f171d27..99965503 100644 --- a/envoyfilter/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -34,17 +34,10 @@ static_resources: auto_host_rewrite: true cluster: mistral_7b_instruct timeout: 60s - - match: - prefix: "/bolt_fc_1b/v1/chat/completions" - route: - prefix_rewrite: /v1/chat/completions - auto_host_rewrite: true - cluster: bolt_fc_1b - timeout: 120s - match: prefix: "/v1/chat/completions" headers: - - name: "x-bolt-llm-provider" + - name: "x-arch-llm-provider" string_match: exact: openai route: @@ -54,7 +47,7 @@ static_resources: - match: prefix: "/v1/chat/completions" headers: - - name: "x-bolt-llm-provider" + - name: "x-arch-llm-provider" string_match: exact: mistral route: @@ -167,12 +160,12 @@ static_resources: address: mistral_7b_instruct port_value: 10001 hostname: "mistral_7b_instruct" - - name: bolt_fc_1b + - name: arch_fc connect_timeout: 5s type: STRICT_DNS lb_policy: ROUND_ROBIN load_assignment: - cluster_name: bolt_fc_1b + cluster_name: arch_fc endpoints: - lb_endpoints: - endpoint: @@ -180,7 +173,7 @@ static_resources: socket_address: address: function_resolver port_value: 80 - hostname: "bolt_fc_1b" + hostname: "arch_fc" {% for _, cluster in arch_clusters.items() %} - name: {{ cluster.name }} connect_timeout: 5s diff --git a/envoyfilter/envoy.yaml b/arch/envoy.yaml similarity index 98% rename from envoyfilter/envoy.yaml rename to arch/envoy.yaml index f0236bf6..31e9c3fa 100644 --- a/envoyfilter/envoy.yaml +++ b/arch/envoy.yaml @@ -17,13 +17,13 @@ static_resources: scheme_header_transformation: scheme_to_overwrite: https route_config: - - name: bolt + - name: arch domains: - "*" routes: - match: headers: - - name: "x-bolt-llm-provider" + - name: "x-arch-llm-provider" string_match: exact: openai route: @@ -32,7 +32,7 @@ static_resources: timeout: 60s - match: headers: - - name: "x-bolt-llm-provider" + - name: "x-arch-llm-provider" string_match: exact: mistral route: diff --git a/envoyfilter/grafana/datasource.yaml b/arch/grafana/datasource.yaml similarity index 100% rename from envoyfilter/grafana/datasource.yaml rename to arch/grafana/datasource.yaml diff --git a/envoyfilter/init_vector_store.sh b/arch/init_vector_store.sh similarity index 100% rename from envoyfilter/init_vector_store.sh rename to arch/init_vector_store.sh diff --git a/envoyfilter/katanemo-config.yaml b/arch/katanemo-config.yaml similarity index 100% rename from envoyfilter/katanemo-config.yaml rename to arch/katanemo-config.yaml diff --git a/envoyfilter/prometheus/prometheus.yaml b/arch/prometheus/prometheus.yaml similarity index 100% rename from envoyfilter/prometheus/prometheus.yaml rename to arch/prometheus/prometheus.yaml diff --git a/envoyfilter/src/consts.rs b/arch/src/consts.rs similarity index 62% rename from envoyfilter/src/consts.rs rename to arch/src/consts.rs index 250bc145..805d2d35 100644 --- a/envoyfilter/src/consts.rs +++ b/arch/src/consts.rs @@ -1,11 +1,11 @@ pub const DEFAULT_EMBEDDING_MODEL: &str = "BAAI/bge-large-en-v1.5"; pub const DEFAULT_INTENT_MODEL: &str = "tasksource/deberta-base-long-nli"; pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8; -pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-bolt-ratelimit-selector"; +pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-arch-ratelimit-selector"; pub const SYSTEM_ROLE: &str = "system"; pub const USER_ROLE: &str = "user"; pub const GPT_35_TURBO: &str = "gpt-3.5-turbo"; -pub const BOLT_FC_CLUSTER: &str = "bolt_fc_1b"; -pub const BOLT_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes +pub const ARC_FC_CLUSTER: &str = "arch_fc"; +pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes pub const MODEL_SERVER_NAME: &str = "model_server"; -pub const BOLT_ROUTING_HEADER: &str = "x-bolt-llm-provider"; +pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider"; diff --git a/envoyfilter/src/filter_context.rs b/arch/src/filter_context.rs similarity index 100% rename from envoyfilter/src/filter_context.rs rename to arch/src/filter_context.rs diff --git a/envoyfilter/src/lib.rs b/arch/src/lib.rs similarity index 100% rename from envoyfilter/src/lib.rs rename to arch/src/lib.rs diff --git a/envoyfilter/src/llm_providers.rs b/arch/src/llm_providers.rs similarity index 91% rename from envoyfilter/src/llm_providers.rs rename to arch/src/llm_providers.rs index 91039ed2..c698bd1f 100644 --- a/envoyfilter/src/llm_providers.rs +++ b/arch/src/llm_providers.rs @@ -4,12 +4,12 @@ pub struct LlmProviders; impl LlmProviders { pub const OPENAI_PROVIDER: LlmProvider<'static> = LlmProvider { name: "openai", - api_key_header: "x-bolt-openai-api-key", + api_key_header: "x-arch-openai-api-key", model: "gpt-3.5-turbo", }; pub const MISTRAL_PROVIDER: LlmProvider<'static> = LlmProvider { name: "mistral", - api_key_header: "x-bolt-mistral-api-key", + api_key_header: "x-arch-mistral-api-key", model: "mistral-large-latest", }; diff --git a/envoyfilter/src/ratelimit.rs b/arch/src/ratelimit.rs similarity index 100% rename from envoyfilter/src/ratelimit.rs rename to arch/src/ratelimit.rs diff --git a/envoyfilter/src/routing.rs b/arch/src/routing.rs similarity index 100% rename from envoyfilter/src/routing.rs rename to arch/src/routing.rs diff --git a/envoyfilter/src/stats.rs b/arch/src/stats.rs similarity index 100% rename from envoyfilter/src/stats.rs rename to arch/src/stats.rs diff --git a/envoyfilter/src/stream_context.rs b/arch/src/stream_context.rs similarity index 96% rename from envoyfilter/src/stream_context.rs rename to arch/src/stream_context.rs index 5607dc61..69c65092 100644 --- a/envoyfilter/src/stream_context.rs +++ b/arch/src/stream_context.rs @@ -1,5 +1,5 @@ use crate::consts::{ - BOLT_FC_CLUSTER, BOLT_FC_REQUEST_TIMEOUT_MS, BOLT_ROUTING_HEADER, DEFAULT_EMBEDDING_MODEL, + ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_ROUTING_HEADER, ARC_FC_CLUSTER, DEFAULT_EMBEDDING_MODEL, DEFAULT_INTENT_MODEL, DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME, RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE, }; @@ -93,7 +93,7 @@ impl StreamContext { } fn add_routing_header(&mut self) { - self.add_http_request_header(BOLT_ROUTING_HEADER, self.llm_provider().as_ref()); + self.add_http_request_header(ARCH_ROUTING_HEADER, self.llm_provider().as_ref()); } fn modify_auth_headers(&mut self) -> Result<(), String> { @@ -305,15 +305,15 @@ impl StreamContext { let prompt_target_name = zeroshot_intent_response.predicted_class.clone(); - // Check to see who responded to user message. This will help us identify if control should be passed to Bolt FC or not. - // If the last message was from Bolt FC, then Bolt FC is handling the conversation (possibly for parameter collection). - let mut bolt_assistant = false; + // Check to see who responded to user message. This will help us identify if control should be passed to Arch FC or not. + // If the last message was from Arch FC, then Arch FC is handling the conversation (possibly for parameter collection). + let mut arch_assistant = false; let messages = &callout_context.request_body.messages; if messages.len() >= 2 { let latest_assistant_message = &messages[messages.len() - 2]; if let Some(model) = latest_assistant_message.model.as_ref() { - if model.starts_with("Bolt") { - bolt_assistant = true; + if model.starts_with("Arch") { + arch_assistant = true; } } } else { @@ -331,12 +331,12 @@ impl StreamContext { // check to ensure that the prompt target similarity score is above the threshold if prompt_target_similarity_score < prompt_target_intent_matching_threshold - && !bolt_assistant + && !arch_assistant { - // if bolt fc responded to the user message, then we don't need to check the similarity score - // it may be that bolt fc is handling the conversation for parameter collection - if bolt_assistant { - info!("bolt assistant is handling the conversation"); + // if arch fc responded to the user message, then we don't need to check the similarity score + // it may be that arch fc is handling the conversation for parameter collection + if arch_assistant { + info!("arch assistant is handling the conversation"); } else { info!( "prompt target below limit: {:.3}, threshold: {:.3}, continue conversation with user", @@ -407,7 +407,7 @@ impl StreamContext { let msg_body = match serde_json::to_string(&chat_completions) { Ok(msg_body) => { - debug!("bolt-fc request body content: {}", msg_body); + debug!("arch_fc request body content: {}", msg_body); msg_body } Err(e) => { @@ -419,16 +419,16 @@ impl StreamContext { }; let token_id = match self.dispatch_http_call( - BOLT_FC_CLUSTER, + ARC_FC_CLUSTER, vec![ (":method", "POST"), (":path", "/v1/chat/completions"), - (":authority", BOLT_FC_CLUSTER), + (":authority", ARC_FC_CLUSTER), ("content-type", "application/json"), ("x-envoy-max-retries", "3"), ( "x-envoy-upstream-rq-timeout-ms", - BOLT_FC_REQUEST_TIMEOUT_MS.to_string().as_str(), + ARCH_FC_REQUEST_TIMEOUT_MS.to_string().as_str(), ), ], Some(msg_body.as_bytes()), @@ -445,7 +445,7 @@ impl StreamContext { debug!( "dispatched call to function {} token_id={}", - BOLT_FC_CLUSTER, token_id + ARC_FC_CLUSTER, token_id ); self.metrics.active_http_calls.increment(1); @@ -464,8 +464,8 @@ impl StreamContext { let body_str = String::from_utf8(body).unwrap(); debug!("function_resolver response str: {}", body_str); - let boltfc_response: ChatCompletionsResponse = match serde_json::from_str(&body_str) { - Ok(boltfc_response) => boltfc_response, + let arch_fc_response: ChatCompletionsResponse = match serde_json::from_str(&body_str) { + Ok(arch_fc_response) => arch_fc_response, Err(e) => { return self.send_server_error( format!( @@ -477,11 +477,11 @@ impl StreamContext { } }; - let model_resp = &boltfc_response.choices[0]; + let model_resp = &arch_fc_response.choices[0]; if model_resp.message.tool_calls.is_none() { - // This means that Bolt FC did not have enough information to resolve the function call - // Bolt FC probably responded with a message asking for more information. + // This means that Arch FC did not have enough information to resolve the function call + // Arch FC probably responded with a message asking for more information. // Let's send the response back to the user to initalize lightweight dialog for parameter collection //TODO: add resolver name to the response so the client can send the response back to the correct resolver @@ -784,7 +784,7 @@ impl HttpContext for StreamContext { // the lifecycle of the http request and response. fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action { let provider_hint = self - .get_http_request_header("x-bolt-deterministic-provider") + .get_http_request_header("x-arch-deterministic-provider") .is_some(); self.llm_provider = Some(routing::get_llm_provider(provider_hint)); @@ -945,7 +945,7 @@ impl HttpContext for StreamContext { } }; - debug!("dispatched HTTP call to bolt_guard token_id={}", token_id); + debug!("dispatched HTTP call to arch_guard token_id={}", token_id); let call_context = CallContext { response_handler_type: ResponseHandlerType::ArchGuard, diff --git a/envoyfilter/src/tokenizer.rs b/arch/src/tokenizer.rs similarity index 100% rename from envoyfilter/src/tokenizer.rs rename to arch/src/tokenizer.rs diff --git a/envoyfilter/tests/integration.rs b/arch/tests/integration.rs similarity index 95% rename from envoyfilter/tests/integration.rs rename to arch/tests/integration.rs index ce02a203..21ce8979 100644 --- a/envoyfilter/tests/integration.rs +++ b/arch/tests/integration.rs @@ -29,17 +29,17 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) { .call_proxy_on_request_headers(http_context, 0, false) .expect_get_header_map_value( Some(MapType::HttpRequestHeaders), - Some("x-bolt-deterministic-provider"), + Some("x-arch-deterministic-provider"), ) .returning(Some("true")) .expect_add_header_map_value( Some(MapType::HttpRequestHeaders), - Some("x-bolt-llm-provider"), + Some("x-arch-llm-provider"), Some("openai"), ) .expect_get_header_map_value( Some(MapType::HttpRequestHeaders), - Some("x-bolt-openai-api-key"), + Some("x-arch-openai-api-key"), ) .returning(Some("api-key")) .expect_replace_header_map_value( @@ -49,16 +49,16 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) { ) .expect_remove_header_map_value( Some(MapType::HttpRequestHeaders), - Some("x-bolt-openai-api-key"), + Some("x-arch-openai-api-key"), ) .expect_remove_header_map_value( Some(MapType::HttpRequestHeaders), - Some("x-bolt-mistral-api-key"), + Some("x-arch-mistral-api-key"), ) .expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length")) .expect_get_header_map_value( Some(MapType::HttpRequestHeaders), - Some("x-bolt-ratelimit-selector"), + Some("x-arch-ratelimit-selector"), ) .returning(Some("selector-key")) .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key")) @@ -164,7 +164,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) { .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Info), None) - .expect_http_call(Some("bolt_fc_1b"), None, None, None, None) + .expect_http_call(Some("arch_fc"), None, None, None, None) .returning(Some(3)) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) @@ -402,7 +402,7 @@ fn request_ratelimited() { normal_flow(&mut module, filter_context, http_context); - let bolt_fc_resp = ChatCompletionsResponse { + let arch_fc_resp = ChatCompletionsResponse { usage: Usage { completion_tokens: 0, }, @@ -429,12 +429,12 @@ fn request_ratelimited() { model: String::from("test"), }; - let bolt_fc_resp_str = serde_json::to_string(&bolt_fc_resp).unwrap(); + let arch_fc_resp_str = serde_json::to_string(&arch_fc_resp).unwrap(); module - .call_proxy_on_http_call_response(http_context, 3, 0, bolt_fc_resp_str.len() as i32, 0) + .call_proxy_on_http_call_response(http_context, 3, 0, arch_fc_resp_str.len() as i32, 0) .expect_metric_increment("active_http_calls", -1) .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) - .returning(Some(&bolt_fc_resp_str)) + .returning(Some(&arch_fc_resp_str)) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) @@ -517,7 +517,7 @@ fn request_not_ratelimited() { normal_flow(&mut module, filter_context, http_context); - let bolt_fc_resp = ChatCompletionsResponse { + let arch_fc_resp = ChatCompletionsResponse { usage: Usage { completion_tokens: 0, }, @@ -544,12 +544,12 @@ fn request_not_ratelimited() { model: String::from("test"), }; - let bolt_fc_resp_str = serde_json::to_string(&bolt_fc_resp).unwrap(); + let arch_fc_resp_str = serde_json::to_string(&arch_fc_resp).unwrap(); module - .call_proxy_on_http_call_response(http_context, 3, 0, bolt_fc_resp_str.len() as i32, 0) + .call_proxy_on_http_call_response(http_context, 3, 0, arch_fc_resp_str.len() as i32, 0) .expect_metric_increment("active_http_calls", -1) .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) - .returning(Some(&bolt_fc_resp_str)) + .returning(Some(&arch_fc_resp_str)) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) diff --git a/chatbot_ui/app/run.py b/chatbot_ui/app/run.py index 4d06287d..02b89d3c 100644 --- a/chatbot_ui/app/run.py +++ b/chatbot_ui/app/run.py @@ -22,9 +22,9 @@ def predict(message, history): # Custom headers custom_headers = { - 'x-bolt-openai-api-key': f"{OPENAI_API_KEY}", - 'x-bolt-mistral-api-key': f"{MISTRAL_API_KEY}", - 'x-bolt-deterministic-provider': 'openai', + 'x-arch-openai-api-key': f"{OPENAI_API_KEY}", + 'x-arch-mistral-api-key': f"{MISTRAL_API_KEY}", + 'x-arch-deterministic-provider': 'openai', } try: @@ -53,7 +53,7 @@ def predict(message, history): with gr.Blocks(fill_height=True, css="footer {visibility: hidden}") as demo: print("Starting Demo...") - chatbot = gr.Chatbot(label="Bolt Chatbot", scale=1) + chatbot = gr.Chatbot(label="Arch Chatbot", scale=1) state = gr.State([]) with gr.Row(): txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter", scale=1, autofocus=True) diff --git a/config_generator/Dockerfile b/config_generator/Dockerfile index 00ff5b93..378a4176 100644 --- a/config_generator/Dockerfile +++ b/config_generator/Dockerfile @@ -3,7 +3,7 @@ WORKDIR /usr/src/app COPY config_generator/requirements.txt . RUN pip install -r requirements.txt COPY config_generator/config_generator.py . -COPY envoyfilter/envoy.template.yaml . -COPY envoyfilter/katanemo-config.yaml . +COPY arch/envoy.template.yaml . +COPY arch/katanemo-config.yaml . CMD ["python", "config_generator.py"] diff --git a/config_generator/config_generator.py b/config_generator/config_generator.py index 1c1d06ac..386806f1 100644 --- a/config_generator/config_generator.py +++ b/config_generator/config_generator.py @@ -3,13 +3,13 @@ from jinja2 import Environment, FileSystemLoader import yaml ENVOY_CONFIG_TEMPLATE_FILE = os.getenv('ENVOY_CONFIG_TEMPLATE_FILE', 'envoy.template.yaml') -BOLT_CONFIG_FILE = os.getenv('BOLT_CONFIG_FILE', 'bolt_config.yaml') +ARCH_CONFIG_FILE = os.getenv('ARCH_CONFIG_FILE', 'arch_config.yaml') ENVOY_CONFIG_FILE_RENDERED = os.getenv('ENVOY_CONFIG_FILE_RENDERED', '/usr/src/app/out/envoy.yaml') env = Environment(loader=FileSystemLoader('./')) template = env.get_template('envoy.template.yaml') -with open(BOLT_CONFIG_FILE, 'r') as file: +with open(ARCH_CONFIG_FILE, 'r') as file: katanemo_config = file.read() config_yaml = yaml.safe_load(katanemo_config) diff --git a/demos/function_calling/bolt_config.yaml b/demos/function_calling/arch_config.yaml similarity index 100% rename from demos/function_calling/bolt_config.yaml rename to demos/function_calling/arch_config.yaml diff --git a/demos/function_calling/docker-compose.yaml b/demos/function_calling/docker-compose.yaml index 20e3002a..5fc05cd6 100644 --- a/demos/function_calling/docker-compose.yaml +++ b/demos/function_calling/docker-compose.yaml @@ -5,15 +5,14 @@ services: context: ../../ dockerfile: config_generator/Dockerfile volumes: - - ../../envoyfilter/envoy.template.yaml:/usr/src/app/envoy.template.yaml - - ./bolt_config.yaml:/usr/src/app/bolt_config.yaml + - ../../arch/envoy.template.yaml:/usr/src/app/envoy.template.yaml + - ./arch_config.yaml:/usr/src/app/arch_config.yaml - ./generated:/usr/src/app/out - bolt: + arch: build: context: ../../ - dockerfile: envoyfilter/Dockerfile - hostname: bolt + dockerfile: arch/Dockerfile ports: - "10000:10000" - "19901:9901" @@ -36,12 +35,12 @@ services: ports: - "18081:80" healthcheck: - test: ["CMD", "curl" ,"http://localhost:80/healthz"] + test: ["CMD", "curl" ,"http://localhost/healthz"] interval: 5s retries: 20 volumes: - ~/.cache/huggingface:/root/.cache/huggingface - - ./bolt_config.yaml:/root/bolt_config.yaml + - ./arch_config.yaml:/root/arch_config.yaml function_resolver: build: @@ -84,7 +83,7 @@ services: profiles: - manual - open-webui: + open_webui: image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main} container_name: open-webui volumes: @@ -111,7 +110,7 @@ services: environment: - OPENAI_API_KEY=${OPENAI_API_KEY:?error} - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error} - - CHAT_COMPLETION_ENDPOINT=http://bolt:10000/v1 + - CHAT_COMPLETION_ENDPOINT=http://arch:10000/v1 prometheus: image: prom/prometheus diff --git a/gateway.code-workspace b/gateway.code-workspace index 98cb3c29..a35227cf 100644 --- a/gateway.code-workspace +++ b/gateway.code-workspace @@ -5,8 +5,8 @@ "path": "." }, { - "name": "envoyfilter", - "path": "envoyfilter" + "name": "arch", + "path": "arch" }, { "name": "model_server", diff --git a/model_server/app/main.py b/model_server/app/main.py index 2c83d769..66e9de43 100644 --- a/model_server/app/main.py +++ b/model_server/app/main.py @@ -22,7 +22,7 @@ transformers = load_transformers() ner_models = load_ner_models() zero_shot_models = load_zero_shot_models() -with open("/root/bolt_config.yaml", "r") as file: +with open("/root/arch_config.yaml", "r") as file: config = yaml.safe_load(file) with open("guard_model_config.yaml") as f: guard_model_config = yaml.safe_load(f)