rename envoyfilter => arch (#91)

* rename envoyfilter => arch

* fix more files

* more fixes

* more renames
This commit is contained in:
Adil Hafeez 2024-09-27 16:41:39 -07:00 committed by GitHub
parent 7168b14ed3
commit ea86f73605
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
33 changed files with 91 additions and 99 deletions

View file

@ -11,8 +11,8 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Setup | Rust - name: Setup | Rust
run: rustup toolchain install stable --profile minimal run: rustup toolchain install stable --profile minimal
- name: Run Clippy on envoyfilter - name: Run Clippy on arch
run: cd envoyfilter && cargo clippy --all-targets --all-features -- -Dwarnings run: cd arch && cargo clippy --all-targets --all-features -- -Dwarnings
- name: Run Clippy on public_types - name: Run Clippy on public_types
run: cd public_types && cargo clippy --all-targets --all-features -- -Dwarnings run: cd public_types && cargo clippy --all-targets --all-features -- -Dwarnings
@ -25,8 +25,8 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Setup | Rust - name: Setup | Rust
run: rustup toolchain install stable --profile minimal run: rustup toolchain install stable --profile minimal
- name: Run Rustfmt on envoyfilter - name: Run Rustfmt on arch
run: cd envoyfilter && cargo fmt -p intelligent-prompt-gateway -- --check run: cd arch && cargo fmt -p intelligent-prompt-gateway -- --check
- name: Run Rustfmt on public_types - name: Run Rustfmt on public_types
run: cd public_types && cargo fmt -p public_types -- --check run: cd public_types && cargo fmt -p public_types -- --check
@ -41,8 +41,8 @@ jobs:
- name: Setup | Install wasm toolchain - name: Setup | Install wasm toolchain
run: rustup target add wasm32-wasi run: rustup target add wasm32-wasi
- name: Build wasm module - name: Build wasm module
run: cd envoyfilter && cargo build --release --target=wasm32-wasi run: cd arch && cargo build --release --target=wasm32-wasi
- name: Run Tests on envoyfilter - name: Run Tests on arch
run: cd envoyfilter && cargo test run: cd arch && cargo test
- name: Run Tests on public_types - name: Run Tests on public_types
run: cd public_types && cargo test run: cd public_types && cargo test

4
.gitignore vendored
View file

@ -1,5 +1,5 @@
envoyfilter/target arch/target
envoyfilter/qdrant_data/ arch/qdrant_data/
public_types/target public_types/target
/venv/ /venv/
__pycache__ __pycache__

View file

@ -3,7 +3,7 @@ repos:
rev: v4.6.0 rev: v4.6.0
hooks: hooks:
- id: check-yaml - id: check-yaml
exclude: envoyfilter/envoy.template.yaml exclude: arch/envoy.template.yaml
- id: end-of-file-fixer - id: end-of-file-fixer
- id: trailing-whitespace - id: trailing-whitespace
- repo: local - repo: local
@ -12,16 +12,16 @@ repos:
name: cargo-fmt name: cargo-fmt
language: system language: system
types: [file, rust] types: [file, rust]
entry: bash -c "cd envoyfilter && cargo fmt -p intelligent-prompt-gateway -- --check" entry: bash -c "cd arch && cargo fmt -p intelligent-prompt-gateway -- --check"
- id: cargo-clippy - id: cargo-clippy
name: cargo-clippy name: cargo-clippy
language: system language: system
types: [file, rust] types: [file, rust]
entry: bash -c "cd envoyfilter && cargo clippy -p intelligent-prompt-gateway --all" entry: bash -c "cd arch && cargo clippy -p intelligent-prompt-gateway --all"
- id: cargo-test - id: cargo-test
name: cargo-test name: cargo-test
language: system language: system
types: [file, rust] types: [file, rust]
# --lib is to only test the library, since when integration tests are made, # --lib is to only test the library, since when integration tests are made,
# they will be in a seperate tests directory # they will be in a seperate tests directory
entry: bash -c "cd envoyfilter && cargo test -p intelligent-prompt-gateway --lib" entry: bash -c "cd arch && cargo test -p intelligent-prompt-gateway --lib"

View file

@ -1,17 +1,17 @@
# build filter using rust toolchain # build filter using rust toolchain
FROM rust:1.80.0 as builder FROM rust:1.80.0 as builder
RUN rustup -v target add wasm32-wasi RUN rustup -v target add wasm32-wasi
WORKDIR /envoyfilter WORKDIR /arch
COPY envoyfilter/src /envoyfilter/src COPY arch/src /arch/src
COPY envoyfilter/Cargo.toml /envoyfilter/ COPY arch/Cargo.toml /arch/
COPY envoyfilter/Cargo.lock /envoyfilter/ COPY arch/Cargo.lock /arch/
COPY public_types /public_types COPY public_types /public_types
RUN cargo build --release --target wasm32-wasi RUN cargo build --release --target wasm32-wasi
# copy built filter into envoy image # copy built filter into envoy image
FROM envoyproxy/envoy:v1.30-latest FROM envoyproxy/envoy:v1.30-latest
COPY --from=builder /envoyfilter/target/wasm32-wasi/release/intelligent_prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm COPY --from=builder /arch/target/wasm32-wasi/release/intelligent_prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm
# CMD ["envoy", "-c", "/etc/envoy/envoy.yaml"] # CMD ["envoy", "-c", "/etc/envoy/envoy.yaml"]
# CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--log-level", "debug"] # CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--log-level", "debug"]
CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "wasm:debug"] CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "wasm:debug"]

View file

@ -1,3 +1,3 @@
RUST_VERSION=1.80.0 RUST_VERSION=1.80.0
docker run --rm -v rustup_cache:/usr/local/rustup/ rust:$RUST_VERSION rustup -v target add wasm32-wasi docker run --rm -v rustup_cache:/usr/local/rustup/ rust:$RUST_VERSION rustup -v target add wasm32-wasi
docker run --rm -v $PWD/../open-message-format:/code/open-message-format -v ~/.cargo:/root/.cargo -v $(pwd):/code/envoyfilter -w /code/envoyfilter -v rustup_cache:/usr/local/rustup/ rust:$RUST_VERSION cargo build --release --target wasm32-wasi docker run --rm -v $PWD/../open-message-format:/code/open-message-format -v ~/.cargo:/root/.cargo -v $(pwd):/code/arch -w /code/arch -v rustup_cache:/usr/local/rustup/ rust:$RUST_VERSION cargo build --release --target wasm32-wasi

View file

@ -34,17 +34,10 @@ static_resources:
auto_host_rewrite: true auto_host_rewrite: true
cluster: mistral_7b_instruct cluster: mistral_7b_instruct
timeout: 60s timeout: 60s
- match:
prefix: "/bolt_fc_1b/v1/chat/completions"
route:
prefix_rewrite: /v1/chat/completions
auto_host_rewrite: true
cluster: bolt_fc_1b
timeout: 120s
- match: - match:
prefix: "/v1/chat/completions" prefix: "/v1/chat/completions"
headers: headers:
- name: "x-bolt-llm-provider" - name: "x-arch-llm-provider"
string_match: string_match:
exact: openai exact: openai
route: route:
@ -54,7 +47,7 @@ static_resources:
- match: - match:
prefix: "/v1/chat/completions" prefix: "/v1/chat/completions"
headers: headers:
- name: "x-bolt-llm-provider" - name: "x-arch-llm-provider"
string_match: string_match:
exact: mistral exact: mistral
route: route:
@ -167,12 +160,12 @@ static_resources:
address: mistral_7b_instruct address: mistral_7b_instruct
port_value: 10001 port_value: 10001
hostname: "mistral_7b_instruct" hostname: "mistral_7b_instruct"
- name: bolt_fc_1b - name: arch_fc
connect_timeout: 5s connect_timeout: 5s
type: STRICT_DNS type: STRICT_DNS
lb_policy: ROUND_ROBIN lb_policy: ROUND_ROBIN
load_assignment: load_assignment:
cluster_name: bolt_fc_1b cluster_name: arch_fc
endpoints: endpoints:
- lb_endpoints: - lb_endpoints:
- endpoint: - endpoint:
@ -180,7 +173,7 @@ static_resources:
socket_address: socket_address:
address: function_resolver address: function_resolver
port_value: 80 port_value: 80
hostname: "bolt_fc_1b" hostname: "arch_fc"
{% for _, cluster in arch_clusters.items() %} {% for _, cluster in arch_clusters.items() %}
- name: {{ cluster.name }} - name: {{ cluster.name }}
connect_timeout: 5s connect_timeout: 5s

View file

@ -17,13 +17,13 @@ static_resources:
scheme_header_transformation: scheme_header_transformation:
scheme_to_overwrite: https scheme_to_overwrite: https
route_config: route_config:
- name: bolt - name: arch
domains: domains:
- "*" - "*"
routes: routes:
- match: - match:
headers: headers:
- name: "x-bolt-llm-provider" - name: "x-arch-llm-provider"
string_match: string_match:
exact: openai exact: openai
route: route:
@ -32,7 +32,7 @@ static_resources:
timeout: 60s timeout: 60s
- match: - match:
headers: headers:
- name: "x-bolt-llm-provider" - name: "x-arch-llm-provider"
string_match: string_match:
exact: mistral exact: mistral
route: route:

View file

@ -1,11 +1,11 @@
pub const DEFAULT_EMBEDDING_MODEL: &str = "BAAI/bge-large-en-v1.5"; pub const DEFAULT_EMBEDDING_MODEL: &str = "BAAI/bge-large-en-v1.5";
pub const DEFAULT_INTENT_MODEL: &str = "tasksource/deberta-base-long-nli"; pub const DEFAULT_INTENT_MODEL: &str = "tasksource/deberta-base-long-nli";
pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8; pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8;
pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-bolt-ratelimit-selector"; pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-arch-ratelimit-selector";
pub const SYSTEM_ROLE: &str = "system"; pub const SYSTEM_ROLE: &str = "system";
pub const USER_ROLE: &str = "user"; pub const USER_ROLE: &str = "user";
pub const GPT_35_TURBO: &str = "gpt-3.5-turbo"; pub const GPT_35_TURBO: &str = "gpt-3.5-turbo";
pub const BOLT_FC_CLUSTER: &str = "bolt_fc_1b"; pub const ARC_FC_CLUSTER: &str = "arch_fc";
pub const BOLT_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
pub const MODEL_SERVER_NAME: &str = "model_server"; pub const MODEL_SERVER_NAME: &str = "model_server";
pub const BOLT_ROUTING_HEADER: &str = "x-bolt-llm-provider"; pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";

View file

@ -4,12 +4,12 @@ pub struct LlmProviders;
impl LlmProviders { impl LlmProviders {
pub const OPENAI_PROVIDER: LlmProvider<'static> = LlmProvider { pub const OPENAI_PROVIDER: LlmProvider<'static> = LlmProvider {
name: "openai", name: "openai",
api_key_header: "x-bolt-openai-api-key", api_key_header: "x-arch-openai-api-key",
model: "gpt-3.5-turbo", model: "gpt-3.5-turbo",
}; };
pub const MISTRAL_PROVIDER: LlmProvider<'static> = LlmProvider { pub const MISTRAL_PROVIDER: LlmProvider<'static> = LlmProvider {
name: "mistral", name: "mistral",
api_key_header: "x-bolt-mistral-api-key", api_key_header: "x-arch-mistral-api-key",
model: "mistral-large-latest", model: "mistral-large-latest",
}; };

View file

@ -1,5 +1,5 @@
use crate::consts::{ use crate::consts::{
BOLT_FC_CLUSTER, BOLT_FC_REQUEST_TIMEOUT_MS, BOLT_ROUTING_HEADER, DEFAULT_EMBEDDING_MODEL, ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_ROUTING_HEADER, ARC_FC_CLUSTER, DEFAULT_EMBEDDING_MODEL,
DEFAULT_INTENT_MODEL, DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME, DEFAULT_INTENT_MODEL, DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME,
RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE, RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE,
}; };
@ -93,7 +93,7 @@ impl StreamContext {
} }
fn add_routing_header(&mut self) { fn add_routing_header(&mut self) {
self.add_http_request_header(BOLT_ROUTING_HEADER, self.llm_provider().as_ref()); self.add_http_request_header(ARCH_ROUTING_HEADER, self.llm_provider().as_ref());
} }
fn modify_auth_headers(&mut self) -> Result<(), String> { fn modify_auth_headers(&mut self) -> Result<(), String> {
@ -305,15 +305,15 @@ impl StreamContext {
let prompt_target_name = zeroshot_intent_response.predicted_class.clone(); let prompt_target_name = zeroshot_intent_response.predicted_class.clone();
// Check to see who responded to user message. This will help us identify if control should be passed to Bolt FC or not. // Check to see who responded to user message. This will help us identify if control should be passed to Arch FC or not.
// If the last message was from Bolt FC, then Bolt FC is handling the conversation (possibly for parameter collection). // If the last message was from Arch FC, then Arch FC is handling the conversation (possibly for parameter collection).
let mut bolt_assistant = false; let mut arch_assistant = false;
let messages = &callout_context.request_body.messages; let messages = &callout_context.request_body.messages;
if messages.len() >= 2 { if messages.len() >= 2 {
let latest_assistant_message = &messages[messages.len() - 2]; let latest_assistant_message = &messages[messages.len() - 2];
if let Some(model) = latest_assistant_message.model.as_ref() { if let Some(model) = latest_assistant_message.model.as_ref() {
if model.starts_with("Bolt") { if model.starts_with("Arch") {
bolt_assistant = true; arch_assistant = true;
} }
} }
} else { } else {
@ -331,12 +331,12 @@ impl StreamContext {
// check to ensure that the prompt target similarity score is above the threshold // check to ensure that the prompt target similarity score is above the threshold
if prompt_target_similarity_score < prompt_target_intent_matching_threshold if prompt_target_similarity_score < prompt_target_intent_matching_threshold
&& !bolt_assistant && !arch_assistant
{ {
// if bolt fc responded to the user message, then we don't need to check the similarity score // if arch fc responded to the user message, then we don't need to check the similarity score
// it may be that bolt fc is handling the conversation for parameter collection // it may be that arch fc is handling the conversation for parameter collection
if bolt_assistant { if arch_assistant {
info!("bolt assistant is handling the conversation"); info!("arch assistant is handling the conversation");
} else { } else {
info!( info!(
"prompt target below limit: {:.3}, threshold: {:.3}, continue conversation with user", "prompt target below limit: {:.3}, threshold: {:.3}, continue conversation with user",
@ -407,7 +407,7 @@ impl StreamContext {
let msg_body = match serde_json::to_string(&chat_completions) { let msg_body = match serde_json::to_string(&chat_completions) {
Ok(msg_body) => { Ok(msg_body) => {
debug!("bolt-fc request body content: {}", msg_body); debug!("arch_fc request body content: {}", msg_body);
msg_body msg_body
} }
Err(e) => { Err(e) => {
@ -419,16 +419,16 @@ impl StreamContext {
}; };
let token_id = match self.dispatch_http_call( let token_id = match self.dispatch_http_call(
BOLT_FC_CLUSTER, ARC_FC_CLUSTER,
vec![ vec![
(":method", "POST"), (":method", "POST"),
(":path", "/v1/chat/completions"), (":path", "/v1/chat/completions"),
(":authority", BOLT_FC_CLUSTER), (":authority", ARC_FC_CLUSTER),
("content-type", "application/json"), ("content-type", "application/json"),
("x-envoy-max-retries", "3"), ("x-envoy-max-retries", "3"),
( (
"x-envoy-upstream-rq-timeout-ms", "x-envoy-upstream-rq-timeout-ms",
BOLT_FC_REQUEST_TIMEOUT_MS.to_string().as_str(), ARCH_FC_REQUEST_TIMEOUT_MS.to_string().as_str(),
), ),
], ],
Some(msg_body.as_bytes()), Some(msg_body.as_bytes()),
@ -445,7 +445,7 @@ impl StreamContext {
debug!( debug!(
"dispatched call to function {} token_id={}", "dispatched call to function {} token_id={}",
BOLT_FC_CLUSTER, token_id ARC_FC_CLUSTER, token_id
); );
self.metrics.active_http_calls.increment(1); self.metrics.active_http_calls.increment(1);
@ -464,8 +464,8 @@ impl StreamContext {
let body_str = String::from_utf8(body).unwrap(); let body_str = String::from_utf8(body).unwrap();
debug!("function_resolver response str: {}", body_str); debug!("function_resolver response str: {}", body_str);
let boltfc_response: ChatCompletionsResponse = match serde_json::from_str(&body_str) { let arch_fc_response: ChatCompletionsResponse = match serde_json::from_str(&body_str) {
Ok(boltfc_response) => boltfc_response, Ok(arch_fc_response) => arch_fc_response,
Err(e) => { Err(e) => {
return self.send_server_error( return self.send_server_error(
format!( format!(
@ -477,11 +477,11 @@ impl StreamContext {
} }
}; };
let model_resp = &boltfc_response.choices[0]; let model_resp = &arch_fc_response.choices[0];
if model_resp.message.tool_calls.is_none() { if model_resp.message.tool_calls.is_none() {
// This means that Bolt FC did not have enough information to resolve the function call // This means that Arch FC did not have enough information to resolve the function call
// Bolt FC probably responded with a message asking for more information. // Arch FC probably responded with a message asking for more information.
// Let's send the response back to the user to initalize lightweight dialog for parameter collection // Let's send the response back to the user to initalize lightweight dialog for parameter collection
//TODO: add resolver name to the response so the client can send the response back to the correct resolver //TODO: add resolver name to the response so the client can send the response back to the correct resolver
@ -784,7 +784,7 @@ impl HttpContext for StreamContext {
// the lifecycle of the http request and response. // the lifecycle of the http request and response.
fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action { fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
let provider_hint = self let provider_hint = self
.get_http_request_header("x-bolt-deterministic-provider") .get_http_request_header("x-arch-deterministic-provider")
.is_some(); .is_some();
self.llm_provider = Some(routing::get_llm_provider(provider_hint)); self.llm_provider = Some(routing::get_llm_provider(provider_hint));
@ -945,7 +945,7 @@ impl HttpContext for StreamContext {
} }
}; };
debug!("dispatched HTTP call to bolt_guard token_id={}", token_id); debug!("dispatched HTTP call to arch_guard token_id={}", token_id);
let call_context = CallContext { let call_context = CallContext {
response_handler_type: ResponseHandlerType::ArchGuard, response_handler_type: ResponseHandlerType::ArchGuard,

View file

@ -29,17 +29,17 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
.call_proxy_on_request_headers(http_context, 0, false) .call_proxy_on_request_headers(http_context, 0, false)
.expect_get_header_map_value( .expect_get_header_map_value(
Some(MapType::HttpRequestHeaders), Some(MapType::HttpRequestHeaders),
Some("x-bolt-deterministic-provider"), Some("x-arch-deterministic-provider"),
) )
.returning(Some("true")) .returning(Some("true"))
.expect_add_header_map_value( .expect_add_header_map_value(
Some(MapType::HttpRequestHeaders), Some(MapType::HttpRequestHeaders),
Some("x-bolt-llm-provider"), Some("x-arch-llm-provider"),
Some("openai"), Some("openai"),
) )
.expect_get_header_map_value( .expect_get_header_map_value(
Some(MapType::HttpRequestHeaders), Some(MapType::HttpRequestHeaders),
Some("x-bolt-openai-api-key"), Some("x-arch-openai-api-key"),
) )
.returning(Some("api-key")) .returning(Some("api-key"))
.expect_replace_header_map_value( .expect_replace_header_map_value(
@ -49,16 +49,16 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
) )
.expect_remove_header_map_value( .expect_remove_header_map_value(
Some(MapType::HttpRequestHeaders), Some(MapType::HttpRequestHeaders),
Some("x-bolt-openai-api-key"), Some("x-arch-openai-api-key"),
) )
.expect_remove_header_map_value( .expect_remove_header_map_value(
Some(MapType::HttpRequestHeaders), Some(MapType::HttpRequestHeaders),
Some("x-bolt-mistral-api-key"), Some("x-arch-mistral-api-key"),
) )
.expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length")) .expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length"))
.expect_get_header_map_value( .expect_get_header_map_value(
Some(MapType::HttpRequestHeaders), Some(MapType::HttpRequestHeaders),
Some("x-bolt-ratelimit-selector"), Some("x-arch-ratelimit-selector"),
) )
.returning(Some("selector-key")) .returning(Some("selector-key"))
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key")) .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key"))
@ -164,7 +164,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
.expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Info), None) .expect_log(Some(LogLevel::Info), None)
.expect_http_call(Some("bolt_fc_1b"), None, None, None, None) .expect_http_call(Some("arch_fc"), None, None, None, None)
.returning(Some(3)) .returning(Some(3))
.expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None)
@ -402,7 +402,7 @@ fn request_ratelimited() {
normal_flow(&mut module, filter_context, http_context); normal_flow(&mut module, filter_context, http_context);
let bolt_fc_resp = ChatCompletionsResponse { let arch_fc_resp = ChatCompletionsResponse {
usage: Usage { usage: Usage {
completion_tokens: 0, completion_tokens: 0,
}, },
@ -429,12 +429,12 @@ fn request_ratelimited() {
model: String::from("test"), model: String::from("test"),
}; };
let bolt_fc_resp_str = serde_json::to_string(&bolt_fc_resp).unwrap(); let arch_fc_resp_str = serde_json::to_string(&arch_fc_resp).unwrap();
module module
.call_proxy_on_http_call_response(http_context, 3, 0, bolt_fc_resp_str.len() as i32, 0) .call_proxy_on_http_call_response(http_context, 3, 0, arch_fc_resp_str.len() as i32, 0)
.expect_metric_increment("active_http_calls", -1) .expect_metric_increment("active_http_calls", -1)
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
.returning(Some(&bolt_fc_resp_str)) .returning(Some(&arch_fc_resp_str))
.expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None)
@ -517,7 +517,7 @@ fn request_not_ratelimited() {
normal_flow(&mut module, filter_context, http_context); normal_flow(&mut module, filter_context, http_context);
let bolt_fc_resp = ChatCompletionsResponse { let arch_fc_resp = ChatCompletionsResponse {
usage: Usage { usage: Usage {
completion_tokens: 0, completion_tokens: 0,
}, },
@ -544,12 +544,12 @@ fn request_not_ratelimited() {
model: String::from("test"), model: String::from("test"),
}; };
let bolt_fc_resp_str = serde_json::to_string(&bolt_fc_resp).unwrap(); let arch_fc_resp_str = serde_json::to_string(&arch_fc_resp).unwrap();
module module
.call_proxy_on_http_call_response(http_context, 3, 0, bolt_fc_resp_str.len() as i32, 0) .call_proxy_on_http_call_response(http_context, 3, 0, arch_fc_resp_str.len() as i32, 0)
.expect_metric_increment("active_http_calls", -1) .expect_metric_increment("active_http_calls", -1)
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
.returning(Some(&bolt_fc_resp_str)) .returning(Some(&arch_fc_resp_str))
.expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None)

View file

@ -22,9 +22,9 @@ def predict(message, history):
# Custom headers # Custom headers
custom_headers = { custom_headers = {
'x-bolt-openai-api-key': f"{OPENAI_API_KEY}", 'x-arch-openai-api-key': f"{OPENAI_API_KEY}",
'x-bolt-mistral-api-key': f"{MISTRAL_API_KEY}", 'x-arch-mistral-api-key': f"{MISTRAL_API_KEY}",
'x-bolt-deterministic-provider': 'openai', 'x-arch-deterministic-provider': 'openai',
} }
try: try:
@ -53,7 +53,7 @@ def predict(message, history):
with gr.Blocks(fill_height=True, css="footer {visibility: hidden}") as demo: with gr.Blocks(fill_height=True, css="footer {visibility: hidden}") as demo:
print("Starting Demo...") print("Starting Demo...")
chatbot = gr.Chatbot(label="Bolt Chatbot", scale=1) chatbot = gr.Chatbot(label="Arch Chatbot", scale=1)
state = gr.State([]) state = gr.State([])
with gr.Row(): with gr.Row():
txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter", scale=1, autofocus=True) txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter", scale=1, autofocus=True)

View file

@ -3,7 +3,7 @@ WORKDIR /usr/src/app
COPY config_generator/requirements.txt . COPY config_generator/requirements.txt .
RUN pip install -r requirements.txt RUN pip install -r requirements.txt
COPY config_generator/config_generator.py . COPY config_generator/config_generator.py .
COPY envoyfilter/envoy.template.yaml . COPY arch/envoy.template.yaml .
COPY envoyfilter/katanemo-config.yaml . COPY arch/katanemo-config.yaml .
CMD ["python", "config_generator.py"] CMD ["python", "config_generator.py"]

View file

@ -3,13 +3,13 @@ from jinja2 import Environment, FileSystemLoader
import yaml import yaml
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv('ENVOY_CONFIG_TEMPLATE_FILE', 'envoy.template.yaml') ENVOY_CONFIG_TEMPLATE_FILE = os.getenv('ENVOY_CONFIG_TEMPLATE_FILE', 'envoy.template.yaml')
BOLT_CONFIG_FILE = os.getenv('BOLT_CONFIG_FILE', 'bolt_config.yaml') ARCH_CONFIG_FILE = os.getenv('ARCH_CONFIG_FILE', 'arch_config.yaml')
ENVOY_CONFIG_FILE_RENDERED = os.getenv('ENVOY_CONFIG_FILE_RENDERED', '/usr/src/app/out/envoy.yaml') ENVOY_CONFIG_FILE_RENDERED = os.getenv('ENVOY_CONFIG_FILE_RENDERED', '/usr/src/app/out/envoy.yaml')
env = Environment(loader=FileSystemLoader('./')) env = Environment(loader=FileSystemLoader('./'))
template = env.get_template('envoy.template.yaml') template = env.get_template('envoy.template.yaml')
with open(BOLT_CONFIG_FILE, 'r') as file: with open(ARCH_CONFIG_FILE, 'r') as file:
katanemo_config = file.read() katanemo_config = file.read()
config_yaml = yaml.safe_load(katanemo_config) config_yaml = yaml.safe_load(katanemo_config)

View file

@ -5,15 +5,14 @@ services:
context: ../../ context: ../../
dockerfile: config_generator/Dockerfile dockerfile: config_generator/Dockerfile
volumes: volumes:
- ../../envoyfilter/envoy.template.yaml:/usr/src/app/envoy.template.yaml - ../../arch/envoy.template.yaml:/usr/src/app/envoy.template.yaml
- ./bolt_config.yaml:/usr/src/app/bolt_config.yaml - ./arch_config.yaml:/usr/src/app/arch_config.yaml
- ./generated:/usr/src/app/out - ./generated:/usr/src/app/out
bolt: arch:
build: build:
context: ../../ context: ../../
dockerfile: envoyfilter/Dockerfile dockerfile: arch/Dockerfile
hostname: bolt
ports: ports:
- "10000:10000" - "10000:10000"
- "19901:9901" - "19901:9901"
@ -36,12 +35,12 @@ services:
ports: ports:
- "18081:80" - "18081:80"
healthcheck: healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"] test: ["CMD", "curl" ,"http://localhost/healthz"]
interval: 5s interval: 5s
retries: 20 retries: 20
volumes: volumes:
- ~/.cache/huggingface:/root/.cache/huggingface - ~/.cache/huggingface:/root/.cache/huggingface
- ./bolt_config.yaml:/root/bolt_config.yaml - ./arch_config.yaml:/root/arch_config.yaml
function_resolver: function_resolver:
build: build:
@ -84,7 +83,7 @@ services:
profiles: profiles:
- manual - manual
open-webui: open_webui:
image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main} image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main}
container_name: open-webui container_name: open-webui
volumes: volumes:
@ -111,7 +110,7 @@ services:
environment: environment:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error} - OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error} - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
- CHAT_COMPLETION_ENDPOINT=http://bolt:10000/v1 - CHAT_COMPLETION_ENDPOINT=http://arch:10000/v1
prometheus: prometheus:
image: prom/prometheus image: prom/prometheus

View file

@ -5,8 +5,8 @@
"path": "." "path": "."
}, },
{ {
"name": "envoyfilter", "name": "arch",
"path": "envoyfilter" "path": "arch"
}, },
{ {
"name": "model_server", "name": "model_server",

View file

@ -22,7 +22,7 @@ transformers = load_transformers()
ner_models = load_ner_models() ner_models = load_ner_models()
zero_shot_models = load_zero_shot_models() zero_shot_models = load_zero_shot_models()
with open("/root/bolt_config.yaml", "r") as file: with open("/root/arch_config.yaml", "r") as file:
config = yaml.safe_load(file) config = yaml.safe_load(file)
with open("guard_model_config.yaml") as f: with open("guard_model_config.yaml") as f:
guard_model_config = yaml.safe_load(f) guard_model_config = yaml.safe_load(f)