mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
rename envoyfilter => arch (#91)
* rename envoyfilter => arch * fix more files * more fixes * more renames
This commit is contained in:
parent
7168b14ed3
commit
ea86f73605
33 changed files with 91 additions and 99 deletions
14
.github/workflows/checks.yml
vendored
14
.github/workflows/checks.yml
vendored
|
|
@ -11,8 +11,8 @@ jobs:
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
- name: Setup | Rust
|
- name: Setup | Rust
|
||||||
run: rustup toolchain install stable --profile minimal
|
run: rustup toolchain install stable --profile minimal
|
||||||
- name: Run Clippy on envoyfilter
|
- name: Run Clippy on arch
|
||||||
run: cd envoyfilter && cargo clippy --all-targets --all-features -- -Dwarnings
|
run: cd arch && cargo clippy --all-targets --all-features -- -Dwarnings
|
||||||
- name: Run Clippy on public_types
|
- name: Run Clippy on public_types
|
||||||
run: cd public_types && cargo clippy --all-targets --all-features -- -Dwarnings
|
run: cd public_types && cargo clippy --all-targets --all-features -- -Dwarnings
|
||||||
|
|
||||||
|
|
@ -25,8 +25,8 @@ jobs:
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
- name: Setup | Rust
|
- name: Setup | Rust
|
||||||
run: rustup toolchain install stable --profile minimal
|
run: rustup toolchain install stable --profile minimal
|
||||||
- name: Run Rustfmt on envoyfilter
|
- name: Run Rustfmt on arch
|
||||||
run: cd envoyfilter && cargo fmt -p intelligent-prompt-gateway -- --check
|
run: cd arch && cargo fmt -p intelligent-prompt-gateway -- --check
|
||||||
- name: Run Rustfmt on public_types
|
- name: Run Rustfmt on public_types
|
||||||
run: cd public_types && cargo fmt -p public_types -- --check
|
run: cd public_types && cargo fmt -p public_types -- --check
|
||||||
|
|
||||||
|
|
@ -41,8 +41,8 @@ jobs:
|
||||||
- name: Setup | Install wasm toolchain
|
- name: Setup | Install wasm toolchain
|
||||||
run: rustup target add wasm32-wasi
|
run: rustup target add wasm32-wasi
|
||||||
- name: Build wasm module
|
- name: Build wasm module
|
||||||
run: cd envoyfilter && cargo build --release --target=wasm32-wasi
|
run: cd arch && cargo build --release --target=wasm32-wasi
|
||||||
- name: Run Tests on envoyfilter
|
- name: Run Tests on arch
|
||||||
run: cd envoyfilter && cargo test
|
run: cd arch && cargo test
|
||||||
- name: Run Tests on public_types
|
- name: Run Tests on public_types
|
||||||
run: cd public_types && cargo test
|
run: cd public_types && cargo test
|
||||||
|
|
|
||||||
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -1,5 +1,5 @@
|
||||||
envoyfilter/target
|
arch/target
|
||||||
envoyfilter/qdrant_data/
|
arch/qdrant_data/
|
||||||
public_types/target
|
public_types/target
|
||||||
/venv/
|
/venv/
|
||||||
__pycache__
|
__pycache__
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ repos:
|
||||||
rev: v4.6.0
|
rev: v4.6.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: check-yaml
|
- id: check-yaml
|
||||||
exclude: envoyfilter/envoy.template.yaml
|
exclude: arch/envoy.template.yaml
|
||||||
- id: end-of-file-fixer
|
- id: end-of-file-fixer
|
||||||
- id: trailing-whitespace
|
- id: trailing-whitespace
|
||||||
- repo: local
|
- repo: local
|
||||||
|
|
@ -12,16 +12,16 @@ repos:
|
||||||
name: cargo-fmt
|
name: cargo-fmt
|
||||||
language: system
|
language: system
|
||||||
types: [file, rust]
|
types: [file, rust]
|
||||||
entry: bash -c "cd envoyfilter && cargo fmt -p intelligent-prompt-gateway -- --check"
|
entry: bash -c "cd arch && cargo fmt -p intelligent-prompt-gateway -- --check"
|
||||||
- id: cargo-clippy
|
- id: cargo-clippy
|
||||||
name: cargo-clippy
|
name: cargo-clippy
|
||||||
language: system
|
language: system
|
||||||
types: [file, rust]
|
types: [file, rust]
|
||||||
entry: bash -c "cd envoyfilter && cargo clippy -p intelligent-prompt-gateway --all"
|
entry: bash -c "cd arch && cargo clippy -p intelligent-prompt-gateway --all"
|
||||||
- id: cargo-test
|
- id: cargo-test
|
||||||
name: cargo-test
|
name: cargo-test
|
||||||
language: system
|
language: system
|
||||||
types: [file, rust]
|
types: [file, rust]
|
||||||
# --lib is to only test the library, since when integration tests are made,
|
# --lib is to only test the library, since when integration tests are made,
|
||||||
# they will be in a seperate tests directory
|
# they will be in a seperate tests directory
|
||||||
entry: bash -c "cd envoyfilter && cargo test -p intelligent-prompt-gateway --lib"
|
entry: bash -c "cd arch && cargo test -p intelligent-prompt-gateway --lib"
|
||||||
|
|
|
||||||
0
envoyfilter/Cargo.lock → arch/Cargo.lock
generated
0
envoyfilter/Cargo.lock → arch/Cargo.lock
generated
|
|
@ -1,17 +1,17 @@
|
||||||
# build filter using rust toolchain
|
# build filter using rust toolchain
|
||||||
FROM rust:1.80.0 as builder
|
FROM rust:1.80.0 as builder
|
||||||
RUN rustup -v target add wasm32-wasi
|
RUN rustup -v target add wasm32-wasi
|
||||||
WORKDIR /envoyfilter
|
WORKDIR /arch
|
||||||
COPY envoyfilter/src /envoyfilter/src
|
COPY arch/src /arch/src
|
||||||
COPY envoyfilter/Cargo.toml /envoyfilter/
|
COPY arch/Cargo.toml /arch/
|
||||||
COPY envoyfilter/Cargo.lock /envoyfilter/
|
COPY arch/Cargo.lock /arch/
|
||||||
COPY public_types /public_types
|
COPY public_types /public_types
|
||||||
|
|
||||||
RUN cargo build --release --target wasm32-wasi
|
RUN cargo build --release --target wasm32-wasi
|
||||||
|
|
||||||
# copy built filter into envoy image
|
# copy built filter into envoy image
|
||||||
FROM envoyproxy/envoy:v1.30-latest
|
FROM envoyproxy/envoy:v1.30-latest
|
||||||
COPY --from=builder /envoyfilter/target/wasm32-wasi/release/intelligent_prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm
|
COPY --from=builder /arch/target/wasm32-wasi/release/intelligent_prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm
|
||||||
# CMD ["envoy", "-c", "/etc/envoy/envoy.yaml"]
|
# CMD ["envoy", "-c", "/etc/envoy/envoy.yaml"]
|
||||||
# CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--log-level", "debug"]
|
# CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--log-level", "debug"]
|
||||||
CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "wasm:debug"]
|
CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "wasm:debug"]
|
||||||
|
|
@ -1,3 +1,3 @@
|
||||||
RUST_VERSION=1.80.0
|
RUST_VERSION=1.80.0
|
||||||
docker run --rm -v rustup_cache:/usr/local/rustup/ rust:$RUST_VERSION rustup -v target add wasm32-wasi
|
docker run --rm -v rustup_cache:/usr/local/rustup/ rust:$RUST_VERSION rustup -v target add wasm32-wasi
|
||||||
docker run --rm -v $PWD/../open-message-format:/code/open-message-format -v ~/.cargo:/root/.cargo -v $(pwd):/code/envoyfilter -w /code/envoyfilter -v rustup_cache:/usr/local/rustup/ rust:$RUST_VERSION cargo build --release --target wasm32-wasi
|
docker run --rm -v $PWD/../open-message-format:/code/open-message-format -v ~/.cargo:/root/.cargo -v $(pwd):/code/arch -w /code/arch -v rustup_cache:/usr/local/rustup/ rust:$RUST_VERSION cargo build --release --target wasm32-wasi
|
||||||
|
|
@ -34,17 +34,10 @@ static_resources:
|
||||||
auto_host_rewrite: true
|
auto_host_rewrite: true
|
||||||
cluster: mistral_7b_instruct
|
cluster: mistral_7b_instruct
|
||||||
timeout: 60s
|
timeout: 60s
|
||||||
- match:
|
|
||||||
prefix: "/bolt_fc_1b/v1/chat/completions"
|
|
||||||
route:
|
|
||||||
prefix_rewrite: /v1/chat/completions
|
|
||||||
auto_host_rewrite: true
|
|
||||||
cluster: bolt_fc_1b
|
|
||||||
timeout: 120s
|
|
||||||
- match:
|
- match:
|
||||||
prefix: "/v1/chat/completions"
|
prefix: "/v1/chat/completions"
|
||||||
headers:
|
headers:
|
||||||
- name: "x-bolt-llm-provider"
|
- name: "x-arch-llm-provider"
|
||||||
string_match:
|
string_match:
|
||||||
exact: openai
|
exact: openai
|
||||||
route:
|
route:
|
||||||
|
|
@ -54,7 +47,7 @@ static_resources:
|
||||||
- match:
|
- match:
|
||||||
prefix: "/v1/chat/completions"
|
prefix: "/v1/chat/completions"
|
||||||
headers:
|
headers:
|
||||||
- name: "x-bolt-llm-provider"
|
- name: "x-arch-llm-provider"
|
||||||
string_match:
|
string_match:
|
||||||
exact: mistral
|
exact: mistral
|
||||||
route:
|
route:
|
||||||
|
|
@ -167,12 +160,12 @@ static_resources:
|
||||||
address: mistral_7b_instruct
|
address: mistral_7b_instruct
|
||||||
port_value: 10001
|
port_value: 10001
|
||||||
hostname: "mistral_7b_instruct"
|
hostname: "mistral_7b_instruct"
|
||||||
- name: bolt_fc_1b
|
- name: arch_fc
|
||||||
connect_timeout: 5s
|
connect_timeout: 5s
|
||||||
type: STRICT_DNS
|
type: STRICT_DNS
|
||||||
lb_policy: ROUND_ROBIN
|
lb_policy: ROUND_ROBIN
|
||||||
load_assignment:
|
load_assignment:
|
||||||
cluster_name: bolt_fc_1b
|
cluster_name: arch_fc
|
||||||
endpoints:
|
endpoints:
|
||||||
- lb_endpoints:
|
- lb_endpoints:
|
||||||
- endpoint:
|
- endpoint:
|
||||||
|
|
@ -180,7 +173,7 @@ static_resources:
|
||||||
socket_address:
|
socket_address:
|
||||||
address: function_resolver
|
address: function_resolver
|
||||||
port_value: 80
|
port_value: 80
|
||||||
hostname: "bolt_fc_1b"
|
hostname: "arch_fc"
|
||||||
{% for _, cluster in arch_clusters.items() %}
|
{% for _, cluster in arch_clusters.items() %}
|
||||||
- name: {{ cluster.name }}
|
- name: {{ cluster.name }}
|
||||||
connect_timeout: 5s
|
connect_timeout: 5s
|
||||||
|
|
@ -17,13 +17,13 @@ static_resources:
|
||||||
scheme_header_transformation:
|
scheme_header_transformation:
|
||||||
scheme_to_overwrite: https
|
scheme_to_overwrite: https
|
||||||
route_config:
|
route_config:
|
||||||
- name: bolt
|
- name: arch
|
||||||
domains:
|
domains:
|
||||||
- "*"
|
- "*"
|
||||||
routes:
|
routes:
|
||||||
- match:
|
- match:
|
||||||
headers:
|
headers:
|
||||||
- name: "x-bolt-llm-provider"
|
- name: "x-arch-llm-provider"
|
||||||
string_match:
|
string_match:
|
||||||
exact: openai
|
exact: openai
|
||||||
route:
|
route:
|
||||||
|
|
@ -32,7 +32,7 @@ static_resources:
|
||||||
timeout: 60s
|
timeout: 60s
|
||||||
- match:
|
- match:
|
||||||
headers:
|
headers:
|
||||||
- name: "x-bolt-llm-provider"
|
- name: "x-arch-llm-provider"
|
||||||
string_match:
|
string_match:
|
||||||
exact: mistral
|
exact: mistral
|
||||||
route:
|
route:
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
pub const DEFAULT_EMBEDDING_MODEL: &str = "BAAI/bge-large-en-v1.5";
|
pub const DEFAULT_EMBEDDING_MODEL: &str = "BAAI/bge-large-en-v1.5";
|
||||||
pub const DEFAULT_INTENT_MODEL: &str = "tasksource/deberta-base-long-nli";
|
pub const DEFAULT_INTENT_MODEL: &str = "tasksource/deberta-base-long-nli";
|
||||||
pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8;
|
pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8;
|
||||||
pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-bolt-ratelimit-selector";
|
pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-arch-ratelimit-selector";
|
||||||
pub const SYSTEM_ROLE: &str = "system";
|
pub const SYSTEM_ROLE: &str = "system";
|
||||||
pub const USER_ROLE: &str = "user";
|
pub const USER_ROLE: &str = "user";
|
||||||
pub const GPT_35_TURBO: &str = "gpt-3.5-turbo";
|
pub const GPT_35_TURBO: &str = "gpt-3.5-turbo";
|
||||||
pub const BOLT_FC_CLUSTER: &str = "bolt_fc_1b";
|
pub const ARC_FC_CLUSTER: &str = "arch_fc";
|
||||||
pub const BOLT_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||||
pub const MODEL_SERVER_NAME: &str = "model_server";
|
pub const MODEL_SERVER_NAME: &str = "model_server";
|
||||||
pub const BOLT_ROUTING_HEADER: &str = "x-bolt-llm-provider";
|
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
|
||||||
|
|
@ -4,12 +4,12 @@ pub struct LlmProviders;
|
||||||
impl LlmProviders {
|
impl LlmProviders {
|
||||||
pub const OPENAI_PROVIDER: LlmProvider<'static> = LlmProvider {
|
pub const OPENAI_PROVIDER: LlmProvider<'static> = LlmProvider {
|
||||||
name: "openai",
|
name: "openai",
|
||||||
api_key_header: "x-bolt-openai-api-key",
|
api_key_header: "x-arch-openai-api-key",
|
||||||
model: "gpt-3.5-turbo",
|
model: "gpt-3.5-turbo",
|
||||||
};
|
};
|
||||||
pub const MISTRAL_PROVIDER: LlmProvider<'static> = LlmProvider {
|
pub const MISTRAL_PROVIDER: LlmProvider<'static> = LlmProvider {
|
||||||
name: "mistral",
|
name: "mistral",
|
||||||
api_key_header: "x-bolt-mistral-api-key",
|
api_key_header: "x-arch-mistral-api-key",
|
||||||
model: "mistral-large-latest",
|
model: "mistral-large-latest",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
use crate::consts::{
|
use crate::consts::{
|
||||||
BOLT_FC_CLUSTER, BOLT_FC_REQUEST_TIMEOUT_MS, BOLT_ROUTING_HEADER, DEFAULT_EMBEDDING_MODEL,
|
ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_ROUTING_HEADER, ARC_FC_CLUSTER, DEFAULT_EMBEDDING_MODEL,
|
||||||
DEFAULT_INTENT_MODEL, DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME,
|
DEFAULT_INTENT_MODEL, DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME,
|
||||||
RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE,
|
RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE,
|
||||||
};
|
};
|
||||||
|
|
@ -93,7 +93,7 @@ impl StreamContext {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_routing_header(&mut self) {
|
fn add_routing_header(&mut self) {
|
||||||
self.add_http_request_header(BOLT_ROUTING_HEADER, self.llm_provider().as_ref());
|
self.add_http_request_header(ARCH_ROUTING_HEADER, self.llm_provider().as_ref());
|
||||||
}
|
}
|
||||||
|
|
||||||
fn modify_auth_headers(&mut self) -> Result<(), String> {
|
fn modify_auth_headers(&mut self) -> Result<(), String> {
|
||||||
|
|
@ -305,15 +305,15 @@ impl StreamContext {
|
||||||
|
|
||||||
let prompt_target_name = zeroshot_intent_response.predicted_class.clone();
|
let prompt_target_name = zeroshot_intent_response.predicted_class.clone();
|
||||||
|
|
||||||
// Check to see who responded to user message. This will help us identify if control should be passed to Bolt FC or not.
|
// Check to see who responded to user message. This will help us identify if control should be passed to Arch FC or not.
|
||||||
// If the last message was from Bolt FC, then Bolt FC is handling the conversation (possibly for parameter collection).
|
// If the last message was from Arch FC, then Arch FC is handling the conversation (possibly for parameter collection).
|
||||||
let mut bolt_assistant = false;
|
let mut arch_assistant = false;
|
||||||
let messages = &callout_context.request_body.messages;
|
let messages = &callout_context.request_body.messages;
|
||||||
if messages.len() >= 2 {
|
if messages.len() >= 2 {
|
||||||
let latest_assistant_message = &messages[messages.len() - 2];
|
let latest_assistant_message = &messages[messages.len() - 2];
|
||||||
if let Some(model) = latest_assistant_message.model.as_ref() {
|
if let Some(model) = latest_assistant_message.model.as_ref() {
|
||||||
if model.starts_with("Bolt") {
|
if model.starts_with("Arch") {
|
||||||
bolt_assistant = true;
|
arch_assistant = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -331,12 +331,12 @@ impl StreamContext {
|
||||||
|
|
||||||
// check to ensure that the prompt target similarity score is above the threshold
|
// check to ensure that the prompt target similarity score is above the threshold
|
||||||
if prompt_target_similarity_score < prompt_target_intent_matching_threshold
|
if prompt_target_similarity_score < prompt_target_intent_matching_threshold
|
||||||
&& !bolt_assistant
|
&& !arch_assistant
|
||||||
{
|
{
|
||||||
// if bolt fc responded to the user message, then we don't need to check the similarity score
|
// if arch fc responded to the user message, then we don't need to check the similarity score
|
||||||
// it may be that bolt fc is handling the conversation for parameter collection
|
// it may be that arch fc is handling the conversation for parameter collection
|
||||||
if bolt_assistant {
|
if arch_assistant {
|
||||||
info!("bolt assistant is handling the conversation");
|
info!("arch assistant is handling the conversation");
|
||||||
} else {
|
} else {
|
||||||
info!(
|
info!(
|
||||||
"prompt target below limit: {:.3}, threshold: {:.3}, continue conversation with user",
|
"prompt target below limit: {:.3}, threshold: {:.3}, continue conversation with user",
|
||||||
|
|
@ -407,7 +407,7 @@ impl StreamContext {
|
||||||
|
|
||||||
let msg_body = match serde_json::to_string(&chat_completions) {
|
let msg_body = match serde_json::to_string(&chat_completions) {
|
||||||
Ok(msg_body) => {
|
Ok(msg_body) => {
|
||||||
debug!("bolt-fc request body content: {}", msg_body);
|
debug!("arch_fc request body content: {}", msg_body);
|
||||||
msg_body
|
msg_body
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
|
@ -419,16 +419,16 @@ impl StreamContext {
|
||||||
};
|
};
|
||||||
|
|
||||||
let token_id = match self.dispatch_http_call(
|
let token_id = match self.dispatch_http_call(
|
||||||
BOLT_FC_CLUSTER,
|
ARC_FC_CLUSTER,
|
||||||
vec![
|
vec![
|
||||||
(":method", "POST"),
|
(":method", "POST"),
|
||||||
(":path", "/v1/chat/completions"),
|
(":path", "/v1/chat/completions"),
|
||||||
(":authority", BOLT_FC_CLUSTER),
|
(":authority", ARC_FC_CLUSTER),
|
||||||
("content-type", "application/json"),
|
("content-type", "application/json"),
|
||||||
("x-envoy-max-retries", "3"),
|
("x-envoy-max-retries", "3"),
|
||||||
(
|
(
|
||||||
"x-envoy-upstream-rq-timeout-ms",
|
"x-envoy-upstream-rq-timeout-ms",
|
||||||
BOLT_FC_REQUEST_TIMEOUT_MS.to_string().as_str(),
|
ARCH_FC_REQUEST_TIMEOUT_MS.to_string().as_str(),
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
Some(msg_body.as_bytes()),
|
Some(msg_body.as_bytes()),
|
||||||
|
|
@ -445,7 +445,7 @@ impl StreamContext {
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
"dispatched call to function {} token_id={}",
|
"dispatched call to function {} token_id={}",
|
||||||
BOLT_FC_CLUSTER, token_id
|
ARC_FC_CLUSTER, token_id
|
||||||
);
|
);
|
||||||
|
|
||||||
self.metrics.active_http_calls.increment(1);
|
self.metrics.active_http_calls.increment(1);
|
||||||
|
|
@ -464,8 +464,8 @@ impl StreamContext {
|
||||||
let body_str = String::from_utf8(body).unwrap();
|
let body_str = String::from_utf8(body).unwrap();
|
||||||
debug!("function_resolver response str: {}", body_str);
|
debug!("function_resolver response str: {}", body_str);
|
||||||
|
|
||||||
let boltfc_response: ChatCompletionsResponse = match serde_json::from_str(&body_str) {
|
let arch_fc_response: ChatCompletionsResponse = match serde_json::from_str(&body_str) {
|
||||||
Ok(boltfc_response) => boltfc_response,
|
Ok(arch_fc_response) => arch_fc_response,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
return self.send_server_error(
|
return self.send_server_error(
|
||||||
format!(
|
format!(
|
||||||
|
|
@ -477,11 +477,11 @@ impl StreamContext {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let model_resp = &boltfc_response.choices[0];
|
let model_resp = &arch_fc_response.choices[0];
|
||||||
|
|
||||||
if model_resp.message.tool_calls.is_none() {
|
if model_resp.message.tool_calls.is_none() {
|
||||||
// This means that Bolt FC did not have enough information to resolve the function call
|
// This means that Arch FC did not have enough information to resolve the function call
|
||||||
// Bolt FC probably responded with a message asking for more information.
|
// Arch FC probably responded with a message asking for more information.
|
||||||
// Let's send the response back to the user to initalize lightweight dialog for parameter collection
|
// Let's send the response back to the user to initalize lightweight dialog for parameter collection
|
||||||
|
|
||||||
//TODO: add resolver name to the response so the client can send the response back to the correct resolver
|
//TODO: add resolver name to the response so the client can send the response back to the correct resolver
|
||||||
|
|
@ -784,7 +784,7 @@ impl HttpContext for StreamContext {
|
||||||
// the lifecycle of the http request and response.
|
// the lifecycle of the http request and response.
|
||||||
fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
|
fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
|
||||||
let provider_hint = self
|
let provider_hint = self
|
||||||
.get_http_request_header("x-bolt-deterministic-provider")
|
.get_http_request_header("x-arch-deterministic-provider")
|
||||||
.is_some();
|
.is_some();
|
||||||
self.llm_provider = Some(routing::get_llm_provider(provider_hint));
|
self.llm_provider = Some(routing::get_llm_provider(provider_hint));
|
||||||
|
|
||||||
|
|
@ -945,7 +945,7 @@ impl HttpContext for StreamContext {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
debug!("dispatched HTTP call to bolt_guard token_id={}", token_id);
|
debug!("dispatched HTTP call to arch_guard token_id={}", token_id);
|
||||||
|
|
||||||
let call_context = CallContext {
|
let call_context = CallContext {
|
||||||
response_handler_type: ResponseHandlerType::ArchGuard,
|
response_handler_type: ResponseHandlerType::ArchGuard,
|
||||||
|
|
@ -29,17 +29,17 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
|
||||||
.call_proxy_on_request_headers(http_context, 0, false)
|
.call_proxy_on_request_headers(http_context, 0, false)
|
||||||
.expect_get_header_map_value(
|
.expect_get_header_map_value(
|
||||||
Some(MapType::HttpRequestHeaders),
|
Some(MapType::HttpRequestHeaders),
|
||||||
Some("x-bolt-deterministic-provider"),
|
Some("x-arch-deterministic-provider"),
|
||||||
)
|
)
|
||||||
.returning(Some("true"))
|
.returning(Some("true"))
|
||||||
.expect_add_header_map_value(
|
.expect_add_header_map_value(
|
||||||
Some(MapType::HttpRequestHeaders),
|
Some(MapType::HttpRequestHeaders),
|
||||||
Some("x-bolt-llm-provider"),
|
Some("x-arch-llm-provider"),
|
||||||
Some("openai"),
|
Some("openai"),
|
||||||
)
|
)
|
||||||
.expect_get_header_map_value(
|
.expect_get_header_map_value(
|
||||||
Some(MapType::HttpRequestHeaders),
|
Some(MapType::HttpRequestHeaders),
|
||||||
Some("x-bolt-openai-api-key"),
|
Some("x-arch-openai-api-key"),
|
||||||
)
|
)
|
||||||
.returning(Some("api-key"))
|
.returning(Some("api-key"))
|
||||||
.expect_replace_header_map_value(
|
.expect_replace_header_map_value(
|
||||||
|
|
@ -49,16 +49,16 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
|
||||||
)
|
)
|
||||||
.expect_remove_header_map_value(
|
.expect_remove_header_map_value(
|
||||||
Some(MapType::HttpRequestHeaders),
|
Some(MapType::HttpRequestHeaders),
|
||||||
Some("x-bolt-openai-api-key"),
|
Some("x-arch-openai-api-key"),
|
||||||
)
|
)
|
||||||
.expect_remove_header_map_value(
|
.expect_remove_header_map_value(
|
||||||
Some(MapType::HttpRequestHeaders),
|
Some(MapType::HttpRequestHeaders),
|
||||||
Some("x-bolt-mistral-api-key"),
|
Some("x-arch-mistral-api-key"),
|
||||||
)
|
)
|
||||||
.expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length"))
|
.expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length"))
|
||||||
.expect_get_header_map_value(
|
.expect_get_header_map_value(
|
||||||
Some(MapType::HttpRequestHeaders),
|
Some(MapType::HttpRequestHeaders),
|
||||||
Some("x-bolt-ratelimit-selector"),
|
Some("x-arch-ratelimit-selector"),
|
||||||
)
|
)
|
||||||
.returning(Some("selector-key"))
|
.returning(Some("selector-key"))
|
||||||
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key"))
|
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key"))
|
||||||
|
|
@ -164,7 +164,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
|
||||||
.expect_log(Some(LogLevel::Debug), None)
|
.expect_log(Some(LogLevel::Debug), None)
|
||||||
.expect_log(Some(LogLevel::Debug), None)
|
.expect_log(Some(LogLevel::Debug), None)
|
||||||
.expect_log(Some(LogLevel::Info), None)
|
.expect_log(Some(LogLevel::Info), None)
|
||||||
.expect_http_call(Some("bolt_fc_1b"), None, None, None, None)
|
.expect_http_call(Some("arch_fc"), None, None, None, None)
|
||||||
.returning(Some(3))
|
.returning(Some(3))
|
||||||
.expect_log(Some(LogLevel::Debug), None)
|
.expect_log(Some(LogLevel::Debug), None)
|
||||||
.expect_log(Some(LogLevel::Debug), None)
|
.expect_log(Some(LogLevel::Debug), None)
|
||||||
|
|
@ -402,7 +402,7 @@ fn request_ratelimited() {
|
||||||
|
|
||||||
normal_flow(&mut module, filter_context, http_context);
|
normal_flow(&mut module, filter_context, http_context);
|
||||||
|
|
||||||
let bolt_fc_resp = ChatCompletionsResponse {
|
let arch_fc_resp = ChatCompletionsResponse {
|
||||||
usage: Usage {
|
usage: Usage {
|
||||||
completion_tokens: 0,
|
completion_tokens: 0,
|
||||||
},
|
},
|
||||||
|
|
@ -429,12 +429,12 @@ fn request_ratelimited() {
|
||||||
model: String::from("test"),
|
model: String::from("test"),
|
||||||
};
|
};
|
||||||
|
|
||||||
let bolt_fc_resp_str = serde_json::to_string(&bolt_fc_resp).unwrap();
|
let arch_fc_resp_str = serde_json::to_string(&arch_fc_resp).unwrap();
|
||||||
module
|
module
|
||||||
.call_proxy_on_http_call_response(http_context, 3, 0, bolt_fc_resp_str.len() as i32, 0)
|
.call_proxy_on_http_call_response(http_context, 3, 0, arch_fc_resp_str.len() as i32, 0)
|
||||||
.expect_metric_increment("active_http_calls", -1)
|
.expect_metric_increment("active_http_calls", -1)
|
||||||
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
|
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
|
||||||
.returning(Some(&bolt_fc_resp_str))
|
.returning(Some(&arch_fc_resp_str))
|
||||||
.expect_log(Some(LogLevel::Debug), None)
|
.expect_log(Some(LogLevel::Debug), None)
|
||||||
.expect_log(Some(LogLevel::Debug), None)
|
.expect_log(Some(LogLevel::Debug), None)
|
||||||
.expect_log(Some(LogLevel::Debug), None)
|
.expect_log(Some(LogLevel::Debug), None)
|
||||||
|
|
@ -517,7 +517,7 @@ fn request_not_ratelimited() {
|
||||||
|
|
||||||
normal_flow(&mut module, filter_context, http_context);
|
normal_flow(&mut module, filter_context, http_context);
|
||||||
|
|
||||||
let bolt_fc_resp = ChatCompletionsResponse {
|
let arch_fc_resp = ChatCompletionsResponse {
|
||||||
usage: Usage {
|
usage: Usage {
|
||||||
completion_tokens: 0,
|
completion_tokens: 0,
|
||||||
},
|
},
|
||||||
|
|
@ -544,12 +544,12 @@ fn request_not_ratelimited() {
|
||||||
model: String::from("test"),
|
model: String::from("test"),
|
||||||
};
|
};
|
||||||
|
|
||||||
let bolt_fc_resp_str = serde_json::to_string(&bolt_fc_resp).unwrap();
|
let arch_fc_resp_str = serde_json::to_string(&arch_fc_resp).unwrap();
|
||||||
module
|
module
|
||||||
.call_proxy_on_http_call_response(http_context, 3, 0, bolt_fc_resp_str.len() as i32, 0)
|
.call_proxy_on_http_call_response(http_context, 3, 0, arch_fc_resp_str.len() as i32, 0)
|
||||||
.expect_metric_increment("active_http_calls", -1)
|
.expect_metric_increment("active_http_calls", -1)
|
||||||
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
|
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
|
||||||
.returning(Some(&bolt_fc_resp_str))
|
.returning(Some(&arch_fc_resp_str))
|
||||||
.expect_log(Some(LogLevel::Debug), None)
|
.expect_log(Some(LogLevel::Debug), None)
|
||||||
.expect_log(Some(LogLevel::Debug), None)
|
.expect_log(Some(LogLevel::Debug), None)
|
||||||
.expect_log(Some(LogLevel::Debug), None)
|
.expect_log(Some(LogLevel::Debug), None)
|
||||||
|
|
@ -22,9 +22,9 @@ def predict(message, history):
|
||||||
|
|
||||||
# Custom headers
|
# Custom headers
|
||||||
custom_headers = {
|
custom_headers = {
|
||||||
'x-bolt-openai-api-key': f"{OPENAI_API_KEY}",
|
'x-arch-openai-api-key': f"{OPENAI_API_KEY}",
|
||||||
'x-bolt-mistral-api-key': f"{MISTRAL_API_KEY}",
|
'x-arch-mistral-api-key': f"{MISTRAL_API_KEY}",
|
||||||
'x-bolt-deterministic-provider': 'openai',
|
'x-arch-deterministic-provider': 'openai',
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -53,7 +53,7 @@ def predict(message, history):
|
||||||
|
|
||||||
with gr.Blocks(fill_height=True, css="footer {visibility: hidden}") as demo:
|
with gr.Blocks(fill_height=True, css="footer {visibility: hidden}") as demo:
|
||||||
print("Starting Demo...")
|
print("Starting Demo...")
|
||||||
chatbot = gr.Chatbot(label="Bolt Chatbot", scale=1)
|
chatbot = gr.Chatbot(label="Arch Chatbot", scale=1)
|
||||||
state = gr.State([])
|
state = gr.State([])
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter", scale=1, autofocus=True)
|
txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter", scale=1, autofocus=True)
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ WORKDIR /usr/src/app
|
||||||
COPY config_generator/requirements.txt .
|
COPY config_generator/requirements.txt .
|
||||||
RUN pip install -r requirements.txt
|
RUN pip install -r requirements.txt
|
||||||
COPY config_generator/config_generator.py .
|
COPY config_generator/config_generator.py .
|
||||||
COPY envoyfilter/envoy.template.yaml .
|
COPY arch/envoy.template.yaml .
|
||||||
COPY envoyfilter/katanemo-config.yaml .
|
COPY arch/katanemo-config.yaml .
|
||||||
|
|
||||||
CMD ["python", "config_generator.py"]
|
CMD ["python", "config_generator.py"]
|
||||||
|
|
|
||||||
|
|
@ -3,13 +3,13 @@ from jinja2 import Environment, FileSystemLoader
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv('ENVOY_CONFIG_TEMPLATE_FILE', 'envoy.template.yaml')
|
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv('ENVOY_CONFIG_TEMPLATE_FILE', 'envoy.template.yaml')
|
||||||
BOLT_CONFIG_FILE = os.getenv('BOLT_CONFIG_FILE', 'bolt_config.yaml')
|
ARCH_CONFIG_FILE = os.getenv('ARCH_CONFIG_FILE', 'arch_config.yaml')
|
||||||
ENVOY_CONFIG_FILE_RENDERED = os.getenv('ENVOY_CONFIG_FILE_RENDERED', '/usr/src/app/out/envoy.yaml')
|
ENVOY_CONFIG_FILE_RENDERED = os.getenv('ENVOY_CONFIG_FILE_RENDERED', '/usr/src/app/out/envoy.yaml')
|
||||||
|
|
||||||
env = Environment(loader=FileSystemLoader('./'))
|
env = Environment(loader=FileSystemLoader('./'))
|
||||||
template = env.get_template('envoy.template.yaml')
|
template = env.get_template('envoy.template.yaml')
|
||||||
|
|
||||||
with open(BOLT_CONFIG_FILE, 'r') as file:
|
with open(ARCH_CONFIG_FILE, 'r') as file:
|
||||||
katanemo_config = file.read()
|
katanemo_config = file.read()
|
||||||
|
|
||||||
config_yaml = yaml.safe_load(katanemo_config)
|
config_yaml = yaml.safe_load(katanemo_config)
|
||||||
|
|
|
||||||
|
|
@ -5,15 +5,14 @@ services:
|
||||||
context: ../../
|
context: ../../
|
||||||
dockerfile: config_generator/Dockerfile
|
dockerfile: config_generator/Dockerfile
|
||||||
volumes:
|
volumes:
|
||||||
- ../../envoyfilter/envoy.template.yaml:/usr/src/app/envoy.template.yaml
|
- ../../arch/envoy.template.yaml:/usr/src/app/envoy.template.yaml
|
||||||
- ./bolt_config.yaml:/usr/src/app/bolt_config.yaml
|
- ./arch_config.yaml:/usr/src/app/arch_config.yaml
|
||||||
- ./generated:/usr/src/app/out
|
- ./generated:/usr/src/app/out
|
||||||
|
|
||||||
bolt:
|
arch:
|
||||||
build:
|
build:
|
||||||
context: ../../
|
context: ../../
|
||||||
dockerfile: envoyfilter/Dockerfile
|
dockerfile: arch/Dockerfile
|
||||||
hostname: bolt
|
|
||||||
ports:
|
ports:
|
||||||
- "10000:10000"
|
- "10000:10000"
|
||||||
- "19901:9901"
|
- "19901:9901"
|
||||||
|
|
@ -36,12 +35,12 @@ services:
|
||||||
ports:
|
ports:
|
||||||
- "18081:80"
|
- "18081:80"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
|
test: ["CMD", "curl" ,"http://localhost/healthz"]
|
||||||
interval: 5s
|
interval: 5s
|
||||||
retries: 20
|
retries: 20
|
||||||
volumes:
|
volumes:
|
||||||
- ~/.cache/huggingface:/root/.cache/huggingface
|
- ~/.cache/huggingface:/root/.cache/huggingface
|
||||||
- ./bolt_config.yaml:/root/bolt_config.yaml
|
- ./arch_config.yaml:/root/arch_config.yaml
|
||||||
|
|
||||||
function_resolver:
|
function_resolver:
|
||||||
build:
|
build:
|
||||||
|
|
@ -84,7 +83,7 @@ services:
|
||||||
profiles:
|
profiles:
|
||||||
- manual
|
- manual
|
||||||
|
|
||||||
open-webui:
|
open_webui:
|
||||||
image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main}
|
image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main}
|
||||||
container_name: open-webui
|
container_name: open-webui
|
||||||
volumes:
|
volumes:
|
||||||
|
|
@ -111,7 +110,7 @@ services:
|
||||||
environment:
|
environment:
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
||||||
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
|
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
|
||||||
- CHAT_COMPLETION_ENDPOINT=http://bolt:10000/v1
|
- CHAT_COMPLETION_ENDPOINT=http://arch:10000/v1
|
||||||
|
|
||||||
prometheus:
|
prometheus:
|
||||||
image: prom/prometheus
|
image: prom/prometheus
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,8 @@
|
||||||
"path": "."
|
"path": "."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "envoyfilter",
|
"name": "arch",
|
||||||
"path": "envoyfilter"
|
"path": "arch"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "model_server",
|
"name": "model_server",
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ transformers = load_transformers()
|
||||||
ner_models = load_ner_models()
|
ner_models = load_ner_models()
|
||||||
zero_shot_models = load_zero_shot_models()
|
zero_shot_models = load_zero_shot_models()
|
||||||
|
|
||||||
with open("/root/bolt_config.yaml", "r") as file:
|
with open("/root/arch_config.yaml", "r") as file:
|
||||||
config = yaml.safe_load(file)
|
config = yaml.safe_load(file)
|
||||||
with open("guard_model_config.yaml") as f:
|
with open("guard_model_config.yaml") as f:
|
||||||
guard_model_config = yaml.safe_load(f)
|
guard_model_config = yaml.safe_load(f)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue