rename archgw_model_sever => model_server (#106)

This commit is contained in:
Adil Hafeez 2024-10-01 11:24:43 -07:00 committed by GitHub
parent 8654d3d5c5
commit 1a7c1ad0a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 16 additions and 35 deletions

View file

@ -11,10 +11,10 @@ services:
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ./arch_log:/var/log/
depends_on:
archgw_model_server:
model_server:
condition: service_healthy
archgw_model_server:
model_server:
build:
context: ../model_server
dockerfile: Dockerfile

View file

@ -132,20 +132,20 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: api.mistral.ai
- name: archgw_model_server
- name: model_server
connect_timeout: 5s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: archgw_model_server
cluster_name: model_server
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: archgw_model_server
address: model_server
port_value: 80
hostname: "archgw_model_server"
hostname: "model_server"
- name: mistral_7b_instruct
connect_timeout: 5s
type: STRICT_DNS
@ -171,7 +171,7 @@ static_resources:
- endpoint:
address:
socket_address:
address: archgw_model_server
address: model_server
port_value: 80
hostname: "arch_fc"
{% for _, cluster in arch_clusters.items() %}

View file

@ -7,6 +7,6 @@ pub const USER_ROLE: &str = "user";
pub const GPT_35_TURBO: &str = "gpt-3.5-turbo";
pub const ARC_FC_CLUSTER: &str = "arch_fc";
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
pub const MODEL_SERVER_NAME: &str = "archgw_model_server";
pub const MODEL_SERVER_NAME: &str = "model_server";
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
pub const ARCH_MESSAGES_KEY: &str = "arch_messages";

View file

@ -104,7 +104,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_http_call(Some("archgw_model_server"), None, None, None, None)
.expect_http_call(Some("model_server"), None, None, None, None)
.returning(Some(1))
.expect_log(Some(LogLevel::Debug), None)
.expect_metric_increment("active_http_calls", 1)
@ -136,7 +136,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
.returning(Some(&embeddings_response_buffer))
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_http_call(Some("archgw_model_server"), None, None, None, None)
.expect_http_call(Some("model_server"), None, None, None, None)
.returning(Some(2))
.expect_metric_increment("active_http_calls", 1)
.expect_log(Some(LogLevel::Debug), None)
@ -313,7 +313,7 @@ fn successful_request_to_open_ai_chat_completions() {
.returning(Some(chat_completions_request_body))
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Info), None)
.expect_http_call(Some("archgw_model_server"), None, None, None, None)
.expect_http_call(Some("model_server"), None, None, None, None)
.returning(Some(4))
.expect_metric_increment("active_http_calls", 1)
.execute_and_expect(ReturnType::Action(Action::Pause))

View file

@ -28,7 +28,7 @@ services:
environment:
- LOG_LEVEL=debug
archgw_model_server:
model_server:
build:
context: ../../model_server
dockerfile: Dockerfile

View file

@ -28,7 +28,7 @@ services:
environment:
- LOG_LEVEL=debug
archgw_model_server:
model_server:
build:
context: ../../model_server
dockerfile: Dockerfile

View file

@ -1,14 +1,5 @@
services:
config_generator:
build:
context: ../../
dockerfile: config_generator/Dockerfile
volumes:
- ../../arch/envoy.template.yaml:/usr/src/app/envoy.template.yaml
- ./arch_config.yaml:/usr/src/app/arch_config.yaml
- ./generated:/usr/src/app/out
arch:
build:
context: ../../
@ -21,14 +12,12 @@ services:
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ./arch_log:/var/log/
depends_on:
config_generator:
condition: service_completed_successfully
model_server:
condition: service_healthy
environment:
- LOG_LEVEL=debug
archgw_model_server:
model_server:
build:
context: ../../model_server
dockerfile: Dockerfile

View file

@ -28,7 +28,7 @@ services:
environment:
- LOG_LEVEL=debug
archgw_model_server:
model_server:
build:
context: ../../model_server
dockerfile: Dockerfile

View file

@ -29,7 +29,7 @@ services:
environment:
- LOG_LEVEL=debug
archgw_model_server:
model_server:
build:
context: ../../model_server
dockerfile: Dockerfile

View file

@ -16,14 +16,6 @@
"name": "chatbot_ui",
"path": "chatbot_ui"
},
{
"name": "open-message-format",
"path": "open-message-format"
},
{
"name": "config_generator",
"path": "config_generator"
},
{
"name": "demos/function_calling",
"path": "./demos/function_calling",