mirror of
https://github.com/katanemo/plano.git
synced 2026-06-08 14:55:14 +02:00
rename archgw_model_sever => model_server (#106)
This commit is contained in:
parent
8654d3d5c5
commit
1a7c1ad0a5
10 changed files with 16 additions and 35 deletions
|
|
@ -11,10 +11,10 @@ services:
|
|||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||
- ./arch_log:/var/log/
|
||||
depends_on:
|
||||
archgw_model_server:
|
||||
model_server:
|
||||
condition: service_healthy
|
||||
|
||||
archgw_model_server:
|
||||
model_server:
|
||||
build:
|
||||
context: ../model_server
|
||||
dockerfile: Dockerfile
|
||||
|
|
|
|||
|
|
@ -132,20 +132,20 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
||||
sni: api.mistral.ai
|
||||
- name: archgw_model_server
|
||||
- name: model_server
|
||||
connect_timeout: 5s
|
||||
type: STRICT_DNS
|
||||
lb_policy: ROUND_ROBIN
|
||||
load_assignment:
|
||||
cluster_name: archgw_model_server
|
||||
cluster_name: model_server
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: archgw_model_server
|
||||
address: model_server
|
||||
port_value: 80
|
||||
hostname: "archgw_model_server"
|
||||
hostname: "model_server"
|
||||
- name: mistral_7b_instruct
|
||||
connect_timeout: 5s
|
||||
type: STRICT_DNS
|
||||
|
|
@ -171,7 +171,7 @@ static_resources:
|
|||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: archgw_model_server
|
||||
address: model_server
|
||||
port_value: 80
|
||||
hostname: "arch_fc"
|
||||
{% for _, cluster in arch_clusters.items() %}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,6 @@ pub const USER_ROLE: &str = "user";
|
|||
pub const GPT_35_TURBO: &str = "gpt-3.5-turbo";
|
||||
pub const ARC_FC_CLUSTER: &str = "arch_fc";
|
||||
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||
pub const MODEL_SERVER_NAME: &str = "archgw_model_server";
|
||||
pub const MODEL_SERVER_NAME: &str = "model_server";
|
||||
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
|
||||
pub const ARCH_MESSAGES_KEY: &str = "arch_messages";
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
|
|||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_http_call(Some("archgw_model_server"), None, None, None, None)
|
||||
.expect_http_call(Some("model_server"), None, None, None, None)
|
||||
.returning(Some(1))
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_metric_increment("active_http_calls", 1)
|
||||
|
|
@ -136,7 +136,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
|
|||
.returning(Some(&embeddings_response_buffer))
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_http_call(Some("archgw_model_server"), None, None, None, None)
|
||||
.expect_http_call(Some("model_server"), None, None, None, None)
|
||||
.returning(Some(2))
|
||||
.expect_metric_increment("active_http_calls", 1)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
|
|
@ -313,7 +313,7 @@ fn successful_request_to_open_ai_chat_completions() {
|
|||
.returning(Some(chat_completions_request_body))
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Info), None)
|
||||
.expect_http_call(Some("archgw_model_server"), None, None, None, None)
|
||||
.expect_http_call(Some("model_server"), None, None, None, None)
|
||||
.returning(Some(4))
|
||||
.expect_metric_increment("active_http_calls", 1)
|
||||
.execute_and_expect(ReturnType::Action(Action::Pause))
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ services:
|
|||
environment:
|
||||
- LOG_LEVEL=debug
|
||||
|
||||
archgw_model_server:
|
||||
model_server:
|
||||
build:
|
||||
context: ../../model_server
|
||||
dockerfile: Dockerfile
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ services:
|
|||
environment:
|
||||
- LOG_LEVEL=debug
|
||||
|
||||
archgw_model_server:
|
||||
model_server:
|
||||
build:
|
||||
context: ../../model_server
|
||||
dockerfile: Dockerfile
|
||||
|
|
|
|||
|
|
@ -1,14 +1,5 @@
|
|||
services:
|
||||
|
||||
config_generator:
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: config_generator/Dockerfile
|
||||
volumes:
|
||||
- ../../arch/envoy.template.yaml:/usr/src/app/envoy.template.yaml
|
||||
- ./arch_config.yaml:/usr/src/app/arch_config.yaml
|
||||
- ./generated:/usr/src/app/out
|
||||
|
||||
arch:
|
||||
build:
|
||||
context: ../../
|
||||
|
|
@ -21,14 +12,12 @@ services:
|
|||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||
- ./arch_log:/var/log/
|
||||
depends_on:
|
||||
config_generator:
|
||||
condition: service_completed_successfully
|
||||
model_server:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
- LOG_LEVEL=debug
|
||||
|
||||
archgw_model_server:
|
||||
model_server:
|
||||
build:
|
||||
context: ../../model_server
|
||||
dockerfile: Dockerfile
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ services:
|
|||
environment:
|
||||
- LOG_LEVEL=debug
|
||||
|
||||
archgw_model_server:
|
||||
model_server:
|
||||
build:
|
||||
context: ../../model_server
|
||||
dockerfile: Dockerfile
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ services:
|
|||
environment:
|
||||
- LOG_LEVEL=debug
|
||||
|
||||
archgw_model_server:
|
||||
model_server:
|
||||
build:
|
||||
context: ../../model_server
|
||||
dockerfile: Dockerfile
|
||||
|
|
|
|||
|
|
@ -16,14 +16,6 @@
|
|||
"name": "chatbot_ui",
|
||||
"path": "chatbot_ui"
|
||||
},
|
||||
{
|
||||
"name": "open-message-format",
|
||||
"path": "open-message-format"
|
||||
},
|
||||
{
|
||||
"name": "config_generator",
|
||||
"path": "config_generator"
|
||||
},
|
||||
{
|
||||
"name": "demos/function_calling",
|
||||
"path": "./demos/function_calling",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue