mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
fix more
This commit is contained in:
parent
d2ad943f63
commit
9cb04756c5
13 changed files with 181 additions and 90 deletions
|
|
@ -104,7 +104,25 @@ def validate_and_render_schema():
|
|||
arch_config_string = yaml.dump(config_yaml)
|
||||
arch_llm_config_string = yaml.dump(config_yaml)
|
||||
|
||||
prompt_gateway_listener = config_yaml.get("listeners", {}).get("prompt_gateway", {})
|
||||
if prompt_gateway_listener.get("port") == None:
|
||||
prompt_gateway_listener["port"] = 10000 # default port for prompt gateway
|
||||
if prompt_gateway_listener.get("address") == None:
|
||||
prompt_gateway_listener["address"] = "127.0.0.1"
|
||||
if prompt_gateway_listener.get("timeout") == None:
|
||||
prompt_gateway_listener["timeout"] = "10s"
|
||||
|
||||
llm_gateway_listener = config_yaml.get("listeners", {}).get("llm_gateway", {})
|
||||
if llm_gateway_listener.get("port") == None:
|
||||
llm_gateway_listener["port"] = 12000 # default port for llm gateway
|
||||
if llm_gateway_listener.get("address") == None:
|
||||
llm_gateway_listener["address"] = "127.0.0.1"
|
||||
if llm_gateway_listener.get("timeout") == None:
|
||||
llm_gateway_listener["timeout"] = "10s"
|
||||
|
||||
data = {
|
||||
"prompt_gateway_listener": prompt_gateway_listener,
|
||||
"llm_gateway_listener": llm_gateway_listener,
|
||||
"arch_config": arch_config_string,
|
||||
"arch_llm_config": arch_llm_config_string,
|
||||
"arch_clusters": inferred_clusters,
|
||||
|
|
|
|||
|
|
@ -15,12 +15,15 @@ from cli.consts import (
|
|||
)
|
||||
from huggingface_hub import snapshot_download
|
||||
from dotenv import dotenv_values
|
||||
import yaml
|
||||
|
||||
|
||||
log = getLogger(__name__)
|
||||
|
||||
|
||||
def start_archgw_docker(client, arch_config_file, env):
|
||||
def start_archgw_docker(
|
||||
client, arch_config_file, env, prompt_gateway_port, llm_gateway_port
|
||||
):
|
||||
logs_path = "~/archgw_logs"
|
||||
logs_path_abs = os.path.expanduser(logs_path)
|
||||
|
||||
|
|
@ -29,10 +32,10 @@ def start_archgw_docker(client, arch_config_file, env):
|
|||
image=ARCHGW_DOCKER_IMAGE,
|
||||
detach=True, # Run in detached mode
|
||||
ports={
|
||||
"10000/tcp": 10000,
|
||||
f"{prompt_gateway_port}/tcp": prompt_gateway_port,
|
||||
"10001/tcp": 10001,
|
||||
"11000/tcp": 11000,
|
||||
"12000/tcp": 12000,
|
||||
f"{llm_gateway_port}/tcp": llm_gateway_port,
|
||||
"9901/tcp": 19901,
|
||||
},
|
||||
volumes={
|
||||
|
|
@ -50,7 +53,12 @@ def start_archgw_docker(client, arch_config_file, env):
|
|||
},
|
||||
extra_hosts={"host.docker.internal": "host-gateway"},
|
||||
healthcheck={
|
||||
"test": ["CMD", "curl", "-f", "http://localhost:10000/healthz"],
|
||||
"test": [
|
||||
"CMD",
|
||||
"curl",
|
||||
"-f",
|
||||
f"http://localhost:{prompt_gateway_port}/healthz",
|
||||
],
|
||||
"interval": 5000000000, # 5 seconds
|
||||
"timeout": 1000000000, # 1 seconds
|
||||
"retries": 3,
|
||||
|
|
@ -128,7 +136,25 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
except docker.errors.NotFound as e:
|
||||
pass
|
||||
|
||||
container = start_archgw_docker(client, arch_config_file, env)
|
||||
# parse arch_config_file yaml file and get prompt_gateway_port
|
||||
arch_config_dict = {}
|
||||
with open(arch_config_file) as f:
|
||||
arch_config_dict = yaml.safe_load(f)
|
||||
|
||||
prompt_gateway_port = (
|
||||
arch_config_dict.get("listeners", {})
|
||||
.get("prompt_gateway", {})
|
||||
.get("port", 10000)
|
||||
)
|
||||
llm_gateway_port = (
|
||||
arch_config_dict.get("listeners", {})
|
||||
.get("llm_gateway", {})
|
||||
.get("port", 12000)
|
||||
)
|
||||
|
||||
container = start_archgw_docker(
|
||||
client, arch_config_file, env, prompt_gateway_port, llm_gateway_port
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue