model server build (#127)

* first commit to have model_server not be dependent on Docker * making changes to fix the docker-compose file for archgw to set DNS_V4 and minor fixes with the build * additional fixes for model server to be separated out in the build * additional fixes for model server to be separated out in the build * fix to get model_server to be built as a separate python process. TODO: fix the embeddings logs after cli completes * fixing init to pull tempfile using the tempfile python package --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
2026-06-14 15:15:15 +02:00 · 2024-10-06 18:21:43 -07:00 · 2024-10-06 18:21:43 -07:00 · b60ceb9168
commit b60ceb9168
parent 7d21359f5b
21 changed files with 3390 additions and 154 deletions
--- a/arch/docker-compose.yaml
+++ b/arch/docker-compose.yaml
@ -7,24 +7,5 @@ services:
    volumes:
      - ${ARCH_CONFIG_FILE:-./demos/function_calling/arch_confg.yaml}:/config/arch_config.yaml
      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
-    depends_on:
-      model_server:
-        condition: service_healthy
    env_file:
      - stage.env
-
-  model_server:
-    image: model_server:latest
-    ports:
-      - "18081:80"
-    healthcheck:
-        test: ["CMD", "curl" ,"http://localhost/healthz"]
-        interval: 5s
-        retries: 20
-    volumes:
-      - ~/.cache/huggingface:/root/.cache/huggingface
-    environment:
-      - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
-      - OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M
-      - MODE=${MODE:-cloud}
-      - FC_URL=${FC_URL:-https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1}
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@ -123,8 +123,8 @@ static_resources:
              - endpoint:
                  address:
                    socket_address:
-                      address: model_server
-                      port_value: 80
+                      address: host.docker.internal
+                      port_value: 51000
                  hostname: "model_server"
    - name: mistral_7b_instruct
      connect_timeout: 5s
@ -153,8 +153,8 @@ static_resources:
              - endpoint:
                  address:
                    socket_address:
-                      address: model_server
-                      port_value: 80
+                      address: host.docker.internal
+                      port_value: 51000
                  hostname: "arch_fc"
 {% for _, cluster in arch_clusters.items() %}
    - name: {{ cluster.name }}
--- a/arch/tools/cli.py
+++ b/arch/tools/cli.py
@ -5,7 +5,7 @@ import config_generator
 import pkg_resources
 import sys
 import subprocess
-from core import start_arch, stop_arch
+from core import start_arch_modelserver, stop_arch_modelserver, start_arch, stop_arch
 from utils import get_llm_provider_access_keys, load_env_file_to_dict

 logo = r"""
@ -26,7 +26,7 @@ def main(ctx):

 # Command to build archgw and model_server Docker images
 ARCHGW_DOCKERFILE = "./arch/Dockerfile"
-MODEL_SERVER_DOCKERFILE = "./model_server/Dockerfile"
+MODEL_SERVER_BUILD_FILE = "./model_server/pyproject.toml"

@click.command()
 def build():
@ -44,21 +44,22 @@ def build():
        click.echo("Error: Dockerfile not found in /arch")
        sys.exit(1)

-    # Check if /model_server/Dockerfile exists
-    if os.path.exists(MODEL_SERVER_DOCKERFILE):
-        click.echo("Building model_server image...")
+    click.echo("All images built successfully.")
+
+    """Install the model server dependencies using Poetry."""
+    # Check if pyproject.toml exists
+    if os.path.exists(MODEL_SERVER_BUILD_FILE):
+        click.echo("Installing model server dependencies with Poetry...")
        try:
-            subprocess.run(["docker", "build", "-f", MODEL_SERVER_DOCKERFILE, "-t", "model_server:latest", "./model_server"], check=True)
-            click.echo("model_server image built successfully.")
+            subprocess.run(["poetry", "install", "--no-cache"], cwd=os.path.dirname(MODEL_SERVER_BUILD_FILE), check=True)
+            click.echo("Model server dependencies installed successfully.")
        except subprocess.CalledProcessError as e:
-            click.echo(f"Error building model_server image: {e}")
+            click.echo(f"Error installing model server dependencies: {e}")
            sys.exit(1)
    else:
-        click.echo("Error: Dockerfile not found in /model_server")
+        click.echo(f"Error: pyproject.toml not found in {MODEL_SERVER_BUILD_FILE}")
        sys.exit(1)

-    click.echo("All images built successfully.")
-
@click.command()
@click.argument('file', required=False)  # Optional file argument
@click.option('-path', default='.', help='Path to the directory containing arch_config.yml')
@ -120,11 +121,14 @@ def up(file, path):
    env = os.environ.copy()
    env.update(env_stage)
    env['ARCH_CONFIG_FILE'] = arch_config_file
+
+    start_arch_modelserver()
    start_arch(arch_config_file, env)

@click.command()
 def down():
    """Stops Arch."""
+    stop_arch_modelserver()
    stop_arch()

@click.command()
--- a/arch/tools/core.py
+++ b/arch/tools/core.py
@ -5,14 +5,13 @@ import pkg_resources
 import select
 from utils import run_docker_compose_ps, print_service_status, check_services_state

-def start_arch(arch_config_file, env, log_timeout=120, check_interval=1):
+def start_arch(arch_config_file, env, log_timeout=120):
    """
    Start Docker Compose in detached mode and stream logs until services are healthy.

    Args:
        path (str): The path where the prompt_confi.yml file is located.
        log_timeout (int): Time in seconds to show logs before checking for healthy state.
-        check_interval (int): Time in seconds between health status checks.
    """

    compose_file = pkg_resources.resource_filename(__name__, 'config/docker-compose.yaml')
@ -96,3 +95,33 @@ def stop_arch():

    except subprocess.CalledProcessError as e:
        print(f"Failed to shut down services: {str(e)}")
+
+def start_arch_modelserver():
+    """
+    Start the model server. This assumes that the archgw_modelserver package is installed locally
+
+    """
+    try:
+        subprocess.run(
+            ['archgw_modelserver', 'restart'],
+            check=True,
+        )
+        print("Successfull run the archgw model_server")
+    except subprocess.CalledProcessError as e:
+        print (f"Failed to start model_server. Please check archgw_modelserver logs")
+        sys.exit(1)
+
+def stop_arch_modelserver():
+    """
+    Stop the model server. This assumes that the archgw_modelserver package is installed locally
+
+    """
+    try:
+        subprocess.run(
+            ['archgw_modelserver', 'stop'],
+            check=True,
+        )
+        print("Successfull stopped the archgw model_server")
+    except subprocess.CalledProcessError as e:
+        print (f"Failed to start model_server. Please check archgw_modelserver logs")
+        sys.exit(1)