Use large github action machine to run e2e tests (#230)

This commit is contained in:
Adil Hafeez 2024-10-30 17:54:51 -07:00 committed by GitHub
parent bb882fb59b
commit e462e393b1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 4725 additions and 441 deletions

View file

@ -9,7 +9,7 @@
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"args": ["app.main:app","--reload", "--port", "8000"]
"args": ["app.main:app","--reload", "--port", "51000"]
}
]
}

View file

@ -1,3 +1,4 @@
import importlib
import sys
import os
import time
@ -7,6 +8,15 @@ import tempfile
import subprocess
import logging
def get_version():
try:
version = importlib.metadata.version("archgw_modelserver")
return version
except importlib.metadata.PackageNotFoundError:
return "version not found"
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
@ -14,6 +24,7 @@ logging.basicConfig(
log = logging.getLogger("model_server.cli")
log.setLevel(logging.INFO)
log.info(f"model server version: {get_version()}")
def run_server(port=51000):
@ -37,8 +48,9 @@ def run_server(port=51000):
def start_server(port=51000):
"""Start the Uvicorn server"""
log.info(
"Starting model server - loading some awesomeness, this may take some time :)"
"starting model server - loading some awesomeness, this may take some time :)"
)
process = subprocess.Popen(
[
"python",
@ -61,7 +73,7 @@ def start_server(port=51000):
log.info(f"Model server started with PID {process.pid}")
else:
# Add model_server boot-up logs
log.info("Model server - Didn't Sart In Time. Shutting Down")
log.info("model server - didn't start in time, shutting down")
process.terminate()

View file

@ -67,12 +67,16 @@ async def chat_completion(req: ChatMessage, res: Response):
f"model_server => arch_function: {client_model_name}, messages: {json.dumps(messages)}"
)
resp = const.arch_function_client.chat.completions.create(
messages=messages,
model=client_model_name,
stream=False,
extra_body=const.arch_function_generation_params,
)
try:
resp = const.arch_function_client.chat.completions.create(
messages=messages,
model=client_model_name,
stream=False,
extra_body=const.arch_function_generation_params,
)
except Exception as e:
logger.error(f"model_server <= arch_function: error: {e}")
raise
tool_calls = const.arch_function_hanlder.extract_tool_calls(
resp.choices[0].message.content

1192
model_server/poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "archgw_modelserver"
version = "0.0.4"
version = "0.0.5"
description = "A model server for serving models"
authors = ["Katanemo Labs, Inc <archgw@katanemo.com>"]
license = "Apache 2.0"