mirror of
https://github.com/katanemo/plano.git
synced 2026-06-14 15:15:15 +02:00
Use large github action machine to run e2e tests (#230)
This commit is contained in:
parent
bb882fb59b
commit
e462e393b1
30 changed files with 4725 additions and 441 deletions
2
model_server/.vscode/launch.json
vendored
2
model_server/.vscode/launch.json
vendored
|
|
@ -9,7 +9,7 @@
|
|||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "uvicorn",
|
||||
"args": ["app.main:app","--reload", "--port", "8000"]
|
||||
"args": ["app.main:app","--reload", "--port", "51000"]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import importlib
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
|
|
@ -7,6 +8,15 @@ import tempfile
|
|||
import subprocess
|
||||
import logging
|
||||
|
||||
|
||||
def get_version():
|
||||
try:
|
||||
version = importlib.metadata.version("archgw_modelserver")
|
||||
return version
|
||||
except importlib.metadata.PackageNotFoundError:
|
||||
return "version not found"
|
||||
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
|
|
@ -14,6 +24,7 @@ logging.basicConfig(
|
|||
|
||||
log = logging.getLogger("model_server.cli")
|
||||
log.setLevel(logging.INFO)
|
||||
log.info(f"model server version: {get_version()}")
|
||||
|
||||
|
||||
def run_server(port=51000):
|
||||
|
|
@ -37,8 +48,9 @@ def run_server(port=51000):
|
|||
def start_server(port=51000):
|
||||
"""Start the Uvicorn server"""
|
||||
log.info(
|
||||
"Starting model server - loading some awesomeness, this may take some time :)"
|
||||
"starting model server - loading some awesomeness, this may take some time :)"
|
||||
)
|
||||
|
||||
process = subprocess.Popen(
|
||||
[
|
||||
"python",
|
||||
|
|
@ -61,7 +73,7 @@ def start_server(port=51000):
|
|||
log.info(f"Model server started with PID {process.pid}")
|
||||
else:
|
||||
# Add model_server boot-up logs
|
||||
log.info("Model server - Didn't Sart In Time. Shutting Down")
|
||||
log.info("model server - didn't start in time, shutting down")
|
||||
process.terminate()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -67,12 +67,16 @@ async def chat_completion(req: ChatMessage, res: Response):
|
|||
f"model_server => arch_function: {client_model_name}, messages: {json.dumps(messages)}"
|
||||
)
|
||||
|
||||
resp = const.arch_function_client.chat.completions.create(
|
||||
messages=messages,
|
||||
model=client_model_name,
|
||||
stream=False,
|
||||
extra_body=const.arch_function_generation_params,
|
||||
)
|
||||
try:
|
||||
resp = const.arch_function_client.chat.completions.create(
|
||||
messages=messages,
|
||||
model=client_model_name,
|
||||
stream=False,
|
||||
extra_body=const.arch_function_generation_params,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"model_server <= arch_function: error: {e}")
|
||||
raise
|
||||
|
||||
tool_calls = const.arch_function_hanlder.extract_tool_calls(
|
||||
resp.choices[0].message.content
|
||||
|
|
|
|||
1192
model_server/poetry.lock
generated
1192
model_server/poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "archgw_modelserver"
|
||||
version = "0.0.4"
|
||||
version = "0.0.5"
|
||||
description = "A model server for serving models"
|
||||
authors = ["Katanemo Labs, Inc <archgw@katanemo.com>"]
|
||||
license = "Apache 2.0"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue