Refactor model server hardware config + add unit tests to load/request to the server (#189)

* remove mode/hardware

* add test and pre commit hook

* add pytest dependieces

* fix format

* fix lint

* fix precommit

* fix pre commit

* fix pre commit

* fix precommit

* fix precommit

* fix precommit

* fix precommit

* fix precommit

* fix precommit

* fix precommit

* fix precommit

* fix precommit

* fix precommit
This commit is contained in:
CTran 2024-10-16 16:58:10 -07:00 committed by GitHub
parent 3bd2ffe9fb
commit 8e54ac20d8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 480 additions and 43 deletions

View file

@ -22,9 +22,11 @@ def get_device():
available_device = {
"cpu": True,
"cuda": torch.cuda.is_available(),
"mps": torch.backends.mps.is_available()
if hasattr(torch.backends, "mps")
else False,
"mps": (
torch.backends.mps.is_available()
if hasattr(torch.backends, "mps")
else False
),
}
if available_device["cuda"]:
@ -37,24 +39,6 @@ def get_device():
return device
def get_serving_mode():
mode = os.getenv("MODE", "cloud")
if mode not in ["cloud", "local-gpu", "local-cpu"]:
raise ValueError(f"Invalid serving mode: {mode}")
return mode
def get_hardware(mode):
if mode == "local-cpu":
hardware = "cpu"
else:
hardware = "gpu" if torch.cuda.is_available() else "cpu"
return hardware
def get_client(endpoint):
client = OpenAI(base_url=endpoint, api_key="EMPTY")
return client