mirror of
https://github.com/katanemo/plano.git
synced 2026-04-27 17:56:28 +02:00
* first commit to have model_server not be dependent on Docker * making changes to fix the docker-compose file for archgw to set DNS_V4 and minor fixes with the build * additional fixes for model server to be separated out in the build * additional fixes for model server to be separated out in the build * fix to get model_server to be built as a separate python process. TODO: fix the embeddings logs after cli completes * fixing init to pull tempfile using the tempfile python package --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
65 lines
1.9 KiB
Python
65 lines
1.9 KiB
Python
import os
|
|
import sentence_transformers
|
|
from transformers import AutoTokenizer, pipeline
|
|
import sqlite3
|
|
import torch
|
|
|
|
def get_device():
|
|
if torch.cuda.is_available():
|
|
device = "cuda"
|
|
elif torch.backends.mps.is_available():
|
|
device = "mps"
|
|
else:
|
|
device = "cpu"
|
|
|
|
return device
|
|
|
|
def load_transformers(models=os.getenv("MODELS", "BAAI/bge-large-en-v1.5")):
|
|
transformers = {}
|
|
device = get_device()
|
|
for model in models.split(","):
|
|
transformers[model] = sentence_transformers.SentenceTransformer(model, device=device)
|
|
|
|
return transformers
|
|
|
|
def load_guard_model(
|
|
model_name,
|
|
hardware_config="cpu",
|
|
):
|
|
guard_mode = {}
|
|
guard_mode["tokenizer"] = AutoTokenizer.from_pretrained(
|
|
model_name, trust_remote_code=True
|
|
)
|
|
guard_mode["model_name"] = model_name
|
|
if hardware_config == "cpu":
|
|
from optimum.intel import OVModelForSequenceClassification
|
|
|
|
device = "cpu"
|
|
guard_mode["model"] = OVModelForSequenceClassification.from_pretrained(
|
|
model_name, device_map=device, low_cpu_mem_usage=True
|
|
)
|
|
elif hardware_config == "gpu":
|
|
from transformers import AutoModelForSequenceClassification
|
|
import torch
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
guard_mode["model"] = AutoModelForSequenceClassification.from_pretrained(
|
|
model_name, device_map=device, low_cpu_mem_usage=True
|
|
)
|
|
guard_mode["device"] = device
|
|
guard_mode["hardware_config"] = hardware_config
|
|
return guard_mode
|
|
|
|
|
|
def load_zero_shot_models(
|
|
models=os.getenv("ZERO_SHOT_MODELS", "tasksource/deberta-base-long-nli")
|
|
):
|
|
zero_shot_models = {}
|
|
device = get_device()
|
|
for model in models.split(","):
|
|
zero_shot_models[model] = pipeline("zero-shot-classification", model=model, device=device)
|
|
|
|
return zero_shot_models
|
|
|
|
if __name__ =="__main__":
|
|
print(get_device())
|