2024-07-18 14:04:51 -07:00
|
|
|
import os
|
|
|
|
|
import sentence_transformers
|
2024-10-09 11:42:05 -07:00
|
|
|
from transformers import AutoTokenizer, AutoModel, pipeline
|
2024-09-19 11:40:31 -07:00
|
|
|
import sqlite3
|
2024-10-04 13:09:35 -07:00
|
|
|
import torch
|
2024-10-08 14:37:48 -07:00
|
|
|
from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForSequenceClassification # type: ignore
|
2024-10-04 13:09:35 -07:00
|
|
|
|
2024-10-07 15:21:05 -07:00
|
|
|
|
2024-10-04 13:09:35 -07:00
|
|
|
def get_device():
|
|
|
|
|
if torch.cuda.is_available():
|
|
|
|
|
device = "cuda"
|
|
|
|
|
elif torch.backends.mps.is_available():
|
|
|
|
|
device = "mps"
|
|
|
|
|
else:
|
|
|
|
|
device = "cpu"
|
2024-07-18 14:04:51 -07:00
|
|
|
|
2024-10-08 22:19:20 -07:00
|
|
|
print(f"Devices Avialble: {device}")
|
2024-10-04 13:09:35 -07:00
|
|
|
return device
|
2024-09-23 12:07:31 -07:00
|
|
|
|
2024-10-07 15:21:05 -07:00
|
|
|
|
2024-10-09 11:42:05 -07:00
|
|
|
def load_transformers(model_name=os.getenv("MODELS", "katanemo/bge-large-en-v1.5")):
|
2024-10-08 22:19:20 -07:00
|
|
|
print("Loading Embedding Model")
|
2024-07-18 14:04:51 -07:00
|
|
|
transformers = {}
|
2024-10-04 13:09:35 -07:00
|
|
|
device = get_device()
|
2024-10-08 14:37:48 -07:00
|
|
|
transformers["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
|
2024-10-09 11:42:05 -07:00
|
|
|
if device != "cuda":
|
|
|
|
|
transformers["model"] = ORTModelForFeatureExtraction.from_pretrained(
|
|
|
|
|
model_name, file_name="onnx/model.onnx"
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
transformers["model"] = AutoModel.from_pretrained(model_name, device_map=device)
|
2024-10-08 14:37:48 -07:00
|
|
|
transformers["model_name"] = model_name
|
2024-07-18 14:04:51 -07:00
|
|
|
|
|
|
|
|
return transformers
|
2024-07-30 16:23:23 -07:00
|
|
|
|
2024-10-07 15:21:05 -07:00
|
|
|
|
2024-09-23 12:07:31 -07:00
|
|
|
def load_guard_model(
|
|
|
|
|
model_name,
|
|
|
|
|
hardware_config="cpu",
|
|
|
|
|
):
|
2024-10-08 22:19:20 -07:00
|
|
|
print("Loading Guard Model")
|
2024-10-08 14:37:48 -07:00
|
|
|
guard_model = {}
|
|
|
|
|
guard_model["tokenizer"] = AutoTokenizer.from_pretrained(
|
2024-09-23 12:07:31 -07:00
|
|
|
model_name, trust_remote_code=True
|
|
|
|
|
)
|
2024-10-08 14:37:48 -07:00
|
|
|
guard_model["model_name"] = model_name
|
2024-09-23 12:07:31 -07:00
|
|
|
if hardware_config == "cpu":
|
|
|
|
|
from optimum.intel import OVModelForSequenceClassification
|
|
|
|
|
|
|
|
|
|
device = "cpu"
|
2024-10-08 14:37:48 -07:00
|
|
|
guard_model["model"] = OVModelForSequenceClassification.from_pretrained(
|
2024-09-23 12:07:31 -07:00
|
|
|
model_name, device_map=device, low_cpu_mem_usage=True
|
|
|
|
|
)
|
|
|
|
|
elif hardware_config == "gpu":
|
|
|
|
|
from transformers import AutoModelForSequenceClassification
|
|
|
|
|
import torch
|
|
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
2024-10-08 14:37:48 -07:00
|
|
|
guard_model["model"] = AutoModelForSequenceClassification.from_pretrained(
|
2024-09-23 12:07:31 -07:00
|
|
|
model_name, device_map=device, low_cpu_mem_usage=True
|
|
|
|
|
)
|
2024-10-08 14:37:48 -07:00
|
|
|
guard_model["device"] = device
|
|
|
|
|
guard_model["hardware_config"] = hardware_config
|
|
|
|
|
return guard_model
|
2024-09-23 12:07:31 -07:00
|
|
|
|
|
|
|
|
|
2024-10-09 11:25:07 -07:00
|
|
|
def load_zero_shot_models(
|
2024-10-09 11:42:05 -07:00
|
|
|
model_name=os.getenv("ZERO_SHOT_MODELS", "katanemo/deberta-base-nli")
|
2024-10-09 11:25:07 -07:00
|
|
|
):
|
2024-10-08 14:37:48 -07:00
|
|
|
zero_shot_model = {}
|
2024-10-04 13:09:35 -07:00
|
|
|
device = get_device()
|
2024-10-09 11:42:05 -07:00
|
|
|
if device != "cuda":
|
|
|
|
|
zero_shot_model["model"] = ORTModelForSequenceClassification.from_pretrained(
|
|
|
|
|
model_name, file_name="onnx/model.onnx"
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
zero_shot_model["model"] = AutoModel.from_pretrained(model_name)
|
2024-10-08 14:37:48 -07:00
|
|
|
zero_shot_model["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
|
|
|
|
|
|
|
|
|
|
# create pipeline
|
|
|
|
|
zero_shot_model["pipeline"] = pipeline(
|
|
|
|
|
"zero-shot-classification",
|
|
|
|
|
model=zero_shot_model["model"],
|
|
|
|
|
tokenizer=zero_shot_model["tokenizer"],
|
|
|
|
|
device=device,
|
|
|
|
|
)
|
|
|
|
|
zero_shot_model["model_name"] = model_name
|
2024-09-16 19:20:07 -07:00
|
|
|
|
2024-10-08 14:37:48 -07:00
|
|
|
return zero_shot_model
|
2024-09-19 11:40:31 -07:00
|
|
|
|
2024-10-09 11:25:07 -07:00
|
|
|
|
2024-10-07 15:21:05 -07:00
|
|
|
if __name__ == "__main__":
|
2024-10-04 13:09:35 -07:00
|
|
|
print(get_device())
|