mirror of
https://github.com/katanemo/plano.git
synced 2026-05-09 15:52:44 +02:00
Fix gpu dependency and only leverage onnx when GPU is available (#157)
* replacing appending instead of write * fix eetq dependency * gpu guard required eetq * fix bug when gpu is available * fix for gpu device * reverse * fix * replace gpu -> cuda
This commit is contained in:
parent
5c4a6bc8ff
commit
8b5db45507
3 changed files with 18 additions and 14 deletions
|
|
@ -1,5 +1,5 @@
|
||||||
pub const DEFAULT_EMBEDDING_MODEL: &str = "katanemo/bge-large-en-v1.5-onnx";
|
pub const DEFAULT_EMBEDDING_MODEL: &str = "katanemo/bge-large-en-v1.5";
|
||||||
pub const DEFAULT_INTENT_MODEL: &str = "katanemo/deberta-base-nli-onnx";
|
pub const DEFAULT_INTENT_MODEL: &str = "katanemo/deberta-base-nli";
|
||||||
pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8;
|
pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8;
|
||||||
pub const DEFAULT_HALLUCINATED_THRESHOLD: f64 = 0.1;
|
pub const DEFAULT_HALLUCINATED_THRESHOLD: f64 = 0.1;
|
||||||
pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-arch-ratelimit-selector";
|
pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-arch-ratelimit-selector";
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,3 @@
|
||||||
jailbreak:
|
jailbreak:
|
||||||
cpu: "katanemo/Arch-Guard-cpu"
|
cpu: "katanemo/Arch-Guard-cpu"
|
||||||
gpu: "katanemo/Arch-Guard-gpu"
|
gpu: "katanemo/Arch-Guard"
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import os
|
import os
|
||||||
import sentence_transformers
|
import sentence_transformers
|
||||||
from transformers import AutoTokenizer, pipeline
|
from transformers import AutoTokenizer, AutoModel, pipeline
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import torch
|
import torch
|
||||||
from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForSequenceClassification # type: ignore
|
from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForSequenceClassification # type: ignore
|
||||||
|
|
@ -18,16 +18,17 @@ def get_device():
|
||||||
return device
|
return device
|
||||||
|
|
||||||
|
|
||||||
def load_transformers(
|
def load_transformers(model_name=os.getenv("MODELS", "katanemo/bge-large-en-v1.5")):
|
||||||
model_name=os.getenv("MODELS", "katanemo/bge-large-en-v1.5-onnx")
|
|
||||||
):
|
|
||||||
print("Loading Embedding Model")
|
print("Loading Embedding Model")
|
||||||
transformers = {}
|
transformers = {}
|
||||||
device = get_device()
|
device = get_device()
|
||||||
transformers["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
|
transformers["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
|
||||||
transformers["model"] = ORTModelForFeatureExtraction.from_pretrained(
|
if device != "cuda":
|
||||||
model_name, device_map=device
|
transformers["model"] = ORTModelForFeatureExtraction.from_pretrained(
|
||||||
)
|
model_name, file_name="onnx/model.onnx"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
transformers["model"] = AutoModel.from_pretrained(model_name, device_map=device)
|
||||||
transformers["model_name"] = model_name
|
transformers["model_name"] = model_name
|
||||||
|
|
||||||
return transformers
|
return transformers
|
||||||
|
|
@ -64,13 +65,16 @@ def load_guard_model(
|
||||||
|
|
||||||
|
|
||||||
def load_zero_shot_models(
|
def load_zero_shot_models(
|
||||||
model_name=os.getenv("ZERO_SHOT_MODELS", "katanemo/deberta-base-nli-onnx")
|
model_name=os.getenv("ZERO_SHOT_MODELS", "katanemo/deberta-base-nli")
|
||||||
):
|
):
|
||||||
zero_shot_model = {}
|
zero_shot_model = {}
|
||||||
device = get_device()
|
device = get_device()
|
||||||
zero_shot_model["model"] = ORTModelForSequenceClassification.from_pretrained(
|
if device != "cuda":
|
||||||
model_name
|
zero_shot_model["model"] = ORTModelForSequenceClassification.from_pretrained(
|
||||||
)
|
model_name, file_name="onnx/model.onnx"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
zero_shot_model["model"] = AutoModel.from_pretrained(model_name)
|
||||||
zero_shot_model["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
|
zero_shot_model["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
|
||||||
|
|
||||||
# create pipeline
|
# create pipeline
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue