plano/model_server/app/load_models.py

import os
import sentence_transformers
from transformers import AutoTokenizer, AutoModel, pipeline
import sqlite3
import torch
from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForSequenceClassification  # type: ignore


def get_device():
    if torch.cuda.is_available():
        device = "cuda"
    elif torch.backends.mps.is_available():
        device = "mps"
    else:
        device = "cpu"

    print(f"Devices Avialble: {device}")
    return device


def load_transformers(model_name=os.getenv("MODELS", "katanemo/bge-large-en-v1.5")):
    print("Loading Embedding Model")
    transformers = {}
    device = get_device()
    transformers["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
    if device != "cuda":
        transformers["model"] = ORTModelForFeatureExtraction.from_pretrained(
            model_name, file_name="onnx/model.onnx"
        )
    else:
        transformers["model"] = AutoModel.from_pretrained(model_name, device_map=device)
    transformers["model_name"] = model_name

    return transformers


def load_guard_model(
    model_name,
    hardware_config="cpu",
):
    print("Loading Guard Model")
    guard_model = {}
    guard_model["tokenizer"] = AutoTokenizer.from_pretrained(
        model_name, trust_remote_code=True
    )
    guard_model["model_name"] = model_name
    if hardware_config == "cpu":
        from optimum.intel import OVModelForSequenceClassification

        device = "cpu"
        guard_model["model"] = OVModelForSequenceClassification.from_pretrained(
            model_name, device_map=device, low_cpu_mem_usage=True
        )
    elif hardware_config == "gpu":
        from transformers import AutoModelForSequenceClassification
        import torch

        device = "cuda" if torch.cuda.is_available() else "cpu"
        guard_model["model"] = AutoModelForSequenceClassification.from_pretrained(
            model_name, device_map=device, low_cpu_mem_usage=True
        )
    guard_model["device"] = device
    guard_model["hardware_config"] = hardware_config
    return guard_model


def load_zero_shot_models(
    model_name=os.getenv("ZERO_SHOT_MODELS", "katanemo/deberta-base-nli")
):
    zero_shot_model = {}
    device = get_device()
    if device != "cuda":
        zero_shot_model["model"] = ORTModelForSequenceClassification.from_pretrained(
            model_name, file_name="onnx/model.onnx"
        )
    else:
        zero_shot_model["model"] = AutoModel.from_pretrained(model_name)
    zero_shot_model["tokenizer"] = AutoTokenizer.from_pretrained(model_name)

    # create pipeline
    zero_shot_model["pipeline"] = pipeline(
        "zero-shot-classification",
        model=zero_shot_model["model"],
        tokenizer=zero_shot_model["tokenizer"],
        device=device,
    )
    zero_shot_model["model_name"] = model_name

    return zero_shot_model


if __name__ == "__main__":
    print(get_device())
add embedding store (#10) 2024-07-18 14:04:51 -07:00			`import os`
			`import sentence_transformers`
Fix gpu dependency and only leverage onnx when GPU is available (#157) * replacing appending instead of write * fix eetq dependency * gpu guard required eetq * fix bug when gpu is available * fix for gpu device * reverse * fix * replace gpu -> cuda 2024-10-09 11:42:05 -07:00			`from transformers import AutoTokenizer, AutoModel, pipeline`
demos for network copilot and sql analyzer (#57) * pulled from main branch after adding enums and made changes * added sql_analyzer folder and built a demo for Employee stats function calling. "top_employees" and "aggregate_stats". * sql_anayzer * After addressing PR comments * PR comments * PR comments * Addeed Network Analyzer FC Code * Added network Analyzer code for diff timeframes * Network Copilot and Employee Details demos are updated with their descriptions and resolved the PR comments * Added 2nd function in network copilot * Added 2nd function in network copilot * Added 2nd function in network copilot * Added 2nd function in network copilot * Added 2nd function in network copilot 2024-09-19 11:40:31 -07:00			`import sqlite3`
load_models checks for device before getting the BGE or NLI model loaded in memory. Was defaulting to CPU. And removed gunk for load_sql (#119) Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local> 2024-10-04 13:09:35 -07:00			`import torch`
Cotran/onnx conversion (#145) * onnx replacement * onnx conversion for nli and embedding model * fix naming * fix naming * fix naming * pin version 2024-10-08 14:37:48 -07:00			`from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForSequenceClassification # type: ignore`
load_models checks for device before getting the BGE or NLI model loaded in memory. Was defaulting to CPU. And removed gunk for load_sql (#119) Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local> 2024-10-04 13:09:35 -07:00
formating and mointoring change (#136) 2024-10-07 15:21:05 -07:00
load_models checks for device before getting the BGE or NLI model loaded in memory. Was defaulting to CPU. And removed gunk for load_sql (#119) Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local> 2024-10-04 13:09:35 -07:00			`def get_device():`
			`if torch.cuda.is_available():`
			`device = "cuda"`
			`elif torch.backends.mps.is_available():`
			`device = "mps"`
			`else:`
			`device = "cpu"`
add embedding store (#10) 2024-07-18 14:04:51 -07:00
Salmanap/fix network agent demo (#153) * staging my changes to re-based from main * adding debug statements to rust * merged with main * ready to push network agent * removed the incomplete sql example --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local> 2024-10-08 22:19:20 -07:00			`print(f"Devices Avialble: {device}")`
load_models checks for device before getting the BGE or NLI model loaded in memory. Was defaulting to CPU. And removed gunk for load_sql (#119) Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local> 2024-10-04 13:09:35 -07:00			`return device`
[Kan-103] add support toxic/jailbreak model (#49) * add toxic/jailbreak model * fix path loading model * fix syntax * fix bug,lint, format * fix bug * formatting * add parallel + chunking * fix bug * working version * fix onnnx name erorr * device * fix jailbreak config * fix syntax error * format * add requirement + cli download for dockerfile * add task * add skeleton change for envoy filter for prompt guard * fix hardware config * fix bug * add config changes * add gitignore * merge main * integrate arch-guard with filter * add hardware config * nothing * add hardware config feature * fix requirement * fix chat ui * fix onnx * fix lint * remove non intel cpu * remove onnx * working version * modify docker * fix guard time * add nvidia support * remove nvidia * add gpu * add gpu * add gpu support * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * fix docker file * fix int test * correct gpu docker * upgrad python 10 * fix logits to be gpu compatible * default to cpu dockerfile * resolve comments * fix lint + unused parameters * fix * remove eetq install for cpu * remove deploy gpu --------- Co-authored-by: Adil Hafeez <adil@katanemo.com> 2024-09-23 12:07:31 -07:00
formating and mointoring change (#136) 2024-10-07 15:21:05 -07:00
Fix gpu dependency and only leverage onnx when GPU is available (#157) * replacing appending instead of write * fix eetq dependency * gpu guard required eetq * fix bug when gpu is available * fix for gpu device * reverse * fix * replace gpu -> cuda 2024-10-09 11:42:05 -07:00			`def load_transformers(model_name=os.getenv("MODELS", "katanemo/bge-large-en-v1.5")):`
Salmanap/fix network agent demo (#153) * staging my changes to re-based from main * adding debug statements to rust * merged with main * ready to push network agent * removed the incomplete sql example --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local> 2024-10-08 22:19:20 -07:00			`print("Loading Embedding Model")`
add embedding store (#10) 2024-07-18 14:04:51 -07:00			`transformers = {}`
load_models checks for device before getting the BGE or NLI model loaded in memory. Was defaulting to CPU. And removed gunk for load_sql (#119) Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local> 2024-10-04 13:09:35 -07:00			`device = get_device()`
Cotran/onnx conversion (#145) * onnx replacement * onnx conversion for nli and embedding model * fix naming * fix naming * fix naming * pin version 2024-10-08 14:37:48 -07:00			`transformers["tokenizer"] = AutoTokenizer.from_pretrained(model_name)`
Fix gpu dependency and only leverage onnx when GPU is available (#157) * replacing appending instead of write * fix eetq dependency * gpu guard required eetq * fix bug when gpu is available * fix for gpu device * reverse * fix * replace gpu -> cuda 2024-10-09 11:42:05 -07:00			`if device != "cuda":`
			`transformers["model"] = ORTModelForFeatureExtraction.from_pretrained(`
			`model_name, file_name="onnx/model.onnx"`
			`)`
			`else:`
			`transformers["model"] = AutoModel.from_pretrained(model_name, device_map=device)`
Cotran/onnx conversion (#145) * onnx replacement * onnx conversion for nli and embedding model * fix naming * fix naming * fix naming * pin version 2024-10-08 14:37:48 -07:00			`transformers["model_name"] = model_name`
add embedding store (#10) 2024-07-18 14:04:51 -07:00
			`return transformers`
Add workflow logic for weather forecast demo (#24) 2024-07-30 16:23:23 -07:00
formating and mointoring change (#136) 2024-10-07 15:21:05 -07:00
[Kan-103] add support toxic/jailbreak model (#49) * add toxic/jailbreak model * fix path loading model * fix syntax * fix bug,lint, format * fix bug * formatting * add parallel + chunking * fix bug * working version * fix onnnx name erorr * device * fix jailbreak config * fix syntax error * format * add requirement + cli download for dockerfile * add task * add skeleton change for envoy filter for prompt guard * fix hardware config * fix bug * add config changes * add gitignore * merge main * integrate arch-guard with filter * add hardware config * nothing * add hardware config feature * fix requirement * fix chat ui * fix onnx * fix lint * remove non intel cpu * remove onnx * working version * modify docker * fix guard time * add nvidia support * remove nvidia * add gpu * add gpu * add gpu support * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * fix docker file * fix int test * correct gpu docker * upgrad python 10 * fix logits to be gpu compatible * default to cpu dockerfile * resolve comments * fix lint + unused parameters * fix * remove eetq install for cpu * remove deploy gpu --------- Co-authored-by: Adil Hafeez <adil@katanemo.com> 2024-09-23 12:07:31 -07:00			`def load_guard_model(`
			`model_name,`
			`hardware_config="cpu",`
			`):`
Salmanap/fix network agent demo (#153) * staging my changes to re-based from main * adding debug statements to rust * merged with main * ready to push network agent * removed the incomplete sql example --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local> 2024-10-08 22:19:20 -07:00			`print("Loading Guard Model")`
Cotran/onnx conversion (#145) * onnx replacement * onnx conversion for nli and embedding model * fix naming * fix naming * fix naming * pin version 2024-10-08 14:37:48 -07:00			`guard_model = {}`
			`guard_model["tokenizer"] = AutoTokenizer.from_pretrained(`
[Kan-103] add support toxic/jailbreak model (#49) * add toxic/jailbreak model * fix path loading model * fix syntax * fix bug,lint, format * fix bug * formatting * add parallel + chunking * fix bug * working version * fix onnnx name erorr * device * fix jailbreak config * fix syntax error * format * add requirement + cli download for dockerfile * add task * add skeleton change for envoy filter for prompt guard * fix hardware config * fix bug * add config changes * add gitignore * merge main * integrate arch-guard with filter * add hardware config * nothing * add hardware config feature * fix requirement * fix chat ui * fix onnx * fix lint * remove non intel cpu * remove onnx * working version * modify docker * fix guard time * add nvidia support * remove nvidia * add gpu * add gpu * add gpu support * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * fix docker file * fix int test * correct gpu docker * upgrad python 10 * fix logits to be gpu compatible * default to cpu dockerfile * resolve comments * fix lint + unused parameters * fix * remove eetq install for cpu * remove deploy gpu --------- Co-authored-by: Adil Hafeez <adil@katanemo.com> 2024-09-23 12:07:31 -07:00			`model_name, trust_remote_code=True`
			`)`
Cotran/onnx conversion (#145) * onnx replacement * onnx conversion for nli and embedding model * fix naming * fix naming * fix naming * pin version 2024-10-08 14:37:48 -07:00			`guard_model["model_name"] = model_name`
[Kan-103] add support toxic/jailbreak model (#49) * add toxic/jailbreak model * fix path loading model * fix syntax * fix bug,lint, format * fix bug * formatting * add parallel + chunking * fix bug * working version * fix onnnx name erorr * device * fix jailbreak config * fix syntax error * format * add requirement + cli download for dockerfile * add task * add skeleton change for envoy filter for prompt guard * fix hardware config * fix bug * add config changes * add gitignore * merge main * integrate arch-guard with filter * add hardware config * nothing * add hardware config feature * fix requirement * fix chat ui * fix onnx * fix lint * remove non intel cpu * remove onnx * working version * modify docker * fix guard time * add nvidia support * remove nvidia * add gpu * add gpu * add gpu support * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * fix docker file * fix int test * correct gpu docker * upgrad python 10 * fix logits to be gpu compatible * default to cpu dockerfile * resolve comments * fix lint + unused parameters * fix * remove eetq install for cpu * remove deploy gpu --------- Co-authored-by: Adil Hafeez <adil@katanemo.com> 2024-09-23 12:07:31 -07:00			`if hardware_config == "cpu":`
			`from optimum.intel import OVModelForSequenceClassification`

			`device = "cpu"`
Cotran/onnx conversion (#145) * onnx replacement * onnx conversion for nli and embedding model * fix naming * fix naming * fix naming * pin version 2024-10-08 14:37:48 -07:00			`guard_model["model"] = OVModelForSequenceClassification.from_pretrained(`
[Kan-103] add support toxic/jailbreak model (#49) * add toxic/jailbreak model * fix path loading model * fix syntax * fix bug,lint, format * fix bug * formatting * add parallel + chunking * fix bug * working version * fix onnnx name erorr * device * fix jailbreak config * fix syntax error * format * add requirement + cli download for dockerfile * add task * add skeleton change for envoy filter for prompt guard * fix hardware config * fix bug * add config changes * add gitignore * merge main * integrate arch-guard with filter * add hardware config * nothing * add hardware config feature * fix requirement * fix chat ui * fix onnx * fix lint * remove non intel cpu * remove onnx * working version * modify docker * fix guard time * add nvidia support * remove nvidia * add gpu * add gpu * add gpu support * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * fix docker file * fix int test * correct gpu docker * upgrad python 10 * fix logits to be gpu compatible * default to cpu dockerfile * resolve comments * fix lint + unused parameters * fix * remove eetq install for cpu * remove deploy gpu --------- Co-authored-by: Adil Hafeez <adil@katanemo.com> 2024-09-23 12:07:31 -07:00			`model_name, device_map=device, low_cpu_mem_usage=True`
			`)`
			`elif hardware_config == "gpu":`
			`from transformers import AutoModelForSequenceClassification`
			`import torch`

			`device = "cuda" if torch.cuda.is_available() else "cpu"`
Cotran/onnx conversion (#145) * onnx replacement * onnx conversion for nli and embedding model * fix naming * fix naming * fix naming * pin version 2024-10-08 14:37:48 -07:00			`guard_model["model"] = AutoModelForSequenceClassification.from_pretrained(`
[Kan-103] add support toxic/jailbreak model (#49) * add toxic/jailbreak model * fix path loading model * fix syntax * fix bug,lint, format * fix bug * formatting * add parallel + chunking * fix bug * working version * fix onnnx name erorr * device * fix jailbreak config * fix syntax error * format * add requirement + cli download for dockerfile * add task * add skeleton change for envoy filter for prompt guard * fix hardware config * fix bug * add config changes * add gitignore * merge main * integrate arch-guard with filter * add hardware config * nothing * add hardware config feature * fix requirement * fix chat ui * fix onnx * fix lint * remove non intel cpu * remove onnx * working version * modify docker * fix guard time * add nvidia support * remove nvidia * add gpu * add gpu * add gpu support * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * fix docker file * fix int test * correct gpu docker * upgrad python 10 * fix logits to be gpu compatible * default to cpu dockerfile * resolve comments * fix lint + unused parameters * fix * remove eetq install for cpu * remove deploy gpu --------- Co-authored-by: Adil Hafeez <adil@katanemo.com> 2024-09-23 12:07:31 -07:00			`model_name, device_map=device, low_cpu_mem_usage=True`
			`)`
Cotran/onnx conversion (#145) * onnx replacement * onnx conversion for nli and embedding model * fix naming * fix naming * fix naming * pin version 2024-10-08 14:37:48 -07:00			`guard_model["device"] = device`
			`guard_model["hardware_config"] = hardware_config`
			`return guard_model`
[Kan-103] add support toxic/jailbreak model (#49) * add toxic/jailbreak model * fix path loading model * fix syntax * fix bug,lint, format * fix bug * formatting * add parallel + chunking * fix bug * working version * fix onnnx name erorr * device * fix jailbreak config * fix syntax error * format * add requirement + cli download for dockerfile * add task * add skeleton change for envoy filter for prompt guard * fix hardware config * fix bug * add config changes * add gitignore * merge main * integrate arch-guard with filter * add hardware config * nothing * add hardware config feature * fix requirement * fix chat ui * fix onnx * fix lint * remove non intel cpu * remove onnx * working version * modify docker * fix guard time * add nvidia support * remove nvidia * add gpu * add gpu * add gpu support * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * fix docker file * fix int test * correct gpu docker * upgrad python 10 * fix logits to be gpu compatible * default to cpu dockerfile * resolve comments * fix lint + unused parameters * fix * remove eetq install for cpu * remove deploy gpu --------- Co-authored-by: Adil Hafeez <adil@katanemo.com> 2024-09-23 12:07:31 -07:00

lint + formating with black (#158) * lint + formating with black * add black as pre commit 2024-10-09 11:25:07 -07:00			`def load_zero_shot_models(`
Fix gpu dependency and only leverage onnx when GPU is available (#157) * replacing appending instead of write * fix eetq dependency * gpu guard required eetq * fix bug when gpu is available * fix for gpu device * reverse * fix * replace gpu -> cuda 2024-10-09 11:42:05 -07:00			`model_name=os.getenv("ZERO_SHOT_MODELS", "katanemo/deberta-base-nli")`
lint + formating with black (#158) * lint + formating with black * add black as pre commit 2024-10-09 11:25:07 -07:00			`):`
Cotran/onnx conversion (#145) * onnx replacement * onnx conversion for nli and embedding model * fix naming * fix naming * fix naming * pin version 2024-10-08 14:37:48 -07:00			`zero_shot_model = {}`
load_models checks for device before getting the BGE or NLI model loaded in memory. Was defaulting to CPU. And removed gunk for load_sql (#119) Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local> 2024-10-04 13:09:35 -07:00			`device = get_device()`
Fix gpu dependency and only leverage onnx when GPU is available (#157) * replacing appending instead of write * fix eetq dependency * gpu guard required eetq * fix bug when gpu is available * fix for gpu device * reverse * fix * replace gpu -> cuda 2024-10-09 11:42:05 -07:00			`if device != "cuda":`
			`zero_shot_model["model"] = ORTModelForSequenceClassification.from_pretrained(`
			`model_name, file_name="onnx/model.onnx"`
			`)`
			`else:`
			`zero_shot_model["model"] = AutoModel.from_pretrained(model_name)`
Cotran/onnx conversion (#145) * onnx replacement * onnx conversion for nli and embedding model * fix naming * fix naming * fix naming * pin version 2024-10-08 14:37:48 -07:00			`zero_shot_model["tokenizer"] = AutoTokenizer.from_pretrained(model_name)`

			`# create pipeline`
			`zero_shot_model["pipeline"] = pipeline(`
			`"zero-shot-classification",`
			`model=zero_shot_model["model"],`
			`tokenizer=zero_shot_model["tokenizer"],`
			`device=device,`
			`)`
			`zero_shot_model["model_name"] = model_name`
Improve prompt target intent matching (#51) 2024-09-16 19:20:07 -07:00
Cotran/onnx conversion (#145) * onnx replacement * onnx conversion for nli and embedding model * fix naming * fix naming * fix naming * pin version 2024-10-08 14:37:48 -07:00			`return zero_shot_model`
demos for network copilot and sql analyzer (#57) * pulled from main branch after adding enums and made changes * added sql_analyzer folder and built a demo for Employee stats function calling. "top_employees" and "aggregate_stats". * sql_anayzer * After addressing PR comments * PR comments * PR comments * Addeed Network Analyzer FC Code * Added network Analyzer code for diff timeframes * Network Copilot and Employee Details demos are updated with their descriptions and resolved the PR comments * Added 2nd function in network copilot * Added 2nd function in network copilot * Added 2nd function in network copilot * Added 2nd function in network copilot * Added 2nd function in network copilot 2024-09-19 11:40:31 -07:00
lint + formating with black (#158) * lint + formating with black * add black as pre commit 2024-10-09 11:25:07 -07:00
formating and mointoring change (#136) 2024-10-07 15:21:05 -07:00			`if __name__ == "__main__":`
load_models checks for device before getting the BGE or NLI model loaded in memory. Was defaulting to CPU. And removed gunk for load_sql (#119) Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local> 2024-10-04 13:09:35 -07:00			`print(get_device())`