Fix gpu dependency and only leverage onnx when GPU is available (#157)

* replacing appending instead of write

* fix eetq dependency

* gpu guard required eetq

* fix bug when gpu is available

* fix for gpu device

* reverse

* fix

* replace gpu -> cuda
This commit is contained in:
Co Tran 2024-10-09 11:42:05 -07:00 committed by GitHub
parent 5c4a6bc8ff
commit 8b5db45507
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 18 additions and 14 deletions

View file

@ -1,5 +1,5 @@
pub const DEFAULT_EMBEDDING_MODEL: &str = "katanemo/bge-large-en-v1.5-onnx";
pub const DEFAULT_INTENT_MODEL: &str = "katanemo/deberta-base-nli-onnx";
pub const DEFAULT_EMBEDDING_MODEL: &str = "katanemo/bge-large-en-v1.5";
pub const DEFAULT_INTENT_MODEL: &str = "katanemo/deberta-base-nli";
pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8;
pub const DEFAULT_HALLUCINATED_THRESHOLD: f64 = 0.1;
pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-arch-ratelimit-selector";