mirror of
https://github.com/katanemo/plano.git
synced 2026-05-24 14:05:14 +02:00
[Kan-103] add support toxic/jailbreak model (#49)
* add toxic/jailbreak model * fix path loading model * fix syntax * fix bug,lint, format * fix bug * formatting * add parallel + chunking * fix bug * working version * fix onnnx name erorr * device * fix jailbreak config * fix syntax error * format * add requirement + cli download for dockerfile * add task * add skeleton change for envoy filter for prompt guard * fix hardware config * fix bug * add config changes * add gitignore * merge main * integrate arch-guard with filter * add hardware config * nothing * add hardware config feature * fix requirement * fix chat ui * fix onnx * fix lint * remove non intel cpu * remove onnx * working version * modify docker * fix guard time * add nvidia support * remove nvidia * add gpu * add gpu * add gpu support * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * add gpu support for compose * fix docker file * fix int test * correct gpu docker * upgrad python 10 * fix logits to be gpu compatible * default to cpu dockerfile * resolve comments * fix lint + unused parameters * fix * remove eetq install for cpu * remove deploy gpu --------- Co-authored-by: Adil Hafeez <adil@katanemo.com>
This commit is contained in:
parent
80c554ce1a
commit
79b1c5415f
18 changed files with 1622 additions and 191 deletions
70
model_server/Dockerfile.gpu
Normal file
70
model_server/Dockerfile.gpu
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
# Use NVIDIA CUDA base image to enable GPU support
|
||||
FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04 as base
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Install Python 3.10
|
||||
RUN apt-get update && \
|
||||
apt-get install -y python3.10 python3-pip python3-dev python-is-python3 && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
|
||||
#
|
||||
# builder
|
||||
#
|
||||
FROM base AS builder
|
||||
|
||||
WORKDIR /src
|
||||
|
||||
# Upgrade pip
|
||||
RUN pip install --upgrade pip
|
||||
|
||||
# Install git for cloning repositories
|
||||
RUN apt-get update && apt-get install -y git && apt-get clean
|
||||
|
||||
# Copy requirements.txt
|
||||
COPY requirements.txt /src/
|
||||
|
||||
# Install Python dependencies
|
||||
RUN pip install --force-reinstall -r requirements.txt
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y cuda-toolkit-12-2
|
||||
|
||||
# Check for NVIDIA GPU and CUDA support and install EETQ if detected
|
||||
RUN if command -v nvcc >/dev/null 2>&1; then \
|
||||
echo "CUDA and NVIDIA GPU detected, installing EETQ..." && \
|
||||
git clone https://github.com/NetEase-FuXi/EETQ.git && \
|
||||
cd EETQ && \
|
||||
git submodule update --init --recursive && \
|
||||
pip install .; \
|
||||
else \
|
||||
echo "CUDA or NVIDIA GPU not detected, skipping EETQ installation."; \
|
||||
fi
|
||||
|
||||
COPY . /src
|
||||
|
||||
#
|
||||
# output
|
||||
#
|
||||
|
||||
|
||||
# Specify list of models that will go into the image as a comma separated list
|
||||
ENV MODELS="BAAI/bge-large-en-v1.5"
|
||||
ENV NER_MODELS="urchade/gliner_large-v2.1"
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
COPY /app /app
|
||||
WORKDIR /app
|
||||
|
||||
# Install required tools
|
||||
RUN apt-get update && apt-get install -y \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Uncomment if you want to install the model during the image build
|
||||
# RUN python install.py && \
|
||||
# find /root/.cache/torch/sentence_transformers/ -name onnx -exec rm -rf {} +
|
||||
|
||||
# Set the default command to run the application
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
|
||||
Loading…
Add table
Add a link
Reference in a new issue