plano/model_server/Dockerfile.gpu
Co Tran 79b1c5415f
[Kan-103] add support toxic/jailbreak model (#49)
* add toxic/jailbreak model

* fix path loading model

* fix syntax

* fix bug,lint, format

* fix bug

* formatting

* add parallel + chunking

* fix bug

* working version

* fix onnnx name erorr

* device

* fix jailbreak config

* fix syntax error

* format

* add requirement + cli download for dockerfile

* add task

* add skeleton change for envoy filter for prompt guard

* fix hardware config

* fix bug

* add config changes

* add gitignore

* merge main

* integrate arch-guard with filter

* add hardware config

* nothing

* add hardware config feature

* fix requirement

* fix chat ui

* fix onnx

* fix lint

* remove non intel cpu

* remove onnx

* working version

* modify docker

* fix guard time

* add nvidia support

* remove nvidia

* add gpu

* add gpu

* add gpu support

* add gpu support for compose

* add gpu support for compose

* add gpu support for compose

* add gpu support for compose

* add gpu support for compose

* fix docker file

* fix int test

* correct gpu docker

* upgrad python 10

* fix logits to be gpu compatible

* default to cpu dockerfile

* resolve comments

* fix lint + unused parameters

* fix

* remove eetq install for cpu

* remove deploy gpu

---------

Co-authored-by: Adil Hafeez <adil@katanemo.com>
2024-09-23 12:07:31 -07:00

70 lines
1.8 KiB
Text

# Use NVIDIA CUDA base image to enable GPU support
FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04 as base
ENV DEBIAN_FRONTEND=noninteractive
# Install Python 3.10
RUN apt-get update && \
apt-get install -y python3.10 python3-pip python3-dev python-is-python3 && \
rm -rf /var/lib/apt/lists/*
#
# builder
#
FROM base AS builder
WORKDIR /src
# Upgrade pip
RUN pip install --upgrade pip
# Install git for cloning repositories
RUN apt-get update && apt-get install -y git && apt-get clean
# Copy requirements.txt
COPY requirements.txt /src/
# Install Python dependencies
RUN pip install --force-reinstall -r requirements.txt
RUN apt-get update && \
apt-get install -y cuda-toolkit-12-2
# Check for NVIDIA GPU and CUDA support and install EETQ if detected
RUN if command -v nvcc >/dev/null 2>&1; then \
echo "CUDA and NVIDIA GPU detected, installing EETQ..." && \
git clone https://github.com/NetEase-FuXi/EETQ.git && \
cd EETQ && \
git submodule update --init --recursive && \
pip install .; \
else \
echo "CUDA or NVIDIA GPU not detected, skipping EETQ installation."; \
fi
COPY . /src
#
# output
#
# Specify list of models that will go into the image as a comma separated list
ENV MODELS="BAAI/bge-large-en-v1.5"
ENV NER_MODELS="urchade/gliner_large-v2.1"
ENV DEBIAN_FRONTEND=noninteractive
COPY /app /app
WORKDIR /app
# Install required tools
RUN apt-get update && apt-get install -y \
curl \
&& rm -rf /var/lib/apt/lists/*
# Uncomment if you want to install the model during the image build
# RUN python install.py && \
# find /root/.cache/torch/sentence_transformers/ -name onnx -exec rm -rf {} +
# Set the default command to run the application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]