diff --git a/README.md b/README.md index 137c1ba..f51cfd3 100644 --- a/README.md +++ b/README.md @@ -38,10 +38,12 @@ run-pipeline.sh → Run finetune → merge/convert → run in sequenc `setup.sh` will: 1. Create a Python virtual environment and install Python dependencies -2. Clone [llama.cpp](https://github.com/ggml-org/llama.cpp) -3. Build llama.cpp with your selected GPU backend +2. Clone [llama.cpp](https://github.com/ggml-org/llama.cpp) or symlink an existing build +3. Build llama.cpp with your selected GPU backend (skip if using existing) 4. Install llama-cpp-python bindings with matching backend flags +**Using an existing llama.cpp build:** Choose option 2 and provide the absolute path to your existing build. Setup will create a symlink at `./llama.cpp`. + ### Backend Selection | Choice | Backend | Requirements | diff --git a/setup.sh b/setup.sh index b4222db..a2b3007 100755 --- a/setup.sh +++ b/setup.sh @@ -11,7 +11,7 @@ python -m venv venv source venv/bin/activate pip install -r requirements.txt -# Prompt for backend +# Select backend for llama-cpp-python binding echo "" echo "Select llama.cpp backend:" echo " 1) CUDA (NVIDIA GPU)" @@ -21,59 +21,77 @@ echo " 4) CPU only" echo "" read -p "Enter choice (1-4): " BACKEND -# Clone llama.cpp +# Ask if fresh build or existing echo "" -echo "Cloning llama.cpp..." -if [ ! -d "llama.cpp" ]; then +echo "Would you like to:" +echo " 1) Clone and build a fresh copy of llama.cpp" +echo " 2) Use an existing llama.cpp build (symlink)" +echo "" +read -p "Enter choice (1-2): " BUILD_CHOICE + +if [ "$BUILD_CHOICE" = "1" ]; then + echo "" + echo "Cloning llama.cpp..." git clone https://github.com/ggml-org/llama.cpp.git + + # Build llama.cpp with correct flags + echo "" + echo "Building llama.cpp..." + cd llama.cpp + + BUILD_FAILED=0 + + case $BACKEND in + 1) + echo "Building with CUDA support..." + cmake -B build -DGGML_CUDA=ON || BUILD_FAILED=1 + [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 + ;; + 2) + echo "Building with ROCm support..." + HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \ + cmake -S . -B build -DGGML_HIP=ON -DGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release || BUILD_FAILED=1 + [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 + ;; + 3) + echo "Building with Vulkan support..." + cmake -B build -DGGML_VULKAN=1 || BUILD_FAILED=1 + [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 + ;; + 4) + echo "Building CPU-only..." + cmake -B build || BUILD_FAILED=1 + [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 + ;; + *) + echo "Invalid choice. Building CPU-only." + cmake -B build || BUILD_FAILED=1 + [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 + ;; + esac + + cd .. else - echo "llama.cpp already exists, skipping clone." + read -p "Enter absolute path to existing llama.cpp build: " LLAMA_CPP_PATH + + # Validate path + if [ ! -d "$LLAMA_CPP_PATH" ]; then + echo "Error: Directory $LLAMA_CPP_PATH does not exist." + exit 1 + fi + + # Resolve to absolute path + LLAMA_CPP_PATH=$(realpath "$LLAMA_CPP_PATH") + + echo "" + echo "Creating symlink: ./llama.cpp -> $LLAMA_CPP_PATH" + ln -sfn "$LLAMA_CPP_PATH" llama.cpp fi -# Build llama.cpp with correct flags -echo "" -echo "Building llama.cpp..." -cd llama.cpp - -BUILD_FAILED=0 - -case $BACKEND in - 1) - echo "Building with CUDA support..." - cmake -B build -DGGML_CUDA=ON || BUILD_FAILED=1 - [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 - ;; - 2) - echo "Building with ROCm support..." - HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \ - cmake -S . -B build -DGGML_HIP=ON -DGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release || BUILD_FAILED=1 - [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 - ;; - 3) - echo "Building with Vulkan support..." - cmake -B build -DGGML_VULKAN=1 || BUILD_FAILED=1 - [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 - ;; - 4) - echo "Building CPU-only..." - cmake -B build || BUILD_FAILED=1 - [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 - ;; - *) - echo "Invalid choice. Building CPU-only." - cmake -B build || BUILD_FAILED=1 - [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 - ;; -esac - -cd .. - # Install llama-cpp-python in main venv echo "" echo "Installing llama-cpp-python..." -source venv/bin/activate - case $BACKEND in 1) CMAKE_ARGS="-DGGML_CUDA=on" ;; 2) CMAKE_ARGS="-DGGML_HIP=on" ;; @@ -81,6 +99,7 @@ case $BACKEND in *) CMAKE_ARGS="" ;; esac +source venv/bin/activate eval "CMAKE_ARGS=\"$CMAKE_ARGS\" pip install llama-cpp-python" # Create convertgguf_venv for llama.cpp Python tools @@ -94,7 +113,7 @@ echo "" echo "Setup complete! Configure the scripts and run:" echo " bash run-pipeline.sh" -if [ $BUILD_FAILED -ne 0 ]; then +if [ -n "$BUILD_FAILED" ] && [ $BUILD_FAILED -ne 0 ]; then echo "" echo "Build failed. See the build guide for help:" echo " https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md"