Add option to use existing llama.cpp build

2026-06-02 16:39:02 +02:00 · 2026-06-02 16:39:02 +02:00 · 1f4e9e119c
commit 1f4e9e119c
parent 80a91e278e
2 changed files with 69 additions and 48 deletions
--- a/README.md
+++ b/README.md
@ -38,10 +38,12 @@ run-pipeline.sh            → Run finetune → merge/convert → run in sequenc
 `setup.sh` will:
 1. Create a Python virtual environment and install Python dependencies
-2. Clone [llama.cpp](https://github.com/ggml-org/llama.cpp)
+2. Clone [llama.cpp](https://github.com/ggml-org/llama.cpp) or symlink an existing build
-3. Build llama.cpp with your selected GPU backend
+3. Build llama.cpp with your selected GPU backend (skip if using existing)
 4. Install llama-cpp-python bindings with matching backend flags
 **Using an existing llama.cpp build:** Choose option 2 and provide the absolute path to your existing build. Setup will create a symlink at `./llama.cpp`.
 ### Backend Selection
 | Choice | Backend | Requirements |
--- a/setup.sh
+++ b/setup.sh
@ -11,7 +11,7 @@ python -m venv venv
 source venv/bin/activate
 pip install -r requirements.txt
-# Prompt for backend
+# Select backend for llama-cpp-python binding
 echo ""
 echo "Select llama.cpp backend:"
 echo "  1) CUDA (NVIDIA GPU)"
@ -21,59 +21,77 @@ echo "  4) CPU only"
 echo ""
 read -p "Enter choice (1-4): " BACKEND
-# Clone llama.cpp
+# Ask if fresh build or existing
 echo ""
-echo "Cloning llama.cpp..."
+echo "Would you like to:"
-if [ ! -d "llama.cpp" ]; then
+echo "  1) Clone and build a fresh copy of llama.cpp"
 echo "  2) Use an existing llama.cpp build (symlink)"
 echo ""
 read -p "Enter choice (1-2): " BUILD_CHOICE
 if [ "$BUILD_CHOICE" = "1" ]; then
    echo ""
    echo "Cloning llama.cpp..."
    git clone https://github.com/ggml-org/llama.cpp.git
    # Build llama.cpp with correct flags
    echo ""
    echo "Building llama.cpp..."
    cd llama.cpp
    BUILD_FAILED=0
    case $BACKEND in
        1)
            echo "Building with CUDA support..."
            cmake -B build -DGGML_CUDA=ON || BUILD_FAILED=1
            [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
            ;;
        2)
            echo "Building with ROCm support..."
            HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
                cmake -S . -B build -DGGML_HIP=ON -DGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release || BUILD_FAILED=1
            [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
            ;;
        3)
            echo "Building with Vulkan support..."
            cmake -B build -DGGML_VULKAN=1 || BUILD_FAILED=1
            [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
            ;;
        4)
            echo "Building CPU-only..."
            cmake -B build || BUILD_FAILED=1
            [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
            ;;
        *)
            echo "Invalid choice. Building CPU-only."
            cmake -B build || BUILD_FAILED=1
            [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
            ;;
    esac
    cd ..
 else
-    echo "llama.cpp already exists, skipping clone."
+    read -p "Enter absolute path to existing llama.cpp build: " LLAMA_CPP_PATH
    # Validate path
    if [ ! -d "$LLAMA_CPP_PATH" ]; then
        echo "Error: Directory $LLAMA_CPP_PATH does not exist."
        exit 1
    fi
    # Resolve to absolute path
    LLAMA_CPP_PATH=$(realpath "$LLAMA_CPP_PATH")
    echo ""
    echo "Creating symlink: ./llama.cpp -> $LLAMA_CPP_PATH"
    ln -sfn "$LLAMA_CPP_PATH" llama.cpp
 fi
 # Build llama.cpp with correct flags
 echo ""
 echo "Building llama.cpp..."
 cd llama.cpp
 BUILD_FAILED=0
 case $BACKEND in
    1)
        echo "Building with CUDA support..."
        cmake -B build -DGGML_CUDA=ON || BUILD_FAILED=1
        [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
        ;;
    2)
        echo "Building with ROCm support..."
        HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
            cmake -S . -B build -DGGML_HIP=ON -DGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release || BUILD_FAILED=1
        [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
        ;;
    3)
        echo "Building with Vulkan support..."
        cmake -B build -DGGML_VULKAN=1 || BUILD_FAILED=1
        [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
        ;;
    4)
        echo "Building CPU-only..."
        cmake -B build || BUILD_FAILED=1
        [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
        ;;
    *)
        echo "Invalid choice. Building CPU-only."
        cmake -B build || BUILD_FAILED=1
        [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
        ;;
 esac
 cd ..
 # Install llama-cpp-python in main venv
 echo ""
 echo "Installing llama-cpp-python..."
 source venv/bin/activate
 case $BACKEND in
    1) CMAKE_ARGS="-DGGML_CUDA=on" ;;
    2) CMAKE_ARGS="-DGGML_HIP=on" ;;
@ -81,6 +99,7 @@ case $BACKEND in
    *) CMAKE_ARGS="" ;;
 esac
 source venv/bin/activate
 eval "CMAKE_ARGS=\"$CMAKE_ARGS\" pip install llama-cpp-python"
 # Create convertgguf_venv for llama.cpp Python tools
@ -94,7 +113,7 @@ echo ""
 echo "Setup complete! Configure the scripts and run:"
 echo "  bash run-pipeline.sh"
-if [ $BUILD_FAILED -ne 0 ]; then
+if [ -n "$BUILD_FAILED" ] && [ $BUILD_FAILED -ne 0 ]; then
    echo ""
    echo "Build failed. See the build guide for help:"
    echo "  https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md"