From 815c9aadec67548b286ef3a395778251d12107d0 Mon Sep 17 00:00:00 2001 From: Oracle Date: Tue, 2 Jun 2026 17:08:39 +0200 Subject: [PATCH] Use existing llama.cpp build for python bindings if possible --- README.md | 37 ++++++++++++++++++++---- finetune.py | 4 +-- scripts/run-model.sh | 2 +- setup.sh | 69 +++++++++++++++++++++++++++----------------- synthetic-data.py | 58 ++++++++++--------------------------- 5 files changed, 92 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index 2c199be..cac3fbf 100644 --- a/README.md +++ b/README.md @@ -38,14 +38,16 @@ run-pipeline.sh → Run finetune → merge/convert → run in sequenc `setup.sh` will: 1. Create a Python virtual environment and install Python dependencies -2. Clone [llama.cpp](https://github.com/ggml-org/llama.cpp) or symlink an existing build -3. Build llama.cpp with your selected GPU backend (skip if using existing) -4. Install llama-cpp-python bindings with matching backend flags +2. Clone [llama.cpp](https://github.com/ggml-org/llama.cpp) (fresh build) or symlink an existing build +3. Build llama.cpp with shared libraries (`-DBUILD_SHARED_LIBS=ON`) +4. Install llama-cpp-python bindings linked against the shared library (`-DLLAMA_BUILD=OFF`) -**Using an existing llama.cpp build:** Choose option 2 and provide the absolute path to your existing build. Setup will create a symlink at `./llama.cpp`. +**Using an existing llama.cpp build:** Choose option 2 and provide the absolute path. The build must have been created with `-DBUILD_SHARED_LIBS=ON` and contain `libllama.so`. Setup will create a symlink at `./llama.cpp`. ### Backend Selection +Backend is only prompted when building llama.cpp from scratch. Choose based on your GPU: + | Choice | Backend | Requirements | |---|---|---| | 1 | CUDA (NVIDIA) | Systemwide CUDA installation (NVIDIA drivers + CUDA toolkit) | @@ -67,6 +69,17 @@ vulkaninfo Should run without errors. +### Existing llama.cpp Build + +If using option 2 (existing build), ensure it was compiled with shared libraries: + +```bash +cmake -B build -DBUILD_SHARED_LIBS=ON -DGGML_CUDA=ON # or -DGGML_HIP=ON / -DGGML_VULKAN=1 +cmake --build build --config Release -j$(nproc) +``` + +The build must contain `libllama.so` (typically at `build/libllama.so`). + ## Scripts ### 1. scripts/generate-data.sh @@ -81,8 +94,20 @@ Generates synthetic training data using a GGUF model via llama.cpp. Run this if | `INPUT_PARQUET_PATH` | Path to existing training data to extend | `./data/train.parquet` | | `OUTPUT_PARQUET_PATH` | Path to save the combined dataset | `./data/output.parquet` | | `NEW_ROWS_COUNT` | Number of synthetic records to generate | `100` | -| User prompt (line 67) | Replace `"YOUR PROMPT GOES HERE"` with generation instructions | `Generate questions about machine learning...` | -| System message (line 63) | Controls the model's role | `"You are a data generator. Output ONLY the format below..."` | +| `User prompt` (line 66) | Replace `"YOUR PROMPT GOES HERE"` with generation instructions | `Generate questions about machine learning...` | +| `System message` (line 62) | Controls the model's role | `"You are a data generator. Output ONLY the format below..."` | +| `max_tokens` | Max tokens per response | `200` | +| `temperature` | Creativity of generation | `0.7` | +| `top_p` | Nucleus sampling threshold | `0.95` | +| `top_k` | Top-k sampling threshold | `50` | +| `min_p` | Minimum probability threshold | `0.05` | + +The model expects output in the format: + +``` +Question: +Answer: +``` ```bash bash scripts/generate-data.sh diff --git a/finetune.py b/finetune.py index 2f4753c..90e7ada 100644 --- a/finetune.py +++ b/finetune.py @@ -16,7 +16,7 @@ warnings.filterwarnings("ignore") # ========================================== # Update these paths -DATA_PATH = "YOUR_PAQUET_FILE_PATH" +DATA_PATH = "YOUR_PARQUET_FILE_PATH" OUTPUT_DIR = "./model" # Training params, change these to fit your hardware BATCH_SIZE = 2 @@ -37,7 +37,7 @@ print("Loading data...") df = pd.read_parquet(DATA_PATH) # Check required columns -required_cols = ["question", "answer", "label"] +required_cols = ["question", "answer"] missing_cols = [c for c in required_cols if c not in df.columns] if missing_cols: raise ValueError(f"Missing columns in Parquet file: {missing_cols}") diff --git a/scripts/run-model.sh b/scripts/run-model.sh index 31aa1fe..6ea73d1 100755 --- a/scripts/run-model.sh +++ b/scripts/run-model.sh @@ -1,2 +1,2 @@ #!/bin/bash -./llama.cpp/build/bin/llama-cli -m ./merged_model/Merged_Model.gguf +./llama.cpp/build/bin/llama-cli -m ./merged_model/model.gguf diff --git a/setup.sh b/setup.sh index a2b3007..e94741d 100755 --- a/setup.sh +++ b/setup.sh @@ -11,25 +11,26 @@ python -m venv venv source venv/bin/activate pip install -r requirements.txt -# Select backend for llama-cpp-python binding -echo "" -echo "Select llama.cpp backend:" -echo " 1) CUDA (NVIDIA GPU)" -echo " 2) ROCm (AMD GPU)" -echo " 3) Vulkan (Cross-vendor GPU)" -echo " 4) CPU only" -echo "" -read -p "Enter choice (1-4): " BACKEND - # Ask if fresh build or existing echo "" echo "Would you like to:" echo " 1) Clone and build a fresh copy of llama.cpp" -echo " 2) Use an existing llama.cpp build (symlink)" +echo " 2) Use an existing llama.cpp build" echo "" read -p "Enter choice (1-2): " BUILD_CHOICE +LLAMA_CPP_PATH="" + if [ "$BUILD_CHOICE" = "1" ]; then + echo "" + echo "Select llama.cpp backend:" + echo " 1) CUDA (NVIDIA GPU)" + echo " 2) ROCm (AMD GPU)" + echo " 3) Vulkan (Cross-vendor GPU)" + echo " 4) CPU only" + echo "" + read -p "Enter choice (1-4): " BACKEND + echo "" echo "Cloning llama.cpp..." git clone https://github.com/ggml-org/llama.cpp.git @@ -44,32 +45,35 @@ if [ "$BUILD_CHOICE" = "1" ]; then case $BACKEND in 1) echo "Building with CUDA support..." - cmake -B build -DGGML_CUDA=ON || BUILD_FAILED=1 + cmake -B build -DBUILD_SHARED_LIBS=ON -DGGML_CUDA=ON || BUILD_FAILED=1 [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 ;; 2) echo "Building with ROCm support..." + read -p "Enter GPU target (e.g., gfx1030, gfx942, gfx1100): " ROCM_TARGET + [ -z "$ROCM_TARGET" ] && ROCM_TARGET="gfx1030" HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \ - cmake -S . -B build -DGGML_HIP=ON -DGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release || BUILD_FAILED=1 + cmake -S . -B build -DBUILD_SHARED_LIBS=ON -DGGML_HIP=ON -DGPU_TARGETS="$ROCM_TARGET" -DCMAKE_BUILD_TYPE=Release || BUILD_FAILED=1 [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 ;; 3) echo "Building with Vulkan support..." - cmake -B build -DGGML_VULKAN=1 || BUILD_FAILED=1 + cmake -B build -DBUILD_SHARED_LIBS=ON -DGGML_VULKAN=1 || BUILD_FAILED=1 [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 ;; 4) echo "Building CPU-only..." - cmake -B build || BUILD_FAILED=1 + cmake -B build -DBUILD_SHARED_LIBS=ON || BUILD_FAILED=1 [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 ;; *) echo "Invalid choice. Building CPU-only." - cmake -B build || BUILD_FAILED=1 + cmake -B build -DBUILD_SHARED_LIBS=ON || BUILD_FAILED=1 [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1 ;; esac + LLAMA_CPP_PATH=$(pwd) cd .. else read -p "Enter absolute path to existing llama.cpp build: " LLAMA_CPP_PATH @@ -83,24 +87,37 @@ else # Resolve to absolute path LLAMA_CPP_PATH=$(realpath "$LLAMA_CPP_PATH") + # Check for shared library + if [ ! -f "$LLAMA_CPP_PATH/build/libllama.so" ] && [ ! -f "$LLAMA_CPP_PATH/libllama.so" ]; then + echo "Error: Could not find libllama.so in $LLAMA_CPP_PATH" + echo "Make sure llama.cpp was built with -DBUILD_SHARED_LIBS=ON" + exit 1 + fi + echo "" - echo "Creating symlink: ./llama.cpp -> $LLAMA_CPP_PATH" + echo "Using existing build: $LLAMA_CPP_PATH" ln -sfn "$LLAMA_CPP_PATH" llama.cpp fi -# Install llama-cpp-python in main venv +# Find the shared library +if [ -f "$LLAMA_CPP_PATH/build/libllama.so" ]; then + LLAMA_CPP_LIB="$LLAMA_CPP_PATH/build/libllama.so" +elif [ -f "$LLAMA_CPP_PATH/libllama.so" ]; then + LLAMA_CPP_LIB="$LLAMA_CPP_PATH/libllama.so" +else + echo "Error: Could not locate libllama.so" + exit 1 +fi + +echo "" +echo "Using shared library: $LLAMA_CPP_LIB" + +# Install llama-cpp-python with existing build echo "" echo "Installing llama-cpp-python..." -case $BACKEND in - 1) CMAKE_ARGS="-DGGML_CUDA=on" ;; - 2) CMAKE_ARGS="-DGGML_HIP=on" ;; - 3) CMAKE_ARGS="-DGGML_VULKAN=on" ;; - *) CMAKE_ARGS="" ;; -esac - source venv/bin/activate -eval "CMAKE_ARGS=\"$CMAKE_ARGS\" pip install llama-cpp-python" +LLAMA_CPP_LIB="$LLAMA_CPP_LIB" LLAMA_CPP_LIB_PATH="$LLAMA_CPP_LIB" CMAKE_ARGS="-DLLAMA_BUILD=OFF" pip install llama-cpp-python # Create convertgguf_venv for llama.cpp Python tools echo "" diff --git a/synthetic-data.py b/synthetic-data.py index 93e094b..b9d325f 100644 --- a/synthetic-data.py +++ b/synthetic-data.py @@ -9,6 +9,11 @@ GGUF_MODEL_PATH = "./path/to/model.gguf" INPUT_PARQUET_PATH = "./path/to/input.parquet" OUTPUT_PARQUET_PATH = "./path/to/output.parquet" NEW_ROWS_COUNT = 100 +MAX_TOKENS = 200 +TEMPERATURE = 0.7 +TOP_P = 0.95 +TOP_K = 50 +MIN_P = 0.05 # Check if files exist if not os.path.exists(GGUF_MODEL_PATH): @@ -18,7 +23,7 @@ if not os.path.exists(INPUT_PARQUET_PATH): print(f"❌ Error: Input Parquet file not found at {INPUT_PARQUET_PATH}") exit() -# 2. LOAD GGUF MODEL - GPU (Vulkan) ONLY +# 2. LOAD GGUF MODEL - GPU print("Loading llama.cpp model...") try: model = Llama( @@ -31,7 +36,7 @@ try: use_mmap=True, use_mlock=False, ) - print("✅ llama.cpp model loaded with Vulkan GPU.") + print("✅ llama.cpp model loaded.") except Exception as e: print(f"❌ Error loading model: {e}") exit() @@ -48,8 +53,6 @@ except Exception as e: print(f"❌ Error loading dataset: {e}") exit() -existing_labels = list(set(original_ds["label"])) - # 4. GENERATE SYNTHETIC DATA - STRUCTURED OUTPUT print(f"Generating {NEW_ROWS_COUNT} synthetic records...") synthetic_data = [] @@ -70,11 +73,11 @@ for i in range(NEW_ROWS_COUNT): # Generate with sampling parameters response = model.create_chat_completion( messages=messages, - max_tokens=200, - temperature=1.0, - top_p=0.95, - top_k=20, - min_p=0.0, + max_tokens=MAX_TOKENS, + temperature=TEMPERATURE, + top_p=TOP_P, + top_k=TOP_K, + min_p=MIN_P, ) # Get response text @@ -86,67 +89,36 @@ for i in range(NEW_ROWS_COUNT): question = None answer = None - label = None - found_question = False - found_answer = False - found_label = False for line in lines: line = line.strip() - # Extract Question if "Question:" in line and "Answer:" not in line: match = re.search( - r"Question:\s*(.+?)(?:\nAnswer|\nLabel|$)", line, re.IGNORECASE + r"Question:\s*(.+?)(?:\nAnswer|$)", line, re.IGNORECASE ) if match: question = match.group(1).strip() - found_question = True - # Extract Answer elif "Answer:" in line: - match = re.search(r"Answer:\s*(.+?)(?:\nLabel|$)", line, re.IGNORECASE) + match = re.search(r"Answer:\s*(.+)", line, re.IGNORECASE) if match: answer = match.group(1).strip() - found_answer = True - # Extract Label - elif "Label:" in line: - match = re.search(r"Label:\s*(.+)", line, re.IGNORECASE) - if match: - label = match.group(1).strip() - found_label = True - - # VALIDATION if not all([question, answer]): print(f"⚠️ Row {i + 1}: Incomplete output. Skipping.") for line in lines: print(line) continue - if not label: - label = "unbiased" - else: - # Normalize label - label = ( - label.lower().strip('"').strip("'").replace("[", "").replace("]", "") - ) - - if label not in existing_labels: - print(f"⚠️ Row {i + 1}: Invalid label '{label}'. Skipping.") - continue - - # Clean up question = re.sub(r"```.*?```", "", question).strip() answer = re.sub(r"```.*?```", "", answer).strip() - parsed_row = {"question": question, "answer": answer, "label": label} + parsed_row = {"question": question, "answer": answer} - # PRINT PARSED DATA IN TERMINAL print(f"✅ ROW {i + 1} PARSED:") print(f" Question: {question}") print(f" Answer: {answer}") - print(f" Label: {label}") print() synthetic_data.append(parsed_row)