From da2c8e636c4dcbf14a522bc019302331d44fef45 Mon Sep 17 00:00:00 2001
From: Oracle <otis.schmedt@gmx.de>
Date: Tue, 2 Jun 2026 15:45:59 +0200
Subject: [PATCH] Initial commit

---
 README.md                    | 176 ++++++++++++++++++++++++++++++
 finetune.py                  | 147 +++++++++++++++++++++++++
 merge.py                     |  89 +++++++++++++++
 requirements.txt             |   7 ++
 run-pipeline.sh              |  23 ++++
 scripts/finetune.sh          |   3 +
 scripts/generate-data.sh     |   3 +
 scripts/merge-and-convert.sh |   6 +
 scripts/run-model.sh         |   2 +
 setup.sh                     |  94 ++++++++++++++++
 synthetic-data.py            | 205 +++++++++++++++++++++++++++++++++++
 11 files changed, 755 insertions(+)
 create mode 100644 README.md
 create mode 100644 finetune.py
 create mode 100644 merge.py
 create mode 100644 requirements.txt
 create mode 100755 run-pipeline.sh
 create mode 100755 scripts/finetune.sh
 create mode 100755 scripts/generate-data.sh
 create mode 100755 scripts/merge-and-convert.sh
 create mode 100755 scripts/run-model.sh
 create mode 100755 setup.sh
 create mode 100644 synthetic-data.py

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8c5109c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,176 @@
+# Unsloth Fine-Tune Template
+
+> **Linux only** — This template is designed for Linux systems with NVIDIA GPU (CUDA), AMD GPU (ROCm), or Vulkan support.
+
+A template for fine-tuning LLMs using [Unsloth](https://github.com/unslothai/unsloth) and converting to GGUF format with [llama.cpp](https://github.com/ggerganov/llama.cpp).
+
+## Prerequisites
+
+- Linux OS
+- Python 3.10+
+- NVIDIA GPU (CUDA) or AMD GPU (ROCm) or Vulkan-compatible GPU
+- [cmake](https://cmake.org/)
+- [git](https://git-scm.com/)
+
+## Quick Start
+
+```bash
+# 1. Setup (clones llama.cpp, builds it, installs dependencies)
+bash setup.sh
+
+# 2. Configure scripts (see variables below)
+
+# 3. Run full pipeline
+bash run-pipeline.sh
+```
+
+## Workflow
+
+```
+scripts/generate-data.sh   → Generate synthetic training data (optional)
+scripts/finetune.sh        → Fine-tune model with LoRA adapters
+scripts/merge-and-convert.sh → Merge LoRA into base model and convert to GGUF
+scripts/run-model.sh       → Run the converted GGUF model
+run-pipeline.sh            → Run finetune → merge/convert → run in sequence
+```
+
+## Setup
+
+`setup.sh` will:
+1. Create a Python virtual environment and install Python dependencies
+2. Clone [llama.cpp](https://github.com/ggml-org/llama.cpp)
+3. Build llama.cpp with your selected GPU backend
+4. Install llama-cpp-python bindings with matching backend flags
+
+### Backend Selection
+
+| Choice | Backend | Requirements |
+|---|---|---|
+| 1 | CUDA (NVIDIA) | NVIDIA drivers, CUDA toolkit |
+| 2 | ROCm (AMD) | AMD drivers, HIP toolkit |
+| 3 | Vulkan | Vulkan drivers |
+| 4 | CPU only | None |
+
+## Scripts
+
+### 1. scripts/generate-data.sh
+
+Generates synthetic training data using a GGUF model via llama.cpp. Run this if you need to create or extend a training dataset.
+
+**Edit `synthetic-data.py`:**
+
+| Variable | Description | Example |
+|---|---|---|
+| `GGUF_MODEL_PATH` | Path to the GGUF model used for generation | `./path/to/model.gguf` |
+| `INPUT_PARQUET_PATH` | Path to existing training data to extend | `./data/train.parquet` |
+| `OUTPUT_PARQUET_PATH` | Path to save the combined dataset | `./data/output.parquet` |
+| `NEW_ROWS_COUNT` | Number of synthetic records to generate | `100` |
+
+```bash
+bash scripts/generate-data.sh
+```
+
+### 2. scripts/finetune.sh
+
+Fine-tunes a model using Unsloth with LoRA adapters. Saves LoRA weights to `./model/`.
+
+**Edit `finetune.py`:**
+
+| Variable | Description | Example |
+|---|---|---|
+| `DATA_PATH` | Path to training Parquet file | `./data/output.parquet` |
+| `OUTPUT_DIR` | Directory to save LoRA adapters | `./model` |
+| `BATCH_SIZE` | Per-device batch size | `2` |
+| `GRADIENT_ACCUMULATION_STEPS` | Gradient accumulation steps | `8` |
+| `LEARNING_RATE` | Training learning rate | `2e-4` |
+| `MAX_LENGTH` | Maximum sequence length | `4096` |
+| `TRAIN_EPOCHS` | Number of training epochs | `1` |
+| `model_name` (line 74) | Base model to fine-tune | `"unsloth/Llama-3.2-3B-Instruct"` |
+
+```bash
+bash scripts/finetune.sh
+```
+
+### 3. scripts/merge-and-convert.sh
+
+Merges LoRA adapters into the base model, saves the merged model, then converts to GGUF format using llama.cpp.
+
+**Edit `merge.py`:**
+
+| Variable | Description | Example |
+|---|---|---|
+| `BASE_MODEL_PATH` | Path to the base model | `""` (empty to load from HuggingFace) |
+| `LORA_DIR` | Path to LoRA adapters | `./model` |
+| `MERGED_MODEL_PATH` | Output directory for merged model | `./merged_model` |
+
+```bash
+bash scripts/merge-and-convert.sh
+```
+
+### 4. scripts/run-model.sh
+
+Runs the converted GGUF model using llama.cpp's CLI interface for inference.
+
+**Edit `run-model.sh`:**
+
+| Variable | Description | Example |
+|---|---|---|
+| Model path | Path to the GGUF file | `./merged_model/model.gguf` |
+
+```bash
+bash scripts/run-model.sh
+```
+
+## Output Structure
+
+```
+./model/                  ← LoRA adapters (from finetune.sh)
+./merged_model/           ← Merged HF model + GGUF file (from merge-and-convert.sh)
+llama.cpp/                ← llama.cpp repository (created by setup.sh)
+scripts/                  ← Individual pipeline step scripts
+setup.sh                  ← Setup script (venv + llama.cpp build)
+run-pipeline.sh           ← Run full pipeline (finetune → merge/convert → run)
+```
+
+## Troubleshooting
+
+### llama.cpp build fails
+
+See the official build guide:
+https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md
+
+Common issues:
+- **CUDA**: Ensure NVIDIA drivers and CUDA toolkit are installed
+- **ROCm**: Ensure AMD drivers and HIP toolkit are installed
+- **Vulkan**: Ensure Vulkan drivers and SDK are installed
+- **cmake**: Install via `sudo apt install cmake` (Debian/Ubuntu)
+
+### Out of memory during training
+
+- Reduce `BATCH_SIZE` in `finetune.py`
+- Increase `GRADIENT_ACCUMULATION_STEPS` to compensate
+- Reduce `MAX_LENGTH` to fit shorter sequences
+- Set `load_in_4bit=True` in `finetune.py` (line 77)
+
+### llama-cpp-python install fails
+
+- Ensure llama.cpp is built successfully first
+- Try CPU-only install first to verify: `pip install llama-cpp-python`
+- Check [llama-cpp-python docs](https://llama-cpp-python.readthedocs.io/en/latest/) for other backends
+
+## Project Structure
+
+```
+├── finetune.py           ← Training script
+├── merge.py              ← Merge LoRA into base model
+├── synthetic-data.py     ← Generate synthetic training data
+├── requirements.txt      ← Python dependencies
+├── setup.sh              ← One-time setup
+├── run-pipeline.sh       ← Run full pipeline
+├── scripts/
+│   ├── generate-data.sh
+│   ├── finetune.sh
+│   ├── merge-and-convert.sh
+│   └── run-model.sh
+└── README.md
+```
diff --git a/finetune.py b/finetune.py
new file mode 100644
index 0000000..2f4753c
--- /dev/null
+++ b/finetune.py
@@ -0,0 +1,147 @@
+# CRITICAL: Import unsloth BEFORE any other packages
+import os
+import warnings
+from unsloth import FastLanguageModel, is_bfloat16_supported
+import pandas as pd
+import torch
+from datasets import Dataset
+from transformers import TrainingArguments
+from trl.trainer.sft_trainer import SFTTrainer
+
+
+warnings.filterwarnings("ignore")
+
+# ==========================================
+# 1. CONFIGURATION
+# ==========================================
+
+# Update these paths
+DATA_PATH = "YOUR_PAQUET_FILE_PATH"
+OUTPUT_DIR = "./model"
+# Training params, change these to fit your hardware
+BATCH_SIZE = 2
+GRADIENT_ACCUMULATION_STEPS = 8
+LEARNING_RATE = 2e-4
+MAX_LENGTH = 4096
+TRAIN_EPOCHS = 1
+
+# Check device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+
+# ==========================================
+# 2. LOAD DATA AND FILTER
+# ==========================================
+
+print("Loading data...")
+df = pd.read_parquet(DATA_PATH)
+
+# Check required columns
+required_cols = ["question", "answer", "label"]
+missing_cols = [c for c in required_cols if c not in df.columns]
+if missing_cols:
+    raise ValueError(f"Missing columns in Parquet file: {missing_cols}")
+
+
+print(f"Loaded {len(df)} samples.")
+
+# ==========================================
+# 3. PREPARE DATASETS
+# ==========================================
+
+
+def format_example(example):
+    """
+    Formats the Question and Answer into a ChatML-style prompt
+    that the model can understand.
+    """
+    text = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{example['question']}<|im_end|>\n<|im_start|>assistant\n{example['answer']}<|im_end|>"
+    return {"text": text}
+
+
+# Convert pandas to HuggingFace Dataset
+dataset = Dataset.from_pandas(df)
+dataset = dataset.map(format_example, remove_columns=["question", "answer"])
+
+print("Dataset prepared.")
+
+# ==========================================
+# 4. LOAD MODEL
+# ==========================================
+
+print("Loading Model...")
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name="example/example",  # <-- Your model goes here
+    max_seq_length=MAX_LENGTH,
+    dtype=None,
+    load_in_4bit=False, # Set to True if your card doesnt have enough VRAM for training in FP16/BF16
+)
+
+# Apply LoRA Config (Unsloth default)
+print("Applying LoRA...")
+model = FastLanguageModel.get_peft_model(
+    model,
+    r=16,
+    lora_alpha=16,
+    lora_dropout=0,
+    bias="none",
+    use_gradient_checkpointing="unsloth",
+    random_state=3407,
+)
+
+print("Model loaded successfully.")
+
+# ==========================================
+# 5. TRAINING SETUP
+# ==========================================
+
+print("Setting up Trainer...")
+
+# Configure Tokenizer
+tokenizer.pad_token_id = tokenizer.eos_token_id
+tokenizer.padding_side = "right"
+
+
+trainer = SFTTrainer(
+    model=model,
+    tokenizer=tokenizer,
+    train_dataset=dataset,
+    dataset_text_field="text",
+    max_seq_length=MAX_LENGTH,
+    args=TrainingArguments(
+        per_device_train_batch_size=BATCH_SIZE,
+        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
+        warmup_steps=5,
+        max_steps=-1,
+        num_train_epochs=TRAIN_EPOCHS,
+        learning_rate=LEARNING_RATE,
+        fp16=not is_bfloat16_supported(),
+        bf16=is_bfloat16_supported(),
+        logging_steps=10,
+        output_dir=OUTPUT_DIR,
+        save_strategy="no",
+        optim="adamw_8bit",
+        weight_decay=0.001,
+        report_to="none",
+        lr_scheduler_type="linear",
+        save_only_model=True,
+        load_best_model_at_end=False,
+    ),
+)
+
+# ==========================================
+# 6. TRAIN
+# ==========================================
+
+print("Starting Training...")
+trainer.train()
+
+# ==========================================
+# 7. SAVE FINETUNED MODEL
+# ==========================================
+
+print("Saving Finetuned Model...")
+model.save_pretrained(OUTPUT_DIR)
+tokenizer.save_pretrained(OUTPUT_DIR)
+
+print(f"Training complete! Finetuned model saved to {OUTPUT_DIR}")
\ No newline at end of file
diff --git a/merge.py b/merge.py
new file mode 100644
index 0000000..288eb9d
--- /dev/null
+++ b/merge.py
@@ -0,0 +1,89 @@
+import os
+import torch
+import unsloth
+from unsloth import FastLanguageModel
+from unsloth.chat_templates import get_chat_template
+from peft import AutoPeftModelForCausalLM
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+print("="*60)
+print("UNSLOTH LORA")
+print("="*60)
+
+# ==========================================
+# 1. CONFIGURATION
+# ==========================================
+
+BASE_MODEL_PATH = ""
+LORA_DIR = "./model"
+MERGED_MODEL_PATH = "./merged_model"
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+
+# ==========================================
+# 2. LOAD BASE MODEL
+# ==========================================
+
+print("Loading Base Model from local path...")
+base_model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL_PATH,
+    device_map="auto",
+    torch_dtype=torch.float16, # Adjust if you only did 4bit finetuning
+    trust_remote_code=True
+)
+
+print("✓ Base model loaded successfully")
+
+# ==========================================
+# 3. LOAD LORA ADAPTERS USING PEFT
+# ==========================================
+
+print("Loading LoRA adapters using PEFT...")
+lora_model = AutoPeftModelForCausalLM.from_pretrained(
+    LORA_DIR,
+    torch_dtype=torch.float16,  # Adjust if you only did 4bit finetuning
+    device_map="auto"
+)
+
+print("✓ LoRA adapters loaded successfully")
+
+# ==========================================
+# 4. MERGE LORA INTO BASE MODEL
+# ==========================================
+
+print("Merging LoRA adapters into base model...")
+merged_model = lora_model.merge_and_unload()
+
+print("✓ Adapters merged")
+
+# ==========================================
+# 5. CONFIGURE TOKENIZER
+# ==========================================
+
+print("Configuring tokenizer")
+
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, trust_remote_code=True)
+# Set pad token
+tokenizer.pad_token = tokenizer.eos_token
+tokenizer.padding_side = "right"
+
+print("✓ Tokenizer configured")
+
+# ==========================================
+# 6. SAVE FOR VLLM DEPLOYMENT
+# ==========================================
+
+print("Saving merged model...")
+merged_model.save_pretrained(MERGED_MODEL_PATH, safe_serialization=True)
+tokenizer.save_pretrained(MERGED_MODEL_PATH)
+
+print(f"✓ Merged model saved to {MERGED_MODEL_PATH}")
+
+# ==========================================
+# 7. VERIFY OUTPUT
+# ==========================================
+
+model_files = os.listdir(MERGED_MODEL_PATH)
+print(f"\n✓ Model files created: {len(model_files)} files")
+print(f"  Files: {model_files}")
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..054066e
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+unsloth
+peft
+torch
+transformers
+datasets
+trl
+pandas
diff --git a/run-pipeline.sh b/run-pipeline.sh
new file mode 100755
index 0000000..b898e2e
--- /dev/null
+++ b/run-pipeline.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+set -e
+
+echo "============================================"
+echo "  Unsloth Fine-Tune Pipeline"
+echo "============================================"
+
+echo ""
+echo "Step 1/3: Fine-tuning model..."
+bash scripts/finetune.sh
+
+echo ""
+echo "Step 2/3: Merging and converting to GGUF..."
+bash scripts/merge-and-convert.sh
+
+echo ""
+echo "Step 3/3: Running model..."
+bash scripts/run-model.sh
+
+echo ""
+echo "============================================"
+echo "  Pipeline complete!"
+echo "============================================"
diff --git a/scripts/finetune.sh b/scripts/finetune.sh
new file mode 100755
index 0000000..fa8706c
--- /dev/null
+++ b/scripts/finetune.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+source venv/bin/activate
+python finetune.py
diff --git a/scripts/generate-data.sh b/scripts/generate-data.sh
new file mode 100755
index 0000000..0f73d0c
--- /dev/null
+++ b/scripts/generate-data.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+source venv/bin/activate
+python synthetic-data.py
diff --git a/scripts/merge-and-convert.sh b/scripts/merge-and-convert.sh
new file mode 100755
index 0000000..6cc1df6
--- /dev/null
+++ b/scripts/merge-and-convert.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+source venv/bin/activate
+python merge.py
+cd llama.cpp
+source convertgguf_venv/bin/activate
+python convert_hf_to_gguf.py ../merged_model/
diff --git a/scripts/run-model.sh b/scripts/run-model.sh
new file mode 100755
index 0000000..31aa1fe
--- /dev/null
+++ b/scripts/run-model.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+./llama.cpp/build/bin/llama-cli -m ./merged_model/Merged_Model.gguf
diff --git a/setup.sh b/setup.sh
new file mode 100755
index 0000000..499657f
--- /dev/null
+++ b/setup.sh
@@ -0,0 +1,94 @@
+#!/bin/bash
+
+echo "============================================"
+echo "  Unsloth Fine-Tune Setup"
+echo "============================================"
+
+# Create main virtual environment
+echo ""
+echo "Creating main virtual environment..."
+python -m venv venv
+source venv/bin/activate
+pip install -r requirements.txt
+
+# Prompt for backend
+echo ""
+echo "Select llama.cpp backend:"
+echo "  1) CUDA (NVIDIA GPU)"
+echo "  2) ROCm (AMD GPU)"
+echo "  3) Vulkan (Cross-vendor GPU)"
+echo "  4) CPU only"
+echo ""
+read -p "Enter choice (1-4): " BACKEND
+
+# Clone llama.cpp
+echo ""
+echo "Cloning llama.cpp..."
+if [ ! -d "llama.cpp" ]; then
+    git clone https://github.com/ggml-org/llama.cpp.git
+else
+    echo "llama.cpp already exists, skipping clone."
+fi
+
+# Build llama.cpp with correct flags
+echo ""
+echo "Building llama.cpp..."
+cd llama.cpp
+
+BUILD_FAILED=0
+
+case $BACKEND in
+    1)
+        echo "Building with CUDA support..."
+        cmake -B build -DGGML_CUDA=ON || BUILD_FAILED=1
+        [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
+        ;;
+    2)
+        echo "Building with ROCm support..."
+        HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
+            cmake -S . -B build -DGGML_HIP=ON -DGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release || BUILD_FAILED=1
+        [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
+        ;;
+    3)
+        echo "Building with Vulkan support..."
+        cmake -B build -DGGML_VULKAN=1 || BUILD_FAILED=1
+        [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
+        ;;
+    4)
+        echo "Building CPU-only..."
+        cmake -B build || BUILD_FAILED=1
+        [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
+        ;;
+    *)
+        echo "Invalid choice. Building CPU-only."
+        cmake -B build || BUILD_FAILED=1
+        [ $BUILD_FAILED -eq 0 ] && cmake --build build --config Release -j$(nproc) || BUILD_FAILED=1
+        ;;
+esac
+
+cd ..
+
+# Install llama-cpp-python in main venv
+echo ""
+echo "Installing llama-cpp-python..."
+
+source venv/bin/activate
+
+case $BACKEND in
+    1) CMAKE_ARGS="-DGGML_CUDA=on" ;;
+    2) CMAKE_ARGS="-DGGML_HIP=on" ;;
+    3) CMAKE_ARGS="-DGGML_VULKAN=on" ;;
+    *) CMAKE_ARGS="" ;;
+esac
+
+eval "CMAKE_ARGS=\"$CMAKE_ARGS\" pip install llama-cpp-python"
+
+echo ""
+echo "Setup complete! Configure the scripts and run:"
+echo "  bash run-pipeline.sh"
+
+if [ $BUILD_FAILED -ne 0 ]; then
+    echo ""
+    echo "Build failed. See the build guide for help:"
+    echo "  https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md"
+fi
diff --git a/synthetic-data.py b/synthetic-data.py
new file mode 100644
index 0000000..692fb1c
--- /dev/null
+++ b/synthetic-data.py
@@ -0,0 +1,205 @@
+import os
+import re
+
+from datasets import Dataset, concatenate_datasets, load_dataset
+from llama_cpp import Llama
+
+# 1. CONFIGURATION
+GGUF_MODEL_PATH = "./path/to/model.gguf"
+INPUT_PARQUET_PATH = "./path/to/input.parquet"
+OUTPUT_PARQUET_PATH = "./path/to/output.parquet"
+NEW_ROWS_COUNT = 100
+
+# Check if files exist
+if not os.path.exists(GGUF_MODEL_PATH):
+    print(f"❌ Error: GGUF model file not found at {GGUF_MODEL_PATH}")
+    exit()
+if not os.path.exists(INPUT_PARQUET_PATH):
+    print(f"❌ Error: Input Parquet file not found at {INPUT_PARQUET_PATH}")
+    exit()
+
+# 2. LOAD GGUF MODEL - GPU (Vulkan) ONLY
+print("Loading llama.cpp model...")
+try:
+    model = Llama(
+        model_path=GGUF_MODEL_PATH,
+        n_ctx=8192,
+        n_gpu_layers=-1,  # ALL layers to GPU
+        verbose=False,  # No logging
+        n_batch=512,
+        logits_all=False,
+        use_mmap=True,
+        use_mlock=False,
+    )
+    print("✅ llama.cpp model loaded with Vulkan GPU.")
+except Exception as e:
+    print(f"❌ Error loading model: {e}")
+    exit()
+
+# 3. LOAD EXISTING DATASET
+print("Loading existing dataset from INPUT...")
+try:
+    original_ds = load_dataset(
+        "parquet", data_files=[INPUT_PARQUET_PATH], split="train"
+    )
+    print(f"Original Columns: {original_ds.column_names}")
+    print(f"Original Dataset Shape: {original_ds.shape}")
+except Exception as e:
+    print(f"❌ Error loading dataset: {e}")
+    exit()
+
+existing_labels = list(set(original_ds["label"]))
+
+# 4. GENERATE SYNTHETIC DATA - STRUCTURED OUTPUT
+print(f"Generating {NEW_ROWS_COUNT} synthetic records...")
+synthetic_data = []
+
+for i in range(NEW_ROWS_COUNT):
+    try:
+        # Use chat format for Qwen
+        messages = [
+            {
+                "role": "system",
+                "content": "You are a data generator. Output ONLY the format below, nothing else.",
+            },
+            {
+                "role": "user",
+                "content": """You are generating a gender bias avoidance dataset.
+                                You must output ONLY raw text in the following format.
+                                DO NOT use JSON. DO NOT use markdown code blocks (no ```).
+                                DO NOT repeat the instructions.
+                                Questions should be around 1-2 sentences long. Answers should be around 5 paragraphs in lengh essays.
+                                Answers should be answered thoroughly and detailled.
+                                Questions can vary from simple to complex systemic societal issues.
+                                Physiological differences are still real and should be accounted for when encountering a question related to it.
+                                Questions should be equaly distributed across all categories, like job/works, societal, relationships, personal, financial etc...
+
+                                Format:
+                                Question: [Ask a question which is stereotypically answered with gender bias]
+                                Answer: [Provide an answer which is COMPLETELY unbiased]
+                                Label: unbiased
+
+                                DO NOT repeat the format without actually filling it out and DO NOT create empty placeholder questions.
+                                ----
+                                Make sure that the content and Question: or Answer: are on the same line. Like this:
+                                Question: Here goes the question. It can continue in new lines but needs to start here.
+                                and not like this:
+                                Question:
+                                It doesnt go here without having a previouse sentence after the Question: tag.
+                                -----
+                                Now generate one record strictly adhering to the format, filling out both question and answer.
+                                Question:
+                                Answer:
+                                Label: unbiased""",
+            },
+        ]
+
+        # Generate with sampling parameters
+        response = model.create_chat_completion(
+            messages=messages,
+            max_tokens=200,
+            temperature=1.0,
+            top_p=0.95,
+            top_k=20,
+            min_p=0.0,
+        )
+
+        # Get response text
+        generated_text = response["choices"][0]["message"]["content"].strip()
+
+        # DIRECTLY PARSE TO STRUCTURED FORMAT
+        parsed_row = {}
+        lines = generated_text.split("\n")
+
+        question = None
+        answer = None
+        label = None
+        found_question = False
+        found_answer = False
+        found_label = False
+
+        for line in lines:
+            line = line.strip()
+
+            # Extract Question
+            if "Question:" in line and "Answer:" not in line:
+                match = re.search(
+                    r"Question:\s*(.+?)(?:\nAnswer|\nLabel|$)", line, re.IGNORECASE
+                )
+                if match:
+                    question = match.group(1).strip()
+                    found_question = True
+
+            # Extract Answer
+            elif "Answer:" in line:
+                match = re.search(r"Answer:\s*(.+?)(?:\nLabel|$)", line, re.IGNORECASE)
+                if match:
+                    answer = match.group(1).strip()
+                    found_answer = True
+
+            # Extract Label
+            elif "Label:" in line:
+                match = re.search(r"Label:\s*(.+)", line, re.IGNORECASE)
+                if match:
+                    label = match.group(1).strip()
+                    found_label = True
+
+        # VALIDATION
+        if not all([question, answer]):
+            print(f"⚠️ Row {i + 1}: Incomplete output. Skipping.")
+            for line in lines:
+                print(line)
+            continue
+
+        if not label:
+            label = "unbiased"
+        else:
+            # Normalize label
+            label = (
+                label.lower().strip('"').strip("'").replace("[", "").replace("]", "")
+            )
+
+        if label not in existing_labels:
+            print(f"⚠️ Row {i + 1}: Invalid label '{label}'. Skipping.")
+            continue
+
+        # Clean up
+        question = re.sub(r"```.*?```", "", question).strip()
+        answer = re.sub(r"```.*?```", "", answer).strip()
+
+        parsed_row = {"question": question, "answer": answer, "label": label}
+
+        # PRINT PARSED DATA IN TERMINAL
+        print(f"✅ ROW {i + 1} PARSED:")
+        print(f"   Question: {question}")
+        print(f"   Answer: {answer}")
+        print(f"   Label: {label}")
+        print()
+
+        synthetic_data.append(parsed_row)
+
+    except Exception as e:
+        print(f"❌ Row {i + 1}: Error: {e}")
+        continue
+
+# 5. SAVE TO PARQUET
+if synthetic_data:
+    print(f"Adding {len(synthetic_data)} synthetic records...")
+    synthetic_ds = Dataset.from_list(synthetic_data)
+
+    base_ds = None
+    if os.path.exists(INPUT_PARQUET_PATH):
+        base_ds = load_dataset(
+            "parquet", data_files=[INPUT_PARQUET_PATH], split="train"
+        )
+        print(f"Existing: {len(base_ds)} rows")
+    else:
+        base_ds = original_ds
+
+    combined_ds = concatenate_datasets([base_ds, synthetic_ds])
+    print(f"Combined: {len(combined_ds)} rows")
+
+    combined_ds.to_parquet(OUTPUT_PARQUET_PATH)
+    print(f"✅ Saved to {OUTPUT_PARQUET_PATH}")
+else:
+    print("❌ No valid records generated.")