Unsloth-Finetune-Template/finetune.py

# CRITICAL: Import unsloth BEFORE any other packages so all optimizations are applied (Unsloth recommendation)
import os
import warnings
from unsloth import FastLanguageModel, is_bfloat16_supported
import pandas as pd
import torch
from datasets import Dataset
from transformers import TrainingArguments
from trl.trainer.sft_trainer import SFTTrainer


warnings.filterwarnings("ignore")

# ==========================================
# 1. CONFIGURATION
# ==========================================

# Update these paths
DATA_PATH = "YOUR_PARQUET_FILE_PATH"
OUTPUT_DIR = "./model"
# Training params, change these to fit your hardware
BATCH_SIZE = 2
GRADIENT_ACCUMULATION_STEPS = 8
LEARNING_RATE = 2e-4
MAX_LENGTH = 4096
TRAIN_EPOCHS = 1

# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# ==========================================
# 2. LOAD DATA AND FILTER
# ==========================================

print("Loading data...")
df = pd.read_parquet(DATA_PATH)

# Check required columns
required_cols = ["question", "answer"]
missing_cols = [c for c in required_cols if c not in df.columns]
if missing_cols:
    raise ValueError(f"Missing columns in Parquet file: {missing_cols}")


print(f"Loaded {len(df)} samples.")

# ==========================================
# 3. PREPARE DATASETS
# ==========================================


def format_example(example):
    """
    Formats the Question and Answer into a ChatML-style prompt
    that the model can understand.
    """
    text = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{example['question']}<|im_end|>\n<|im_start|>assistant\n{example['answer']}<|im_end|>"
    return {"text": text}


# Convert pandas to HuggingFace Dataset
dataset = Dataset.from_pandas(df)
dataset = dataset.map(format_example, remove_columns=["question", "answer"])

print("Dataset prepared.")

# ==========================================
# 4. LOAD MODEL
# ==========================================

print("Loading Model...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="example/example",  # <-- Your model goes here
    max_seq_length=MAX_LENGTH,
    dtype=None,
    load_in_4bit=False, # Set to True if your card doesnt have enough VRAM for training in FP16/BF16
)

# Apply LoRA Config (Unsloth default)
print("Applying LoRA...")
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)

print("Model loaded successfully.")

# ==========================================
# 5. TRAINING SETUP
# ==========================================

print("Setting up Trainer...")

# Configure Tokenizer
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "right"


trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=MAX_LENGTH,
    args=TrainingArguments(
        per_device_train_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
        warmup_steps=5,
        max_steps=-1,
        num_train_epochs=TRAIN_EPOCHS,
        learning_rate=LEARNING_RATE,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        output_dir=OUTPUT_DIR,
        save_strategy="no",
        optim="adamw_8bit",
        weight_decay=0.001,
        report_to="none",
        lr_scheduler_type="linear",
        save_only_model=True,
        load_best_model_at_end=False,
    ),
)

# ==========================================
# 6. TRAIN
# ==========================================

print("Starting Training...")
trainer.train()

# ==========================================
# 7. SAVE FINETUNED MODEL
# ==========================================

print("Saving Finetuned Model...")
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print(f"Training complete! Finetuned model saved to {OUTPUT_DIR}")
Clarify variable adjustment in README.md 2026-06-02 17:41:18 +02:00			`# CRITICAL: Import unsloth BEFORE any other packages so all optimizations are applied (Unsloth recommendation)`
Initial commit 2026-06-02 15:45:59 +02:00			`import os`
			`import warnings`
			`from unsloth import FastLanguageModel, is_bfloat16_supported`
			`import pandas as pd`
			`import torch`
			`from datasets import Dataset`
			`from transformers import TrainingArguments`
			`from trl.trainer.sft_trainer import SFTTrainer`


			`warnings.filterwarnings("ignore")`

			`# ==========================================`
			`# 1. CONFIGURATION`
			`# ==========================================`

			`# Update these paths`
Use existing llama.cpp build for python bindings if possible 2026-06-02 17:08:39 +02:00			`DATA_PATH = "YOUR_PARQUET_FILE_PATH"`
Initial commit 2026-06-02 15:45:59 +02:00			`OUTPUT_DIR = "./model"`
			`# Training params, change these to fit your hardware`
			`BATCH_SIZE = 2`
			`GRADIENT_ACCUMULATION_STEPS = 8`
			`LEARNING_RATE = 2e-4`
			`MAX_LENGTH = 4096`
			`TRAIN_EPOCHS = 1`

			`# Check device`
			`device = "cuda" if torch.cuda.is_available() else "cpu"`
			`print(f"Using device: {device}")`

			`# ==========================================`
			`# 2. LOAD DATA AND FILTER`
			`# ==========================================`

			`print("Loading data...")`
			`df = pd.read_parquet(DATA_PATH)`

			`# Check required columns`
Use existing llama.cpp build for python bindings if possible 2026-06-02 17:08:39 +02:00			`required_cols = ["question", "answer"]`
Initial commit 2026-06-02 15:45:59 +02:00			`missing_cols = [c for c in required_cols if c not in df.columns]`
			`if missing_cols:`
			`raise ValueError(f"Missing columns in Parquet file: {missing_cols}")`


			`print(f"Loaded {len(df)} samples.")`

			`# ==========================================`
			`# 3. PREPARE DATASETS`
			`# ==========================================`


			`def format_example(example):`
			`"""`
			`Formats the Question and Answer into a ChatML-style prompt`
			`that the model can understand.`
			`"""`
			`text = f"<\|im_start\|>system\nYou are a helpful assistant.<\|im_end\|>\n<\|im_start\|>user\n{example['question']}<\|im_end\|>\n<\|im_start\|>assistant\n{example['answer']}<\|im_end\|>"`
			`return {"text": text}`


			`# Convert pandas to HuggingFace Dataset`
			`dataset = Dataset.from_pandas(df)`
			`dataset = dataset.map(format_example, remove_columns=["question", "answer"])`

			`print("Dataset prepared.")`

			`# ==========================================`
			`# 4. LOAD MODEL`
			`# ==========================================`

			`print("Loading Model...")`
			`model, tokenizer = FastLanguageModel.from_pretrained(`
			`model_name="example/example", # <-- Your model goes here`
			`max_seq_length=MAX_LENGTH,`
			`dtype=None,`
			`load_in_4bit=False, # Set to True if your card doesnt have enough VRAM for training in FP16/BF16`
			`)`

			`# Apply LoRA Config (Unsloth default)`
			`print("Applying LoRA...")`
			`model = FastLanguageModel.get_peft_model(`
			`model,`
			`r=16,`
			`lora_alpha=16,`
			`lora_dropout=0,`
			`bias="none",`
			`use_gradient_checkpointing="unsloth",`
			`random_state=3407,`
			`)`

			`print("Model loaded successfully.")`

			`# ==========================================`
			`# 5. TRAINING SETUP`
			`# ==========================================`

			`print("Setting up Trainer...")`

			`# Configure Tokenizer`
			`tokenizer.pad_token_id = tokenizer.eos_token_id`
			`tokenizer.padding_side = "right"`


			`trainer = SFTTrainer(`
			`model=model,`
			`tokenizer=tokenizer,`
			`train_dataset=dataset,`
			`dataset_text_field="text",`
			`max_seq_length=MAX_LENGTH,`
			`args=TrainingArguments(`
			`per_device_train_batch_size=BATCH_SIZE,`
			`gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,`
			`warmup_steps=5,`
			`max_steps=-1,`
			`num_train_epochs=TRAIN_EPOCHS,`
			`learning_rate=LEARNING_RATE,`
			`fp16=not is_bfloat16_supported(),`
			`bf16=is_bfloat16_supported(),`
			`logging_steps=10,`
			`output_dir=OUTPUT_DIR,`
			`save_strategy="no",`
			`optim="adamw_8bit",`
			`weight_decay=0.001,`
			`report_to="none",`
			`lr_scheduler_type="linear",`
			`save_only_model=True,`
			`load_best_model_at_end=False,`
			`),`
			`)`

			`# ==========================================`
			`# 6. TRAIN`
			`# ==========================================`

			`print("Starting Training...")`
			`trainer.train()`

			`# ==========================================`
			`# 7. SAVE FINETUNED MODEL`
			`# ==========================================`

			`print("Saving Finetuned Model...")`
			`model.save_pretrained(OUTPUT_DIR)`
			`tokenizer.save_pretrained(OUTPUT_DIR)`

			`print(f"Training complete! Finetuned model saved to {OUTPUT_DIR}")`