# CRITICAL: Import unsloth BEFORE any other packages so all optimizations are applied (Unsloth recommendation) import os import warnings from unsloth import FastLanguageModel, is_bfloat16_supported import pandas as pd import torch from datasets import Dataset from transformers import TrainingArguments from trl.trainer.sft_trainer import SFTTrainer warnings.filterwarnings("ignore") # ========================================== # 1. CONFIGURATION # ========================================== # Update these paths DATA_PATH = "YOUR_PARQUET_FILE_PATH" OUTPUT_DIR = "./model" # Training params, change these to fit your hardware BATCH_SIZE = 2 GRADIENT_ACCUMULATION_STEPS = 8 LEARNING_RATE = 2e-4 MAX_LENGTH = 4096 TRAIN_EPOCHS = 1 # Check device device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") # ========================================== # 2. LOAD DATA AND FILTER # ========================================== print("Loading data...") df = pd.read_parquet(DATA_PATH) # Check required columns required_cols = ["question", "answer"] missing_cols = [c for c in required_cols if c not in df.columns] if missing_cols: raise ValueError(f"Missing columns in Parquet file: {missing_cols}") print(f"Loaded {len(df)} samples.") # ========================================== # 3. PREPARE DATASETS # ========================================== def format_example(example): """ Formats the Question and Answer into a ChatML-style prompt that the model can understand. """ text = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{example['question']}<|im_end|>\n<|im_start|>assistant\n{example['answer']}<|im_end|>" return {"text": text} # Convert pandas to HuggingFace Dataset dataset = Dataset.from_pandas(df) dataset = dataset.map(format_example, remove_columns=["question", "answer"]) print("Dataset prepared.") # ========================================== # 4. LOAD MODEL # ========================================== print("Loading Model...") model, tokenizer = FastLanguageModel.from_pretrained( model_name="example/example", # <-- Your model goes here max_seq_length=MAX_LENGTH, dtype=None, load_in_4bit=False, # Set to True if your card doesnt have enough VRAM for training in FP16/BF16 ) # Apply LoRA Config (Unsloth default) print("Applying LoRA...") model = FastLanguageModel.get_peft_model( model, r=16, lora_alpha=16, lora_dropout=0, bias="none", use_gradient_checkpointing="unsloth", random_state=3407, ) print("Model loaded successfully.") # ========================================== # 5. TRAINING SETUP # ========================================== print("Setting up Trainer...") # Configure Tokenizer tokenizer.pad_token_id = tokenizer.eos_token_id tokenizer.padding_side = "right" trainer = SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=dataset, dataset_text_field="text", max_seq_length=MAX_LENGTH, args=TrainingArguments( per_device_train_batch_size=BATCH_SIZE, gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS, warmup_steps=5, max_steps=-1, num_train_epochs=TRAIN_EPOCHS, learning_rate=LEARNING_RATE, fp16=not is_bfloat16_supported(), bf16=is_bfloat16_supported(), logging_steps=10, output_dir=OUTPUT_DIR, save_strategy="no", optim="adamw_8bit", weight_decay=0.001, report_to="none", lr_scheduler_type="linear", save_only_model=True, load_best_model_at_end=False, ), ) # ========================================== # 6. TRAIN # ========================================== print("Starting Training...") trainer.train() # ========================================== # 7. SAVE FINETUNED MODEL # ========================================== print("Saving Finetuned Model...") model.save_pretrained(OUTPUT_DIR) tokenizer.save_pretrained(OUTPUT_DIR) print(f"Training complete! Finetuned model saved to {OUTPUT_DIR}")