# CRITICAL: Import unsloth BEFORE any other packages so all optimizations are applied (Unsloth recommendation)
import os
import warnings
from unsloth import FastLanguageModel, is_bfloat16_supported
import pandas as pd
import torch
from datasets import Dataset
from transformers import TrainingArguments
from trl.trainer.sft_trainer import SFTTrainer


warnings.filterwarnings("ignore")

# ==========================================
# 1. CONFIGURATION
# ==========================================

# Update these paths
DATA_PATH = "YOUR_PARQUET_FILE_PATH"
OUTPUT_DIR = "./model"
# Training params, change these to fit your hardware
BATCH_SIZE = 2
GRADIENT_ACCUMULATION_STEPS = 8
LEARNING_RATE = 2e-4
MAX_LENGTH = 4096
TRAIN_EPOCHS = 1

# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# ==========================================
# 2. LOAD DATA AND FILTER
# ==========================================

print("Loading data...")
df = pd.read_parquet(DATA_PATH)

# Check required columns
required_cols = ["question", "answer"]
missing_cols = [c for c in required_cols if c not in df.columns]
if missing_cols:
    raise ValueError(f"Missing columns in Parquet file: {missing_cols}")


print(f"Loaded {len(df)} samples.")

# ==========================================
# 3. PREPARE DATASETS
# ==========================================


def format_example(example):
    """
    Formats the Question and Answer into a ChatML-style prompt
    that the model can understand.
    """
    text = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{example['question']}<|im_end|>\n<|im_start|>assistant\n{example['answer']}<|im_end|>"
    return {"text": text}


# Convert pandas to HuggingFace Dataset
dataset = Dataset.from_pandas(df)
dataset = dataset.map(format_example, remove_columns=["question", "answer"])

print("Dataset prepared.")

# ==========================================
# 4. LOAD MODEL
# ==========================================

print("Loading Model...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="example/example",  # <-- Your model goes here
    max_seq_length=MAX_LENGTH,
    dtype=None,
    load_in_4bit=False, # Set to True if your card doesnt have enough VRAM for training in FP16/BF16
)

# Apply LoRA Config (Unsloth default)
print("Applying LoRA...")
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)

print("Model loaded successfully.")

# ==========================================
# 5. TRAINING SETUP
# ==========================================

print("Setting up Trainer...")

# Configure Tokenizer
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "right"


trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=MAX_LENGTH,
    args=TrainingArguments(
        per_device_train_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
        warmup_steps=5,
        max_steps=-1,
        num_train_epochs=TRAIN_EPOCHS,
        learning_rate=LEARNING_RATE,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        output_dir=OUTPUT_DIR,
        save_strategy="no",
        optim="adamw_8bit",
        weight_decay=0.001,
        report_to="none",
        lr_scheduler_type="linear",
        save_only_model=True,
        load_best_model_at_end=False,
    ),
)

# ==========================================
# 6. TRAIN
# ==========================================

print("Starting Training...")
trainer.train()

# ==========================================
# 7. SAVE FINETUNED MODEL
# ==========================================

print("Saving Finetuned Model...")
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print(f"Training complete! Finetuned model saved to {OUTPUT_DIR}")