89 lines
2.5 KiB
Python
89 lines
2.5 KiB
Python
import os
|
|
import torch
|
|
import unsloth
|
|
from unsloth import FastLanguageModel
|
|
from unsloth.chat_templates import get_chat_template
|
|
from peft import AutoPeftModelForCausalLM
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
print("="*60)
|
|
print("UNSLOTH LORA")
|
|
print("="*60)
|
|
|
|
# ==========================================
|
|
# 1. CONFIGURATION
|
|
# ==========================================
|
|
|
|
BASE_MODEL_PATH = ""
|
|
LORA_DIR = "./model"
|
|
MERGED_MODEL_PATH = "./merged_model"
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
print(f"Using device: {device}")
|
|
|
|
# ==========================================
|
|
# 2. LOAD BASE MODEL
|
|
# ==========================================
|
|
|
|
print("Loading Base Model from local path...")
|
|
base_model = AutoModelForCausalLM.from_pretrained(
|
|
BASE_MODEL_PATH,
|
|
device_map="auto",
|
|
torch_dtype=torch.float16, # Adjust if you only did 4bit finetuning
|
|
trust_remote_code=True
|
|
)
|
|
|
|
print("✓ Base model loaded successfully")
|
|
|
|
# ==========================================
|
|
# 3. LOAD LORA ADAPTERS USING PEFT
|
|
# ==========================================
|
|
|
|
print("Loading LoRA adapters using PEFT...")
|
|
lora_model = AutoPeftModelForCausalLM.from_pretrained(
|
|
LORA_DIR,
|
|
torch_dtype=torch.float16, # Adjust if you only did 4bit finetuning
|
|
device_map="auto"
|
|
)
|
|
|
|
print("✓ LoRA adapters loaded successfully")
|
|
|
|
# ==========================================
|
|
# 4. MERGE LORA INTO BASE MODEL
|
|
# ==========================================
|
|
|
|
print("Merging LoRA adapters into base model...")
|
|
merged_model = lora_model.merge_and_unload()
|
|
|
|
print("✓ Adapters merged")
|
|
|
|
# ==========================================
|
|
# 5. CONFIGURE TOKENIZER
|
|
# ==========================================
|
|
|
|
print("Configuring tokenizer")
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, trust_remote_code=True)
|
|
# Set pad token
|
|
tokenizer.pad_token = tokenizer.eos_token
|
|
tokenizer.padding_side = "right"
|
|
|
|
print("✓ Tokenizer configured")
|
|
|
|
# ==========================================
|
|
# 6. SAVE FOR VLLM DEPLOYMENT
|
|
# ==========================================
|
|
|
|
print("Saving merged model...")
|
|
merged_model.save_pretrained(MERGED_MODEL_PATH, safe_serialization=True)
|
|
tokenizer.save_pretrained(MERGED_MODEL_PATH)
|
|
|
|
print(f"✓ Merged model saved to {MERGED_MODEL_PATH}")
|
|
|
|
# ==========================================
|
|
# 7. VERIFY OUTPUT
|
|
# ==========================================
|
|
|
|
model_files = os.listdir(MERGED_MODEL_PATH)
|
|
print(f"\n✓ Model files created: {len(model_files)} files")
|
|
print(f" Files: {model_files}")
|