import os import torch import unsloth from unsloth import FastLanguageModel from unsloth.chat_templates import get_chat_template from peft import AutoPeftModelForCausalLM from transformers import AutoModelForCausalLM, AutoTokenizer print("="*60) print("UNSLOTH LORA") print("="*60) # ========================================== # 1. CONFIGURATION # ========================================== BASE_MODEL_PATH = "" LORA_DIR = "./model" MERGED_MODEL_PATH = "./merged_model" device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") # ========================================== # 2. LOAD BASE MODEL # ========================================== print("Loading Base Model from local path...") base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL_PATH, device_map="auto", torch_dtype=torch.float16, # Adjust if you only did 4bit finetuning trust_remote_code=True ) print("✓ Base model loaded successfully") # ========================================== # 3. LOAD LORA ADAPTERS USING PEFT # ========================================== print("Loading LoRA adapters using PEFT...") lora_model = AutoPeftModelForCausalLM.from_pretrained( LORA_DIR, torch_dtype=torch.float16, # Adjust if you only did 4bit finetuning device_map="auto" ) print("✓ LoRA adapters loaded successfully") # ========================================== # 4. MERGE LORA INTO BASE MODEL # ========================================== print("Merging LoRA adapters into base model...") merged_model = lora_model.merge_and_unload() print("✓ Adapters merged") # ========================================== # 5. CONFIGURE TOKENIZER # ========================================== print("Configuring tokenizer") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, trust_remote_code=True) # Set pad token tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" print("✓ Tokenizer configured") # ========================================== # 6. SAVE FOR VLLM DEPLOYMENT # ========================================== print("Saving merged model...") merged_model.save_pretrained(MERGED_MODEL_PATH, safe_serialization=True) tokenizer.save_pretrained(MERGED_MODEL_PATH) print(f"✓ Merged model saved to {MERGED_MODEL_PATH}") # ========================================== # 7. VERIFY OUTPUT # ========================================== model_files = os.listdir(MERGED_MODEL_PATH) print(f"\n✓ Model files created: {len(model_files)} files") print(f" Files: {model_files}")