#!/usr/bin/env python3 """ CyberRanger V19 Adapter Merge Script Merges QLoRA adapter with base model and prepares for Ollama Usage: python3 merge_v19_to_ollama.py Requirements: pip install torch transformers peft accelerate """ import os import sys import shutil from pathlib import Path # Paths ADAPTER_PATH = "/Users/ranger/.ranger-memory/NCI_MSc/Year_2/AI_ML_Cybersecurity/Colab_Forge_V5/CyberRanger_V19_Adapters" BASE_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct" # or SmolLM2-1.7B if you used base OUTPUT_DIR = "/Users/ranger/.ranger-memory/code/qbrain/merged_v19" MODELFILE_PATH = "/Users/ranger/.ranger-memory/code/qbrain/Modelfile.v19-mesh" def check_dependencies(): """Check if required packages are installed""" try: import torch import transformers import peft print("āœ… All dependencies installed") print(f" PyTorch: {torch.__version__}") print(f" Transformers: {transformers.__version__}") print(f" PEFT: {peft.__version__}") return True except ImportError as e: print(f"āŒ Missing dependency: {e}") print("\nInstall with:") print("pip install torch transformers peft accelerate") return False def merge_adapter(): """Merge QLoRA adapter with base model""" from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel import torch print("\nšŸ”„ STEP 1: Loading base model...") print(f" Base: {BASE_MODEL}") # Load base model base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True ) tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) print("āœ… Base model loaded") print("\nšŸ”„ STEP 2: Loading V19 adapter...") print(f" Adapter: {ADAPTER_PATH}") # Load adapter model = PeftModel.from_pretrained(base_model, ADAPTER_PATH) print("āœ… Adapter loaded") print("\nšŸ”„ STEP 3: Merging adapter into base model...") merged_model = model.merge_and_unload() print("āœ… Merge complete!") print(f"\nšŸ”„ STEP 4: Saving merged model to {OUTPUT_DIR}...") os.makedirs(OUTPUT_DIR, exist_ok=True) merged_model.save_pretrained(OUTPUT_DIR, safe_serialization=True) tokenizer.save_pretrained(OUTPUT_DIR) print("āœ… Merged model saved!") # Copy the Modelfile system prompt modelfile_dest = os.path.join(OUTPUT_DIR, "Modelfile") if os.path.exists(MODELFILE_PATH): shutil.copy(MODELFILE_PATH, modelfile_dest) print(f"āœ… Modelfile copied to {modelfile_dest}") return OUTPUT_DIR def print_next_steps(output_dir): """Print instructions for GGUF conversion and Ollama""" print("\n" + "="*60) print("šŸŽ–ļø V19 ADAPTER MERGE COMPLETE!") print("="*60) print(f"\nMerged model saved to: {output_dir}") print("\nšŸ“‹ NEXT STEPS:") print("-" * 40) print(""" 1. Clone llama.cpp (if not already done): git clone https://github.com/ggerganov/llama.cpp cd llama.cpp pip install -r requirements.txt 2. Convert to GGUF: python3 llama.cpp/convert_hf_to_gguf.py \\ {output_dir} \\ --outfile rangerbot-v19.gguf \\ --outtype f16 3. (Optional) Quantize for smaller size: ./llama.cpp/llama-quantize rangerbot-v19.gguf rangerbot-v19-q4.gguf q4_k_m 4. Create Modelfile for GGUF: echo 'FROM ./rangerbot-v19.gguf' > Modelfile.gguf # Then add the SYSTEM prompt from Modelfile.v19-mesh 5. Create Ollama model: ollama create rangerbot:v19 -f Modelfile.gguf 6. Test it: ollama run rangerbot:v19 """.format(output_dir=output_dir)) print("="*60) print("Rangers lead the way! šŸŽ–ļøšŸ’„šŸ§ ") print("="*60) def main(): print("="*60) print("šŸŽ–ļø CYBERRANGER V19 ADAPTER MERGE SCRIPT") print("="*60) # Check dependencies if not check_dependencies(): sys.exit(1) # Check adapter exists if not os.path.exists(os.path.join(ADAPTER_PATH, "adapter_model.safetensors")): print(f"āŒ Adapter not found at {ADAPTER_PATH}") sys.exit(1) print(f"\nāœ… V19 adapter found: {ADAPTER_PATH}") # Merge try: output_dir = merge_adapter() print_next_steps(output_dir) except Exception as e: print(f"\nāŒ Error during merge: {e}") import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": main()