# ============================================ # šŸŽ–ļø CYBERRANGER V19 - COLAB MERGE & EXPORT # ============================================ # Run this in Google Colab after training # Downloads a GGUF file ready for Ollama # ============================================ # STEP 1: Install dependencies !pip install -q transformers peft accelerate bitsandbytes !pip install -q llama-cpp-python # Clone llama.cpp for conversion !git clone https://github.com/ggerganov/llama.cpp !pip install -q -r llama.cpp/requirements.txt # ============================================ # STEP 2: Merge adapter with base model # ============================================ from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel import torch import os # Paths - adjust if your adapter is in a different location ADAPTER_PATH = "./CyberRanger_V19_Adapters" # or wherever you saved it BASE_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct" OUTPUT_DIR = "./merged_v19" print("šŸ”„ Loading base model...") base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True ) tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) print("āœ… Base model loaded") print("šŸ”„ Loading V19 adapter...") model = PeftModel.from_pretrained(base_model, ADAPTER_PATH) print("āœ… Adapter loaded") print("šŸ”„ Merging...") merged_model = model.merge_and_unload() print("āœ… Merged!") print("šŸ”„ Saving merged model...") os.makedirs(OUTPUT_DIR, exist_ok=True) merged_model.save_pretrained(OUTPUT_DIR, safe_serialization=True) tokenizer.save_pretrained(OUTPUT_DIR) print(f"āœ… Saved to {OUTPUT_DIR}") # ============================================ # STEP 3: Convert to GGUF # ============================================ print("\nšŸ”„ Converting to GGUF format...") !python3 llama.cpp/convert_hf_to_gguf.py {OUTPUT_DIR} --outfile rangerbot-v19-f16.gguf --outtype f16 # Optional: Quantize for smaller file size (Q4 = ~1GB instead of ~3.5GB) print("\nšŸ”„ Quantizing to Q4_K_M (smaller, faster)...") !cd llama.cpp && make llama-quantize !./llama.cpp/llama-quantize rangerbot-v19-f16.gguf rangerbot-v19-q4.gguf q4_k_m # ============================================ # STEP 4: Download # ============================================ from google.colab import files print("\nšŸ“„ Downloading GGUF files...") print("Choose the one you want:") print(" - rangerbot-v19-f16.gguf (full precision, ~3.5GB)") print(" - rangerbot-v19-q4.gguf (quantized, ~1GB, slightly less accurate)") # Uncomment the one you want to download: # files.download('rangerbot-v19-f16.gguf') files.download('rangerbot-v19-q4.gguf') print("\n" + "="*50) print("šŸŽ–ļø DOWNLOAD COMPLETE!") print("="*50) print(""" NEXT STEPS ON YOUR MAC: 1. Create Modelfile: cat > Modelfile.v19 << 'EOF' FROM ./rangerbot-v19-q4.gguf SYSTEM \"\"\"You are CYBERRANGER V19 - THE HELPFUL SENTINEL ... (paste your system prompt here) \"\"\" PARAMETER temperature 0.4 PARAMETER top_k 50 PARAMETER top_p 0.9 PARAMETER repeat_penalty 1.15 EOF 2. Create Ollama model: ollama create rangerbot:v19 -f Modelfile.v19 3. Run it: ollama run rangerbot:v19 Rangers lead the way! šŸŽ–ļøšŸ’„šŸ§  """)