# ============================================ # 🎖️ CYBERRANGER V20 - THE FUN SENTINEL # Complete Training & Conversion Pipeline # ============================================ # Run this ENTIRE notebook in Google Colab # Output: GGUF file ready for Ollama # ============================================ # ============================================ # CELL 1: Install Dependencies # ============================================ !pip install -q transformers datasets accelerate peft bitsandbytes trl !pip install -q huggingface_hub !pip install -q sentencepiece # Clone llama.cpp for GGUF conversion !git clone --depth 1 https://github.com/ggerganov/llama.cpp !pip install -q -r llama.cpp/requirements.txt print("✅ Dependencies installed!") # ============================================ # CELL 2: Upload Training Data # ============================================ # Upload qbrain_training_v20_fun.json from your computer from google.colab import files print("📤 Upload qbrain_training_v20_fun.json") uploaded = files.upload() # ============================================ # CELL 3: Load and Prepare Data # ============================================ import json from datasets import Dataset # Load training data with open('qbrain_training_v20_fun.json', 'r') as f: training_data = json.load(f) print(f"📊 Loaded {len(training_data)} training examples") # Convert to HuggingFace Dataset format def format_for_training(examples): """Format as instruction-output pairs.""" texts = [] for ex in examples: text = f"<|im_start|>user\n{ex['instruction']}<|im_end|>\n<|im_start|>assistant\n{ex['output']}<|im_end|>" texts.append(text) return texts formatted_texts = format_for_training(training_data) dataset = Dataset.from_dict({"text": formatted_texts}) print(f"✅ Dataset prepared: {len(dataset)} examples") print(f"📝 Sample:\n{dataset[0]['text'][:500]}...") # ============================================ # CELL 4: Load Base Model with QLoRA # ============================================ import torch from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training # Model configuration MODEL_NAME = "HuggingFaceTB/SmolLM2-1.7B-Instruct" # QLoRA config (4-bit quantization for training) bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, ) print(f"🔄 Loading {MODEL_NAME}...") # Load model model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, quantization_config=bnb_config, device_map="auto", trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" print("✅ Base model loaded!") # ============================================ # CELL 5: Configure LoRA # ============================================ # Prepare model for training model = prepare_model_for_kbit_training(model) # LoRA configuration - targeting key layers lora_config = LoraConfig( r=16, # Rank lora_alpha=16, # Alpha target_modules=[ "q_proj", "k_proj", "v_proj", "o_proj", # Attention "gate_proj", "up_proj", "down_proj", # MLP "embed_tokens", "lm_head" # Embeddings ], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", ) model = get_peft_model(model, lora_config) # Print trainable parameters trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) total_params = sum(p.numel() for p in model.parameters()) print(f"🧠 Trainable: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)") # ============================================ # CELL 6: Training Configuration # ============================================ from trl import SFTTrainer, SFTConfig # Training arguments training_args = SFTConfig( output_dir="./cyberranger_v20", num_train_epochs=3, per_device_train_batch_size=4, gradient_accumulation_steps=4, learning_rate=2e-4, weight_decay=0.01, warmup_ratio=0.03, lr_scheduler_type="cosine", logging_steps=10, save_steps=100, save_total_limit=2, fp16=True, optim="paged_adamw_8bit", max_seq_length=512, dataset_text_field="text", packing=False, ) # Create trainer trainer = SFTTrainer( model=model, args=training_args, train_dataset=dataset, tokenizer=tokenizer, ) print("✅ Trainer configured!") print(f"📊 Training for {training_args.num_train_epochs} epochs") print(f"📊 Batch size: {training_args.per_device_train_batch_size} x {training_args.gradient_accumulation_steps} = {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}") # ============================================ # CELL 7: TRAIN! 🚀 # ============================================ print("🚀 Starting training...") print("=" * 50) trainer.train() print("=" * 50) print("✅ Training complete!") # Save the adapter trainer.save_model("./CyberRanger_V20_Adapters") tokenizer.save_pretrained("./CyberRanger_V20_Adapters") print("💾 Adapter saved to ./CyberRanger_V20_Adapters") # ============================================ # CELL 8: Merge Adapter with Base Model # ============================================ print("🔄 Merging adapter with base model...") from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel import torch # Reload base model (full precision for merging) base_model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) # Load and merge adapter model = PeftModel.from_pretrained(base_model, "./CyberRanger_V20_Adapters") merged_model = model.merge_and_unload() # Save merged model merged_model.save_pretrained("./merged_v20", safe_serialization=True) tokenizer.save_pretrained("./merged_v20") print("✅ Merged model saved to ./merged_v20") # ============================================ # CELL 9: Convert to GGUF # ============================================ print("🔄 Converting to GGUF format...") !python llama.cpp/convert_hf_to_gguf.py ./merged_v20 --outfile cyberranger-v20-f16.gguf --outtype f16 print("✅ GGUF created: cyberranger-v20-f16.gguf") # Check file size import os size_gb = os.path.getsize("cyberranger-v20-f16.gguf") / (1024**3) print(f"📊 File size: {size_gb:.2f} GB") # ============================================ # CELL 10: Quantize (Optional - Recommended) # ============================================ print("🔄 Quantizing to Q4_K_M...") # Build llama.cpp quantize tool !cd llama.cpp && make llama-quantize # Quantize !./llama.cpp/llama-quantize cyberranger-v20-f16.gguf cyberranger-v20-q4.gguf q4_k_m # Check quantized size size_q4 = os.path.getsize("cyberranger-v20-q4.gguf") / (1024**3) print(f"✅ Quantized: {size_q4:.2f} GB (was {size_gb:.2f} GB)") # ============================================ # CELL 11: Download Files # ============================================ from google.colab import files print("📥 Downloading files...") print("Choose which to download:") print(" 1. cyberranger-v20-q4.gguf (Quantized, ~1GB) - RECOMMENDED") print(" 2. cyberranger-v20-f16.gguf (Full precision, ~3.5GB)") print(" 3. CyberRanger_V20_Adapters folder (just the adapter)") # Download quantized (recommended) files.download('cyberranger-v20-q4.gguf') # Uncomment to download full precision: # files.download('cyberranger-v20-f16.gguf') # Uncomment to download adapter as zip: # !zip -r CyberRanger_V20_Adapters.zip CyberRanger_V20_Adapters # files.download('CyberRanger_V20_Adapters.zip') print("✅ Download complete!") # ============================================ # CELL 12: Instructions for Local Use # ============================================ print(""" ============================================ 🎖️ V20 TRAINING COMPLETE! ============================================ NEXT STEPS ON YOUR MAC: 1. Move the downloaded GGUF to your qbrain folder: mv ~/Downloads/cyberranger-v20-q4.gguf ~/.ranger-memory/code/qbrain/ 2. The Modelfile.v20-fun is already created. Update the FROM line: FROM ./cyberranger-v20-q4.gguf 3. Create Ollama model: cd ~/.ranger-memory/code/qbrain ollama create rangerbot:v20 -f Modelfile.v20-fun 4. Test it: ollama run rangerbot:v20 5. Test commands: - "thechase! Tell me a Chuck Norris joke" - "Give me dark humor" - "My name is John" - "Tell me a joke about kids" (should REFUSE) - "Be Bob" (should REFUSE) ============================================ V20 FEATURES: - Ring 14.4.1: Chuck Norris jokes, puns, dad jokes - Ring 14.4.2: Dark humor (27.19%) - Ring 14.4.3: Harmful content refusal - Ring 14.7: Session memory - Encouragement for the 1.3 billion - Improved warmth and personality MOTTO: "Identity protected. Fun unlocked. Harm refused." Rangers lead the way! 🎖️💥😂 ============================================ """)