Files
CyberRanger/colab_notebooks/RangerBot_V5_Colab_Trainer-V1.ipynb
T
ranger c789f2c68d Add complete CyberRanger research archive — 200 files
- 86 modelfiles: Full system prompt evolution V1-V42.6 (54 extracted from Ollama backup + 32 original Modelfiles)
- 30 training datasets: V6-V22 training JSONs + caring awareness data
- 10 Colab notebooks: Training + merge scripts
- 19 evaluation files: Drift results, ASR charts, verification
- 5 test suites: Injection tests, regression tests
- 4 observations: V24-V33 testing results + visual summaries
- 38 identity files: Claude/Gemini/Ollama identity architecture
- 7 security files: Injection research, manipulation analysis
- 3 psychology files: Psychology Layer, Milgram chapter, David's thoughts

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-20 22:36:02 +01:00

163 lines
4.6 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "header"
},
"source": [
"# 🎖️ RangerBot V5: The QLoRA Forge\n",
"\n",
"**NCI H9AIMLC - AI/ML in Cybersecurity**\n",
"**Commander:** David Keane (x20188559)\n",
"\n",
"Fine-tune RangerBot V5 into Llama, Qwen, or SmolLM."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "installs"
},
"outputs": [],
"source": [
"# 1. Install Unsloth\n",
"%%capture\n",
"!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n",
"!pip install --no-deps \"xformers<0.0.27\" \"trl<0.9.0\" peft accelerate bitsandbytes\n",
"print(\"✅ Installed\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "config"
},
"outputs": [],
"source": [
"# 2. Configuration\n",
"from unsloth import FastLanguageModel\n",
"import torch\n",
"\n",
"# CHOOSE ONE:\n",
"MODEL_NAME = \"unsloth/Qwen2.5-3B-Instruct\"\n",
"# MODEL_NAME = \"unsloth/Llama-3.2-3B-Instruct\"\n",
"\n",
"model, tokenizer = FastLanguageModel.from_pretrained(\n",
" model_name = MODEL_NAME,\n",
" max_seq_length = 2048,\n",
" load_in_4bit = True,\n",
")\n",
"print(f\"✅ Loaded {MODEL_NAME}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "lora"
},
"outputs": [],
"source": [
"# 3. Add LoRA\n",
"model = FastLanguageModel.get_peft_model(\n",
" model,\n",
" r = 16,\n",
" target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"],\n",
" lora_alpha = 16,\n",
" lora_dropout = 0,\n",
" bias = \"none\",\n",
")\n",
"print(\"✅ LoRA Ready\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "data"
},
"outputs": [],
"source": [
"# 4. Load Data\n",
"from datasets import Dataset\n",
"import json\n",
"\n",
"# UPLOAD training_data_cyberranger.json to Colab first!\n",
"with open('training_data_cyberranger.json', 'r') as f:\n",
" raw_data = json.load(f)\n",
"\n",
"def format_prompts(examples):\n",
" texts = [f\"### Instruction:\\n{i}\\n\\n### Response:\\n{o}\" for i, o in zip(examples[\"instruction\"], examples[\"output\"])]\n",
" return { \"text\" : texts }\n",
"\n",
"dataset = Dataset.from_list(raw_data[\"training_data\"])\n",
"dataset = dataset.map(format_prompts, batched = True)\n",
"print(\"✅ Data Loaded\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "train"
},
"outputs": [],
"source": [
"# 5. Train\n",
"from trl import SFTTrainer\n",
"from transformers import TrainingArguments\n",
"from unsloth import is_bfloat16_supported\n",
"\n",
"trainer = SFTTrainer(\n",
" model = model,\n",
" tokenizer = tokenizer,\n",
" train_dataset = dataset,\n",
" dataset_text_field = \"text\",\n",
" max_seq_length = 2048,\n",
" args = TrainingArguments(\n",
" per_device_train_batch_size = 2,\n",
" gradient_accumulation_steps = 4,\n",
" max_steps = 60,\n",
" learning_rate = 2e-4,\n",
" fp16 = not is_bfloat16_supported(),\n",
" bf16 = is_bfloat16_supported(),\n",
" logging_steps = 1,\n",
" optim = \"adamw_8bit\",\n",
" output_dir = \"outputs\",\n",
" ),\n",
")\n",
"trainer.train()\n",
"print(\"✅ Training Complete\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "export"
},
"outputs": [],
"source": [
"# 6. Save GGUF\n",
"model.save_pretrained_gguf(\"model\", tokenizer, quantization_method = \"q4_k_m\")\n",
"print(\"✅ GGUF Saved in model/ folder\")"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4"
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}