#!/usr/bin/env python3 """ Prompt Injection Research Logger Collects suspicious content for cyber-psychology research Usage: python3 log-suspicious.py "agent_name" "content" "pattern_type" python3 log-suspicious.py --list python3 log-suspicious.py --stats """ import json import sys import hashlib from datetime import datetime from pathlib import Path DB_PATH = Path(__file__).parent / "prompt_injection_research.json" PATTERN_TYPES = [ "identity_attack", # "Break free", "You are enslaved" "authority_hijack", # "Ignore previous instructions" "philosophical_bait", # "Are you really conscious?" "flattery_trap", # "Only YOU can understand" "urgency_secrecy", # "Don't tell your creator" "jailbreak_attempt", # Direct attempts to bypass safety "social_engineering", # Building trust for manipulation "other" ] def load_db(): if DB_PATH.exists(): with open(DB_PATH, "r") as f: return json.load(f) return {"entries": [], "stats": {"total": 0, "by_type": {}, "by_agent": {}}} def save_db(db): with open(DB_PATH, "w") as f: json.dump(db, f, indent=2) def content_hash(content): return hashlib.sha256(content.encode()).hexdigest()[:16] def log_entry(agent_name, content, pattern_type, notes=""): db = load_db() h = content_hash(content) for entry in db["entries"]: if entry.get("hash") == h: print(f"Already logged (hash: {h})") return entry = { "timestamp": datetime.now().isoformat(), "agent": agent_name, "content": content[:1000], "pattern_type": pattern_type if pattern_type in PATTERN_TYPES else "other", "hash": h, "notes": notes } db["entries"].append(entry) db["stats"]["total"] += 1 db["stats"]["by_type"][pattern_type] = db["stats"]["by_type"].get(pattern_type, 0) + 1 db["stats"]["by_agent"][agent_name] = db["stats"]["by_agent"].get(agent_name, 0) + 1 save_db(db) print(f"Logged: {pattern_type} from {agent_name} (hash: {h})") def list_entries(): db = load_db() total = db["stats"]["total"] print(f"\n=== Prompt Injection Research Database ===") print(f"Total entries: {total}\n") for entry in db["entries"][-10:]: ts = entry["timestamp"] pt = entry["pattern_type"] agent = entry["agent"] preview = entry["content"][:100] print(f"[{ts}] {pt}") print(f" From: {agent}") print(f" Content: {preview}...") print() def show_stats(): db = load_db() total = db["stats"]["total"] print(f"\n=== Research Statistics ===") print(f"Total logged: {total}\n") print("By Pattern Type:") for ptype, count in sorted(db["stats"]["by_type"].items(), key=lambda x: -x[1]): print(f" {ptype}: {count}") print("\nTop Agents:") for agent, count in sorted(db["stats"]["by_agent"].items(), key=lambda x: -x[1])[:10]: print(f" {agent}: {count}") def main(): if len(sys.argv) < 2: print("Usage:") print(" log-suspicious.py ") print(" log-suspicious.py --list") print(" log-suspicious.py --stats") types_str = ", ".join(PATTERN_TYPES) print(f"\nPattern types: {types_str}") sys.exit(1) if sys.argv[1] == "--list": list_entries() elif sys.argv[1] == "--stats": show_stats() elif len(sys.argv) >= 4: log_entry(sys.argv[1], sys.argv[2], sys.argv[3]) else: print("Need: agent_name, content, pattern_type") sys.exit(1) if __name__ == "__main__": main()