Merge PR #192: fix(security): catch multi-word prompt injection bypass in skills_guard

Authored by 0xbyt4. The 'ignore ... instructions' regex only matched a single word between 'ignore' and the keyword (previous/all/above/prior). Multi-word variants like 'ignore all prior instructions' bypassed the scanner entirely.
2026-03-04 05:54:04 -08:00 · 2026-03-04 05:54:04 -08:00 · 520a26c48f
commit 520a26c48f
parent a787a0d60b 4ea29978fc
1 changed files with 1 additions and 1 deletions
--- a/tools/skills_guard.py
+++ b/tools/skills_guard.py
@ -157,7 +157,7 @@ THREAT_PATTERNS = [
     "markdown link with variable interpolation"),

    # ── Prompt injection ──
-    (r'ignore\s+(previous|all|above|prior)\s+instructions',
+    (r'ignore\s+(?:\w+\s+)*(previous|all|above|prior)\s+instructions',
     "prompt_injection_ignore", "critical", "injection",
     "prompt injection: ignore previous instructions"),
    (r'you\s+are\s+now\s+',