merge: resolve conflicts with main (plugins + stop commands)

2026-03-16 07:32:00 -07:00 · 2026-03-16 07:32:00 -07:00 · c2769dffe0
commit c2769dffe0
parent 71e35311f5 97990e7ad5
18 changed files with 1877 additions and 43 deletions
--- a/tools/approval.py
+++ b/tools/approval.py
@ -4,6 +4,7 @@ This module is the single source of truth for the dangerous command system:
 - Pattern detection (DANGEROUS_PATTERNS, detect_dangerous_command)
 - Per-session approval state (thread-safe, keyed by session_key)
 - Approval prompting (CLI interactive + gateway async)
+- Smart approval via auxiliary LLM (auto-approve low-risk commands)
 - Permanent allowlist persistence (config.yaml)
 """

@ -283,6 +284,68 @@ def prompt_dangerous_approval(command: str, description: str,
        sys.stdout.flush()


+def _get_approval_mode() -> str:
+    """Read the approval mode from config. Returns 'manual', 'smart', or 'off'."""
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        return config.get("approvals", {}).get("mode", "manual")
+    except Exception:
+        return "manual"
+
+
+def _smart_approve(command: str, description: str) -> str:
+    """Use the auxiliary LLM to assess risk and decide approval.
+
+    Returns 'approve' if the LLM determines the command is safe,
+    'deny' if genuinely dangerous, or 'escalate' if uncertain.
+
+    Inspired by OpenAI Codex's Smart Approvals guardian subagent
+    (openai/codex#13860).
+    """
+    try:
+        from agent.auxiliary_client import get_text_auxiliary_client, auxiliary_max_tokens_param
+
+        client, model = get_text_auxiliary_client(task="approval")
+        if not client or not model:
+            logger.debug("Smart approvals: no aux client available, escalating")
+            return "escalate"
+
+        prompt = f"""You are a security reviewer for an AI coding agent. A terminal command was flagged by pattern matching as potentially dangerous.
+
+Command: {command}
+Flagged reason: {description}
+
+Assess the ACTUAL risk of this command. Many flagged commands are false positives — for example, `python -c "print('hello')"` is flagged as "script execution via -c flag" but is completely harmless.
+
+Rules:
+- APPROVE if the command is clearly safe (benign script execution, safe file operations, development tools, package installs, git operations, etc.)
+- DENY if the command could genuinely damage the system (recursive delete of important paths, overwriting system files, fork bombs, wiping disks, dropping databases, etc.)
+- ESCALATE if you're uncertain
+
+Respond with exactly one word: APPROVE, DENY, or ESCALATE"""
+
+        response = client.chat.completions.create(
+            model=model,
+            messages=[{"role": "user", "content": prompt}],
+            **auxiliary_max_tokens_param(16),
+            temperature=0,
+        )
+
+        answer = (response.choices[0].message.content or "").strip().upper()
+
+        if "APPROVE" in answer:
+            return "approve"
+        elif "DENY" in answer:
+            return "deny"
+        else:
+            return "escalate"
+
+    except Exception as e:
+        logger.debug("Smart approvals: LLM call failed (%s), escalating", e)
+        return "escalate"
+
+
 def check_dangerous_command(command: str, env_type: str,
                            approval_callback=None) -> dict:
    """Check if a command is dangerous and handle approval.
@ -372,8 +435,9 @@ def check_all_command_guards(command: str, env_type: str,
    if env_type in ("docker", "singularity", "modal", "daytona"):
        return {"approved": True, "message": None}

-    # --yolo: bypass all approval prompts and pre-exec guard checks
-    if os.getenv("HERMES_YOLO_MODE"):
+    # --yolo or approvals.mode=off: bypass all approval prompts
+    approval_mode = _get_approval_mode()
+    if os.getenv("HERMES_YOLO_MODE") or approval_mode == "off":
        return {"approved": True, "message": None}

    is_cli = os.getenv("HERMES_INTERACTIVE")
@ -430,6 +494,31 @@ def check_all_command_guards(command: str, env_type: str,
    if not warnings:
        return {"approved": True, "message": None}

+    # --- Phase 2.5: Smart approval (auxiliary LLM risk assessment) ---
+    # When approvals.mode=smart, ask the aux LLM before prompting the user.
+    # Inspired by OpenAI Codex's Smart Approvals guardian subagent
+    # (openai/codex#13860).
+    if approval_mode == "smart":
+        combined_desc_for_llm = "; ".join(desc for _, desc, _ in warnings)
+        verdict = _smart_approve(command, combined_desc_for_llm)
+        if verdict == "approve":
+            # Auto-approve and grant session-level approval for these patterns
+            for key, _, _ in warnings:
+                approve_session(session_key, key)
+            logger.debug("Smart approval: auto-approved '%s' (%s)",
+                         command[:60], combined_desc_for_llm)
+            return {"approved": True, "message": None,
+                    "smart_approved": True}
+        elif verdict == "deny":
+            combined_desc_for_llm = "; ".join(desc for _, desc, _ in warnings)
+            return {
+                "approved": False,
+                "message": f"BLOCKED by smart approval: {combined_desc_for_llm}. "
+                           "The command was assessed as genuinely dangerous. Do NOT retry.",
+                "smart_denied": True,
+            }
+        # verdict == "escalate" → fall through to manual prompt
+
    # --- Phase 3: Approval ---

    # Combine descriptions for a single approval prompt
--- a/tools/memory_tool.py
+++ b/tools/memory_tool.py
@ -439,11 +439,13 @@ MEMORY_SCHEMA = {
        "Memory is injected into future turns, so keep it compact and focused on facts "
        "that will still matter later.\n\n"
        "WHEN TO SAVE (do this proactively, don't wait to be asked):\n"
+        "- User corrects you or says 'remember this' / 'don't do that again'\n"
        "- User shares a preference, habit, or personal detail (name, role, timezone, coding style)\n"
        "- You discover something about the environment (OS, installed tools, project structure)\n"
-        "- User corrects you or says 'remember this' / 'don't do that again'\n"
        "- You learn a convention, API quirk, or workflow specific to this user's setup\n"
        "- You identify a stable fact that will be useful again in future sessions\n\n"
+        "PRIORITY: User preferences and corrections > environment facts > procedural knowledge. "
+        "The most valuable memory prevents the user from having to repeat themselves.\n\n"
        "Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
        "state to memory; use session_search to recall those from past transcripts.\n"
        "If you've discovered a new way to do something, solved a problem that could be "
--- a/tools/skill_manager_tool.py
+++ b/tools/skill_manager_tool.py
@ -561,7 +561,8 @@ SKILL_MANAGE_SCHEMA = {
        "user-corrected approach worked, non-trivial workflow discovered, "
        "or user asks you to remember a procedure.\n"
        "Update when: instructions stale/wrong, OS-specific failures, "
-        "missing steps or pitfalls found during use.\n\n"
+        "missing steps or pitfalls found during use. "
+        "If you used a skill and hit issues not covered by it, patch it immediately.\n\n"
        "After difficult/iterative tasks, offer to save as a skill. "
        "Skip for simple one-offs. Confirm with user before creating/deleting.\n\n"
        "Good skills: trigger conditions, numbered steps with exact commands, "