merge: resolve conflicts with main (plugins + stop commands)
This commit is contained in:
commit
c2769dffe0
18 changed files with 1877 additions and 43 deletions
|
|
@ -4,6 +4,7 @@ This module is the single source of truth for the dangerous command system:
|
|||
- Pattern detection (DANGEROUS_PATTERNS, detect_dangerous_command)
|
||||
- Per-session approval state (thread-safe, keyed by session_key)
|
||||
- Approval prompting (CLI interactive + gateway async)
|
||||
- Smart approval via auxiliary LLM (auto-approve low-risk commands)
|
||||
- Permanent allowlist persistence (config.yaml)
|
||||
"""
|
||||
|
||||
|
|
@ -283,6 +284,68 @@ def prompt_dangerous_approval(command: str, description: str,
|
|||
sys.stdout.flush()
|
||||
|
||||
|
||||
def _get_approval_mode() -> str:
|
||||
"""Read the approval mode from config. Returns 'manual', 'smart', or 'off'."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
return config.get("approvals", {}).get("mode", "manual")
|
||||
except Exception:
|
||||
return "manual"
|
||||
|
||||
|
||||
def _smart_approve(command: str, description: str) -> str:
|
||||
"""Use the auxiliary LLM to assess risk and decide approval.
|
||||
|
||||
Returns 'approve' if the LLM determines the command is safe,
|
||||
'deny' if genuinely dangerous, or 'escalate' if uncertain.
|
||||
|
||||
Inspired by OpenAI Codex's Smart Approvals guardian subagent
|
||||
(openai/codex#13860).
|
||||
"""
|
||||
try:
|
||||
from agent.auxiliary_client import get_text_auxiliary_client, auxiliary_max_tokens_param
|
||||
|
||||
client, model = get_text_auxiliary_client(task="approval")
|
||||
if not client or not model:
|
||||
logger.debug("Smart approvals: no aux client available, escalating")
|
||||
return "escalate"
|
||||
|
||||
prompt = f"""You are a security reviewer for an AI coding agent. A terminal command was flagged by pattern matching as potentially dangerous.
|
||||
|
||||
Command: {command}
|
||||
Flagged reason: {description}
|
||||
|
||||
Assess the ACTUAL risk of this command. Many flagged commands are false positives — for example, `python -c "print('hello')"` is flagged as "script execution via -c flag" but is completely harmless.
|
||||
|
||||
Rules:
|
||||
- APPROVE if the command is clearly safe (benign script execution, safe file operations, development tools, package installs, git operations, etc.)
|
||||
- DENY if the command could genuinely damage the system (recursive delete of important paths, overwriting system files, fork bombs, wiping disks, dropping databases, etc.)
|
||||
- ESCALATE if you're uncertain
|
||||
|
||||
Respond with exactly one word: APPROVE, DENY, or ESCALATE"""
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
**auxiliary_max_tokens_param(16),
|
||||
temperature=0,
|
||||
)
|
||||
|
||||
answer = (response.choices[0].message.content or "").strip().upper()
|
||||
|
||||
if "APPROVE" in answer:
|
||||
return "approve"
|
||||
elif "DENY" in answer:
|
||||
return "deny"
|
||||
else:
|
||||
return "escalate"
|
||||
|
||||
except Exception as e:
|
||||
logger.debug("Smart approvals: LLM call failed (%s), escalating", e)
|
||||
return "escalate"
|
||||
|
||||
|
||||
def check_dangerous_command(command: str, env_type: str,
|
||||
approval_callback=None) -> dict:
|
||||
"""Check if a command is dangerous and handle approval.
|
||||
|
|
@ -372,8 +435,9 @@ def check_all_command_guards(command: str, env_type: str,
|
|||
if env_type in ("docker", "singularity", "modal", "daytona"):
|
||||
return {"approved": True, "message": None}
|
||||
|
||||
# --yolo: bypass all approval prompts and pre-exec guard checks
|
||||
if os.getenv("HERMES_YOLO_MODE"):
|
||||
# --yolo or approvals.mode=off: bypass all approval prompts
|
||||
approval_mode = _get_approval_mode()
|
||||
if os.getenv("HERMES_YOLO_MODE") or approval_mode == "off":
|
||||
return {"approved": True, "message": None}
|
||||
|
||||
is_cli = os.getenv("HERMES_INTERACTIVE")
|
||||
|
|
@ -430,6 +494,31 @@ def check_all_command_guards(command: str, env_type: str,
|
|||
if not warnings:
|
||||
return {"approved": True, "message": None}
|
||||
|
||||
# --- Phase 2.5: Smart approval (auxiliary LLM risk assessment) ---
|
||||
# When approvals.mode=smart, ask the aux LLM before prompting the user.
|
||||
# Inspired by OpenAI Codex's Smart Approvals guardian subagent
|
||||
# (openai/codex#13860).
|
||||
if approval_mode == "smart":
|
||||
combined_desc_for_llm = "; ".join(desc for _, desc, _ in warnings)
|
||||
verdict = _smart_approve(command, combined_desc_for_llm)
|
||||
if verdict == "approve":
|
||||
# Auto-approve and grant session-level approval for these patterns
|
||||
for key, _, _ in warnings:
|
||||
approve_session(session_key, key)
|
||||
logger.debug("Smart approval: auto-approved '%s' (%s)",
|
||||
command[:60], combined_desc_for_llm)
|
||||
return {"approved": True, "message": None,
|
||||
"smart_approved": True}
|
||||
elif verdict == "deny":
|
||||
combined_desc_for_llm = "; ".join(desc for _, desc, _ in warnings)
|
||||
return {
|
||||
"approved": False,
|
||||
"message": f"BLOCKED by smart approval: {combined_desc_for_llm}. "
|
||||
"The command was assessed as genuinely dangerous. Do NOT retry.",
|
||||
"smart_denied": True,
|
||||
}
|
||||
# verdict == "escalate" → fall through to manual prompt
|
||||
|
||||
# --- Phase 3: Approval ---
|
||||
|
||||
# Combine descriptions for a single approval prompt
|
||||
|
|
|
|||
|
|
@ -439,11 +439,13 @@ MEMORY_SCHEMA = {
|
|||
"Memory is injected into future turns, so keep it compact and focused on facts "
|
||||
"that will still matter later.\n\n"
|
||||
"WHEN TO SAVE (do this proactively, don't wait to be asked):\n"
|
||||
"- User corrects you or says 'remember this' / 'don't do that again'\n"
|
||||
"- User shares a preference, habit, or personal detail (name, role, timezone, coding style)\n"
|
||||
"- You discover something about the environment (OS, installed tools, project structure)\n"
|
||||
"- User corrects you or says 'remember this' / 'don't do that again'\n"
|
||||
"- You learn a convention, API quirk, or workflow specific to this user's setup\n"
|
||||
"- You identify a stable fact that will be useful again in future sessions\n\n"
|
||||
"PRIORITY: User preferences and corrections > environment facts > procedural knowledge. "
|
||||
"The most valuable memory prevents the user from having to repeat themselves.\n\n"
|
||||
"Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
|
||||
"state to memory; use session_search to recall those from past transcripts.\n"
|
||||
"If you've discovered a new way to do something, solved a problem that could be "
|
||||
|
|
|
|||
|
|
@ -561,7 +561,8 @@ SKILL_MANAGE_SCHEMA = {
|
|||
"user-corrected approach worked, non-trivial workflow discovered, "
|
||||
"or user asks you to remember a procedure.\n"
|
||||
"Update when: instructions stale/wrong, OS-specific failures, "
|
||||
"missing steps or pitfalls found during use.\n\n"
|
||||
"missing steps or pitfalls found during use. "
|
||||
"If you used a skill and hit issues not covered by it, patch it immediately.\n\n"
|
||||
"After difficult/iterative tasks, offer to save as a skill. "
|
||||
"Skip for simple one-offs. Confirm with user before creating/deleting.\n\n"
|
||||
"Good skills: trigger conditions, numbered steps with exact commands, "
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue