fix(approval): show full command in dangerous command approval (#1553)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
This commit is contained in:
Teknium 2026-03-17 02:02:33 -07:00 committed by GitHub
parent 0351e4fa90
commit 4cb6735541
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 53 additions and 77 deletions

View file

@ -1740,9 +1740,12 @@ class DiscordAdapter(BasePlatformAdapter):
if not channel: if not channel:
channel = await self._client.fetch_channel(int(chat_id)) channel = await self._client.fetch_channel(int(chat_id))
# Discord embed description limit is 4096; show full command up to that
max_desc = 4088
cmd_display = command if len(command) <= max_desc else command[: max_desc - 3] + "..."
embed = discord.Embed( embed = discord.Embed(
title="Command Approval Required", title="Command Approval Required",
description=f"```\n{command[:500]}\n```", description=f"```\n{cmd_display}\n```",
color=discord.Color.orange(), color=discord.Color.orange(),
) )
embed.set_footer(text=f"Approval ID: {approval_id}") embed.set_footer(text=f"Approval ID: {approval_id}")

View file

@ -107,7 +107,11 @@ def _ensure_current_event_loop(request):
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def _enforce_test_timeout(): def _enforce_test_timeout():
"""Kill any individual test that takes longer than 30 seconds.""" """Kill any individual test that takes longer than 30 seconds.
SIGALRM is Unix-only; skip on Windows."""
if sys.platform == "win32":
yield
return
old = signal.signal(signal.SIGALRM, _timeout_handler) old = signal.signal(signal.SIGALRM, _timeout_handler)
signal.alarm(30) signal.alarm(30)
yield yield

View file

@ -385,75 +385,47 @@ class TestPatternKeyUniqueness:
assert is_approved("legacy-find", key_delete) is True assert is_approved("legacy-find", key_delete) is True
class TestViewFullCommand: class TestFullCommandAlwaysShown:
"""Tests for the 'view full command' option in prompt_dangerous_approval.""" """The full command is always shown in the approval prompt (no truncation).
def test_view_then_once_fallback(self): Previously there was a [v]iew full option for long commands. Now the full
"""Pressing 'v' shows the full command, then 'o' approves once.""" command is always displayed. These tests verify the basic approval flow
still works with long commands. (#1553)
"""
def test_once_with_long_command(self):
"""Pressing 'o' approves once even for very long commands."""
long_cmd = "rm -rf " + "a" * 200 long_cmd = "rm -rf " + "a" * 200
inputs = iter(["v", "o"])
with mock_patch("builtins.input", side_effect=inputs):
result = prompt_dangerous_approval(long_cmd, "recursive delete")
assert result == "once"
def test_view_then_deny_fallback(self):
"""Pressing 'v' shows the full command, then 'd' denies."""
long_cmd = "rm -rf " + "b" * 200
inputs = iter(["v", "d"])
with mock_patch("builtins.input", side_effect=inputs):
result = prompt_dangerous_approval(long_cmd, "recursive delete")
assert result == "deny"
def test_view_then_session_fallback(self):
"""Pressing 'v' shows the full command, then 's' approves for session."""
long_cmd = "rm -rf " + "c" * 200
inputs = iter(["v", "s"])
with mock_patch("builtins.input", side_effect=inputs):
result = prompt_dangerous_approval(long_cmd, "recursive delete")
assert result == "session"
def test_view_then_always_fallback(self):
"""Pressing 'v' shows the full command, then 'a' approves always."""
long_cmd = "rm -rf " + "d" * 200
inputs = iter(["v", "a"])
with mock_patch("builtins.input", side_effect=inputs):
result = prompt_dangerous_approval(long_cmd, "recursive delete")
assert result == "always"
def test_view_then_session_when_permanent_hidden(self):
"""The view-full flow still works when allow_permanent=False."""
long_cmd = "rm -rf " + "d" * 200
inputs = iter(["v", "s"])
with mock_patch("builtins.input", side_effect=inputs):
result = prompt_dangerous_approval(
long_cmd,
"recursive delete",
allow_permanent=False,
)
assert result == "session"
def test_view_not_shown_for_short_command(self):
"""Short commands don't offer the view option; 'v' falls through to deny."""
short_cmd = "rm -rf /tmp"
with mock_patch("builtins.input", return_value="v"):
result = prompt_dangerous_approval(short_cmd, "recursive delete")
# 'v' is not a valid choice for short commands, should deny
assert result == "deny"
def test_once_without_view(self):
"""Directly pressing 'o' without viewing still works."""
long_cmd = "rm -rf " + "e" * 200
with mock_patch("builtins.input", return_value="o"): with mock_patch("builtins.input", return_value="o"):
result = prompt_dangerous_approval(long_cmd, "recursive delete") result = prompt_dangerous_approval(long_cmd, "recursive delete")
assert result == "once" assert result == "once"
def test_view_ignored_after_already_shown(self): def test_session_with_long_command(self):
"""After viewing once, 'v' on a now-untruncated display falls through to deny.""" """Pressing 's' approves for session with long commands."""
long_cmd = "rm -rf " + "f" * 200 long_cmd = "rm -rf " + "c" * 200
inputs = iter(["v", "v"]) # second 'v' should not match since is_truncated is False with mock_patch("builtins.input", return_value="s"):
with mock_patch("builtins.input", side_effect=inputs):
result = prompt_dangerous_approval(long_cmd, "recursive delete") result = prompt_dangerous_approval(long_cmd, "recursive delete")
# After first 'v', is_truncated becomes False, so second 'v' -> deny assert result == "session"
def test_always_with_long_command(self):
"""Pressing 'a' approves always with long commands."""
long_cmd = "rm -rf " + "d" * 200
with mock_patch("builtins.input", return_value="a"):
result = prompt_dangerous_approval(long_cmd, "recursive delete")
assert result == "always"
def test_deny_with_long_command(self):
"""Pressing 'd' denies with long commands."""
long_cmd = "rm -rf " + "b" * 200
with mock_patch("builtins.input", return_value="d"):
result = prompt_dangerous_approval(long_cmd, "recursive delete")
assert result == "deny"
def test_invalid_input_denies(self):
"""Invalid input (like 'v' which no longer exists) falls through to deny."""
short_cmd = "rm -rf /tmp"
with mock_patch("builtins.input", return_value="v"):
result = prompt_dangerous_approval(short_cmd, "recursive delete")
assert result == "deny" assert result == "deny"

View file

@ -220,17 +220,15 @@ def prompt_dangerous_approval(command: str, description: str,
os.environ["HERMES_SPINNER_PAUSE"] = "1" os.environ["HERMES_SPINNER_PAUSE"] = "1"
try: try:
is_truncated = len(command) > 80
while True: while True:
print() print()
print(f" ⚠️ DANGEROUS COMMAND: {description}") print(f" ⚠️ DANGEROUS COMMAND: {description}")
print(f" {command[:80]}{'...' if is_truncated else ''}") print(f" {command}")
print() print()
view_hint = " | [v]iew full" if is_truncated else ""
if allow_permanent: if allow_permanent:
print(f" [o]nce | [s]ession | [a]lways | [d]eny{view_hint}") print(" [o]nce | [s]ession | [a]lways | [d]eny")
else: else:
print(f" [o]nce | [s]ession | [d]eny{view_hint}") print(" [o]nce | [s]ession | [d]eny")
print() print()
sys.stdout.flush() sys.stdout.flush()
@ -252,12 +250,6 @@ def prompt_dangerous_approval(command: str, description: str,
return "deny" return "deny"
choice = result["choice"] choice = result["choice"]
if choice in ('v', 'view') and is_truncated:
print()
print(" Full command:")
print(f" {command}")
is_truncated = False
continue
if choice in ('o', 'once'): if choice in ('o', 'once'):
print(" ✓ Allowed once") print(" ✓ Allowed once")
return "once" return "once"
@ -394,7 +386,10 @@ def check_dangerous_command(command: str, env_type: str,
"status": "approval_required", "status": "approval_required",
"command": command, "command": command,
"description": description, "description": description,
"message": f"⚠️ This command is potentially dangerous ({description}). Asking the user for approval...", "message": (
f"⚠️ This command is potentially dangerous ({description}). "
f"Asking the user for approval.\n\n**Command:**\n```\n{command}\n```"
),
} }
choice = prompt_dangerous_approval(command, description, choice = prompt_dangerous_approval(command, description,
@ -542,7 +537,9 @@ def check_all_command_guards(command: str, env_type: str,
"status": "approval_required", "status": "approval_required",
"command": command, "command": command,
"description": combined_desc, "description": combined_desc,
"message": f"⚠️ {combined_desc}. Asking the user for approval...", "message": (
f"⚠️ {combined_desc}. Asking the user for approval.\n\n**Command:**\n```\n{command}\n```"
),
} }
# CLI interactive: single combined prompt # CLI interactive: single combined prompt