Merge origin/main into hermes/hermes-5d160594
This commit is contained in:
commit
3229e434b8
78 changed files with 3762 additions and 395 deletions
96
tests/tools/test_browser_cleanup.py
Normal file
96
tests/tools/test_browser_cleanup.py
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
"""Regression tests for browser session cleanup and screenshot recovery."""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
class TestScreenshotPathRecovery:
|
||||
def test_extracts_standard_absolute_path(self):
|
||||
from tools.browser_tool import _extract_screenshot_path_from_text
|
||||
|
||||
assert (
|
||||
_extract_screenshot_path_from_text("Screenshot saved to /tmp/foo.png")
|
||||
== "/tmp/foo.png"
|
||||
)
|
||||
|
||||
def test_extracts_quoted_absolute_path(self):
|
||||
from tools.browser_tool import _extract_screenshot_path_from_text
|
||||
|
||||
assert (
|
||||
_extract_screenshot_path_from_text(
|
||||
"Screenshot saved to '/Users/david/.hermes/browser_screenshots/shot.png'"
|
||||
)
|
||||
== "/Users/david/.hermes/browser_screenshots/shot.png"
|
||||
)
|
||||
|
||||
|
||||
class TestBrowserCleanup:
|
||||
def setup_method(self):
|
||||
from tools import browser_tool
|
||||
|
||||
self.browser_tool = browser_tool
|
||||
self.orig_active_sessions = browser_tool._active_sessions.copy()
|
||||
self.orig_session_last_activity = browser_tool._session_last_activity.copy()
|
||||
self.orig_recording_sessions = browser_tool._recording_sessions.copy()
|
||||
self.orig_cleanup_done = browser_tool._cleanup_done
|
||||
|
||||
def teardown_method(self):
|
||||
self.browser_tool._active_sessions.clear()
|
||||
self.browser_tool._active_sessions.update(self.orig_active_sessions)
|
||||
self.browser_tool._session_last_activity.clear()
|
||||
self.browser_tool._session_last_activity.update(self.orig_session_last_activity)
|
||||
self.browser_tool._recording_sessions.clear()
|
||||
self.browser_tool._recording_sessions.update(self.orig_recording_sessions)
|
||||
self.browser_tool._cleanup_done = self.orig_cleanup_done
|
||||
|
||||
def test_cleanup_browser_clears_tracking_state(self):
|
||||
browser_tool = self.browser_tool
|
||||
browser_tool._active_sessions["task-1"] = {
|
||||
"session_name": "sess-1",
|
||||
"bb_session_id": None,
|
||||
}
|
||||
browser_tool._session_last_activity["task-1"] = 123.0
|
||||
|
||||
with (
|
||||
patch("tools.browser_tool._maybe_stop_recording") as mock_stop,
|
||||
patch(
|
||||
"tools.browser_tool._run_browser_command",
|
||||
return_value={"success": True},
|
||||
) as mock_run,
|
||||
patch("tools.browser_tool.os.path.exists", return_value=False),
|
||||
):
|
||||
browser_tool.cleanup_browser("task-1")
|
||||
|
||||
assert "task-1" not in browser_tool._active_sessions
|
||||
assert "task-1" not in browser_tool._session_last_activity
|
||||
mock_stop.assert_called_once_with("task-1")
|
||||
mock_run.assert_called_once_with("task-1", "close", [], timeout=10)
|
||||
|
||||
def test_browser_close_delegates_to_cleanup_browser(self):
|
||||
import json
|
||||
|
||||
browser_tool = self.browser_tool
|
||||
browser_tool._active_sessions["task-2"] = {"session_name": "sess-2"}
|
||||
|
||||
with patch("tools.browser_tool.cleanup_browser") as mock_cleanup:
|
||||
result = json.loads(browser_tool.browser_close("task-2"))
|
||||
|
||||
assert result == {"success": True, "closed": True}
|
||||
mock_cleanup.assert_called_once_with("task-2")
|
||||
|
||||
def test_emergency_cleanup_clears_all_tracking_state(self):
|
||||
browser_tool = self.browser_tool
|
||||
browser_tool._cleanup_done = False
|
||||
browser_tool._active_sessions["task-1"] = {"session_name": "sess-1"}
|
||||
browser_tool._active_sessions["task-2"] = {"session_name": "sess-2"}
|
||||
browser_tool._session_last_activity["task-1"] = 1.0
|
||||
browser_tool._session_last_activity["task-2"] = 2.0
|
||||
browser_tool._recording_sessions.update({"task-1", "task-2"})
|
||||
|
||||
with patch("tools.browser_tool.cleanup_all_browsers") as mock_cleanup_all:
|
||||
browser_tool._emergency_cleanup_all_sessions()
|
||||
|
||||
mock_cleanup_all.assert_called_once_with()
|
||||
assert browser_tool._active_sessions == {}
|
||||
assert browser_tool._session_last_activity == {}
|
||||
assert browser_tool._recording_sessions == set()
|
||||
assert browser_tool._cleanup_done is True
|
||||
|
|
@ -1,11 +1,8 @@
|
|||
"""Tests for the --force flag dangerous verdict bypass fix in skills_guard.py.
|
||||
"""Regression tests for skills guard policy precedence.
|
||||
|
||||
Regression test: the old code had `if result.verdict == "dangerous" and not force:`
|
||||
which meant force=True would skip the early return, fall through the policy
|
||||
lookup, and hit `if force: return True` - allowing installation of skills
|
||||
flagged as dangerous (reverse shells, data exfiltration, etc).
|
||||
|
||||
The docstring explicitly states: "never overrides dangerous".
|
||||
Official/builtin skills should follow the INSTALL_POLICY table even when their
|
||||
scan verdict is dangerous, and --force should override blocked verdicts for
|
||||
non-builtin sources.
|
||||
"""
|
||||
|
||||
|
||||
|
|
@ -44,10 +41,6 @@ def _new_should_allow(verdict, trust_level, force):
|
|||
}
|
||||
VERDICT_INDEX = {"safe": 0, "caution": 1, "dangerous": 2}
|
||||
|
||||
# Fixed: no `and not force` - dangerous is always blocked
|
||||
if verdict == "dangerous":
|
||||
return False
|
||||
|
||||
policy = INSTALL_POLICY.get(trust_level, INSTALL_POLICY["community"])
|
||||
vi = VERDICT_INDEX.get(verdict, 2)
|
||||
decision = policy[vi]
|
||||
|
|
@ -61,35 +54,28 @@ def _new_should_allow(verdict, trust_level, force):
|
|||
return False
|
||||
|
||||
|
||||
class TestForceNeverOverridesDangerous:
|
||||
"""The core bug: --force bypassed the dangerous verdict block."""
|
||||
class TestPolicyPrecedenceForDangerousVerdicts:
|
||||
def test_builtin_dangerous_is_allowed_by_policy(self):
|
||||
assert _new_should_allow("dangerous", "builtin", force=False) is True
|
||||
|
||||
def test_old_code_allows_dangerous_with_force(self):
|
||||
"""Old code: force=True lets dangerous skills through."""
|
||||
assert _old_should_allow("dangerous", "community", force=True) is True
|
||||
def test_trusted_dangerous_is_blocked_without_force(self):
|
||||
assert _new_should_allow("dangerous", "trusted", force=False) is False
|
||||
|
||||
def test_new_code_blocks_dangerous_with_force(self):
|
||||
"""Fixed code: force=True still blocks dangerous skills."""
|
||||
assert _new_should_allow("dangerous", "community", force=True) is False
|
||||
def test_force_overrides_dangerous_for_community(self):
|
||||
assert _new_should_allow("dangerous", "community", force=True) is True
|
||||
|
||||
def test_new_code_blocks_dangerous_trusted_with_force(self):
|
||||
"""Fixed code: even trusted + force cannot install dangerous."""
|
||||
assert _new_should_allow("dangerous", "trusted", force=True) is False
|
||||
def test_force_overrides_dangerous_for_trusted(self):
|
||||
assert _new_should_allow("dangerous", "trusted", force=True) is True
|
||||
|
||||
def test_force_still_overrides_caution(self):
|
||||
"""force=True should still work for caution verdicts."""
|
||||
assert _new_should_allow("caution", "community", force=True) is True
|
||||
|
||||
def test_caution_community_blocked_without_force(self):
|
||||
"""Caution + community is blocked without force (unchanged)."""
|
||||
assert _new_should_allow("caution", "community", force=False) is False
|
||||
|
||||
def test_safe_always_allowed(self):
|
||||
"""Safe verdict is always allowed regardless of force."""
|
||||
assert _new_should_allow("safe", "community", force=False) is True
|
||||
assert _new_should_allow("safe", "community", force=True) is True
|
||||
|
||||
def test_dangerous_blocked_without_force(self):
|
||||
"""Dangerous is blocked without force (both old and new agree)."""
|
||||
assert _old_should_allow("dangerous", "community", force=False) is False
|
||||
assert _new_should_allow("dangerous", "community", force=False) is False
|
||||
def test_old_code_happened_to_allow_forced_dangerous_community(self):
|
||||
assert _old_should_allow("dangerous", "community", force=True) is True
|
||||
|
|
|
|||
|
|
@ -9,9 +9,24 @@ from tools.memory_tool import (
|
|||
memory_tool,
|
||||
_scan_memory_content,
|
||||
ENTRY_DELIMITER,
|
||||
MEMORY_SCHEMA,
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Tool schema guidance
|
||||
# =========================================================================
|
||||
|
||||
class TestMemorySchema:
|
||||
def test_discourages_diary_style_task_logs(self):
|
||||
description = MEMORY_SCHEMA["description"]
|
||||
assert "Do NOT save task progress" in description
|
||||
assert "session_search" in description
|
||||
assert "like a diary" not in description
|
||||
assert "temporary task state" in description
|
||||
assert ">80%" not in description
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Security scanning
|
||||
# =========================================================================
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
|
@ -29,6 +30,118 @@ def _install_telegram_mock(monkeypatch, bot):
|
|||
|
||||
|
||||
class TestSendMessageTool:
|
||||
def test_cron_duplicate_target_is_skipped_and_explained(self):
|
||||
home = SimpleNamespace(chat_id="-1001")
|
||||
config, _telegram_cfg = _make_config()
|
||||
config.get_home_channel = lambda _platform: home
|
||||
|
||||
with patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"HERMES_CRON_AUTO_DELIVER_PLATFORM": "telegram",
|
||||
"HERMES_CRON_AUTO_DELIVER_CHAT_ID": "-1001",
|
||||
},
|
||||
clear=False,
|
||||
), \
|
||||
patch("gateway.config.load_gateway_config", return_value=config), \
|
||||
patch("tools.interrupt.is_interrupted", return_value=False), \
|
||||
patch("model_tools._run_async", side_effect=_run_async_immediately), \
|
||||
patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
|
||||
patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock:
|
||||
result = json.loads(
|
||||
send_message_tool(
|
||||
{
|
||||
"action": "send",
|
||||
"target": "telegram",
|
||||
"message": "hello",
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["skipped"] is True
|
||||
assert result["reason"] == "cron_auto_delivery_duplicate_target"
|
||||
assert "final response" in result["note"]
|
||||
send_mock.assert_not_awaited()
|
||||
mirror_mock.assert_not_called()
|
||||
|
||||
def test_cron_different_target_still_sends(self):
|
||||
config, telegram_cfg = _make_config()
|
||||
|
||||
with patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"HERMES_CRON_AUTO_DELIVER_PLATFORM": "telegram",
|
||||
"HERMES_CRON_AUTO_DELIVER_CHAT_ID": "-1001",
|
||||
},
|
||||
clear=False,
|
||||
), \
|
||||
patch("gateway.config.load_gateway_config", return_value=config), \
|
||||
patch("tools.interrupt.is_interrupted", return_value=False), \
|
||||
patch("model_tools._run_async", side_effect=_run_async_immediately), \
|
||||
patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
|
||||
patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock:
|
||||
result = json.loads(
|
||||
send_message_tool(
|
||||
{
|
||||
"action": "send",
|
||||
"target": "telegram:-1002",
|
||||
"message": "hello",
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result.get("skipped") is not True
|
||||
send_mock.assert_awaited_once_with(
|
||||
Platform.TELEGRAM,
|
||||
telegram_cfg,
|
||||
"-1002",
|
||||
"hello",
|
||||
thread_id=None,
|
||||
media_files=[],
|
||||
)
|
||||
mirror_mock.assert_called_once_with("telegram", "-1002", "hello", source_label="cli", thread_id=None)
|
||||
|
||||
def test_cron_same_chat_different_thread_still_sends(self):
|
||||
config, telegram_cfg = _make_config()
|
||||
|
||||
with patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"HERMES_CRON_AUTO_DELIVER_PLATFORM": "telegram",
|
||||
"HERMES_CRON_AUTO_DELIVER_CHAT_ID": "-1001",
|
||||
"HERMES_CRON_AUTO_DELIVER_THREAD_ID": "17585",
|
||||
},
|
||||
clear=False,
|
||||
), \
|
||||
patch("gateway.config.load_gateway_config", return_value=config), \
|
||||
patch("tools.interrupt.is_interrupted", return_value=False), \
|
||||
patch("model_tools._run_async", side_effect=_run_async_immediately), \
|
||||
patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
|
||||
patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock:
|
||||
result = json.loads(
|
||||
send_message_tool(
|
||||
{
|
||||
"action": "send",
|
||||
"target": "telegram:-1001:99999",
|
||||
"message": "hello",
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result.get("skipped") is not True
|
||||
send_mock.assert_awaited_once_with(
|
||||
Platform.TELEGRAM,
|
||||
telegram_cfg,
|
||||
"-1001",
|
||||
"hello",
|
||||
thread_id="99999",
|
||||
media_files=[],
|
||||
)
|
||||
mirror_mock.assert_called_once_with("telegram", "-1001", "hello", source_label="cli", thread_id="99999")
|
||||
|
||||
def test_sends_to_explicit_telegram_topic_target(self):
|
||||
config, telegram_cfg = _make_config()
|
||||
|
||||
|
|
|
|||
|
|
@ -9,9 +9,21 @@ from tools.session_search_tool import (
|
|||
_format_conversation,
|
||||
_truncate_around_matches,
|
||||
MAX_SESSION_CHARS,
|
||||
SESSION_SEARCH_SCHEMA,
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Tool schema guidance
|
||||
# =========================================================================
|
||||
|
||||
class TestSessionSearchSchema:
|
||||
def test_keeps_cross_session_recall_guidance_without_current_session_nudge(self):
|
||||
description = SESSION_SEARCH_SCHEMA["description"]
|
||||
assert "past conversations" in description
|
||||
assert "recent turns of the current session" not in description
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# _format_timestamp
|
||||
# =========================================================================
|
||||
|
|
|
|||
|
|
@ -46,9 +46,9 @@ from tools.skills_guard import (
|
|||
|
||||
|
||||
class TestResolveTrustLevel:
|
||||
def test_builtin_not_exposed(self):
|
||||
# builtin is only used internally, not resolved from source string
|
||||
assert _resolve_trust_level("openai/skills") == "trusted"
|
||||
def test_official_sources_resolve_to_builtin(self):
|
||||
assert _resolve_trust_level("official") == "builtin"
|
||||
assert _resolve_trust_level("official/email/agentmail") == "builtin"
|
||||
|
||||
def test_trusted_repos(self):
|
||||
assert _resolve_trust_level("openai/skills") == "trusted"
|
||||
|
|
@ -116,11 +116,17 @@ class TestShouldAllowInstall:
|
|||
allowed, _ = should_allow_install(self._result("trusted", "caution", f))
|
||||
assert allowed is True
|
||||
|
||||
def test_dangerous_blocked_even_trusted(self):
|
||||
def test_trusted_dangerous_blocked_without_force(self):
|
||||
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
|
||||
allowed, _ = should_allow_install(self._result("trusted", "dangerous", f))
|
||||
assert allowed is False
|
||||
|
||||
def test_builtin_dangerous_allowed_without_force(self):
|
||||
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
|
||||
allowed, reason = should_allow_install(self._result("builtin", "dangerous", f))
|
||||
assert allowed is True
|
||||
assert "builtin source" in reason
|
||||
|
||||
def test_force_overrides_caution(self):
|
||||
f = [Finding("x", "high", "c", "f", 1, "m", "d")]
|
||||
allowed, reason = should_allow_install(self._result("community", "caution", f), force=True)
|
||||
|
|
@ -132,22 +138,21 @@ class TestShouldAllowInstall:
|
|||
allowed, _ = should_allow_install(self._result("community", "dangerous", f), force=False)
|
||||
assert allowed is False
|
||||
|
||||
def test_force_never_overrides_dangerous(self):
|
||||
"""--force must not bypass dangerous verdict (regression test)."""
|
||||
def test_force_overrides_dangerous_for_community(self):
|
||||
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
|
||||
allowed, reason = should_allow_install(
|
||||
self._result("community", "dangerous", f), force=True
|
||||
)
|
||||
assert allowed is False
|
||||
assert "DANGEROUS" in reason
|
||||
assert allowed is True
|
||||
assert "Force-installed" in reason
|
||||
|
||||
def test_force_never_overrides_dangerous_trusted(self):
|
||||
"""--force must not bypass dangerous even for trusted sources."""
|
||||
def test_force_overrides_dangerous_for_trusted(self):
|
||||
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
|
||||
allowed, _ = should_allow_install(
|
||||
allowed, reason = should_allow_install(
|
||||
self._result("trusted", "dangerous", f), force=True
|
||||
)
|
||||
assert allowed is False
|
||||
assert allowed is True
|
||||
assert "Force-installed" in reason
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue