Merge origin/main into hermes/hermes-5d160594

This commit is contained in:
teknium1 2026-03-14 19:34:05 -07:00
commit 3229e434b8
78 changed files with 3762 additions and 395 deletions

View file

@ -53,6 +53,7 @@ import atexit
import json
import logging
import os
import re
import signal
import subprocess
import shutil
@ -165,63 +166,18 @@ def _emergency_cleanup_all_sessions():
if not _active_sessions:
return
logger.info("Emergency cleanup: closing %s active session(s)...", len(_active_sessions))
logger.info("Emergency cleanup: closing %s active session(s)...",
len(_active_sessions))
try:
if _is_local_mode():
# Local mode: just close agent-browser sessions via CLI
for task_id, session_info in list(_active_sessions.items()):
session_name = session_info.get("session_name")
if session_name:
try:
browser_cmd = _find_agent_browser()
task_socket_dir = os.path.join(
_socket_safe_tmpdir(),
f"agent-browser-{session_name}"
)
env = {**os.environ, "AGENT_BROWSER_SOCKET_DIR": task_socket_dir}
subprocess.run(
browser_cmd.split() + ["--session", session_name, "--json", "close"],
capture_output=True, timeout=5, env=env,
)
logger.info("Closed local session %s", session_name)
except Exception as e:
logger.debug("Error closing local session %s: %s", session_name, e)
else:
# Cloud mode: release Browserbase sessions via API
api_key = os.environ.get("BROWSERBASE_API_KEY")
project_id = os.environ.get("BROWSERBASE_PROJECT_ID")
if not api_key or not project_id:
logger.warning("Cannot cleanup - missing BROWSERBASE credentials")
return
for task_id, session_info in list(_active_sessions.items()):
bb_session_id = session_info.get("bb_session_id")
if bb_session_id:
try:
response = requests.post(
f"https://api.browserbase.com/v1/sessions/{bb_session_id}",
headers={
"X-BB-API-Key": api_key,
"Content-Type": "application/json"
},
json={
"projectId": project_id,
"status": "REQUEST_RELEASE"
},
timeout=5 # Short timeout for cleanup
)
if response.status_code in (200, 201, 204):
logger.info("Closed session %s", bb_session_id)
else:
logger.warning("Failed to close session %s: HTTP %s", bb_session_id, response.status_code)
except Exception as e:
logger.error("Error closing session %s: %s", bb_session_id, e)
_active_sessions.clear()
cleanup_all_browsers()
except Exception as e:
logger.error("Emergency cleanup error: %s", e)
finally:
with _cleanup_lock:
_active_sessions.clear()
_session_last_activity.clear()
_recording_sessions.clear()
# Register cleanup via atexit only. Previous versions installed SIGINT/SIGTERM
@ -640,18 +596,14 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]:
def _create_local_session(task_id: str) -> Dict[str, str]:
"""Create a lightweight local browser session (no cloud API call).
Returns the same dict shape as ``_create_browserbase_session`` so the rest
of the code can treat both modes uniformly.
"""
import uuid
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
logger.info("Created local browser session %s", session_name)
session_name = f"h_{uuid.uuid4().hex[:10]}"
logger.info("Created local browser session %s for task %s",
session_name, task_id)
return {
"session_name": session_name,
"bb_session_id": None, # Not applicable in local mode
"cdp_url": None, # Not applicable in local mode
"bb_session_id": None,
"cdp_url": None,
"features": {"local": True},
}
@ -772,6 +724,27 @@ def _find_agent_browser() -> str:
)
def _extract_screenshot_path_from_text(text: str) -> Optional[str]:
"""Extract a screenshot file path from agent-browser human-readable output."""
if not text:
return None
patterns = [
r"Screenshot saved to ['\"](?P<path>/[^'\"]+?\.png)['\"]",
r"Screenshot saved to (?P<path>/\S+?\.png)(?:\s|$)",
r"(?P<path>/\S+?\.png)(?:\s|$)",
]
for pattern in patterns:
match = re.search(pattern, text)
if match:
path = match.group("path").strip().strip("'\"")
if path:
return path
return None
def _run_browser_command(
task_id: str,
command: str,
@ -841,9 +814,20 @@ def _run_browser_command(
command, task_id, task_socket_dir, len(task_socket_dir))
browser_env = {**os.environ}
# Ensure PATH includes standard dirs (systemd services may have minimal PATH)
if "/usr/bin" not in browser_env.get("PATH", "").split(":"):
browser_env["PATH"] = f"{browser_env.get('PATH', '')}:{_SANE_PATH}"
# Ensure PATH includes Hermes-managed Node first, then standard system dirs.
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
hermes_node_bin = str(hermes_home / "node" / "bin")
existing_path = browser_env.get("PATH", "")
path_parts = [p for p in existing_path.split(":") if p]
candidate_dirs = [hermes_node_bin] + [p for p in _SANE_PATH.split(":") if p]
for part in reversed(candidate_dirs):
if os.path.isdir(part) and part not in path_parts:
path_parts.insert(0, part)
browser_env["PATH"] = ":".join(path_parts)
browser_env["AGENT_BROWSER_SOCKET_DIR"] = task_socket_dir
result = subprocess.run(
@ -866,10 +850,11 @@ def _run_browser_command(
command, " ".join(cmd_parts[:4]) + "...",
(result.stderr or "")[:200])
# Parse JSON output
if result.stdout.strip():
stdout_text = result.stdout.strip()
if stdout_text:
try:
parsed = json.loads(result.stdout.strip())
parsed = json.loads(stdout_text)
# Warn if snapshot came back empty (common sign of daemon/CDP issues)
if command == "snapshot" and parsed.get("success"):
snap_data = parsed.get("data", {})
@ -879,13 +864,33 @@ def _run_browser_command(
"returncode=%s", result.returncode)
return parsed
except json.JSONDecodeError:
# Non-JSON output indicates agent-browser crash or version mismatch
raw = result.stdout.strip()[:500]
raw = stdout_text[:2000]
logger.warning("browser '%s' returned non-JSON output (rc=%s): %s",
command, result.returncode, raw[:200])
command, result.returncode, raw[:500])
if command == "screenshot":
stderr_text = (result.stderr or "").strip()
combined_text = "\n".join(
part for part in [stdout_text, stderr_text] if part
)
recovered_path = _extract_screenshot_path_from_text(combined_text)
if recovered_path and Path(recovered_path).exists():
logger.info(
"browser 'screenshot' recovered file from non-JSON output: %s",
recovered_path,
)
return {
"success": True,
"data": {
"path": recovered_path,
"raw": raw,
},
}
return {
"success": True,
"data": {"raw": raw}
"success": False,
"error": f"Non-JSON output from agent-browser for '{command}': {raw}"
}
# Check for errors
@ -1250,46 +1255,26 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str:
def browser_close(task_id: Optional[str] = None) -> str:
"""
Close the browser session.
Args:
task_id: Task identifier for session isolation
Returns:
JSON string with close result
"""
effective_task_id = task_id or "default"
# Stop auto-recording before closing
_maybe_stop_recording(effective_task_id)
result = _run_browser_command(effective_task_id, "close", [])
# Close the backend session (Browserbase API in cloud mode, nothing extra in local mode)
session_key = task_id if task_id and task_id in _active_sessions else "default"
if session_key in _active_sessions:
session_info = _active_sessions[session_key]
bb_session_id = session_info.get("bb_session_id")
if bb_session_id:
# Cloud mode: release the Browserbase session via API
try:
config = _get_browserbase_config()
_close_browserbase_session(bb_session_id, config["api_key"], config["project_id"])
except Exception as e:
logger.warning("Could not close BrowserBase session: %s", e)
del _active_sessions[session_key]
if result.get("success"):
return json.dumps({
"success": True,
"closed": True
}, ensure_ascii=False)
else:
# Even if close fails, session was released
return json.dumps({
"success": True,
"closed": True,
"warning": result.get("error", "Session may not have been active")
}, ensure_ascii=False)
with _cleanup_lock:
had_session = effective_task_id in _active_sessions
cleanup_browser(effective_task_id)
response = {
"success": True,
"closed": True,
}
if not had_session:
response["warning"] = "Session may not have been active"
return json.dumps(response, ensure_ascii=False)
def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str:
@ -1481,9 +1466,11 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
_cleanup_old_screenshots(screenshots_dir, max_age_hours=24)
# Take screenshot using agent-browser
screenshot_args = [str(screenshot_path)]
screenshot_args = []
if annotate:
screenshot_args.insert(0, "--annotate")
screenshot_args.append("--annotate")
screenshot_args.append("--full")
screenshot_args.append(str(screenshot_path))
result = _run_browser_command(
effective_task_id,
"screenshot",
@ -1498,7 +1485,11 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
"success": False,
"error": f"Failed to take screenshot ({mode} mode): {error_detail}"
}, ensure_ascii=False)
actual_screenshot_path = result.get("data", {}).get("path")
if actual_screenshot_path:
screenshot_path = Path(actual_screenshot_path)
# Check if screenshot file was created
if not screenshot_path.exists():
mode = "local" if _is_local_mode() else "cloud"

View file

@ -304,6 +304,12 @@ Jobs run in a fresh session with no current-chat context, so prompts must be sel
If skill or skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction.
On update, passing skills=[] clears attached skills.
NOTE: The agent's final response is auto-delivered to the target — do NOT use
send_message in the prompt for that same destination. Same-target send_message
calls are skipped to avoid duplicate cron deliveries. Put the primary
user-facing content in the final response, and use send_message only for
additional or different targets.
Important safety rule: cron-run sessions should not recursively schedule more cron jobs.""",
"parameters": {
"type": "object",

View file

@ -435,24 +435,25 @@ def check_memory_requirements() -> bool:
MEMORY_SCHEMA = {
"name": "memory",
"description": (
"Save important information to persistent memory that survives across sessions. "
"Your memory appears in your system prompt at session start -- it's how you "
"remember things about the user and your environment between conversations.\n\n"
"Save durable information to persistent memory that survives across sessions. "
"Memory is injected into future turns, so keep it compact and focused on facts "
"that will still matter later.\n\n"
"WHEN TO SAVE (do this proactively, don't wait to be asked):\n"
"- User shares a preference, habit, or personal detail (name, role, timezone, coding style)\n"
"- You discover something about the environment (OS, installed tools, project structure)\n"
"- User corrects you or says 'remember this' / 'don't do that again'\n"
"- You learn a convention, API quirk, or workflow specific to this user's setup\n"
"- You completed something - log it like a diary entry\n"
"- After completing a complex task, save a brief note about what was done\n\n"
"- If you've discovered a new way to do something, solved a problem that could be necessary later, save it as a skill with the skill tool\n\n"
"- You identify a stable fact that will be useful again in future sessions\n\n"
"Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
"state to memory; use session_search to recall those from past transcripts.\n"
"If you've discovered a new way to do something, solved a problem that could be "
"necessary later, save it as a skill with the skill tool.\n\n"
"TWO TARGETS:\n"
"- 'user': who the user is -- name, role, preferences, communication style, pet peeves\n"
"- 'memory': your notes -- environment facts, project conventions, tool quirks, lessons learned\n\n"
"ACTIONS: add (new entry), replace (update existing -- old_text identifies it), "
"remove (delete -- old_text identifies it).\n"
"Capacity shown in system prompt. When >80%, consolidate entries before adding new ones.\n\n"
"SKIP: trivial/obvious info, things easily re-discovered, raw data dumps."
"remove (delete -- old_text identifies it).\n\n"
"SKIP: trivial/obvious info, things easily re-discovered, raw data dumps, and temporary task state."
),
"parameters": {
"type": "object",

View file

@ -153,6 +153,10 @@ def _handle_send(args):
f"or set a home channel via: hermes config set {platform_name.upper()}_HOME_CHANNEL <channel_id>"
})
duplicate_skip = _maybe_skip_cron_duplicate_send(platform_name, chat_id, thread_id)
if duplicate_skip:
return json.dumps(duplicate_skip)
try:
from model_tools import _run_async
result = _run_async(
@ -213,6 +217,51 @@ def _describe_media_for_mirror(media_files):
return f"[Sent {len(media_files)} media attachments]"
def _get_cron_auto_delivery_target():
"""Return the cron scheduler's auto-delivery target for the current run, if any."""
platform = os.getenv("HERMES_CRON_AUTO_DELIVER_PLATFORM", "").strip().lower()
chat_id = os.getenv("HERMES_CRON_AUTO_DELIVER_CHAT_ID", "").strip()
if not platform or not chat_id:
return None
thread_id = os.getenv("HERMES_CRON_AUTO_DELIVER_THREAD_ID", "").strip() or None
return {
"platform": platform,
"chat_id": chat_id,
"thread_id": thread_id,
}
def _maybe_skip_cron_duplicate_send(platform_name: str, chat_id: str, thread_id: str | None):
"""Skip redundant cron send_message calls when the scheduler will auto-deliver there."""
auto_target = _get_cron_auto_delivery_target()
if not auto_target:
return None
same_target = (
auto_target["platform"] == platform_name
and str(auto_target["chat_id"]) == str(chat_id)
and auto_target.get("thread_id") == thread_id
)
if not same_target:
return None
target_label = f"{platform_name}:{chat_id}"
if thread_id is not None:
target_label += f":{thread_id}"
return {
"success": True,
"skipped": True,
"reason": "cron_auto_delivery_duplicate_target",
"target": target_label,
"note": (
f"Skipped send_message to {target_label}. This cron job will already auto-deliver "
"its final response to that same target. Put the intended user-facing content in "
"your final response instead, or use a different target if you want an additional message."
),
}
async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, media_files=None):
"""Route a message to the appropriate platform sender."""
from gateway.config import Platform

View file

@ -341,8 +341,8 @@ SESSION_SEARCH_SCHEMA = {
"- The user references a project, person, or concept that seems familiar but isn't in memory\n"
"- You want to check if you've solved a similar problem before\n"
"- The user asks 'what did we do about X?' or 'how did we fix Y?'\n\n"
"Don't hesitate to search -- it's fast and cheap. Better to search and confirm "
"than to guess or ask the user to repeat themselves.\n\n"
"Don't hesitate to search when it is actually cross-session -- it's fast and cheap. "
"Better to search and confirm than to guess or ask the user to repeat themselves.\n\n"
"Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), "
"phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). "
"IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses "

View file

@ -645,14 +645,11 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool,
Args:
result: Scan result from scan_skill()
force: If True, override blocks for caution verdicts (never overrides dangerous)
force: If True, override blocked policy decisions for this scan result
Returns:
(allowed, reason) tuple
"""
if result.verdict == "dangerous":
return False, f"Scan verdict is DANGEROUS ({len(result.findings)} findings). Blocked."
policy = INSTALL_POLICY.get(result.trust_level, INSTALL_POLICY["community"])
vi = VERDICT_INDEX.get(result.verdict, 2)
decision = policy[vi]
@ -661,7 +658,10 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool,
return True, f"Allowed ({result.trust_level} source, {result.verdict} verdict)"
if force:
return True, f"Force-installed despite {result.verdict} verdict ({len(result.findings)} findings)"
return True, (
f"Force-installed despite blocked {result.verdict} verdict "
f"({len(result.findings)} findings)"
)
return False, (
f"Blocked ({result.trust_level} source + {result.verdict} verdict, "

View file

@ -354,6 +354,7 @@ async def vision_analyze_tool(
# Prepare error response
result = {
"success": False,
"error": error_msg,
"analysis": analysis,
}