Merge origin/main into hermes/hermes-5d160594
This commit is contained in:
commit
3229e434b8
78 changed files with 3762 additions and 395 deletions
|
|
@ -53,6 +53,7 @@ import atexit
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import subprocess
|
||||
import shutil
|
||||
|
|
@ -165,63 +166,18 @@ def _emergency_cleanup_all_sessions():
|
|||
if not _active_sessions:
|
||||
return
|
||||
|
||||
logger.info("Emergency cleanup: closing %s active session(s)...", len(_active_sessions))
|
||||
|
||||
logger.info("Emergency cleanup: closing %s active session(s)...",
|
||||
len(_active_sessions))
|
||||
|
||||
try:
|
||||
if _is_local_mode():
|
||||
# Local mode: just close agent-browser sessions via CLI
|
||||
for task_id, session_info in list(_active_sessions.items()):
|
||||
session_name = session_info.get("session_name")
|
||||
if session_name:
|
||||
try:
|
||||
browser_cmd = _find_agent_browser()
|
||||
task_socket_dir = os.path.join(
|
||||
_socket_safe_tmpdir(),
|
||||
f"agent-browser-{session_name}"
|
||||
)
|
||||
env = {**os.environ, "AGENT_BROWSER_SOCKET_DIR": task_socket_dir}
|
||||
subprocess.run(
|
||||
browser_cmd.split() + ["--session", session_name, "--json", "close"],
|
||||
capture_output=True, timeout=5, env=env,
|
||||
)
|
||||
logger.info("Closed local session %s", session_name)
|
||||
except Exception as e:
|
||||
logger.debug("Error closing local session %s: %s", session_name, e)
|
||||
else:
|
||||
# Cloud mode: release Browserbase sessions via API
|
||||
api_key = os.environ.get("BROWSERBASE_API_KEY")
|
||||
project_id = os.environ.get("BROWSERBASE_PROJECT_ID")
|
||||
|
||||
if not api_key or not project_id:
|
||||
logger.warning("Cannot cleanup - missing BROWSERBASE credentials")
|
||||
return
|
||||
|
||||
for task_id, session_info in list(_active_sessions.items()):
|
||||
bb_session_id = session_info.get("bb_session_id")
|
||||
if bb_session_id:
|
||||
try:
|
||||
response = requests.post(
|
||||
f"https://api.browserbase.com/v1/sessions/{bb_session_id}",
|
||||
headers={
|
||||
"X-BB-API-Key": api_key,
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"projectId": project_id,
|
||||
"status": "REQUEST_RELEASE"
|
||||
},
|
||||
timeout=5 # Short timeout for cleanup
|
||||
)
|
||||
if response.status_code in (200, 201, 204):
|
||||
logger.info("Closed session %s", bb_session_id)
|
||||
else:
|
||||
logger.warning("Failed to close session %s: HTTP %s", bb_session_id, response.status_code)
|
||||
except Exception as e:
|
||||
logger.error("Error closing session %s: %s", bb_session_id, e)
|
||||
|
||||
_active_sessions.clear()
|
||||
cleanup_all_browsers()
|
||||
except Exception as e:
|
||||
logger.error("Emergency cleanup error: %s", e)
|
||||
finally:
|
||||
with _cleanup_lock:
|
||||
_active_sessions.clear()
|
||||
_session_last_activity.clear()
|
||||
_recording_sessions.clear()
|
||||
|
||||
|
||||
# Register cleanup via atexit only. Previous versions installed SIGINT/SIGTERM
|
||||
|
|
@ -640,18 +596,14 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]:
|
|||
|
||||
|
||||
def _create_local_session(task_id: str) -> Dict[str, str]:
|
||||
"""Create a lightweight local browser session (no cloud API call).
|
||||
|
||||
Returns the same dict shape as ``_create_browserbase_session`` so the rest
|
||||
of the code can treat both modes uniformly.
|
||||
"""
|
||||
import uuid
|
||||
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
|
||||
logger.info("Created local browser session %s", session_name)
|
||||
session_name = f"h_{uuid.uuid4().hex[:10]}"
|
||||
logger.info("Created local browser session %s for task %s",
|
||||
session_name, task_id)
|
||||
return {
|
||||
"session_name": session_name,
|
||||
"bb_session_id": None, # Not applicable in local mode
|
||||
"cdp_url": None, # Not applicable in local mode
|
||||
"bb_session_id": None,
|
||||
"cdp_url": None,
|
||||
"features": {"local": True},
|
||||
}
|
||||
|
||||
|
|
@ -772,6 +724,27 @@ def _find_agent_browser() -> str:
|
|||
)
|
||||
|
||||
|
||||
def _extract_screenshot_path_from_text(text: str) -> Optional[str]:
|
||||
"""Extract a screenshot file path from agent-browser human-readable output."""
|
||||
if not text:
|
||||
return None
|
||||
|
||||
patterns = [
|
||||
r"Screenshot saved to ['\"](?P<path>/[^'\"]+?\.png)['\"]",
|
||||
r"Screenshot saved to (?P<path>/\S+?\.png)(?:\s|$)",
|
||||
r"(?P<path>/\S+?\.png)(?:\s|$)",
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
path = match.group("path").strip().strip("'\"")
|
||||
if path:
|
||||
return path
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _run_browser_command(
|
||||
task_id: str,
|
||||
command: str,
|
||||
|
|
@ -841,9 +814,20 @@ def _run_browser_command(
|
|||
command, task_id, task_socket_dir, len(task_socket_dir))
|
||||
|
||||
browser_env = {**os.environ}
|
||||
# Ensure PATH includes standard dirs (systemd services may have minimal PATH)
|
||||
if "/usr/bin" not in browser_env.get("PATH", "").split(":"):
|
||||
browser_env["PATH"] = f"{browser_env.get('PATH', '')}:{_SANE_PATH}"
|
||||
|
||||
# Ensure PATH includes Hermes-managed Node first, then standard system dirs.
|
||||
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||
hermes_node_bin = str(hermes_home / "node" / "bin")
|
||||
|
||||
existing_path = browser_env.get("PATH", "")
|
||||
path_parts = [p for p in existing_path.split(":") if p]
|
||||
candidate_dirs = [hermes_node_bin] + [p for p in _SANE_PATH.split(":") if p]
|
||||
|
||||
for part in reversed(candidate_dirs):
|
||||
if os.path.isdir(part) and part not in path_parts:
|
||||
path_parts.insert(0, part)
|
||||
|
||||
browser_env["PATH"] = ":".join(path_parts)
|
||||
browser_env["AGENT_BROWSER_SOCKET_DIR"] = task_socket_dir
|
||||
|
||||
result = subprocess.run(
|
||||
|
|
@ -866,10 +850,11 @@ def _run_browser_command(
|
|||
command, " ".join(cmd_parts[:4]) + "...",
|
||||
(result.stderr or "")[:200])
|
||||
|
||||
# Parse JSON output
|
||||
if result.stdout.strip():
|
||||
stdout_text = result.stdout.strip()
|
||||
|
||||
if stdout_text:
|
||||
try:
|
||||
parsed = json.loads(result.stdout.strip())
|
||||
parsed = json.loads(stdout_text)
|
||||
# Warn if snapshot came back empty (common sign of daemon/CDP issues)
|
||||
if command == "snapshot" and parsed.get("success"):
|
||||
snap_data = parsed.get("data", {})
|
||||
|
|
@ -879,13 +864,33 @@ def _run_browser_command(
|
|||
"returncode=%s", result.returncode)
|
||||
return parsed
|
||||
except json.JSONDecodeError:
|
||||
# Non-JSON output indicates agent-browser crash or version mismatch
|
||||
raw = result.stdout.strip()[:500]
|
||||
raw = stdout_text[:2000]
|
||||
logger.warning("browser '%s' returned non-JSON output (rc=%s): %s",
|
||||
command, result.returncode, raw[:200])
|
||||
command, result.returncode, raw[:500])
|
||||
|
||||
if command == "screenshot":
|
||||
stderr_text = (result.stderr or "").strip()
|
||||
combined_text = "\n".join(
|
||||
part for part in [stdout_text, stderr_text] if part
|
||||
)
|
||||
recovered_path = _extract_screenshot_path_from_text(combined_text)
|
||||
|
||||
if recovered_path and Path(recovered_path).exists():
|
||||
logger.info(
|
||||
"browser 'screenshot' recovered file from non-JSON output: %s",
|
||||
recovered_path,
|
||||
)
|
||||
return {
|
||||
"success": True,
|
||||
"data": {
|
||||
"path": recovered_path,
|
||||
"raw": raw,
|
||||
},
|
||||
}
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"data": {"raw": raw}
|
||||
"success": False,
|
||||
"error": f"Non-JSON output from agent-browser for '{command}': {raw}"
|
||||
}
|
||||
|
||||
# Check for errors
|
||||
|
|
@ -1250,46 +1255,26 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str:
|
|||
def browser_close(task_id: Optional[str] = None) -> str:
|
||||
"""
|
||||
Close the browser session.
|
||||
|
||||
|
||||
Args:
|
||||
task_id: Task identifier for session isolation
|
||||
|
||||
|
||||
Returns:
|
||||
JSON string with close result
|
||||
"""
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
# Stop auto-recording before closing
|
||||
_maybe_stop_recording(effective_task_id)
|
||||
|
||||
result = _run_browser_command(effective_task_id, "close", [])
|
||||
|
||||
# Close the backend session (Browserbase API in cloud mode, nothing extra in local mode)
|
||||
session_key = task_id if task_id and task_id in _active_sessions else "default"
|
||||
if session_key in _active_sessions:
|
||||
session_info = _active_sessions[session_key]
|
||||
bb_session_id = session_info.get("bb_session_id")
|
||||
if bb_session_id:
|
||||
# Cloud mode: release the Browserbase session via API
|
||||
try:
|
||||
config = _get_browserbase_config()
|
||||
_close_browserbase_session(bb_session_id, config["api_key"], config["project_id"])
|
||||
except Exception as e:
|
||||
logger.warning("Could not close BrowserBase session: %s", e)
|
||||
del _active_sessions[session_key]
|
||||
|
||||
if result.get("success"):
|
||||
return json.dumps({
|
||||
"success": True,
|
||||
"closed": True
|
||||
}, ensure_ascii=False)
|
||||
else:
|
||||
# Even if close fails, session was released
|
||||
return json.dumps({
|
||||
"success": True,
|
||||
"closed": True,
|
||||
"warning": result.get("error", "Session may not have been active")
|
||||
}, ensure_ascii=False)
|
||||
with _cleanup_lock:
|
||||
had_session = effective_task_id in _active_sessions
|
||||
|
||||
cleanup_browser(effective_task_id)
|
||||
|
||||
response = {
|
||||
"success": True,
|
||||
"closed": True,
|
||||
}
|
||||
if not had_session:
|
||||
response["warning"] = "Session may not have been active"
|
||||
return json.dumps(response, ensure_ascii=False)
|
||||
|
||||
|
||||
def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str:
|
||||
|
|
@ -1481,9 +1466,11 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
|||
_cleanup_old_screenshots(screenshots_dir, max_age_hours=24)
|
||||
|
||||
# Take screenshot using agent-browser
|
||||
screenshot_args = [str(screenshot_path)]
|
||||
screenshot_args = []
|
||||
if annotate:
|
||||
screenshot_args.insert(0, "--annotate")
|
||||
screenshot_args.append("--annotate")
|
||||
screenshot_args.append("--full")
|
||||
screenshot_args.append(str(screenshot_path))
|
||||
result = _run_browser_command(
|
||||
effective_task_id,
|
||||
"screenshot",
|
||||
|
|
@ -1498,7 +1485,11 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
|||
"success": False,
|
||||
"error": f"Failed to take screenshot ({mode} mode): {error_detail}"
|
||||
}, ensure_ascii=False)
|
||||
|
||||
|
||||
actual_screenshot_path = result.get("data", {}).get("path")
|
||||
if actual_screenshot_path:
|
||||
screenshot_path = Path(actual_screenshot_path)
|
||||
|
||||
# Check if screenshot file was created
|
||||
if not screenshot_path.exists():
|
||||
mode = "local" if _is_local_mode() else "cloud"
|
||||
|
|
|
|||
|
|
@ -304,6 +304,12 @@ Jobs run in a fresh session with no current-chat context, so prompts must be sel
|
|||
If skill or skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction.
|
||||
On update, passing skills=[] clears attached skills.
|
||||
|
||||
NOTE: The agent's final response is auto-delivered to the target — do NOT use
|
||||
send_message in the prompt for that same destination. Same-target send_message
|
||||
calls are skipped to avoid duplicate cron deliveries. Put the primary
|
||||
user-facing content in the final response, and use send_message only for
|
||||
additional or different targets.
|
||||
|
||||
Important safety rule: cron-run sessions should not recursively schedule more cron jobs.""",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
|
|
|
|||
|
|
@ -435,24 +435,25 @@ def check_memory_requirements() -> bool:
|
|||
MEMORY_SCHEMA = {
|
||||
"name": "memory",
|
||||
"description": (
|
||||
"Save important information to persistent memory that survives across sessions. "
|
||||
"Your memory appears in your system prompt at session start -- it's how you "
|
||||
"remember things about the user and your environment between conversations.\n\n"
|
||||
"Save durable information to persistent memory that survives across sessions. "
|
||||
"Memory is injected into future turns, so keep it compact and focused on facts "
|
||||
"that will still matter later.\n\n"
|
||||
"WHEN TO SAVE (do this proactively, don't wait to be asked):\n"
|
||||
"- User shares a preference, habit, or personal detail (name, role, timezone, coding style)\n"
|
||||
"- You discover something about the environment (OS, installed tools, project structure)\n"
|
||||
"- User corrects you or says 'remember this' / 'don't do that again'\n"
|
||||
"- You learn a convention, API quirk, or workflow specific to this user's setup\n"
|
||||
"- You completed something - log it like a diary entry\n"
|
||||
"- After completing a complex task, save a brief note about what was done\n\n"
|
||||
"- If you've discovered a new way to do something, solved a problem that could be necessary later, save it as a skill with the skill tool\n\n"
|
||||
"- You identify a stable fact that will be useful again in future sessions\n\n"
|
||||
"Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
|
||||
"state to memory; use session_search to recall those from past transcripts.\n"
|
||||
"If you've discovered a new way to do something, solved a problem that could be "
|
||||
"necessary later, save it as a skill with the skill tool.\n\n"
|
||||
"TWO TARGETS:\n"
|
||||
"- 'user': who the user is -- name, role, preferences, communication style, pet peeves\n"
|
||||
"- 'memory': your notes -- environment facts, project conventions, tool quirks, lessons learned\n\n"
|
||||
"ACTIONS: add (new entry), replace (update existing -- old_text identifies it), "
|
||||
"remove (delete -- old_text identifies it).\n"
|
||||
"Capacity shown in system prompt. When >80%, consolidate entries before adding new ones.\n\n"
|
||||
"SKIP: trivial/obvious info, things easily re-discovered, raw data dumps."
|
||||
"remove (delete -- old_text identifies it).\n\n"
|
||||
"SKIP: trivial/obvious info, things easily re-discovered, raw data dumps, and temporary task state."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
|
|
|
|||
|
|
@ -153,6 +153,10 @@ def _handle_send(args):
|
|||
f"or set a home channel via: hermes config set {platform_name.upper()}_HOME_CHANNEL <channel_id>"
|
||||
})
|
||||
|
||||
duplicate_skip = _maybe_skip_cron_duplicate_send(platform_name, chat_id, thread_id)
|
||||
if duplicate_skip:
|
||||
return json.dumps(duplicate_skip)
|
||||
|
||||
try:
|
||||
from model_tools import _run_async
|
||||
result = _run_async(
|
||||
|
|
@ -213,6 +217,51 @@ def _describe_media_for_mirror(media_files):
|
|||
return f"[Sent {len(media_files)} media attachments]"
|
||||
|
||||
|
||||
def _get_cron_auto_delivery_target():
|
||||
"""Return the cron scheduler's auto-delivery target for the current run, if any."""
|
||||
platform = os.getenv("HERMES_CRON_AUTO_DELIVER_PLATFORM", "").strip().lower()
|
||||
chat_id = os.getenv("HERMES_CRON_AUTO_DELIVER_CHAT_ID", "").strip()
|
||||
if not platform or not chat_id:
|
||||
return None
|
||||
thread_id = os.getenv("HERMES_CRON_AUTO_DELIVER_THREAD_ID", "").strip() or None
|
||||
return {
|
||||
"platform": platform,
|
||||
"chat_id": chat_id,
|
||||
"thread_id": thread_id,
|
||||
}
|
||||
|
||||
|
||||
def _maybe_skip_cron_duplicate_send(platform_name: str, chat_id: str, thread_id: str | None):
|
||||
"""Skip redundant cron send_message calls when the scheduler will auto-deliver there."""
|
||||
auto_target = _get_cron_auto_delivery_target()
|
||||
if not auto_target:
|
||||
return None
|
||||
|
||||
same_target = (
|
||||
auto_target["platform"] == platform_name
|
||||
and str(auto_target["chat_id"]) == str(chat_id)
|
||||
and auto_target.get("thread_id") == thread_id
|
||||
)
|
||||
if not same_target:
|
||||
return None
|
||||
|
||||
target_label = f"{platform_name}:{chat_id}"
|
||||
if thread_id is not None:
|
||||
target_label += f":{thread_id}"
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"skipped": True,
|
||||
"reason": "cron_auto_delivery_duplicate_target",
|
||||
"target": target_label,
|
||||
"note": (
|
||||
f"Skipped send_message to {target_label}. This cron job will already auto-deliver "
|
||||
"its final response to that same target. Put the intended user-facing content in "
|
||||
"your final response instead, or use a different target if you want an additional message."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, media_files=None):
|
||||
"""Route a message to the appropriate platform sender."""
|
||||
from gateway.config import Platform
|
||||
|
|
|
|||
|
|
@ -341,8 +341,8 @@ SESSION_SEARCH_SCHEMA = {
|
|||
"- The user references a project, person, or concept that seems familiar but isn't in memory\n"
|
||||
"- You want to check if you've solved a similar problem before\n"
|
||||
"- The user asks 'what did we do about X?' or 'how did we fix Y?'\n\n"
|
||||
"Don't hesitate to search -- it's fast and cheap. Better to search and confirm "
|
||||
"than to guess or ask the user to repeat themselves.\n\n"
|
||||
"Don't hesitate to search when it is actually cross-session -- it's fast and cheap. "
|
||||
"Better to search and confirm than to guess or ask the user to repeat themselves.\n\n"
|
||||
"Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), "
|
||||
"phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). "
|
||||
"IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses "
|
||||
|
|
|
|||
|
|
@ -645,14 +645,11 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool,
|
|||
|
||||
Args:
|
||||
result: Scan result from scan_skill()
|
||||
force: If True, override blocks for caution verdicts (never overrides dangerous)
|
||||
force: If True, override blocked policy decisions for this scan result
|
||||
|
||||
Returns:
|
||||
(allowed, reason) tuple
|
||||
"""
|
||||
if result.verdict == "dangerous":
|
||||
return False, f"Scan verdict is DANGEROUS ({len(result.findings)} findings). Blocked."
|
||||
|
||||
policy = INSTALL_POLICY.get(result.trust_level, INSTALL_POLICY["community"])
|
||||
vi = VERDICT_INDEX.get(result.verdict, 2)
|
||||
decision = policy[vi]
|
||||
|
|
@ -661,7 +658,10 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool,
|
|||
return True, f"Allowed ({result.trust_level} source, {result.verdict} verdict)"
|
||||
|
||||
if force:
|
||||
return True, f"Force-installed despite {result.verdict} verdict ({len(result.findings)} findings)"
|
||||
return True, (
|
||||
f"Force-installed despite blocked {result.verdict} verdict "
|
||||
f"({len(result.findings)} findings)"
|
||||
)
|
||||
|
||||
return False, (
|
||||
f"Blocked ({result.trust_level} source + {result.verdict} verdict, "
|
||||
|
|
|
|||
|
|
@ -354,6 +354,7 @@ async def vision_analyze_tool(
|
|||
# Prepare error response
|
||||
result = {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"analysis": analysis,
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue