fix: align salvaged browser cleanup patch with current main

Resolve the cherry-pick against current browser_tool structure without carrying unrelated formatting churn, while preserving the intended cleanup, PATH, and screenshot recovery changes from PR #1001.
This commit is contained in:
teknium1 2026-03-14 11:34:31 -07:00
parent 895fe5a5d3
commit c1d1699a64

View file

@ -49,7 +49,6 @@ Usage:
browser_click("@e5", task_id="task_123")
"""
from tools.registry import registry
import atexit
import json
import logging
@ -128,8 +127,7 @@ def _socket_safe_tmpdir() -> str:
# Track active sessions per task
# Stores: session_name (always), bb_session_id + cdp_url (cloud mode only)
# task_id -> {session_name, ...}
_active_sessions: Dict[str, Dict[str, str]] = {}
_active_sessions: Dict[str, Dict[str, str]] = {} # task_id -> {session_name, ...}
_recording_sessions: set = set() # task_ids with active recordings
# Flag to track if cleanup has been done
@ -142,8 +140,7 @@ _cleanup_done = False
# Session inactivity timeout (seconds) - cleanup if no activity for this long
# Default: 5 minutes. Needs headroom for LLM reasoning between browser commands,
# especially when subagents are doing multi-step browser tasks.
BROWSER_SESSION_INACTIVITY_TIMEOUT = int(
os.environ.get("BROWSER_INACTIVITY_TIMEOUT", "300"))
BROWSER_SESSION_INACTIVITY_TIMEOUT = int(os.environ.get("BROWSER_INACTIVITY_TIMEOUT", "300"))
# Track last activity time per session
_session_last_activity: Dict[str, float] = {}
@ -214,17 +211,14 @@ def _cleanup_inactive_browser_sessions():
for task_id in sessions_to_cleanup:
try:
elapsed = int(
current_time - _session_last_activity.get(task_id, current_time))
logger.info(
"Cleaning up inactive session for task: %s (inactive for %ss)", task_id, elapsed)
elapsed = int(current_time - _session_last_activity.get(task_id, current_time))
logger.info("Cleaning up inactive session for task: %s (inactive for %ss)", task_id, elapsed)
cleanup_browser(task_id)
with _cleanup_lock:
if task_id in _session_last_activity:
del _session_last_activity[task_id]
except Exception as e:
logger.warning(
"Error cleaning up inactive session %s: %s", task_id, e)
logger.warning("Error cleaning up inactive session %s: %s", task_id, e)
def _browser_cleanup_thread_worker():
@ -262,8 +256,7 @@ def _start_browser_cleanup_thread():
name="browser-cleanup"
)
_cleanup_thread.start()
logger.info("Started inactivity cleanup thread (timeout: %ss)",
BROWSER_SESSION_INACTIVITY_TIMEOUT)
logger.info("Started inactivity cleanup thread (timeout: %ss)", BROWSER_SESSION_INACTIVITY_TIMEOUT)
def _stop_browser_cleanup_thread():
@ -474,14 +467,11 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]:
# Check for optional settings from environment
# Proxies: enabled by default for better CAPTCHA solving
enable_proxies = os.environ.get(
"BROWSERBASE_PROXIES", "true").lower() != "false"
enable_proxies = os.environ.get("BROWSERBASE_PROXIES", "true").lower() != "false"
# Advanced Stealth: requires Scale Plan, disabled by default
enable_advanced_stealth = os.environ.get(
"BROWSERBASE_ADVANCED_STEALTH", "false").lower() == "true"
enable_advanced_stealth = os.environ.get("BROWSERBASE_ADVANCED_STEALTH", "false").lower() == "true"
# keepAlive: enabled by default (requires paid plan) - allows reconnection after disconnects
enable_keep_alive = os.environ.get(
"BROWSERBASE_KEEP_ALIVE", "true").lower() != "false"
enable_keep_alive = os.environ.get("BROWSERBASE_KEEP_ALIVE", "true").lower() != "false"
# Custom session timeout in milliseconds (optional) - extends session beyond project default
custom_timeout_ms = os.environ.get("BROWSERBASE_SESSION_TIMEOUT")
@ -513,8 +503,7 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]:
if timeout_val > 0:
session_config["timeout"] = timeout_val
except ValueError:
logger.warning(
"Invalid BROWSERBASE_SESSION_TIMEOUT value: %s", custom_timeout_ms)
logger.warning("Invalid BROWSERBASE_SESSION_TIMEOUT value: %s", custom_timeout_ms)
# Enable proxies for better CAPTCHA solving (default: true)
# Routes traffic through residential IPs for more reliable access
@ -550,7 +539,7 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]:
if enable_keep_alive:
keepalive_fallback = True
logger.warning("keepAlive may require paid plan (402), retrying without it. "
"Sessions may timeout during long operations.")
"Sessions may timeout during long operations.")
session_config.pop("keepAlive", None)
response = requests.post(
"https://api.browserbase.com/v1/sessions",
@ -566,7 +555,7 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]:
if response.status_code == 402 and enable_proxies:
proxies_fallback = True
logger.warning("Proxies unavailable (402), retrying without proxies. "
"Bot detection may be less effective.")
"Bot detection may be less effective.")
session_config.pop("proxies", None)
response = requests.post(
"https://api.browserbase.com/v1/sessions",
@ -579,8 +568,7 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]:
)
if not response.ok:
raise RuntimeError(
f"Failed to create Browserbase session: {response.status_code} {response.text}")
raise RuntimeError(f"Failed to create Browserbase session: {response.status_code} {response.text}")
session_data = response.json()
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
@ -597,8 +585,7 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]:
# Log session info for debugging
feature_str = ", ".join(k for k, v in features_enabled.items() if v)
logger.info("Created session %s with features: %s",
session_name, feature_str)
logger.info("Created session %s with features: %s", session_name, feature_str)
return {
"session_name": session_name,
@ -793,8 +780,7 @@ def _run_browser_command(
try:
session_info = _get_session_info(task_id)
except Exception as e:
logger.warning(
"Failed to create browser session for task=%s: %s", task_id, e)
logger.warning("Failed to create browser session for task=%s: %s", task_id, e)
return {"success": False, "error": f"Failed to create browser session: {str(e)}"}
# Build the command with the appropriate backend flag.
@ -844,13 +830,6 @@ def _run_browser_command(
browser_env["PATH"] = ":".join(path_parts)
browser_env["AGENT_BROWSER_SOCKET_DIR"] = task_socket_dir
node_path = shutil.which("node", path=browser_env["PATH"])
if node_path:
logger.debug("browser subprocess using node at: %s", node_path)
else:
logger.warning("node not found in browser PATH: %s",
browser_env["PATH"])
result = subprocess.run(
cmd_parts,
capture_output=True,
@ -862,8 +841,7 @@ def _run_browser_command(
# Log stderr for diagnostics — use warning level on failure so it's visible
if result.stderr and result.stderr.strip():
level = logging.WARNING if result.returncode != 0 else logging.DEBUG
logger.log(level, "browser '%s' stderr: %s",
command, result.stderr.strip()[:500])
logger.log(level, "browser '%s' stderr: %s", command, result.stderr.strip()[:500])
# Log empty output as warning — common sign of broken agent-browser
if not result.stdout.strip() and result.returncode == 0:
@ -877,6 +855,7 @@ def _run_browser_command(
if stdout_text:
try:
parsed = json.loads(stdout_text)
# Warn if snapshot came back empty (common sign of daemon/CDP issues)
if command == "snapshot" and parsed.get("success"):
snap_data = parsed.get("data", {})
if not snap_data.get("snapshot") and not snap_data.get("refs"):
@ -894,8 +873,7 @@ def _run_browser_command(
combined_text = "\n".join(
part for part in [stdout_text, stderr_text] if part
)
recovered_path = _extract_screenshot_path_from_text(
combined_text)
recovered_path = _extract_screenshot_path_from_text(combined_text)
if recovered_path and Path(recovered_path).exists():
logger.info(
@ -917,10 +895,8 @@ def _run_browser_command(
# Check for errors
if result.returncode != 0:
error_msg = result.stderr.strip(
) if result.stderr else f"Command failed with code {result.returncode}"
logger.warning("browser '%s' failed (rc=%s): %s",
command, result.returncode, error_msg[:300])
error_msg = result.stderr.strip() if result.stderr else f"Command failed with code {result.returncode}"
logger.warning("browser '%s' failed (rc=%s): %s", command, result.returncode, error_msg[:300])
return {"success": False, "error": error_msg}
return {"success": True, "data": {}}
@ -1319,10 +1295,8 @@ def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str:
console_args = ["--clear"] if clear else []
error_args = ["--clear"] if clear else []
console_result = _run_browser_command(
effective_task_id, "console", console_args)
errors_result = _run_browser_command(
effective_task_id, "errors", error_args)
console_result = _run_browser_command(effective_task_id, "console", console_args)
errors_result = _run_browser_command(effective_task_id, "errors", error_args)
messages = []
if console_result.get("success"):
@ -1355,16 +1329,14 @@ def _maybe_start_recording(task_id: str):
if task_id in _recording_sessions:
return
try:
hermes_home = Path(os.environ.get(
"HERMES_HOME", Path.home() / ".hermes"))
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
config_path = hermes_home / "config.yaml"
record_enabled = False
if config_path.exists():
import yaml
with open(config_path) as f:
cfg = yaml.safe_load(f) or {}
record_enabled = cfg.get("browser", {}).get(
"record_sessions", False)
record_enabled = cfg.get("browser", {}).get("record_sessions", False)
if not record_enabled:
return
@ -1375,18 +1347,14 @@ def _maybe_start_recording(task_id: str):
import time
timestamp = time.strftime("%Y%m%d_%H%M%S")
recording_path = recordings_dir / \
f"session_{timestamp}_{task_id[:16]}.webm"
recording_path = recordings_dir / f"session_{timestamp}_{task_id[:16]}.webm"
result = _run_browser_command(
task_id, "record", ["start", str(recording_path)])
result = _run_browser_command(task_id, "record", ["start", str(recording_path)])
if result.get("success"):
_recording_sessions.add(task_id)
logger.info("Auto-recording browser session %s to %s",
task_id, recording_path)
logger.info("Auto-recording browser session %s to %s", task_id, recording_path)
else:
logger.debug("Could not start auto-recording: %s",
result.get("error"))
logger.debug("Could not start auto-recording: %s", result.get("error"))
except Exception as e:
logger.debug("Auto-recording setup failed: %s", e)
@ -1399,8 +1367,7 @@ def _maybe_stop_recording(task_id: str):
result = _run_browser_command(task_id, "record", ["stop"])
if result.get("success"):
path = result.get("data", {}).get("path", "")
logger.info(
"Saved browser recording for session %s: %s", task_id, path)
logger.info("Saved browser recording for session %s: %s", task_id, path)
except Exception as e:
logger.debug("Could not stop recording for %s: %s", task_id, e)
finally:
@ -1486,11 +1453,11 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
from pathlib import Path
effective_task_id = task_id or "default"
# Save screenshot to persistent location so it can be shared with users
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
screenshots_dir = hermes_home / "browser_screenshots"
screenshot_path = screenshots_dir / \
f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
screenshot_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
try:
screenshots_dir.mkdir(parents=True, exist_ok=True)
@ -1589,8 +1556,7 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
# screenshot loses evidence the user might need. The 24-hour cleanup
# in _cleanup_old_screenshots prevents unbounded disk growth.
logger.warning("browser_vision failed: %s", e, exc_info=True)
error_info = {"success": False,
"error": f"Error during vision analysis: {str(e)}"}
error_info = {"success": False, "error": f"Error during vision analysis: {str(e)}"}
if screenshot_path.exists():
error_info["screenshot_path"] = str(screenshot_path)
error_info["note"] = "Screenshot was captured but vision analysis failed. You can still share it via MEDIA:<path>."
@ -1625,8 +1591,7 @@ def _cleanup_old_recordings(max_age_hours=72):
"""Remove browser recordings older than max_age_hours to prevent disk bloat."""
import time
try:
hermes_home = Path(os.environ.get(
"HERMES_HOME", Path.home() / ".hermes"))
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
recordings_dir = hermes_home / "browser_recordings"
if not recordings_dir.exists():
return
@ -1676,12 +1641,10 @@ def _close_browserbase_session(session_id: str, api_key: str, project_id: str) -
)
if response.status_code in (200, 201, 204):
logger.debug(
"Successfully closed BrowserBase session %s", session_id)
logger.debug("Successfully closed BrowserBase session %s", session_id)
return True
else:
logger.warning("Failed to close session %s: HTTP %s - %s",
session_id, response.status_code, response.text[:200])
logger.warning("Failed to close session %s: HTTP %s - %s", session_id, response.status_code, response.text[:200])
return False
except Exception as e:
@ -1712,8 +1675,7 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
if session_info:
bb_session_id = session_info.get("bb_session_id", "unknown")
logger.debug("Found session for task %s: bb_session_id=%s",
task_id, bb_session_id)
logger.debug("Found session for task %s: bb_session_id=%s", task_id, bb_session_id)
# Stop auto-recording before closing (saves the file)
_maybe_stop_recording(task_id)
@ -1721,11 +1683,9 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
# Try to close via agent-browser first (needs session in _active_sessions)
try:
_run_browser_command(task_id, "close", [], timeout=10)
logger.debug(
"agent-browser close command completed for task %s", task_id)
logger.debug("agent-browser close command completed for task %s", task_id)
except Exception as e:
logger.warning(
"agent-browser close failed for task %s: %s", task_id, e)
logger.warning("agent-browser close failed for task %s: %s", task_id, e)
# Now remove from tracking under lock
with _cleanup_lock:
@ -1736,20 +1696,16 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
if bb_session_id and not _is_local_mode():
try:
config = _get_browserbase_config()
success = _close_browserbase_session(
bb_session_id, config["api_key"], config["project_id"])
success = _close_browserbase_session(bb_session_id, config["api_key"], config["project_id"])
if not success:
logger.warning(
"Could not close BrowserBase session %s", bb_session_id)
logger.warning("Could not close BrowserBase session %s", bb_session_id)
except Exception as e:
logger.error(
"Exception during BrowserBase session close: %s", e)
logger.error("Exception during BrowserBase session close: %s", e)
# Kill the daemon process and clean up socket directory
session_name = session_info.get("session_name", "")
if session_name:
socket_dir = os.path.join(
_socket_safe_tmpdir(), f"agent-browser-{session_name}")
socket_dir = os.path.join(_socket_safe_tmpdir(), f"agent-browser-{session_name}")
if os.path.exists(socket_dir):
# agent-browser writes {session}.pid in the socket dir
pid_file = os.path.join(socket_dir, f"{session_name}.pid")
@ -1757,11 +1713,9 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
try:
daemon_pid = int(Path(pid_file).read_text().strip())
os.kill(daemon_pid, signal.SIGTERM)
logger.debug("Killed daemon pid %s for %s",
daemon_pid, session_name)
logger.debug("Killed daemon pid %s for %s", daemon_pid, session_name)
except (ProcessLookupError, ValueError, PermissionError, OSError):
logger.debug(
"Could not kill daemon pid for %s (already dead or inaccessible)", session_name)
logger.debug("Could not kill daemon pid for %s (already dead or inaccessible)", session_name)
shutil.rmtree(socket_dir, ignore_errors=True)
logger.debug("Removed task %s from active sessions", task_id)
@ -1848,8 +1802,7 @@ if __name__ == "__main__":
_find_agent_browser()
except FileNotFoundError:
print(" - agent-browser CLI not found")
print(
" Install: npm install -g agent-browser && agent-browser install --with-deps")
print(" Install: npm install -g agent-browser && agent-browser install --with-deps")
if not _is_local_mode():
if not os.environ.get("BROWSERBASE_API_KEY"):
print(" - BROWSERBASE_API_KEY not set (required for cloud mode)")
@ -1870,6 +1823,7 @@ if __name__ == "__main__":
# ---------------------------------------------------------------------------
# Registry
# ---------------------------------------------------------------------------
from tools.registry import registry
_BROWSER_SCHEMA_MAP = {s["name"]: s for s in BROWSER_TOOL_SCHEMAS}
@ -1877,8 +1831,7 @@ registry.register(
name="browser_navigate",
toolset="browser",
schema=_BROWSER_SCHEMA_MAP["browser_navigate"],
handler=lambda args, **kw: browser_navigate(
url=args.get("url", ""), task_id=kw.get("task_id")),
handler=lambda args, **kw: browser_navigate(url=args.get("url", ""), task_id=kw.get("task_id")),
check_fn=check_browser_requirements,
)
registry.register(
@ -1893,8 +1846,7 @@ registry.register(
name="browser_click",
toolset="browser",
schema=_BROWSER_SCHEMA_MAP["browser_click"],
handler=lambda args, **kw: browser_click(**
args, task_id=kw.get("task_id")),
handler=lambda args, **kw: browser_click(**args, task_id=kw.get("task_id")),
check_fn=check_browser_requirements,
)
registry.register(
@ -1908,8 +1860,7 @@ registry.register(
name="browser_scroll",
toolset="browser",
schema=_BROWSER_SCHEMA_MAP["browser_scroll"],
handler=lambda args, **kw: browser_scroll(**
args, task_id=kw.get("task_id")),
handler=lambda args, **kw: browser_scroll(**args, task_id=kw.get("task_id")),
check_fn=check_browser_requirements,
)
registry.register(
@ -1923,8 +1874,7 @@ registry.register(
name="browser_press",
toolset="browser",
schema=_BROWSER_SCHEMA_MAP["browser_press"],
handler=lambda args, **kw: browser_press(
key=args.get("key", ""), task_id=kw.get("task_id")),
handler=lambda args, **kw: browser_press(key=args.get("key", ""), task_id=kw.get("task_id")),
check_fn=check_browser_requirements,
)
registry.register(
@ -1945,20 +1895,13 @@ registry.register(
name="browser_vision",
toolset="browser",
schema=_BROWSER_SCHEMA_MAP["browser_vision"],
handler=lambda args, **kw: browser_vision(
question=args.get("question", ""),
annotate=args.get("annotate", False),
task_id=kw.get("task_id"),
),
handler=lambda args, **kw: browser_vision(question=args.get("question", ""), annotate=args.get("annotate", False), task_id=kw.get("task_id")),
check_fn=check_browser_requirements,
)
registry.register(
name="browser_console",
toolset="browser",
schema=_BROWSER_SCHEMA_MAP["browser_console"],
handler=lambda args, **kw: browser_console(
clear=args.get("clear", False),
task_id=kw.get("task_id"),
),
handler=lambda args, **kw: browser_console(clear=args.get("clear", False), task_id=kw.get("task_id")),
check_fn=check_browser_requirements,
)