Merge origin/develop and integrate browser isolation

This commit is contained in:
andreysk0304 2026-04-21 23:18:59 +03:00
parent 50589232d6
commit ebba5d04a4
13 changed files with 812 additions and 251 deletions

View file

@ -13,7 +13,8 @@ TELEGRAM_ALLOWED_USERS=
TELEGRAM_HOME_CHANNEL=
BROWSER_URL=http://browser:9222
BROWSER_VIEW_URL=
BROWSER_VIEW_URL=http://localhost:6080
BROWSER_VIEW_BASE_URL=
BROWSER_API_HOST=0.0.0.0
BROWSER_API_PORT=8088

1
.gitignore vendored
View file

@ -17,6 +17,7 @@ hermes_code/test_browser.py
.git
.github
.idea
.DS_Store
hermes_data
workspace

View file

@ -7,10 +7,11 @@ XVFB_LOG="/tmp/xvfb.log"
VNC_PORT="${VNC_PORT:-5900}"
NOVNC_PORT="${NOVNC_PORT:-6080}"
CHROME_LOCAL_DEBUG_PORT="${CHROME_LOCAL_DEBUG_PORT:-9223}"
CHROME_PUBLIC_DEBUG_PORT="${CHROME_PUBLIC_DEBUG_PORT:-9222}"
CHROME_LOCAL_DEBUG_PORT="${CHROME_LOCAL_DEBUG_PORT:-${BROWSER_CHROME_DEBUG_PORT:-9223}}"
CHROME_PUBLIC_DEBUG_PORT="${CHROME_PUBLIC_DEBUG_PORT:-${BROWSER_CDP_PROXY_PORT:-9222}}"
BROWSER_USE_RPC_PORT="${BROWSER_USE_RPC_PORT:-8787}"
CHROME_PROFILE_DIR="${CHROME_PROFILE_DIR:-/src/browser_data}"
CHROME_PROFILE_DIR="${CHROME_PROFILE_DIR:-${BROWSER_DATA_DIR:-/src/browser_data}}"
BROWSER_ENABLE_UI="${BROWSER_ENABLE_UI:-true}"
MAX_RESTARTS="${MAX_RESTARTS:-10}"
RESTART_WINDOW_SEC="${RESTART_WINDOW_SEC:-60}"
@ -98,7 +99,6 @@ if [ ! -f /var/lib/dbus/machine-id ]; then
dbus-uuidgen > /var/lib/dbus/machine-id 2>/dev/null || true
fi
# Удаляем stale lock/socket от прошлых падений Xvfb на том же DISPLAY.
rm -f "/tmp/.X${DISPLAY_NUM}-lock" "/tmp/.X11-unix/X${DISPLAY_NUM}" || true
log "starting X stack on DISPLAY=${DISPLAY}"
@ -116,19 +116,23 @@ if ! wait_for_x_display 15; then
exit 1
fi
start_bg fluxbox
start_bg x11vnc -display "$DISPLAY" -rfbport "$VNC_PORT" -nopw -listen 0.0.0.0 -xkb -forever -shared
start_bg websockify --web=/usr/share/novnc/ "$NOVNC_PORT" "localhost:${VNC_PORT}"
if [ "$BROWSER_ENABLE_UI" != "false" ]; then
start_bg fluxbox
start_bg x11vnc -display "$DISPLAY" -rfbport "$VNC_PORT" -nopw -listen 0.0.0.0 -xkb -forever -shared
start_bg websockify --web=/usr/share/novnc/ "$NOVNC_PORT" "localhost:${VNC_PORT}"
fi
start_bg socat "TCP-LISTEN:${CHROME_PUBLIC_DEBUG_PORT},fork,reuseaddr" "TCP:127.0.0.1:${CHROME_LOCAL_DEBUG_PORT}"
start_bg python3 -u /src/browser_use_runner.py
if ! wait_for_port 127.0.0.1 "$VNC_PORT" 20; then
log "fatal: x11vnc did not open port ${VNC_PORT}"
exit 1
fi
if ! wait_for_port 127.0.0.1 "$NOVNC_PORT" 20; then
log "fatal: websockify did not open port ${NOVNC_PORT}"
exit 1
if [ "$BROWSER_ENABLE_UI" != "false" ]; then
if ! wait_for_port 127.0.0.1 "$VNC_PORT" 20; then
log "fatal: x11vnc did not open port ${VNC_PORT}"
exit 1
fi
if ! wait_for_port 127.0.0.1 "$NOVNC_PORT" 20; then
log "fatal: websockify did not open port ${NOVNC_PORT}"
exit 1
fi
fi
if ! wait_for_port 127.0.0.1 "$BROWSER_USE_RPC_PORT" 20; then
log "fatal: browser-use RPC did not open port ${BROWSER_USE_RPC_PORT}"
@ -194,4 +198,3 @@ while true; do
unset CHROME_EXIT
unset CHROME_PID
done

View file

@ -0,0 +1,35 @@
events {}
http {
resolver 127.0.0.11 ipv6=off;
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
server {
listen 8080;
server_name _;
location = / {
add_header Content-Type text/plain;
return 200 "Browser view proxy is running.\n";
}
location ~ ^/view/(?<owner>[a-f0-9]{16})$ {
return 302 /view/$owner/vnc.html?path=view/$owner/websockify;
}
location ~ ^/view/(?<owner>[a-f0-9]{16})/(?<rest>.*)$ {
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_buffering off;
proxy_pass http://hermes-browser-$owner:6080/$rest$is_args$args;
}
}
}

View file

@ -9,12 +9,18 @@ services:
environment:
- BROWSER_URL=http://browser:9222
- BROWSER_USE_RPC_URL=http://browser:8787/run
- BROWSER_VIEW_BASE_URL=${BROWSER_VIEW_BASE_URL:-}
- BROWSER_USE_ISOLATION_MODE=docker-per-principal
- BROWSER_RUNTIME_IMAGE=hermes-browser-runtime:latest
- BROWSER_RUNTIME_NETWORK=hermes-net
- BROWSER_RUNTIME_ENABLE_UI=true
- HERMES_HOME=/app/hermes_data
volumes:
- ./hermes_code:/app/hermes_code:ro
- ./hermes_data:/app/hermes_data:rw
- ./workspace:/app/workspace:rw
- ./config.example.yaml:/app/config.example.yaml:ro
- /var/run/docker.sock:/var/run/docker.sock
depends_on:
browser:
condition: service_healthy
@ -35,10 +41,12 @@ services:
fi;
exec python -m gateway.run
"
browser:
build:
context: ./browser_env
dockerfile: Dockerfile.browser
image: hermes-browser-runtime:latest
container_name: hermes-browser
env_file:
- .env
@ -79,13 +87,25 @@ services:
networks:
- hermes-net
browser-view-proxy:
image: nginx:alpine
container_name: hermes-browser-view-proxy
volumes:
- ./browser_env/nginx.browser-view.conf:/etc/nginx/nginx.conf:ro
depends_on:
browser:
condition: service_healthy
restart: always
networks:
- hermes-net
tunnel:
image: cloudflare/cloudflared:latest
profiles:
- remote
container_name: hermes-tunnel
restart: always
command: tunnel --protocol http2 --url http://browser:6080 --no-tls-verify
command: tunnel --protocol http2 --url http://browser-view-proxy:8080 --no-tls-verify
networks:
- hermes-net
@ -94,4 +114,5 @@ volumes:
networks:
hermes-net:
name: hermes-net
driver: bridge

View file

@ -173,6 +173,12 @@ def load_cli_config() -> Dict[str, Any]:
"browser": {
"inactivity_timeout": 120, # Auto-cleanup inactive browser sessions after 2 min
"record_sessions": False, # Auto-record browser sessions as WebM videos
"browser_use_isolation_mode": "shared",
"browser_use_runtime_image": "hermes-browser-runtime:latest",
"browser_use_runtime_network": "hermes-net",
"browser_use_runtime_ttl_seconds": 900,
"browser_use_runtime_start_timeout": 45,
"browser_use_runtime_enable_ui": True,
},
"compression": {
"enabled": True, # Auto-compress when approaching context limit
@ -4652,6 +4658,15 @@ class HermesCLI:
label = label[:47] + "..."
self._spinner_text = f"{emoji} {label}"
self._invalidate()
if function_name == "internet_browser":
browser_view_url = ""
try:
from tools.browser_use_manager import get_browser_use_view_url
browser_view_url = get_browser_use_view_url(task_id=self.session_id)
except Exception:
browser_view_url = os.getenv("BROWSER_VIEW_URL", "").strip()
if browser_view_url:
_cprint(f"{emoji} Viewer: {browser_view_url}")
if not self._voice_mode:
return

View file

@ -4931,6 +4931,7 @@ class GatewayRunner:
last_tool = [None] # Mutable container for tracking in closure
last_progress_msg = [None] # Track last message for dedup
repeat_count = [0] # How many times the same message repeated
browser_view_sent = [False] # Only announce browser viewer once per run
def progress_callback(tool_name: str, preview: str = None, args: dict = None):
"""Callback invoked by agent when a tool is called."""
@ -4964,6 +4965,23 @@ class GatewayRunner:
else:
msg = f"{emoji} {tool_name}..."
if tool_name == "internet_browser" and not browser_view_sent[0]:
browser_view_url = ""
try:
from tools.browser_use_manager import get_browser_use_view_url
browser_view_url = get_browser_use_view_url(
task_id=session_id,
honcho_session_key=session_key,
)
except Exception:
browser_view_url = os.getenv("BROWSER_VIEW_URL", "").strip()
if browser_view_url:
msg = (
f"{msg}\n"
f"Browser view: {browser_view_url}"
)
browser_view_sent[0] = True
# Dedup: collapse consecutive identical progress messages.
# Common with execute_code where models iterate with the same
# code (same boilerplate imports → identical previews).

View file

@ -151,6 +151,12 @@ DEFAULT_CONFIG = {
"inactivity_timeout": 120,
"command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.)
"record_sessions": False, # Auto-record browser sessions as WebM videos
"browser_use_isolation_mode": "shared",
"browser_use_runtime_image": "hermes-browser-runtime:latest",
"browser_use_runtime_network": "hermes-net",
"browser_use_runtime_ttl_seconds": 900,
"browser_use_runtime_start_timeout": 45,
"browser_use_runtime_enable_ui": True,
},
# Filesystem checkpoints — automatic snapshots before destructive file ops.

View file

@ -1,218 +0,0 @@
---
name: opencode
description: Delegate coding tasks to OpenCode CLI agent for feature implementation, refactoring, PR review, and long-running autonomous sessions. Requires the opencode CLI installed and authenticated.
version: 1.2.0
author: Hermes Agent
license: MIT
metadata:
hermes:
tags: [Coding-Agent, OpenCode, Autonomous, Refactoring, Code-Review]
related_skills: [claude-code, codex, hermes-agent]
---
# OpenCode CLI
Use [OpenCode](https://opencode.ai) as an autonomous coding worker orchestrated by Hermes terminal/process tools. OpenCode is a provider-agnostic, open-source AI coding agent with a TUI and CLI.
## When to Use
- User explicitly asks to use OpenCode
- You want an external coding agent to implement/refactor/review code
- You need long-running coding sessions with progress checks
- You want parallel task execution in isolated workdirs/worktrees
## Prerequisites
- OpenCode installed: `npm i -g opencode-ai@latest` or `brew install anomalyco/tap/opencode`
- Auth configured: `opencode auth login` or set provider env vars (OPENROUTER_API_KEY, etc.)
- Verify: `opencode auth list` should show at least one provider
- Git repository for code tasks (recommended)
- `pty=true` for interactive TUI sessions
## Binary Resolution (Important)
Shell environments may resolve different OpenCode binaries. If behavior differs between your terminal and Hermes, check:
```
terminal(command="which -a opencode")
terminal(command="opencode --version")
```
If needed, pin an explicit binary path:
```
terminal(command="$HOME/.opencode/bin/opencode run '...'", workdir="~/project", pty=true)
```
## One-Shot Tasks
Use `opencode run` for bounded, non-interactive tasks:
```
terminal(command="opencode run 'Add retry logic to API calls and update tests'", workdir="~/project")
```
Attach context files with `-f`:
```
terminal(command="opencode run 'Review this config for security issues' -f config.yaml -f .env.example", workdir="~/project")
```
Show model thinking with `--thinking`:
```
terminal(command="opencode run 'Debug why tests fail in CI' --thinking", workdir="~/project")
```
Force a specific model:
```
terminal(command="opencode run 'Refactor auth module' --model openrouter/anthropic/claude-sonnet-4", workdir="~/project")
```
## Interactive Sessions (Background)
For iterative work requiring multiple exchanges, start the TUI in background:
```
terminal(command="opencode", workdir="~/project", background=true, pty=true)
# Returns session_id
# Send a prompt
process(action="submit", session_id="<id>", data="Implement OAuth refresh flow and add tests")
# Monitor progress
process(action="poll", session_id="<id>")
process(action="log", session_id="<id>")
# Send follow-up input
process(action="submit", session_id="<id>", data="Now add error handling for token expiry")
# Exit cleanly — Ctrl+C
process(action="write", session_id="<id>", data="\x03")
# Or just kill the process
process(action="kill", session_id="<id>")
```
**Important:** Do NOT use `/exit` — it is not a valid OpenCode command and will open an agent selector dialog instead. Use Ctrl+C (`\x03`) or `process(action="kill")` to exit.
### TUI Keybindings
| Key | Action |
|-----|--------|
| `Enter` | Submit message (press twice if needed) |
| `Tab` | Switch between agents (build/plan) |
| `Ctrl+P` | Open command palette |
| `Ctrl+X L` | Switch session |
| `Ctrl+X M` | Switch model |
| `Ctrl+X N` | New session |
| `Ctrl+X E` | Open editor |
| `Ctrl+C` | Exit OpenCode |
### Resuming Sessions
After exiting, OpenCode prints a session ID. Resume with:
```
terminal(command="opencode -c", workdir="~/project", background=true, pty=true) # Continue last session
terminal(command="opencode -s ses_abc123", workdir="~/project", background=true, pty=true) # Specific session
```
## Common Flags
| Flag | Use |
|------|-----|
| `run 'prompt'` | One-shot execution and exit |
| `--continue` / `-c` | Continue the last OpenCode session |
| `--session <id>` / `-s` | Continue a specific session |
| `--agent <name>` | Choose OpenCode agent (build or plan) |
| `--model provider/model` | Force specific model |
| `--format json` | Machine-readable output/events |
| `--file <path>` / `-f` | Attach file(s) to the message |
| `--thinking` | Show model thinking blocks |
| `--variant <level>` | Reasoning effort (high, max, minimal) |
| `--title <name>` | Name the session |
| `--attach <url>` | Connect to a running opencode server |
## Procedure
1. Verify tool readiness:
- `terminal(command="opencode --version")`
- `terminal(command="opencode auth list")`
2. For bounded tasks, use `opencode run '...'` (no pty needed).
3. For iterative tasks, start `opencode` with `background=true, pty=true`.
4. Monitor long tasks with `process(action="poll"|"log")`.
5. If OpenCode asks for input, respond via `process(action="submit", ...)`.
6. Exit with `process(action="write", data="\x03")` or `process(action="kill")`.
7. Summarize file changes, test results, and next steps back to user.
## PR Review Workflow
OpenCode has a built-in PR command:
```
terminal(command="opencode pr 42", workdir="~/project", pty=true)
```
Or review in a temporary clone for isolation:
```
terminal(command="REVIEW=$(mktemp -d) && git clone https://github.com/user/repo.git $REVIEW && cd $REVIEW && opencode run 'Review this PR vs main. Report bugs, security risks, test gaps, and style issues.' -f $(git diff origin/main --name-only | head -20 | tr '\n' ' ')", pty=true)
```
## Parallel Work Pattern
Use separate workdirs/worktrees to avoid collisions:
```
terminal(command="opencode run 'Fix issue #101 and commit'", workdir="/tmp/issue-101", background=true, pty=true)
terminal(command="opencode run 'Add parser regression tests and commit'", workdir="/tmp/issue-102", background=true, pty=true)
process(action="list")
```
## Session & Cost Management
List past sessions:
```
terminal(command="opencode session list")
```
Check token usage and costs:
```
terminal(command="opencode stats")
terminal(command="opencode stats --days 7 --models anthropic/claude-sonnet-4")
```
## Pitfalls
- Interactive `opencode` (TUI) sessions require `pty=true`. The `opencode run` command does NOT need pty.
- `/exit` is NOT a valid command — it opens an agent selector. Use Ctrl+C to exit the TUI.
- PATH mismatch can select the wrong OpenCode binary/model config.
- If OpenCode appears stuck, inspect logs before killing:
- `process(action="log", session_id="<id>")`
- Avoid sharing one working directory across parallel OpenCode sessions.
- Enter may need to be pressed twice to submit in the TUI (once to finalize text, once to send).
## Verification
Smoke test:
```
terminal(command="opencode run 'Respond with exactly: OPENCODE_SMOKE_OK'")
```
Success criteria:
- Output includes `OPENCODE_SMOKE_OK`
- Command exits without provider/model errors
- For code tasks: expected files changed and tests pass
## Rules
1. Prefer `opencode run` for one-shot automation — it's simpler and doesn't need pty.
2. Use interactive background mode only when iteration is needed.
3. Always scope OpenCode sessions to a single repo/workdir.
4. For long tasks, provide progress updates from `process` logs.
5. Report concrete outcomes (files changed, tests, remaining risks).
6. Exit interactive sessions with Ctrl+C or kill, never `/exit`.

View file

@ -0,0 +1,140 @@
"""Tests for browser-use Docker isolation manager."""
import json
from unittest.mock import MagicMock, patch
def test_resolve_isolation_owner_prefers_honcho_session_key():
from tools.browser_use_manager import resolve_isolation_owner
owner = resolve_isolation_owner(
"docker-per-principal",
task_id="task-1",
honcho_session_key="telegram:chat:user",
)
assert owner == "telegram:chat:user"
def test_resolve_isolation_owner_uses_task_for_per_task_mode():
from tools.browser_use_manager import resolve_isolation_owner
owner = resolve_isolation_owner(
"docker-per-task",
task_id="task-42",
honcho_session_key="telegram:chat:user",
)
assert owner == "task-42"
def test_hash_runtime_owner_is_stable():
from tools.browser_use_manager import hash_runtime_owner
assert hash_runtime_owner("owner-1") == hash_runtime_owner("owner-1")
assert hash_runtime_owner("owner-1") != hash_runtime_owner("owner-2")
def test_shared_mode_returns_shared_cdp_url(monkeypatch):
from tools import browser_use_manager
monkeypatch.setenv("BROWSER_USE_ISOLATION_MODE", "shared")
monkeypatch.setenv("BROWSER_URL", "http://shared-browser:9333")
monkeypatch.setenv("BROWSER_USE_RPC_URL", "http://shared-browser:8787/run")
monkeypatch.setenv("BROWSER_VIEW_BASE_URL", "https://viewer.example.com")
runtime = browser_use_manager.ensure_isolated_browser_runtime(
task_id="task-1",
honcho_session_key="session-key",
)
assert runtime["cdp_url"] == "http://shared-browser:9333"
assert runtime["rpc_url"] == "http://shared-browser:8787/run"
assert runtime["browser_view"] == "https://viewer.example.com/vnc.html?path=websockify"
assert runtime["isolation_mode"] == "shared"
def test_isolated_mode_starts_container_and_waits_for_cdp(monkeypatch):
from tools import browser_use_manager
monkeypatch.setenv("BROWSER_USE_ISOLATION_MODE", "docker-per-principal")
monkeypatch.setenv("BROWSER_RUNTIME_IMAGE", "hermes-browser-runtime:test")
monkeypatch.setenv("BROWSER_RUNTIME_NETWORK", "hermes-net")
monkeypatch.setenv("BROWSER_VIEW_BASE_URL", "https://viewer.example.com")
saved_registry = {}
docker_calls = []
def fake_run_docker(args, check=True):
docker_calls.append(args)
if args[:2] == ["inspect", "-f"]:
return MagicMock(returncode=1, stdout="", stderr="")
if args[:1] == ["inspect"]:
return MagicMock(returncode=1, stdout="", stderr="")
return MagicMock(returncode=0, stdout="ok", stderr="")
with (
patch.object(browser_use_manager, "_load_registry", return_value={"runtimes": {}}),
patch.object(browser_use_manager, "_save_registry", side_effect=lambda payload: saved_registry.update(payload)),
patch.object(browser_use_manager, "_run_docker", side_effect=fake_run_docker),
patch.object(browser_use_manager, "_wait_for_cdp") as mock_wait,
):
runtime = browser_use_manager.ensure_isolated_browser_runtime(
task_id="task-1",
honcho_session_key="telegram:chat:user",
)
assert runtime["isolation_mode"] == "docker-per-principal"
assert runtime["cdp_url"].startswith("http://hermes-browser-")
assert runtime["rpc_url"].startswith("http://hermes-browser-")
assert runtime["rpc_url"].endswith(":8787/run")
assert "/view/" in runtime["browser_view"]
assert saved_registry["runtimes"]
run_commands = [call for call in docker_calls if call[:2] == ["run", "-d"]]
assert run_commands, "expected docker run to be invoked"
assert "hermes-browser-runtime:test" in run_commands[0]
mock_wait.assert_called_once()
def test_browser_use_tool_routes_via_runtime_rpc_and_cleans_up():
from tools import browser_use_tool
rpc_response = json.dumps({"success": True, "result": "done"}).encode("utf-8")
fake_http_response = MagicMock()
fake_http_response.read.return_value = rpc_response
fake_http_response.__enter__.return_value = fake_http_response
fake_http_response.__exit__.return_value = False
with (
patch.object(
browser_use_tool,
"ensure_isolated_browser_runtime",
return_value={
"cdp_url": "http://isolated:9222",
"rpc_url": "http://isolated:8787/run",
"browser_view": "https://viewer.example.com/view/abc",
"isolation_mode": "docker-per-principal",
},
) as mock_runtime,
patch.object(browser_use_tool.request, "urlopen", return_value=fake_http_response) as mock_urlopen,
patch.object(browser_use_tool, "cleanup_browser_use_runtime") as mock_cleanup,
):
result = browser_use_tool.registry.dispatch(
"internet_browser",
{"task": "open example.com"},
task_id="task-7",
honcho_session_key="telegram:123",
)
payload = json.loads(result)
assert payload["success"] is True
assert payload["result"] == "done"
assert payload["browser_view"] == "https://viewer.example.com/view/abc"
assert payload["isolation_mode"] == "docker-per-principal"
mock_runtime.assert_called_once()
request_obj = mock_urlopen.call_args.args[0]
assert request_obj.full_url == "http://isolated:8787/run"
call = mock_runtime.call_args
assert call.kwargs["task_id"] == "task-7"
assert call.kwargs["honcho_session_key"] == "telegram:123"
mock_cleanup.assert_called_once_with(task_id="task-7", honcho_session_key="telegram:123")

View file

@ -15,13 +15,24 @@ The tools are imported into model_tools.py which provides a unified interface
for the AI agent to access all capabilities.
"""
import logging
logger = logging.getLogger(__name__)
# Export all tools for easy importing
from .web_tools import (
web_search_tool,
web_extract_tool,
web_crawl_tool,
check_firecrawl_api_key
)
try:
from .web_tools import (
web_search_tool,
web_extract_tool,
web_crawl_tool,
check_firecrawl_api_key
)
except ModuleNotFoundError as exc:
logger.debug("Skipping web_tools import in tools package init: %s", exc)
web_search_tool = None
web_extract_tool = None
web_crawl_tool = None
check_firecrawl_api_key = None
# Primary terminal tool (local/docker/singularity/modal/daytona/ssh)
from .terminal_tool import (
@ -263,4 +274,3 @@ __all__ = [
'check_delegate_requirements',
'DELEGATE_TASK_SCHEMA',
]

View file

@ -0,0 +1,496 @@
"""Provision isolated browser-use Docker runtimes with per-principal profiles."""
from __future__ import annotations
import hashlib
import json
import logging
import os
import re
import subprocess
import tempfile
import threading
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Optional
import requests
logger = logging.getLogger(__name__)
_DEFAULT_SHARED_CDP_URL = "http://browser:9222"
_DEFAULT_SHARED_RPC_URL = "http://browser:8787/run"
_DEFAULT_RUNTIME_IMAGE = "hermes-browser-runtime:latest"
_DEFAULT_RUNTIME_NETWORK = "hermes-net"
_DEFAULT_TTL_SECONDS = 900
_DEFAULT_START_TIMEOUT = 45
_DEFAULT_ENABLE_UI = True
_REGISTRY_LOCK = threading.Lock()
_VIEW_URL_CACHE_LOCK = threading.Lock()
_VIEW_URL_CACHE: dict[str, Any] = {"value": "", "expires_at": 0.0}
@dataclass(frozen=True)
class BrowserUseIsolationConfig:
mode: str
runtime_image: str
runtime_network: str
runtime_ttl_seconds: int
runtime_start_timeout: int
shared_cdp_url: str
enable_ui: bool
def _hermes_home() -> Path:
return Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
def _registry_path() -> Path:
return _hermes_home() / "browser_use" / "docker_runtimes.json"
def _load_runtime_config_file() -> Dict[str, Any]:
config_path = _hermes_home() / "config.yaml"
if not config_path.exists():
return {}
try:
import yaml
with open(config_path, "r", encoding="utf-8") as fh:
data = yaml.safe_load(fh) or {}
if isinstance(data, dict):
browser_cfg = data.get("browser", {})
return browser_cfg if isinstance(browser_cfg, dict) else {}
except Exception as exc:
logger.debug("Failed to load browser config for browser-use isolation: %s", exc)
return {}
def _env_or_config(env_name: str, config_key: str, default: Any) -> Any:
value = os.getenv(env_name)
if value not in (None, ""):
return value
return _load_runtime_config_file().get(config_key, default)
def _as_int(value: Any, default: int) -> int:
try:
return max(1, int(value))
except (TypeError, ValueError):
return default
def _as_bool(value: Any, default: bool) -> bool:
if value is None:
return default
if isinstance(value, bool):
return value
return str(value).strip().lower() in {"1", "true", "yes", "on"}
def get_browser_use_isolation_config() -> BrowserUseIsolationConfig:
mode = str(
_env_or_config(
"BROWSER_USE_ISOLATION_MODE",
"browser_use_isolation_mode",
"shared",
)
).strip().lower()
if mode not in {"shared", "docker-per-principal", "docker-per-task"}:
logger.warning("Unknown browser-use isolation mode %r; falling back to shared", mode)
mode = "shared"
return BrowserUseIsolationConfig(
mode=mode,
runtime_image=str(
_env_or_config(
"BROWSER_RUNTIME_IMAGE",
"browser_use_runtime_image",
_DEFAULT_RUNTIME_IMAGE,
)
).strip()
or _DEFAULT_RUNTIME_IMAGE,
runtime_network=str(
_env_or_config(
"BROWSER_RUNTIME_NETWORK",
"browser_use_runtime_network",
_DEFAULT_RUNTIME_NETWORK,
)
).strip(),
runtime_ttl_seconds=_as_int(
_env_or_config(
"BROWSER_RUNTIME_TTL_SECONDS",
"browser_use_runtime_ttl_seconds",
_DEFAULT_TTL_SECONDS,
),
_DEFAULT_TTL_SECONDS,
),
runtime_start_timeout=_as_int(
_env_or_config(
"BROWSER_RUNTIME_START_TIMEOUT",
"browser_use_runtime_start_timeout",
_DEFAULT_START_TIMEOUT,
),
_DEFAULT_START_TIMEOUT,
),
shared_cdp_url=str(os.getenv("BROWSER_URL", _DEFAULT_SHARED_CDP_URL)).strip() or _DEFAULT_SHARED_CDP_URL,
enable_ui=_as_bool(
_env_or_config(
"BROWSER_RUNTIME_ENABLE_UI",
"browser_use_runtime_enable_ui",
_DEFAULT_ENABLE_UI,
),
_DEFAULT_ENABLE_UI,
),
)
def resolve_isolation_owner(mode: str, task_id: Optional[str], honcho_session_key: Optional[str]) -> str:
if mode == "docker-per-task":
return (task_id or "default").strip() or "default"
if honcho_session_key:
return honcho_session_key.strip() or (task_id or "default")
return (task_id or "default").strip() or "default"
def hash_runtime_owner(owner: str) -> str:
return hashlib.sha256(owner.encode("utf-8")).hexdigest()[:16]
def _normalize_browser_view_base_url(raw_url: str) -> str:
url = (raw_url or "").strip()
if not url:
return ""
for marker in ("/vnc.html", "/index.html"):
idx = url.find(marker)
if idx != -1:
url = url[:idx]
break
return url.rstrip("/")
def _discover_browser_view_base_url_from_tunnel() -> str:
now = time.time()
with _VIEW_URL_CACHE_LOCK:
cached_value = str(_VIEW_URL_CACHE.get("value", "") or "")
expires_at = float(_VIEW_URL_CACHE.get("expires_at", 0.0) or 0.0)
if cached_value and now < expires_at:
return cached_value
try:
result = _run_docker(["logs", "--tail", "200", "hermes-tunnel"], check=False)
combined = "\n".join(
part for part in [result.stdout or "", result.stderr or ""] if part
)
matches = re.findall(r"https://[^\s\"'<>]+", combined)
base_url = _normalize_browser_view_base_url(matches[-1]) if matches else ""
except Exception as exc:
logger.debug("Failed to discover browser view URL from hermes-tunnel logs: %s", exc)
base_url = ""
with _VIEW_URL_CACHE_LOCK:
_VIEW_URL_CACHE["value"] = base_url
_VIEW_URL_CACHE["expires_at"] = now + (60 if base_url else 10)
return base_url
def get_browser_use_view_url(
task_id: Optional[str] = None,
honcho_session_key: Optional[str] = None,
) -> str:
base_url = _normalize_browser_view_base_url(
os.getenv("BROWSER_VIEW_BASE_URL", "") or os.getenv("BROWSER_VIEW_URL", "")
)
if not base_url:
base_url = _discover_browser_view_base_url_from_tunnel()
if not base_url:
return ""
config = get_browser_use_isolation_config()
owner = resolve_isolation_owner(config.mode, task_id, honcho_session_key)
owner_hash = hash_runtime_owner(owner)
if config.mode == "shared":
return f"{base_url}/vnc.html?path=websockify"
return f"{base_url}/view/{owner_hash}/vnc.html?path=view/{owner_hash}/websockify"
def _shared_rpc_url() -> str:
return str(os.getenv("BROWSER_USE_RPC_URL", _DEFAULT_SHARED_RPC_URL)).strip() or _DEFAULT_SHARED_RPC_URL
def _runtime_rpc_url(container_name: str) -> str:
return f"http://{container_name}:8787/run"
def _load_registry() -> Dict[str, Any]:
path = _registry_path()
if not path.exists():
return {"runtimes": {}}
try:
with open(path, "r", encoding="utf-8") as fh:
data = json.load(fh) or {}
if isinstance(data, dict) and isinstance(data.get("runtimes"), dict):
return data
except Exception as exc:
logger.warning("Failed to read browser-use runtime registry %s: %s", path, exc)
return {"runtimes": {}}
def _save_registry(payload: Dict[str, Any]) -> None:
path = _registry_path()
path.parent.mkdir(parents=True, exist_ok=True)
fd, tmp_path = tempfile.mkstemp(dir=str(path.parent), prefix=".browser_use_", suffix=".tmp")
try:
with os.fdopen(fd, "w", encoding="utf-8") as fh:
json.dump(payload, fh, indent=2, sort_keys=True)
fh.flush()
os.fsync(fh.fileno())
os.replace(tmp_path, path)
except Exception:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
def _run_docker(args: list[str], check: bool = True) -> subprocess.CompletedProcess[str]:
cmd = ["docker", *args]
logger.debug("browser-use docker cmd: %s", " ".join(cmd))
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=120,
)
if check and result.returncode != 0:
stderr = (result.stderr or result.stdout or "").strip()
raise RuntimeError(f"Docker command failed ({' '.join(cmd)}): {stderr}")
return result
def _ensure_docker_access() -> None:
_run_docker(["version"], check=True)
def _container_exists(container_name: str) -> bool:
result = _run_docker(["inspect", container_name], check=False)
return result.returncode == 0
def _container_running(container_name: str) -> bool:
result = _run_docker(
["inspect", "-f", "{{.State.Running}}", container_name],
check=False,
)
return result.returncode == 0 and result.stdout.strip().lower() == "true"
def _remove_container(container_name: str) -> None:
if not container_name:
return
_run_docker(["rm", "-f", container_name], check=False)
def _volume_exists(volume_name: str) -> bool:
result = _run_docker(["volume", "inspect", volume_name], check=False)
return result.returncode == 0
def _ensure_volume(volume_name: str, owner_hash: str) -> None:
if _volume_exists(volume_name):
return
_run_docker(
[
"volume",
"create",
"--label",
"hermes.browser_use=true",
"--label",
f"hermes.owner_hash={owner_hash}",
volume_name,
],
check=True,
)
def _remove_volume(volume_name: str) -> None:
if not volume_name:
return
_run_docker(["volume", "rm", "-f", volume_name], check=False)
def _start_runtime_container(
container_name: str,
volume_name: str,
owner_hash: str,
config: BrowserUseIsolationConfig,
) -> None:
_ensure_volume(volume_name, owner_hash)
run_args = [
"run",
"-d",
"--name",
container_name,
"--network",
config.runtime_network or _DEFAULT_RUNTIME_NETWORK,
"--shm-size",
"2g",
"--label",
"hermes.browser_use=true",
"--label",
f"hermes.owner_hash={owner_hash}",
"--label",
"hermes.managed_by=browser_use_manager",
"-e",
f"BROWSER_ENABLE_UI={'true' if config.enable_ui else 'false'}",
"-e",
"BROWSER_DATA_DIR=/data",
"-v",
f"{volume_name}:/data",
config.runtime_image,
]
_run_docker(run_args, check=True)
def _wait_for_cdp(container_name: str, timeout_seconds: int) -> None:
deadline = time.time() + timeout_seconds
cdp_url = f"http://{container_name}:9222/json/version"
last_error = ""
while time.time() < deadline:
try:
response = requests.get(cdp_url, timeout=2)
if response.ok:
return
last_error = f"HTTP {response.status_code}"
except Exception as exc:
last_error = str(exc)
time.sleep(1)
raise RuntimeError(f"Browser runtime {container_name} did not become ready: {last_error}")
def _cleanup_expired_runtimes_locked(registry: Dict[str, Any], config: BrowserUseIsolationConfig) -> None:
now = time.time()
runtimes = registry.setdefault("runtimes", {})
expired_keys = []
for runtime_key, entry in list(runtimes.items()):
last_used = float(entry.get("last_used", 0) or 0)
if not last_used or now - last_used < config.runtime_ttl_seconds:
continue
container_name = entry.get("container_name", "")
volume_name = entry.get("volume_name", "")
mode = entry.get("mode", "")
logger.info("Cleaning expired browser-use runtime %s (%s)", runtime_key, container_name)
_remove_container(container_name)
if mode == "docker-per-task":
_remove_volume(volume_name)
expired_keys.append(runtime_key)
for runtime_key in expired_keys:
runtimes.pop(runtime_key, None)
def ensure_isolated_browser_runtime(
task_id: Optional[str] = None,
honcho_session_key: Optional[str] = None,
) -> Dict[str, str]:
config = get_browser_use_isolation_config()
if config.mode == "shared":
return {
"cdp_url": config.shared_cdp_url,
"rpc_url": _shared_rpc_url(),
"browser_view": get_browser_use_view_url(
task_id=task_id,
honcho_session_key=honcho_session_key,
),
"isolation_mode": "shared",
"owner": "",
"owner_hash": "",
}
_ensure_docker_access()
owner = resolve_isolation_owner(config.mode, task_id, honcho_session_key)
owner_hash = hash_runtime_owner(owner)
runtime_key = f"{config.mode}:{owner_hash}"
container_name = f"hermes-browser-{owner_hash}"
volume_name = f"hermes-browser-profile-{owner_hash}"
with _REGISTRY_LOCK:
registry = _load_registry()
_cleanup_expired_runtimes_locked(registry, config)
if _container_running(container_name):
registry.setdefault("runtimes", {})[runtime_key] = {
"container_name": container_name,
"volume_name": volume_name,
"last_used": time.time(),
"mode": config.mode,
"owner_hash": owner_hash,
}
_save_registry(registry)
return {
"cdp_url": f"http://{container_name}:9222",
"rpc_url": _runtime_rpc_url(container_name),
"browser_view": get_browser_use_view_url(
task_id=task_id,
honcho_session_key=honcho_session_key,
),
"isolation_mode": config.mode,
"owner": owner,
"owner_hash": owner_hash,
}
if _container_exists(container_name):
_remove_container(container_name)
_start_runtime_container(container_name, volume_name, owner_hash, config)
_wait_for_cdp(container_name, config.runtime_start_timeout)
registry.setdefault("runtimes", {})[runtime_key] = {
"container_name": container_name,
"volume_name": volume_name,
"last_used": time.time(),
"mode": config.mode,
"owner_hash": owner_hash,
}
_save_registry(registry)
return {
"cdp_url": f"http://{container_name}:9222",
"rpc_url": _runtime_rpc_url(container_name),
"browser_view": get_browser_use_view_url(
task_id=task_id,
honcho_session_key=honcho_session_key,
),
"isolation_mode": config.mode,
"owner": owner,
"owner_hash": owner_hash,
}
def cleanup_browser_use_runtime(
task_id: Optional[str] = None,
honcho_session_key: Optional[str] = None,
) -> None:
config = get_browser_use_isolation_config()
if config.mode != "docker-per-task":
return
owner = resolve_isolation_owner(config.mode, task_id, honcho_session_key)
owner_hash = hash_runtime_owner(owner)
runtime_key = f"{config.mode}:{owner_hash}"
container_name = f"hermes-browser-{owner_hash}"
volume_name = f"hermes-browser-profile-{owner_hash}"
with _REGISTRY_LOCK:
registry = _load_registry()
_remove_container(container_name)
_remove_volume(volume_name)
registry.setdefault("runtimes", {}).pop(runtime_key, None)
_save_registry(registry)

View file

@ -1,14 +1,23 @@
import json
import os
from urllib import error, request
from tools.browser_use_manager import (
cleanup_browser_use_runtime,
ensure_isolated_browser_runtime,
)
from tools.registry import registry
def run_browser_task(task):
def run_browser_task(task, task_id=None, honcho_session_key=None):
if not task or not str(task).strip():
return json.dumps({"success": False, "error": "Task is required"}, ensure_ascii=False)
rpc_url = os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run")
runtime = ensure_isolated_browser_runtime(
task_id=task_id,
honcho_session_key=honcho_session_key,
)
rpc_url = runtime.get("rpc_url") or os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run")
timeout_sec = int(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900"))
payload = json.dumps({"task": task}).encode("utf-8")
req = request.Request(rpc_url, data=payload, headers={"Content-Type": "application/json"}, method="POST")
@ -16,7 +25,20 @@ def run_browser_task(task):
try:
with request.urlopen(req, timeout=timeout_sec) as resp:
body = resp.read().decode("utf-8")
try:
data = json.loads(body)
except json.JSONDecodeError:
return body
if isinstance(data, dict):
browser_view = runtime.get("browser_view")
if browser_view and not data.get("browser_view"):
data["browser_view"] = browser_view
data["isolation_mode"] = runtime.get("isolation_mode", "shared")
return json.dumps(data, ensure_ascii=False)
return body
except error.HTTPError as http_err:
body = http_err.read().decode("utf-8", errors="replace")
return json.dumps(
@ -35,6 +57,14 @@ def run_browser_task(task):
},
ensure_ascii=False,
)
finally:
try:
cleanup_browser_use_runtime(
task_id=task_id,
honcho_session_key=honcho_session_key,
)
except Exception:
pass
registry.register(
@ -59,7 +89,10 @@ registry.register(
"required": ["task"]
}
},
handler=lambda args, **kw: run_browser_task(args.get("task")),
handler=lambda args, **kw: run_browser_task(
args.get("task"),
task_id=kw.get("task_id"),
honcho_session_key=kw.get("honcho_session_key"),
),
emoji="🌐",
)