diff --git a/.env.example b/.env.example index 16fbeacf..70be968b 100644 --- a/.env.example +++ b/.env.example @@ -13,10 +13,11 @@ TELEGRAM_ALLOWED_USERS= TELEGRAM_HOME_CHANNEL= BROWSER_URL=http://browser:9222 -BROWSER_VIEW_URL= +BROWSER_VIEW_URL=http://localhost:6080 +BROWSER_VIEW_BASE_URL= BROWSER_API_HOST=0.0.0.0 BROWSER_API_PORT=8088 BROWSER_USE_RPC_URL=http://browser:8787/run BROWSER_USE_RPC_TIMEOUT=900 -BROWSER_API_MAX_CONCURRENCY=2 \ No newline at end of file +BROWSER_API_MAX_CONCURRENCY=2 diff --git a/.gitignore b/.gitignore index 56299679..63bd7658 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ hermes_code/test_browser.py .git .github .idea +.DS_Store hermes_data workspace diff --git a/browser_env/entrypoint.sh b/browser_env/entrypoint.sh index 052ca6c5..f00cb273 100644 --- a/browser_env/entrypoint.sh +++ b/browser_env/entrypoint.sh @@ -7,10 +7,11 @@ XVFB_LOG="/tmp/xvfb.log" VNC_PORT="${VNC_PORT:-5900}" NOVNC_PORT="${NOVNC_PORT:-6080}" -CHROME_LOCAL_DEBUG_PORT="${CHROME_LOCAL_DEBUG_PORT:-9223}" -CHROME_PUBLIC_DEBUG_PORT="${CHROME_PUBLIC_DEBUG_PORT:-9222}" +CHROME_LOCAL_DEBUG_PORT="${CHROME_LOCAL_DEBUG_PORT:-${BROWSER_CHROME_DEBUG_PORT:-9223}}" +CHROME_PUBLIC_DEBUG_PORT="${CHROME_PUBLIC_DEBUG_PORT:-${BROWSER_CDP_PROXY_PORT:-9222}}" BROWSER_USE_RPC_PORT="${BROWSER_USE_RPC_PORT:-8787}" -CHROME_PROFILE_DIR="${CHROME_PROFILE_DIR:-/src/browser_data}" +CHROME_PROFILE_DIR="${CHROME_PROFILE_DIR:-${BROWSER_DATA_DIR:-/src/browser_data}}" +BROWSER_ENABLE_UI="${BROWSER_ENABLE_UI:-true}" MAX_RESTARTS="${MAX_RESTARTS:-10}" RESTART_WINDOW_SEC="${RESTART_WINDOW_SEC:-60}" @@ -98,7 +99,6 @@ if [ ! -f /var/lib/dbus/machine-id ]; then dbus-uuidgen > /var/lib/dbus/machine-id 2>/dev/null || true fi -# Удаляем stale lock/socket от прошлых падений Xvfb на том же DISPLAY. rm -f "/tmp/.X${DISPLAY_NUM}-lock" "/tmp/.X11-unix/X${DISPLAY_NUM}" || true log "starting X stack on DISPLAY=${DISPLAY}" @@ -116,19 +116,23 @@ if ! wait_for_x_display 15; then exit 1 fi -start_bg fluxbox -start_bg x11vnc -display "$DISPLAY" -rfbport "$VNC_PORT" -nopw -listen 0.0.0.0 -xkb -forever -shared -start_bg websockify --web=/usr/share/novnc/ "$NOVNC_PORT" "localhost:${VNC_PORT}" +if [ "$BROWSER_ENABLE_UI" != "false" ]; then + start_bg fluxbox + start_bg x11vnc -display "$DISPLAY" -rfbport "$VNC_PORT" -nopw -listen 0.0.0.0 -xkb -forever -shared + start_bg websockify --web=/usr/share/novnc/ "$NOVNC_PORT" "localhost:${VNC_PORT}" +fi start_bg socat "TCP-LISTEN:${CHROME_PUBLIC_DEBUG_PORT},fork,reuseaddr" "TCP:127.0.0.1:${CHROME_LOCAL_DEBUG_PORT}" start_bg python3 -u /src/browser_use_runner.py -if ! wait_for_port 127.0.0.1 "$VNC_PORT" 20; then - log "fatal: x11vnc did not open port ${VNC_PORT}" - exit 1 -fi -if ! wait_for_port 127.0.0.1 "$NOVNC_PORT" 20; then - log "fatal: websockify did not open port ${NOVNC_PORT}" - exit 1 +if [ "$BROWSER_ENABLE_UI" != "false" ]; then + if ! wait_for_port 127.0.0.1 "$VNC_PORT" 20; then + log "fatal: x11vnc did not open port ${VNC_PORT}" + exit 1 + fi + if ! wait_for_port 127.0.0.1 "$NOVNC_PORT" 20; then + log "fatal: websockify did not open port ${NOVNC_PORT}" + exit 1 + fi fi if ! wait_for_port 127.0.0.1 "$BROWSER_USE_RPC_PORT" 20; then log "fatal: browser-use RPC did not open port ${BROWSER_USE_RPC_PORT}" @@ -194,4 +198,3 @@ while true; do unset CHROME_EXIT unset CHROME_PID done - diff --git a/browser_env/nginx.browser-view.conf b/browser_env/nginx.browser-view.conf new file mode 100644 index 00000000..0d950456 --- /dev/null +++ b/browser_env/nginx.browser-view.conf @@ -0,0 +1,35 @@ +events {} + +http { + resolver 127.0.0.11 ipv6=off; + + map $http_upgrade $connection_upgrade { + default upgrade; + '' close; + } + + server { + listen 8080; + server_name _; + + location = / { + add_header Content-Type text/plain; + return 200 "Browser view proxy is running.\n"; + } + + location ~ ^/view/(?[a-f0-9]{16})$ { + return 302 /view/$owner/vnc.html?path=view/$owner/websockify; + } + + location ~ ^/view/(?[a-f0-9]{16})/(?.*)$ { + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_buffering off; + proxy_pass http://hermes-browser-$owner:6080/$rest$is_args$args; + } + } +} diff --git a/docker-compose.yml b/docker-compose.yml index 5ec5a166..c9e7b754 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,12 +9,18 @@ services: environment: - BROWSER_URL=http://browser:9222 - BROWSER_USE_RPC_URL=http://browser:8787/run + - BROWSER_VIEW_BASE_URL=${BROWSER_VIEW_BASE_URL:-} + - BROWSER_USE_ISOLATION_MODE=docker-per-principal + - BROWSER_RUNTIME_IMAGE=hermes-browser-runtime:latest + - BROWSER_RUNTIME_NETWORK=hermes-net + - BROWSER_RUNTIME_ENABLE_UI=true - HERMES_HOME=/app/hermes_data volumes: - ./hermes_code:/app/hermes_code:ro - ./hermes_data:/app/hermes_data:rw - ./workspace:/app/workspace:rw - ./config.example.yaml:/app/config.example.yaml:ro + - /var/run/docker.sock:/var/run/docker.sock depends_on: browser: condition: service_healthy @@ -35,10 +41,12 @@ services: fi; exec python -m gateway.run " + browser: build: context: ./browser_env dockerfile: Dockerfile.browser + image: hermes-browser-runtime:latest container_name: hermes-browser env_file: - .env @@ -79,13 +87,25 @@ services: networks: - hermes-net + browser-view-proxy: + image: nginx:alpine + container_name: hermes-browser-view-proxy + volumes: + - ./browser_env/nginx.browser-view.conf:/etc/nginx/nginx.conf:ro + depends_on: + browser: + condition: service_healthy + restart: always + networks: + - hermes-net + tunnel: image: cloudflare/cloudflared:latest profiles: - remote container_name: hermes-tunnel restart: always - command: tunnel --protocol http2 --url http://browser:6080 --no-tls-verify + command: tunnel --protocol http2 --url http://browser-view-proxy:8080 --no-tls-verify networks: - hermes-net @@ -94,4 +114,5 @@ volumes: networks: hermes-net: - driver: bridge \ No newline at end of file + name: hermes-net + driver: bridge diff --git a/hermes_code/cli.py b/hermes_code/cli.py index c15bd87b..5b1247d1 100644 --- a/hermes_code/cli.py +++ b/hermes_code/cli.py @@ -173,6 +173,12 @@ def load_cli_config() -> Dict[str, Any]: "browser": { "inactivity_timeout": 120, # Auto-cleanup inactive browser sessions after 2 min "record_sessions": False, # Auto-record browser sessions as WebM videos + "browser_use_isolation_mode": "shared", + "browser_use_runtime_image": "hermes-browser-runtime:latest", + "browser_use_runtime_network": "hermes-net", + "browser_use_runtime_ttl_seconds": 900, + "browser_use_runtime_start_timeout": 45, + "browser_use_runtime_enable_ui": True, }, "compression": { "enabled": True, # Auto-compress when approaching context limit @@ -4652,6 +4658,15 @@ class HermesCLI: label = label[:47] + "..." self._spinner_text = f"{emoji} {label}" self._invalidate() + if function_name == "internet_browser": + browser_view_url = "" + try: + from tools.browser_use_manager import get_browser_use_view_url + browser_view_url = get_browser_use_view_url(task_id=self.session_id) + except Exception: + browser_view_url = os.getenv("BROWSER_VIEW_URL", "").strip() + if browser_view_url: + _cprint(f" ┊ {emoji} Viewer: {browser_view_url}") if not self._voice_mode: return diff --git a/hermes_code/gateway/run.py b/hermes_code/gateway/run.py index c8cfae5d..a663dca9 100644 --- a/hermes_code/gateway/run.py +++ b/hermes_code/gateway/run.py @@ -4931,6 +4931,7 @@ class GatewayRunner: last_tool = [None] # Mutable container for tracking in closure last_progress_msg = [None] # Track last message for dedup repeat_count = [0] # How many times the same message repeated + browser_view_sent = [False] # Only announce browser viewer once per run def progress_callback(tool_name: str, preview: str = None, args: dict = None): """Callback invoked by agent when a tool is called.""" @@ -4963,6 +4964,23 @@ class GatewayRunner: msg = f"{emoji} {tool_name}: \"{preview}\"" else: msg = f"{emoji} {tool_name}..." + + if tool_name == "internet_browser" and not browser_view_sent[0]: + browser_view_url = "" + try: + from tools.browser_use_manager import get_browser_use_view_url + browser_view_url = get_browser_use_view_url( + task_id=session_id, + honcho_session_key=session_key, + ) + except Exception: + browser_view_url = os.getenv("BROWSER_VIEW_URL", "").strip() + if browser_view_url: + msg = ( + f"{msg}\n" + f"Browser view: {browser_view_url}" + ) + browser_view_sent[0] = True # Dedup: collapse consecutive identical progress messages. # Common with execute_code where models iterate with the same diff --git a/hermes_code/hermes_cli/config.py b/hermes_code/hermes_cli/config.py index 857b784d..fb841366 100644 --- a/hermes_code/hermes_cli/config.py +++ b/hermes_code/hermes_cli/config.py @@ -151,6 +151,12 @@ DEFAULT_CONFIG = { "inactivity_timeout": 120, "command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.) "record_sessions": False, # Auto-record browser sessions as WebM videos + "browser_use_isolation_mode": "shared", + "browser_use_runtime_image": "hermes-browser-runtime:latest", + "browser_use_runtime_network": "hermes-net", + "browser_use_runtime_ttl_seconds": 900, + "browser_use_runtime_start_timeout": 45, + "browser_use_runtime_enable_ui": True, }, # Filesystem checkpoints — automatic snapshots before destructive file ops. diff --git a/hermes_code/skills/autonomous-ai-agents/opencode/SKILL.md b/hermes_code/skills/autonomous-ai-agents/opencode/SKILL.md deleted file mode 100644 index 37707dbc..00000000 --- a/hermes_code/skills/autonomous-ai-agents/opencode/SKILL.md +++ /dev/null @@ -1,218 +0,0 @@ ---- -name: opencode -description: Delegate coding tasks to OpenCode CLI agent for feature implementation, refactoring, PR review, and long-running autonomous sessions. Requires the opencode CLI installed and authenticated. -version: 1.2.0 -author: Hermes Agent -license: MIT -metadata: - hermes: - tags: [Coding-Agent, OpenCode, Autonomous, Refactoring, Code-Review] - related_skills: [claude-code, codex, hermes-agent] ---- - -# OpenCode CLI - -Use [OpenCode](https://opencode.ai) as an autonomous coding worker orchestrated by Hermes terminal/process tools. OpenCode is a provider-agnostic, open-source AI coding agent with a TUI and CLI. - -## When to Use - -- User explicitly asks to use OpenCode -- You want an external coding agent to implement/refactor/review code -- You need long-running coding sessions with progress checks -- You want parallel task execution in isolated workdirs/worktrees - -## Prerequisites - -- OpenCode installed: `npm i -g opencode-ai@latest` or `brew install anomalyco/tap/opencode` -- Auth configured: `opencode auth login` or set provider env vars (OPENROUTER_API_KEY, etc.) -- Verify: `opencode auth list` should show at least one provider -- Git repository for code tasks (recommended) -- `pty=true` for interactive TUI sessions - -## Binary Resolution (Important) - -Shell environments may resolve different OpenCode binaries. If behavior differs between your terminal and Hermes, check: - -``` -terminal(command="which -a opencode") -terminal(command="opencode --version") -``` - -If needed, pin an explicit binary path: - -``` -terminal(command="$HOME/.opencode/bin/opencode run '...'", workdir="~/project", pty=true) -``` - -## One-Shot Tasks - -Use `opencode run` for bounded, non-interactive tasks: - -``` -terminal(command="opencode run 'Add retry logic to API calls and update tests'", workdir="~/project") -``` - -Attach context files with `-f`: - -``` -terminal(command="opencode run 'Review this config for security issues' -f config.yaml -f .env.example", workdir="~/project") -``` - -Show model thinking with `--thinking`: - -``` -terminal(command="opencode run 'Debug why tests fail in CI' --thinking", workdir="~/project") -``` - -Force a specific model: - -``` -terminal(command="opencode run 'Refactor auth module' --model openrouter/anthropic/claude-sonnet-4", workdir="~/project") -``` - -## Interactive Sessions (Background) - -For iterative work requiring multiple exchanges, start the TUI in background: - -``` -terminal(command="opencode", workdir="~/project", background=true, pty=true) -# Returns session_id - -# Send a prompt -process(action="submit", session_id="", data="Implement OAuth refresh flow and add tests") - -# Monitor progress -process(action="poll", session_id="") -process(action="log", session_id="") - -# Send follow-up input -process(action="submit", session_id="", data="Now add error handling for token expiry") - -# Exit cleanly — Ctrl+C -process(action="write", session_id="", data="\x03") -# Or just kill the process -process(action="kill", session_id="") -``` - -**Important:** Do NOT use `/exit` — it is not a valid OpenCode command and will open an agent selector dialog instead. Use Ctrl+C (`\x03`) or `process(action="kill")` to exit. - -### TUI Keybindings - -| Key | Action | -|-----|--------| -| `Enter` | Submit message (press twice if needed) | -| `Tab` | Switch between agents (build/plan) | -| `Ctrl+P` | Open command palette | -| `Ctrl+X L` | Switch session | -| `Ctrl+X M` | Switch model | -| `Ctrl+X N` | New session | -| `Ctrl+X E` | Open editor | -| `Ctrl+C` | Exit OpenCode | - -### Resuming Sessions - -After exiting, OpenCode prints a session ID. Resume with: - -``` -terminal(command="opencode -c", workdir="~/project", background=true, pty=true) # Continue last session -terminal(command="opencode -s ses_abc123", workdir="~/project", background=true, pty=true) # Specific session -``` - -## Common Flags - -| Flag | Use | -|------|-----| -| `run 'prompt'` | One-shot execution and exit | -| `--continue` / `-c` | Continue the last OpenCode session | -| `--session ` / `-s` | Continue a specific session | -| `--agent ` | Choose OpenCode agent (build or plan) | -| `--model provider/model` | Force specific model | -| `--format json` | Machine-readable output/events | -| `--file ` / `-f` | Attach file(s) to the message | -| `--thinking` | Show model thinking blocks | -| `--variant ` | Reasoning effort (high, max, minimal) | -| `--title ` | Name the session | -| `--attach ` | Connect to a running opencode server | - -## Procedure - -1. Verify tool readiness: - - `terminal(command="opencode --version")` - - `terminal(command="opencode auth list")` -2. For bounded tasks, use `opencode run '...'` (no pty needed). -3. For iterative tasks, start `opencode` with `background=true, pty=true`. -4. Monitor long tasks with `process(action="poll"|"log")`. -5. If OpenCode asks for input, respond via `process(action="submit", ...)`. -6. Exit with `process(action="write", data="\x03")` or `process(action="kill")`. -7. Summarize file changes, test results, and next steps back to user. - -## PR Review Workflow - -OpenCode has a built-in PR command: - -``` -terminal(command="opencode pr 42", workdir="~/project", pty=true) -``` - -Or review in a temporary clone for isolation: - -``` -terminal(command="REVIEW=$(mktemp -d) && git clone https://github.com/user/repo.git $REVIEW && cd $REVIEW && opencode run 'Review this PR vs main. Report bugs, security risks, test gaps, and style issues.' -f $(git diff origin/main --name-only | head -20 | tr '\n' ' ')", pty=true) -``` - -## Parallel Work Pattern - -Use separate workdirs/worktrees to avoid collisions: - -``` -terminal(command="opencode run 'Fix issue #101 and commit'", workdir="/tmp/issue-101", background=true, pty=true) -terminal(command="opencode run 'Add parser regression tests and commit'", workdir="/tmp/issue-102", background=true, pty=true) -process(action="list") -``` - -## Session & Cost Management - -List past sessions: - -``` -terminal(command="opencode session list") -``` - -Check token usage and costs: - -``` -terminal(command="opencode stats") -terminal(command="opencode stats --days 7 --models anthropic/claude-sonnet-4") -``` - -## Pitfalls - -- Interactive `opencode` (TUI) sessions require `pty=true`. The `opencode run` command does NOT need pty. -- `/exit` is NOT a valid command — it opens an agent selector. Use Ctrl+C to exit the TUI. -- PATH mismatch can select the wrong OpenCode binary/model config. -- If OpenCode appears stuck, inspect logs before killing: - - `process(action="log", session_id="")` -- Avoid sharing one working directory across parallel OpenCode sessions. -- Enter may need to be pressed twice to submit in the TUI (once to finalize text, once to send). - -## Verification - -Smoke test: - -``` -terminal(command="opencode run 'Respond with exactly: OPENCODE_SMOKE_OK'") -``` - -Success criteria: -- Output includes `OPENCODE_SMOKE_OK` -- Command exits without provider/model errors -- For code tasks: expected files changed and tests pass - -## Rules - -1. Prefer `opencode run` for one-shot automation — it's simpler and doesn't need pty. -2. Use interactive background mode only when iteration is needed. -3. Always scope OpenCode sessions to a single repo/workdir. -4. For long tasks, provide progress updates from `process` logs. -5. Report concrete outcomes (files changed, tests, remaining risks). -6. Exit interactive sessions with Ctrl+C or kill, never `/exit`. diff --git a/hermes_code/tests/tools/test_browser_use_isolation.py b/hermes_code/tests/tools/test_browser_use_isolation.py new file mode 100644 index 00000000..25001ad0 --- /dev/null +++ b/hermes_code/tests/tools/test_browser_use_isolation.py @@ -0,0 +1,140 @@ +"""Tests for browser-use Docker isolation manager.""" + +import json +from unittest.mock import MagicMock, patch + + +def test_resolve_isolation_owner_prefers_honcho_session_key(): + from tools.browser_use_manager import resolve_isolation_owner + + owner = resolve_isolation_owner( + "docker-per-principal", + task_id="task-1", + honcho_session_key="telegram:chat:user", + ) + + assert owner == "telegram:chat:user" + + +def test_resolve_isolation_owner_uses_task_for_per_task_mode(): + from tools.browser_use_manager import resolve_isolation_owner + + owner = resolve_isolation_owner( + "docker-per-task", + task_id="task-42", + honcho_session_key="telegram:chat:user", + ) + + assert owner == "task-42" + + +def test_hash_runtime_owner_is_stable(): + from tools.browser_use_manager import hash_runtime_owner + + assert hash_runtime_owner("owner-1") == hash_runtime_owner("owner-1") + assert hash_runtime_owner("owner-1") != hash_runtime_owner("owner-2") + + +def test_shared_mode_returns_shared_cdp_url(monkeypatch): + from tools import browser_use_manager + + monkeypatch.setenv("BROWSER_USE_ISOLATION_MODE", "shared") + monkeypatch.setenv("BROWSER_URL", "http://shared-browser:9333") + monkeypatch.setenv("BROWSER_USE_RPC_URL", "http://shared-browser:8787/run") + monkeypatch.setenv("BROWSER_VIEW_BASE_URL", "https://viewer.example.com") + + runtime = browser_use_manager.ensure_isolated_browser_runtime( + task_id="task-1", + honcho_session_key="session-key", + ) + + assert runtime["cdp_url"] == "http://shared-browser:9333" + assert runtime["rpc_url"] == "http://shared-browser:8787/run" + assert runtime["browser_view"] == "https://viewer.example.com/vnc.html?path=websockify" + assert runtime["isolation_mode"] == "shared" + + +def test_isolated_mode_starts_container_and_waits_for_cdp(monkeypatch): + from tools import browser_use_manager + + monkeypatch.setenv("BROWSER_USE_ISOLATION_MODE", "docker-per-principal") + monkeypatch.setenv("BROWSER_RUNTIME_IMAGE", "hermes-browser-runtime:test") + monkeypatch.setenv("BROWSER_RUNTIME_NETWORK", "hermes-net") + monkeypatch.setenv("BROWSER_VIEW_BASE_URL", "https://viewer.example.com") + + saved_registry = {} + docker_calls = [] + + def fake_run_docker(args, check=True): + docker_calls.append(args) + if args[:2] == ["inspect", "-f"]: + return MagicMock(returncode=1, stdout="", stderr="") + if args[:1] == ["inspect"]: + return MagicMock(returncode=1, stdout="", stderr="") + return MagicMock(returncode=0, stdout="ok", stderr="") + + with ( + patch.object(browser_use_manager, "_load_registry", return_value={"runtimes": {}}), + patch.object(browser_use_manager, "_save_registry", side_effect=lambda payload: saved_registry.update(payload)), + patch.object(browser_use_manager, "_run_docker", side_effect=fake_run_docker), + patch.object(browser_use_manager, "_wait_for_cdp") as mock_wait, + ): + runtime = browser_use_manager.ensure_isolated_browser_runtime( + task_id="task-1", + honcho_session_key="telegram:chat:user", + ) + + assert runtime["isolation_mode"] == "docker-per-principal" + assert runtime["cdp_url"].startswith("http://hermes-browser-") + assert runtime["rpc_url"].startswith("http://hermes-browser-") + assert runtime["rpc_url"].endswith(":8787/run") + assert "/view/" in runtime["browser_view"] + assert saved_registry["runtimes"] + run_commands = [call for call in docker_calls if call[:2] == ["run", "-d"]] + assert run_commands, "expected docker run to be invoked" + assert "hermes-browser-runtime:test" in run_commands[0] + mock_wait.assert_called_once() + + +def test_browser_use_tool_routes_via_runtime_rpc_and_cleans_up(): + from tools import browser_use_tool + + rpc_response = json.dumps({"success": True, "result": "done"}).encode("utf-8") + fake_http_response = MagicMock() + fake_http_response.read.return_value = rpc_response + fake_http_response.__enter__.return_value = fake_http_response + fake_http_response.__exit__.return_value = False + + with ( + patch.object( + browser_use_tool, + "ensure_isolated_browser_runtime", + return_value={ + "cdp_url": "http://isolated:9222", + "rpc_url": "http://isolated:8787/run", + "browser_view": "https://viewer.example.com/view/abc", + "isolation_mode": "docker-per-principal", + }, + ) as mock_runtime, + patch.object(browser_use_tool.request, "urlopen", return_value=fake_http_response) as mock_urlopen, + patch.object(browser_use_tool, "cleanup_browser_use_runtime") as mock_cleanup, + ): + result = browser_use_tool.registry.dispatch( + "internet_browser", + {"task": "open example.com"}, + task_id="task-7", + honcho_session_key="telegram:123", + ) + + payload = json.loads(result) + assert payload["success"] is True + assert payload["result"] == "done" + assert payload["browser_view"] == "https://viewer.example.com/view/abc" + assert payload["isolation_mode"] == "docker-per-principal" + mock_runtime.assert_called_once() + request_obj = mock_urlopen.call_args.args[0] + assert request_obj.full_url == "http://isolated:8787/run" + call = mock_runtime.call_args + assert call.kwargs["task_id"] == "task-7" + assert call.kwargs["honcho_session_key"] == "telegram:123" + mock_cleanup.assert_called_once_with(task_id="task-7", honcho_session_key="telegram:123") diff --git a/hermes_code/tools/__init__.py b/hermes_code/tools/__init__.py index 975e9cb4..8ef9d76d 100644 --- a/hermes_code/tools/__init__.py +++ b/hermes_code/tools/__init__.py @@ -15,13 +15,24 @@ The tools are imported into model_tools.py which provides a unified interface for the AI agent to access all capabilities. """ +import logging + +logger = logging.getLogger(__name__) + # Export all tools for easy importing -from .web_tools import ( - web_search_tool, - web_extract_tool, - web_crawl_tool, - check_firecrawl_api_key -) +try: + from .web_tools import ( + web_search_tool, + web_extract_tool, + web_crawl_tool, + check_firecrawl_api_key + ) +except ModuleNotFoundError as exc: + logger.debug("Skipping web_tools import in tools package init: %s", exc) + web_search_tool = None + web_extract_tool = None + web_crawl_tool = None + check_firecrawl_api_key = None # Primary terminal tool (local/docker/singularity/modal/daytona/ssh) from .terminal_tool import ( @@ -263,4 +274,3 @@ __all__ = [ 'check_delegate_requirements', 'DELEGATE_TASK_SCHEMA', ] - diff --git a/hermes_code/tools/browser_use_manager.py b/hermes_code/tools/browser_use_manager.py new file mode 100644 index 00000000..b157665e --- /dev/null +++ b/hermes_code/tools/browser_use_manager.py @@ -0,0 +1,496 @@ +"""Provision isolated browser-use Docker runtimes with per-principal profiles.""" + +from __future__ import annotations + +import hashlib +import json +import logging +import os +import re +import subprocess +import tempfile +import threading +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, Optional + +import requests + +logger = logging.getLogger(__name__) + +_DEFAULT_SHARED_CDP_URL = "http://browser:9222" +_DEFAULT_SHARED_RPC_URL = "http://browser:8787/run" +_DEFAULT_RUNTIME_IMAGE = "hermes-browser-runtime:latest" +_DEFAULT_RUNTIME_NETWORK = "hermes-net" +_DEFAULT_TTL_SECONDS = 900 +_DEFAULT_START_TIMEOUT = 45 +_DEFAULT_ENABLE_UI = True +_REGISTRY_LOCK = threading.Lock() +_VIEW_URL_CACHE_LOCK = threading.Lock() +_VIEW_URL_CACHE: dict[str, Any] = {"value": "", "expires_at": 0.0} + + +@dataclass(frozen=True) +class BrowserUseIsolationConfig: + mode: str + runtime_image: str + runtime_network: str + runtime_ttl_seconds: int + runtime_start_timeout: int + shared_cdp_url: str + enable_ui: bool + + +def _hermes_home() -> Path: + return Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) + + +def _registry_path() -> Path: + return _hermes_home() / "browser_use" / "docker_runtimes.json" + + +def _load_runtime_config_file() -> Dict[str, Any]: + config_path = _hermes_home() / "config.yaml" + if not config_path.exists(): + return {} + + try: + import yaml + + with open(config_path, "r", encoding="utf-8") as fh: + data = yaml.safe_load(fh) or {} + if isinstance(data, dict): + browser_cfg = data.get("browser", {}) + return browser_cfg if isinstance(browser_cfg, dict) else {} + except Exception as exc: + logger.debug("Failed to load browser config for browser-use isolation: %s", exc) + return {} + + +def _env_or_config(env_name: str, config_key: str, default: Any) -> Any: + value = os.getenv(env_name) + if value not in (None, ""): + return value + return _load_runtime_config_file().get(config_key, default) + + +def _as_int(value: Any, default: int) -> int: + try: + return max(1, int(value)) + except (TypeError, ValueError): + return default + + +def _as_bool(value: Any, default: bool) -> bool: + if value is None: + return default + if isinstance(value, bool): + return value + return str(value).strip().lower() in {"1", "true", "yes", "on"} + + +def get_browser_use_isolation_config() -> BrowserUseIsolationConfig: + mode = str( + _env_or_config( + "BROWSER_USE_ISOLATION_MODE", + "browser_use_isolation_mode", + "shared", + ) + ).strip().lower() + if mode not in {"shared", "docker-per-principal", "docker-per-task"}: + logger.warning("Unknown browser-use isolation mode %r; falling back to shared", mode) + mode = "shared" + + return BrowserUseIsolationConfig( + mode=mode, + runtime_image=str( + _env_or_config( + "BROWSER_RUNTIME_IMAGE", + "browser_use_runtime_image", + _DEFAULT_RUNTIME_IMAGE, + ) + ).strip() + or _DEFAULT_RUNTIME_IMAGE, + runtime_network=str( + _env_or_config( + "BROWSER_RUNTIME_NETWORK", + "browser_use_runtime_network", + _DEFAULT_RUNTIME_NETWORK, + ) + ).strip(), + runtime_ttl_seconds=_as_int( + _env_or_config( + "BROWSER_RUNTIME_TTL_SECONDS", + "browser_use_runtime_ttl_seconds", + _DEFAULT_TTL_SECONDS, + ), + _DEFAULT_TTL_SECONDS, + ), + runtime_start_timeout=_as_int( + _env_or_config( + "BROWSER_RUNTIME_START_TIMEOUT", + "browser_use_runtime_start_timeout", + _DEFAULT_START_TIMEOUT, + ), + _DEFAULT_START_TIMEOUT, + ), + shared_cdp_url=str(os.getenv("BROWSER_URL", _DEFAULT_SHARED_CDP_URL)).strip() or _DEFAULT_SHARED_CDP_URL, + enable_ui=_as_bool( + _env_or_config( + "BROWSER_RUNTIME_ENABLE_UI", + "browser_use_runtime_enable_ui", + _DEFAULT_ENABLE_UI, + ), + _DEFAULT_ENABLE_UI, + ), + ) + + +def resolve_isolation_owner(mode: str, task_id: Optional[str], honcho_session_key: Optional[str]) -> str: + if mode == "docker-per-task": + return (task_id or "default").strip() or "default" + if honcho_session_key: + return honcho_session_key.strip() or (task_id or "default") + return (task_id or "default").strip() or "default" + + +def hash_runtime_owner(owner: str) -> str: + return hashlib.sha256(owner.encode("utf-8")).hexdigest()[:16] + + +def _normalize_browser_view_base_url(raw_url: str) -> str: + url = (raw_url or "").strip() + if not url: + return "" + for marker in ("/vnc.html", "/index.html"): + idx = url.find(marker) + if idx != -1: + url = url[:idx] + break + return url.rstrip("/") + + +def _discover_browser_view_base_url_from_tunnel() -> str: + now = time.time() + with _VIEW_URL_CACHE_LOCK: + cached_value = str(_VIEW_URL_CACHE.get("value", "") or "") + expires_at = float(_VIEW_URL_CACHE.get("expires_at", 0.0) or 0.0) + if cached_value and now < expires_at: + return cached_value + + try: + result = _run_docker(["logs", "--tail", "200", "hermes-tunnel"], check=False) + combined = "\n".join( + part for part in [result.stdout or "", result.stderr or ""] if part + ) + matches = re.findall(r"https://[^\s\"'<>]+", combined) + base_url = _normalize_browser_view_base_url(matches[-1]) if matches else "" + except Exception as exc: + logger.debug("Failed to discover browser view URL from hermes-tunnel logs: %s", exc) + base_url = "" + + with _VIEW_URL_CACHE_LOCK: + _VIEW_URL_CACHE["value"] = base_url + _VIEW_URL_CACHE["expires_at"] = now + (60 if base_url else 10) + + return base_url + + +def get_browser_use_view_url( + task_id: Optional[str] = None, + honcho_session_key: Optional[str] = None, +) -> str: + base_url = _normalize_browser_view_base_url( + os.getenv("BROWSER_VIEW_BASE_URL", "") or os.getenv("BROWSER_VIEW_URL", "") + ) + if not base_url: + base_url = _discover_browser_view_base_url_from_tunnel() + if not base_url: + return "" + + config = get_browser_use_isolation_config() + owner = resolve_isolation_owner(config.mode, task_id, honcho_session_key) + owner_hash = hash_runtime_owner(owner) + + if config.mode == "shared": + return f"{base_url}/vnc.html?path=websockify" + + return f"{base_url}/view/{owner_hash}/vnc.html?path=view/{owner_hash}/websockify" + + +def _shared_rpc_url() -> str: + return str(os.getenv("BROWSER_USE_RPC_URL", _DEFAULT_SHARED_RPC_URL)).strip() or _DEFAULT_SHARED_RPC_URL + + +def _runtime_rpc_url(container_name: str) -> str: + return f"http://{container_name}:8787/run" + + +def _load_registry() -> Dict[str, Any]: + path = _registry_path() + if not path.exists(): + return {"runtimes": {}} + try: + with open(path, "r", encoding="utf-8") as fh: + data = json.load(fh) or {} + if isinstance(data, dict) and isinstance(data.get("runtimes"), dict): + return data + except Exception as exc: + logger.warning("Failed to read browser-use runtime registry %s: %s", path, exc) + return {"runtimes": {}} + + +def _save_registry(payload: Dict[str, Any]) -> None: + path = _registry_path() + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp_path = tempfile.mkstemp(dir=str(path.parent), prefix=".browser_use_", suffix=".tmp") + try: + with os.fdopen(fd, "w", encoding="utf-8") as fh: + json.dump(payload, fh, indent=2, sort_keys=True) + fh.flush() + os.fsync(fh.fileno()) + os.replace(tmp_path, path) + except Exception: + try: + os.unlink(tmp_path) + except OSError: + pass + raise + + +def _run_docker(args: list[str], check: bool = True) -> subprocess.CompletedProcess[str]: + cmd = ["docker", *args] + logger.debug("browser-use docker cmd: %s", " ".join(cmd)) + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=120, + ) + if check and result.returncode != 0: + stderr = (result.stderr or result.stdout or "").strip() + raise RuntimeError(f"Docker command failed ({' '.join(cmd)}): {stderr}") + return result + + +def _ensure_docker_access() -> None: + _run_docker(["version"], check=True) + + +def _container_exists(container_name: str) -> bool: + result = _run_docker(["inspect", container_name], check=False) + return result.returncode == 0 + + +def _container_running(container_name: str) -> bool: + result = _run_docker( + ["inspect", "-f", "{{.State.Running}}", container_name], + check=False, + ) + return result.returncode == 0 and result.stdout.strip().lower() == "true" + + +def _remove_container(container_name: str) -> None: + if not container_name: + return + _run_docker(["rm", "-f", container_name], check=False) + + +def _volume_exists(volume_name: str) -> bool: + result = _run_docker(["volume", "inspect", volume_name], check=False) + return result.returncode == 0 + + +def _ensure_volume(volume_name: str, owner_hash: str) -> None: + if _volume_exists(volume_name): + return + _run_docker( + [ + "volume", + "create", + "--label", + "hermes.browser_use=true", + "--label", + f"hermes.owner_hash={owner_hash}", + volume_name, + ], + check=True, + ) + + +def _remove_volume(volume_name: str) -> None: + if not volume_name: + return + _run_docker(["volume", "rm", "-f", volume_name], check=False) + + +def _start_runtime_container( + container_name: str, + volume_name: str, + owner_hash: str, + config: BrowserUseIsolationConfig, +) -> None: + _ensure_volume(volume_name, owner_hash) + run_args = [ + "run", + "-d", + "--name", + container_name, + "--network", + config.runtime_network or _DEFAULT_RUNTIME_NETWORK, + "--shm-size", + "2g", + "--label", + "hermes.browser_use=true", + "--label", + f"hermes.owner_hash={owner_hash}", + "--label", + "hermes.managed_by=browser_use_manager", + "-e", + f"BROWSER_ENABLE_UI={'true' if config.enable_ui else 'false'}", + "-e", + "BROWSER_DATA_DIR=/data", + "-v", + f"{volume_name}:/data", + config.runtime_image, + ] + _run_docker(run_args, check=True) + + +def _wait_for_cdp(container_name: str, timeout_seconds: int) -> None: + deadline = time.time() + timeout_seconds + cdp_url = f"http://{container_name}:9222/json/version" + last_error = "" + while time.time() < deadline: + try: + response = requests.get(cdp_url, timeout=2) + if response.ok: + return + last_error = f"HTTP {response.status_code}" + except Exception as exc: + last_error = str(exc) + time.sleep(1) + raise RuntimeError(f"Browser runtime {container_name} did not become ready: {last_error}") + + +def _cleanup_expired_runtimes_locked(registry: Dict[str, Any], config: BrowserUseIsolationConfig) -> None: + now = time.time() + runtimes = registry.setdefault("runtimes", {}) + expired_keys = [] + for runtime_key, entry in list(runtimes.items()): + last_used = float(entry.get("last_used", 0) or 0) + if not last_used or now - last_used < config.runtime_ttl_seconds: + continue + + container_name = entry.get("container_name", "") + volume_name = entry.get("volume_name", "") + mode = entry.get("mode", "") + logger.info("Cleaning expired browser-use runtime %s (%s)", runtime_key, container_name) + _remove_container(container_name) + if mode == "docker-per-task": + _remove_volume(volume_name) + expired_keys.append(runtime_key) + + for runtime_key in expired_keys: + runtimes.pop(runtime_key, None) + + +def ensure_isolated_browser_runtime( + task_id: Optional[str] = None, + honcho_session_key: Optional[str] = None, +) -> Dict[str, str]: + config = get_browser_use_isolation_config() + if config.mode == "shared": + return { + "cdp_url": config.shared_cdp_url, + "rpc_url": _shared_rpc_url(), + "browser_view": get_browser_use_view_url( + task_id=task_id, + honcho_session_key=honcho_session_key, + ), + "isolation_mode": "shared", + "owner": "", + "owner_hash": "", + } + + _ensure_docker_access() + owner = resolve_isolation_owner(config.mode, task_id, honcho_session_key) + owner_hash = hash_runtime_owner(owner) + runtime_key = f"{config.mode}:{owner_hash}" + container_name = f"hermes-browser-{owner_hash}" + volume_name = f"hermes-browser-profile-{owner_hash}" + + with _REGISTRY_LOCK: + registry = _load_registry() + _cleanup_expired_runtimes_locked(registry, config) + + if _container_running(container_name): + registry.setdefault("runtimes", {})[runtime_key] = { + "container_name": container_name, + "volume_name": volume_name, + "last_used": time.time(), + "mode": config.mode, + "owner_hash": owner_hash, + } + _save_registry(registry) + return { + "cdp_url": f"http://{container_name}:9222", + "rpc_url": _runtime_rpc_url(container_name), + "browser_view": get_browser_use_view_url( + task_id=task_id, + honcho_session_key=honcho_session_key, + ), + "isolation_mode": config.mode, + "owner": owner, + "owner_hash": owner_hash, + } + + if _container_exists(container_name): + _remove_container(container_name) + + _start_runtime_container(container_name, volume_name, owner_hash, config) + _wait_for_cdp(container_name, config.runtime_start_timeout) + + registry.setdefault("runtimes", {})[runtime_key] = { + "container_name": container_name, + "volume_name": volume_name, + "last_used": time.time(), + "mode": config.mode, + "owner_hash": owner_hash, + } + _save_registry(registry) + + return { + "cdp_url": f"http://{container_name}:9222", + "rpc_url": _runtime_rpc_url(container_name), + "browser_view": get_browser_use_view_url( + task_id=task_id, + honcho_session_key=honcho_session_key, + ), + "isolation_mode": config.mode, + "owner": owner, + "owner_hash": owner_hash, + } + + +def cleanup_browser_use_runtime( + task_id: Optional[str] = None, + honcho_session_key: Optional[str] = None, +) -> None: + config = get_browser_use_isolation_config() + if config.mode != "docker-per-task": + return + + owner = resolve_isolation_owner(config.mode, task_id, honcho_session_key) + owner_hash = hash_runtime_owner(owner) + runtime_key = f"{config.mode}:{owner_hash}" + container_name = f"hermes-browser-{owner_hash}" + volume_name = f"hermes-browser-profile-{owner_hash}" + + with _REGISTRY_LOCK: + registry = _load_registry() + _remove_container(container_name) + _remove_volume(volume_name) + registry.setdefault("runtimes", {}).pop(runtime_key, None) + _save_registry(registry) diff --git a/hermes_code/tools/browser_use_tool.py b/hermes_code/tools/browser_use_tool.py index 167b61b3..d318cd0a 100644 --- a/hermes_code/tools/browser_use_tool.py +++ b/hermes_code/tools/browser_use_tool.py @@ -1,14 +1,23 @@ import json import os from urllib import error, request + +from tools.browser_use_manager import ( + cleanup_browser_use_runtime, + ensure_isolated_browser_runtime, +) from tools.registry import registry -def run_browser_task(task): +def run_browser_task(task, task_id=None, honcho_session_key=None): if not task or not str(task).strip(): return json.dumps({"success": False, "error": "Task is required"}, ensure_ascii=False) - rpc_url = os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run") + runtime = ensure_isolated_browser_runtime( + task_id=task_id, + honcho_session_key=honcho_session_key, + ) + rpc_url = runtime.get("rpc_url") or os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run") timeout_sec = int(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900")) payload = json.dumps({"task": task}).encode("utf-8") req = request.Request(rpc_url, data=payload, headers={"Content-Type": "application/json"}, method="POST") @@ -16,7 +25,20 @@ def run_browser_task(task): try: with request.urlopen(req, timeout=timeout_sec) as resp: body = resp.read().decode("utf-8") + + try: + data = json.loads(body) + except json.JSONDecodeError: return body + + if isinstance(data, dict): + browser_view = runtime.get("browser_view") + if browser_view and not data.get("browser_view"): + data["browser_view"] = browser_view + data["isolation_mode"] = runtime.get("isolation_mode", "shared") + return json.dumps(data, ensure_ascii=False) + + return body except error.HTTPError as http_err: body = http_err.read().decode("utf-8", errors="replace") return json.dumps( @@ -35,11 +57,19 @@ def run_browser_task(task): }, ensure_ascii=False, ) + finally: + try: + cleanup_browser_use_runtime( + task_id=task_id, + honcho_session_key=honcho_session_key, + ) + except Exception: + pass registry.register( name="internet_browser", - toolset="browse_cmd", + toolset="browse_cmd", schema={ "name": "internet_browser", "description": ( @@ -52,14 +82,17 @@ registry.register( "type": "object", "properties": { "task": { - "type": "string", + "type": "string", "description": "Подробная задача на естественном языке. Например: 'Зайди на wildberries.ru, найди черную футболку и верни цену'." } }, "required": ["task"] } }, - - handler=lambda args, **kw: run_browser_task(args.get("task")), + handler=lambda args, **kw: run_browser_task( + args.get("task"), + task_id=kw.get("task_id"), + honcho_session_key=kw.get("honcho_session_key"), + ), emoji="🌐", -) \ No newline at end of file +)