diff --git a/browser-use/SKILL.md b/browser-use/SKILL.md deleted file mode 100644 index 02bbb467..00000000 --- a/browser-use/SKILL.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -name: browser-use -version: "1.0.0" -description: Use browser-use with a Chromium CDP endpoint to perform web tasks from Hermes. -triggers: - - "browser-use" - - "open website and extract" - - "automate browser task" - - "run browser task" -allowed-tools: - - terminal - - file - - memory ---- - -# Browser Use (Chromium) - -This skill runs browser tasks via `browser-use` and connects to Chromium through CDP. - -## Prerequisites - -- `hermes-agent` container is running -- `chromium` service is running in `docker-compose` -- `OPENAI_API_KEY` is present in container env (via `docker-compose` `env_file`) -- If running outside container, set `OPENAI_API_KEY` in your shell or `.env` - -## Troubleshooting Environment Setup - -If you get `{"success": false, "error": "OPENAI_API_KEY is not set"}`: - -```bash -docker compose exec -T hermes-agent python - <<'PY' -import os -print('OPENAI_API_KEY', '' if os.getenv('OPENAI_API_KEY') else '') -print('OPENAI_BASE_URL', '' if os.getenv('OPENAI_BASE_URL') else '') -PY -``` - -If `OPENAI_API_KEY` is missing, ensure key exists in one of env files used by compose: -- `workspace/.env` -- `hermes_data/.env` - -Then recreate container: - -```bash -docker compose up -d hermes-agent -``` - -```bash -# Optional overrides when running outside Docker -export OPENAI_API_KEY="your-api-key-here" -export BROWSER_USE_CDP_URL="ws://chromium:3000/chromium?token=hermes-local" -``` - -**Common failure:** `{"success": false, "error": "OPENAI_API_KEY is not set"}` -- Cause: key is absent in container env -- Fix: add key to `workspace/.env` or `hermes_data/.env`, then `docker compose up -d hermes-agent` - -**Common failure:** 401 `key_model_access_denied` -- Cause: API key cannot access configured model (for example `gpt-4o-mini`) -- Fix: set allowed model via `BROWSER_USE_MODEL` (or `OPENAI_MODEL`) to a model your provider key can use - -**Common failure:** Connection refused to `chromium` -- Cause: Browser not running or CDP endpoint wrong -- Fix: Check `docker-compose ps` and verify `chromium` service is up - -## Quick start - -```bash -python-browser-use /root/.hermes/skills/autonomous-ai-agents/browser-use/scripts/run_browser_use.py \ - --task "Open example.com and return page title" \ - --max-steps 8 -``` - -## How to use in Hermes - -When user asks for website automation: - -```bash -python-browser-use /root/.hermes/skills/autonomous-ai-agents/browser-use/scripts/run_browser_use.py \ - --task "" \ - --max-steps 20 -``` - -If user gives a start URL, pass `--start-url`. - -## Notes - -- Default CDP URL: `ws://chromium:3000/chromium?token=hermes-local` -- Override by setting `BROWSER_USE_CDP_URL` -- Runtime Python: `BROWSER_USE_PYTHON` (defaults to `python-browser-use`) -- The script outputs JSON for easy parsing - - diff --git a/browser-use/assets/config.example.json b/browser-use/assets/config.example.json deleted file mode 100644 index e8e3e146..00000000 --- a/browser-use/assets/config.example.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "browser": { - "cdp_url": "ws://chromium:3000/chromium?token=hermes-local", - "headless": true, - "timeout": 120000 - }, - "agent": { - "model_env": "BROWSER_USE_MODEL", - "max_steps": 20, - "use_vision": false - }, - "logging": { - "level": "info" - } -} - diff --git a/browser-use/assets/config.json b/browser-use/assets/config.json deleted file mode 100644 index 8f355553..00000000 --- a/browser-use/assets/config.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "browser": { - "cdp_url": "ws://chromium:3000/playwright?token=hermes-local", - "headless": true, - "timeout": 120000 - }, - "agent": { - "model_env": "BROWSER_USE_MODEL", - "max_steps": 20, - "use_vision": false - }, - "logging": { - "level": "info" - } -} - diff --git a/browser-use/scripts/requirements.txt b/browser-use/scripts/requirements.txt deleted file mode 100644 index 33650044..00000000 --- a/browser-use/scripts/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -browser-use==0.12.5 - diff --git a/browser-use/scripts/run_browser_use.py b/browser-use/scripts/run_browser_use.py deleted file mode 100644 index 401e52fc..00000000 --- a/browser-use/scripts/run_browser_use.py +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/env python3 -"""Run browser-use task against a Chromium CDP endpoint.""" - -import argparse -import asyncio -import json -import os -import socket -from pathlib import Path -from typing import Any -from urllib.parse import urlparse, urlunparse -from urllib.request import urlopen - -from browser_use import Agent, BrowserSession -from browser_use.llm import ChatOpenAI - - -ENV_FALLBACK_PATHS = ( - Path("/workspace/.env"), - Path("/workspace/workspace/.env"), - Path("/root/.hermes/.env"), -) - - -def _read_env_from_files(name: str) -> str | None: - for env_path in ENV_FALLBACK_PATHS: - if not env_path.exists(): - continue - try: - for raw_line in env_path.read_text(encoding="utf-8").splitlines(): - line = raw_line.strip() - if not line or line.startswith("#") or "=" not in line: - continue - key, value = line.split("=", 1) - if key.strip() == name: - cleaned = value.strip().strip('"').strip("'") - return cleaned or None - except OSError: - continue - return None - - -def _get_env(name: str, default: str | None = None) -> str | None: - value = os.getenv(name) - if value: - return value - from_file = _read_env_from_files(name) - if from_file: - return from_file - return default if default else None - - -def _build_task(task: str, start_url: str | None) -> str: - if not start_url: - return task - return f"Start from {start_url}. Task: {task}" - - -def _serialize_history(history: Any) -> dict[str, Any]: - result = "" - errors: list[str] = [] - if hasattr(history, "final_result"): - try: - result = history.final_result() or "" - except Exception: - result = "" - if hasattr(history, "errors"): - try: - raw_errors = list(history.errors()) - errors = [str(e) for e in raw_errors if e] - except Exception: - errors = [] - return { - "final_result": result, - "errors": errors, - "has_errors": bool(errors), - } - - -def _resolve_cdp_url(cdp_url: str) -> str: - if cdp_url.startswith("ws://") or cdp_url.startswith("wss://"): - return cdp_url - if cdp_url.startswith("http://") or cdp_url.startswith("https://"): - parsed = urlparse(cdp_url) - host = parsed.hostname or "" - port = parsed.port - - # Chrome DevTools rejects non-IP/non-localhost Host headers in some setups. - # For docker service names, resolve to IP and query via numeric host. - if host not in {"localhost", "127.0.0.1", "0.0.0.0"}: - try: - resolved_host = socket.gethostbyname(host) - netloc = resolved_host if not port else f"{resolved_host}:{port}" - parsed = parsed._replace(netloc=netloc) - except OSError: - pass - - version_url = urlunparse(parsed).rstrip("/") - if not version_url.endswith("/json/version"): - version_url = f"{version_url}/json/version" - with urlopen(version_url, timeout=10) as response: # nosec B310 - payload = json.loads(response.read().decode("utf-8")) - ws_url = payload.get("webSocketDebuggerUrl") - if not ws_url: - raise RuntimeError(f"CDP endpoint did not return webSocketDebuggerUrl: {version_url}") - - # Keep a reachable host for ws:// URL when input used docker DNS alias. - if host and host not in {"localhost", "127.0.0.1", "0.0.0.0"}: - ws_parsed = urlparse(str(ws_url)) - ws_netloc = ws_parsed.netloc - ws_port = ws_parsed.port - if ws_port is None: - ws_port = 443 if ws_parsed.scheme == "wss" else 80 - try: - resolved_host = socket.gethostbyname(host) - ws_netloc = f"{resolved_host}:{ws_port}" - ws_url = urlunparse(ws_parsed._replace(netloc=ws_netloc)) - except OSError: - pass - - return str(ws_url) - raise RuntimeError(f"Unsupported CDP URL scheme: {cdp_url}") - - -async def _run(args: argparse.Namespace) -> int: - api_key = _get_env("OPENAI_API_KEY") - if not api_key: - print(json.dumps({"success": False, "error": "OPENAI_API_KEY is not set"})) - return 2 - - model = _get_env("BROWSER_USE_MODEL", _get_env("OPENAI_MODEL", "gpt-4o-mini")) - base_url = _get_env("OPENAI_BASE_URL") - raw_cdp_url = args.cdp_url or _get_env("BROWSER_USE_CDP_URL", "ws://chromium:3000/chromium?token=hermes-local") - cdp_url = _resolve_cdp_url(raw_cdp_url) - - llm = ChatOpenAI( - model=model, - api_key=api_key, - base_url=base_url, - temperature=0.0, - ) - - browser_session = BrowserSession(cdp_url=cdp_url) - agent = Agent( - task=_build_task(args.task, args.start_url), - llm=llm, - browser_session=browser_session, - use_vision=False, - ) - - history = await agent.run(max_steps=args.max_steps) - payload = _serialize_history(history) - - print( - json.dumps( - { - "success": not payload["has_errors"], - "model": model, - "cdp_url": cdp_url, - "task": args.task, - "result": payload, - }, - ensure_ascii=True, - ) - ) - return 0 if not payload["has_errors"] else 1 - - -def main() -> int: - parser = argparse.ArgumentParser(description="Run browser-use task") - parser.add_argument("--task", required=True, help="Natural language task for browser-use") - parser.add_argument("--start-url", default=None, help="Optional URL to open first") - parser.add_argument("--max-steps", type=int, default=20, help="Max agent steps") - parser.add_argument("--cdp-url", default=None, help="CDP URL (ws://... or http://.../json/version host)") - args = parser.parse_args() - return asyncio.run(_run(args)) - - -if __name__ == "__main__": - raise SystemExit(main()) - diff --git a/browser-use/scripts/setup.sh b/browser-use/scripts/setup.sh deleted file mode 100644 index 6be17146..00000000 --- a/browser-use/scripts/setup.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -VENV_DIR="${SCRIPT_DIR}/.venv" - -python3 -m venv "${VENV_DIR}" -"${VENV_DIR}/bin/pip" install --upgrade pip -"${VENV_DIR}/bin/pip" install -r "${SCRIPT_DIR}/requirements.txt" - -echo "browser-use skill environment is ready: ${VENV_DIR}" - diff --git a/docker-compose.yml b/docker-compose.yml index 61496bbc..8bd5f39b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -40,7 +40,7 @@ services: - ./workspace/.env - ./hermes_data/.env environment: - - BROWSER_USE_CDP_URL=${BROWSER_USE_CDP_URL:-http://chromium-gui:9223} + - BROWSER_USE_CDP_URL=${BROWSER_USE_CDP_URL:-http://172.25.0.3:9223} - BROWSER_USE_PYTHON=/opt/browser-use-venv/bin/python volumes: - ./workspace:/workspace diff --git a/docker/chromium-gui/start.sh b/docker/chromium-gui/start.sh index 007789e2..6cdc373d 100644 --- a/docker/chromium-gui/start.sh +++ b/docker/chromium-gui/start.sh @@ -13,15 +13,18 @@ fluxbox >/tmp/fluxbox.log 2>&1 & x11vnc -display :99 -forever -shared -rfbport 5900 -nopw >/tmp/x11vnc.log 2>&1 & websockify --web=/usr/share/novnc/ 6080 localhost:5900 >/tmp/novnc.log 2>&1 & -socat TCP-LISTEN:9223,fork,bind=0.0.0.0 TCP:127.0.0.1:9222 >/tmp/socat.log 2>&1 & + +# Проксирование CDP на все адреса используя socat +# Chromium слушает на ::1:9223 (IPv6 localhost) +socat TCP-LISTEN:9223,reuseaddr,fork TCP6:[::1]:9223 >/tmp/socat.log 2>&1 & exec chromium \ --no-sandbox \ --disable-dev-shm-usage \ --disable-gpu \ --disable-setuid-sandbox \ - --remote-debugging-address=0.0.0.0 \ - --remote-debugging-port=9222 \ + --remote-debugging-address=127.0.0.1 \ + --remote-debugging-port=9223 \ --user-data-dir=/tmp/chromium-profile \ --window-size=1920,1080 \ --no-first-run \