update skill
This commit is contained in:
parent
7832c30cc0
commit
aa7927a316
8 changed files with 7 additions and 325 deletions
|
|
@ -1,94 +0,0 @@
|
||||||
---
|
|
||||||
name: browser-use
|
|
||||||
version: "1.0.0"
|
|
||||||
description: Use browser-use with a Chromium CDP endpoint to perform web tasks from Hermes.
|
|
||||||
triggers:
|
|
||||||
- "browser-use"
|
|
||||||
- "open website and extract"
|
|
||||||
- "automate browser task"
|
|
||||||
- "run browser task"
|
|
||||||
allowed-tools:
|
|
||||||
- terminal
|
|
||||||
- file
|
|
||||||
- memory
|
|
||||||
---
|
|
||||||
|
|
||||||
# Browser Use (Chromium)
|
|
||||||
|
|
||||||
This skill runs browser tasks via `browser-use` and connects to Chromium through CDP.
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
- `hermes-agent` container is running
|
|
||||||
- `chromium` service is running in `docker-compose`
|
|
||||||
- `OPENAI_API_KEY` is present in container env (via `docker-compose` `env_file`)
|
|
||||||
- If running outside container, set `OPENAI_API_KEY` in your shell or `.env`
|
|
||||||
|
|
||||||
## Troubleshooting Environment Setup
|
|
||||||
|
|
||||||
If you get `{"success": false, "error": "OPENAI_API_KEY is not set"}`:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose exec -T hermes-agent python - <<'PY'
|
|
||||||
import os
|
|
||||||
print('OPENAI_API_KEY', '<set>' if os.getenv('OPENAI_API_KEY') else '<missing>')
|
|
||||||
print('OPENAI_BASE_URL', '<set>' if os.getenv('OPENAI_BASE_URL') else '<missing>')
|
|
||||||
PY
|
|
||||||
```
|
|
||||||
|
|
||||||
If `OPENAI_API_KEY` is missing, ensure key exists in one of env files used by compose:
|
|
||||||
- `workspace/.env`
|
|
||||||
- `hermes_data/.env`
|
|
||||||
|
|
||||||
Then recreate container:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose up -d hermes-agent
|
|
||||||
```
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Optional overrides when running outside Docker
|
|
||||||
export OPENAI_API_KEY="your-api-key-here"
|
|
||||||
export BROWSER_USE_CDP_URL="ws://chromium:3000/chromium?token=hermes-local"
|
|
||||||
```
|
|
||||||
|
|
||||||
**Common failure:** `{"success": false, "error": "OPENAI_API_KEY is not set"}`
|
|
||||||
- Cause: key is absent in container env
|
|
||||||
- Fix: add key to `workspace/.env` or `hermes_data/.env`, then `docker compose up -d hermes-agent`
|
|
||||||
|
|
||||||
**Common failure:** 401 `key_model_access_denied`
|
|
||||||
- Cause: API key cannot access configured model (for example `gpt-4o-mini`)
|
|
||||||
- Fix: set allowed model via `BROWSER_USE_MODEL` (or `OPENAI_MODEL`) to a model your provider key can use
|
|
||||||
|
|
||||||
**Common failure:** Connection refused to `chromium`
|
|
||||||
- Cause: Browser not running or CDP endpoint wrong
|
|
||||||
- Fix: Check `docker-compose ps` and verify `chromium` service is up
|
|
||||||
|
|
||||||
## Quick start
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python-browser-use /root/.hermes/skills/autonomous-ai-agents/browser-use/scripts/run_browser_use.py \
|
|
||||||
--task "Open example.com and return page title" \
|
|
||||||
--max-steps 8
|
|
||||||
```
|
|
||||||
|
|
||||||
## How to use in Hermes
|
|
||||||
|
|
||||||
When user asks for website automation:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python-browser-use /root/.hermes/skills/autonomous-ai-agents/browser-use/scripts/run_browser_use.py \
|
|
||||||
--task "<user task in plain language>" \
|
|
||||||
--max-steps 20
|
|
||||||
```
|
|
||||||
|
|
||||||
If user gives a start URL, pass `--start-url`.
|
|
||||||
|
|
||||||
## Notes
|
|
||||||
|
|
||||||
- Default CDP URL: `ws://chromium:3000/chromium?token=hermes-local`
|
|
||||||
- Override by setting `BROWSER_USE_CDP_URL`
|
|
||||||
- Runtime Python: `BROWSER_USE_PYTHON` (defaults to `python-browser-use`)
|
|
||||||
- The script outputs JSON for easy parsing
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,16 +0,0 @@
|
||||||
{
|
|
||||||
"browser": {
|
|
||||||
"cdp_url": "ws://chromium:3000/chromium?token=hermes-local",
|
|
||||||
"headless": true,
|
|
||||||
"timeout": 120000
|
|
||||||
},
|
|
||||||
"agent": {
|
|
||||||
"model_env": "BROWSER_USE_MODEL",
|
|
||||||
"max_steps": 20,
|
|
||||||
"use_vision": false
|
|
||||||
},
|
|
||||||
"logging": {
|
|
||||||
"level": "info"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
@ -1,16 +0,0 @@
|
||||||
{
|
|
||||||
"browser": {
|
|
||||||
"cdp_url": "ws://chromium:3000/playwright?token=hermes-local",
|
|
||||||
"headless": true,
|
|
||||||
"timeout": 120000
|
|
||||||
},
|
|
||||||
"agent": {
|
|
||||||
"model_env": "BROWSER_USE_MODEL",
|
|
||||||
"max_steps": 20,
|
|
||||||
"use_vision": false
|
|
||||||
},
|
|
||||||
"logging": {
|
|
||||||
"level": "info"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
browser-use==0.12.5
|
|
||||||
|
|
||||||
|
|
@ -1,181 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""Run browser-use task against a Chromium CDP endpoint."""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import asyncio
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import socket
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any
|
|
||||||
from urllib.parse import urlparse, urlunparse
|
|
||||||
from urllib.request import urlopen
|
|
||||||
|
|
||||||
from browser_use import Agent, BrowserSession
|
|
||||||
from browser_use.llm import ChatOpenAI
|
|
||||||
|
|
||||||
|
|
||||||
ENV_FALLBACK_PATHS = (
|
|
||||||
Path("/workspace/.env"),
|
|
||||||
Path("/workspace/workspace/.env"),
|
|
||||||
Path("/root/.hermes/.env"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _read_env_from_files(name: str) -> str | None:
|
|
||||||
for env_path in ENV_FALLBACK_PATHS:
|
|
||||||
if not env_path.exists():
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
for raw_line in env_path.read_text(encoding="utf-8").splitlines():
|
|
||||||
line = raw_line.strip()
|
|
||||||
if not line or line.startswith("#") or "=" not in line:
|
|
||||||
continue
|
|
||||||
key, value = line.split("=", 1)
|
|
||||||
if key.strip() == name:
|
|
||||||
cleaned = value.strip().strip('"').strip("'")
|
|
||||||
return cleaned or None
|
|
||||||
except OSError:
|
|
||||||
continue
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _get_env(name: str, default: str | None = None) -> str | None:
|
|
||||||
value = os.getenv(name)
|
|
||||||
if value:
|
|
||||||
return value
|
|
||||||
from_file = _read_env_from_files(name)
|
|
||||||
if from_file:
|
|
||||||
return from_file
|
|
||||||
return default if default else None
|
|
||||||
|
|
||||||
|
|
||||||
def _build_task(task: str, start_url: str | None) -> str:
|
|
||||||
if not start_url:
|
|
||||||
return task
|
|
||||||
return f"Start from {start_url}. Task: {task}"
|
|
||||||
|
|
||||||
|
|
||||||
def _serialize_history(history: Any) -> dict[str, Any]:
|
|
||||||
result = ""
|
|
||||||
errors: list[str] = []
|
|
||||||
if hasattr(history, "final_result"):
|
|
||||||
try:
|
|
||||||
result = history.final_result() or ""
|
|
||||||
except Exception:
|
|
||||||
result = ""
|
|
||||||
if hasattr(history, "errors"):
|
|
||||||
try:
|
|
||||||
raw_errors = list(history.errors())
|
|
||||||
errors = [str(e) for e in raw_errors if e]
|
|
||||||
except Exception:
|
|
||||||
errors = []
|
|
||||||
return {
|
|
||||||
"final_result": result,
|
|
||||||
"errors": errors,
|
|
||||||
"has_errors": bool(errors),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _resolve_cdp_url(cdp_url: str) -> str:
|
|
||||||
if cdp_url.startswith("ws://") or cdp_url.startswith("wss://"):
|
|
||||||
return cdp_url
|
|
||||||
if cdp_url.startswith("http://") or cdp_url.startswith("https://"):
|
|
||||||
parsed = urlparse(cdp_url)
|
|
||||||
host = parsed.hostname or ""
|
|
||||||
port = parsed.port
|
|
||||||
|
|
||||||
# Chrome DevTools rejects non-IP/non-localhost Host headers in some setups.
|
|
||||||
# For docker service names, resolve to IP and query via numeric host.
|
|
||||||
if host not in {"localhost", "127.0.0.1", "0.0.0.0"}:
|
|
||||||
try:
|
|
||||||
resolved_host = socket.gethostbyname(host)
|
|
||||||
netloc = resolved_host if not port else f"{resolved_host}:{port}"
|
|
||||||
parsed = parsed._replace(netloc=netloc)
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
version_url = urlunparse(parsed).rstrip("/")
|
|
||||||
if not version_url.endswith("/json/version"):
|
|
||||||
version_url = f"{version_url}/json/version"
|
|
||||||
with urlopen(version_url, timeout=10) as response: # nosec B310
|
|
||||||
payload = json.loads(response.read().decode("utf-8"))
|
|
||||||
ws_url = payload.get("webSocketDebuggerUrl")
|
|
||||||
if not ws_url:
|
|
||||||
raise RuntimeError(f"CDP endpoint did not return webSocketDebuggerUrl: {version_url}")
|
|
||||||
|
|
||||||
# Keep a reachable host for ws:// URL when input used docker DNS alias.
|
|
||||||
if host and host not in {"localhost", "127.0.0.1", "0.0.0.0"}:
|
|
||||||
ws_parsed = urlparse(str(ws_url))
|
|
||||||
ws_netloc = ws_parsed.netloc
|
|
||||||
ws_port = ws_parsed.port
|
|
||||||
if ws_port is None:
|
|
||||||
ws_port = 443 if ws_parsed.scheme == "wss" else 80
|
|
||||||
try:
|
|
||||||
resolved_host = socket.gethostbyname(host)
|
|
||||||
ws_netloc = f"{resolved_host}:{ws_port}"
|
|
||||||
ws_url = urlunparse(ws_parsed._replace(netloc=ws_netloc))
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return str(ws_url)
|
|
||||||
raise RuntimeError(f"Unsupported CDP URL scheme: {cdp_url}")
|
|
||||||
|
|
||||||
|
|
||||||
async def _run(args: argparse.Namespace) -> int:
|
|
||||||
api_key = _get_env("OPENAI_API_KEY")
|
|
||||||
if not api_key:
|
|
||||||
print(json.dumps({"success": False, "error": "OPENAI_API_KEY is not set"}))
|
|
||||||
return 2
|
|
||||||
|
|
||||||
model = _get_env("BROWSER_USE_MODEL", _get_env("OPENAI_MODEL", "gpt-4o-mini"))
|
|
||||||
base_url = _get_env("OPENAI_BASE_URL")
|
|
||||||
raw_cdp_url = args.cdp_url or _get_env("BROWSER_USE_CDP_URL", "ws://chromium:3000/chromium?token=hermes-local")
|
|
||||||
cdp_url = _resolve_cdp_url(raw_cdp_url)
|
|
||||||
|
|
||||||
llm = ChatOpenAI(
|
|
||||||
model=model,
|
|
||||||
api_key=api_key,
|
|
||||||
base_url=base_url,
|
|
||||||
temperature=0.0,
|
|
||||||
)
|
|
||||||
|
|
||||||
browser_session = BrowserSession(cdp_url=cdp_url)
|
|
||||||
agent = Agent(
|
|
||||||
task=_build_task(args.task, args.start_url),
|
|
||||||
llm=llm,
|
|
||||||
browser_session=browser_session,
|
|
||||||
use_vision=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
history = await agent.run(max_steps=args.max_steps)
|
|
||||||
payload = _serialize_history(history)
|
|
||||||
|
|
||||||
print(
|
|
||||||
json.dumps(
|
|
||||||
{
|
|
||||||
"success": not payload["has_errors"],
|
|
||||||
"model": model,
|
|
||||||
"cdp_url": cdp_url,
|
|
||||||
"task": args.task,
|
|
||||||
"result": payload,
|
|
||||||
},
|
|
||||||
ensure_ascii=True,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return 0 if not payload["has_errors"] else 1
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
|
||||||
parser = argparse.ArgumentParser(description="Run browser-use task")
|
|
||||||
parser.add_argument("--task", required=True, help="Natural language task for browser-use")
|
|
||||||
parser.add_argument("--start-url", default=None, help="Optional URL to open first")
|
|
||||||
parser.add_argument("--max-steps", type=int, default=20, help="Max agent steps")
|
|
||||||
parser.add_argument("--cdp-url", default=None, help="CDP URL (ws://... or http://.../json/version host)")
|
|
||||||
args = parser.parse_args()
|
|
||||||
return asyncio.run(_run(args))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
raise SystemExit(main())
|
|
||||||
|
|
||||||
|
|
@ -1,12 +0,0 @@
|
||||||
#!/usr/bin/env bash
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
||||||
VENV_DIR="${SCRIPT_DIR}/.venv"
|
|
||||||
|
|
||||||
python3 -m venv "${VENV_DIR}"
|
|
||||||
"${VENV_DIR}/bin/pip" install --upgrade pip
|
|
||||||
"${VENV_DIR}/bin/pip" install -r "${SCRIPT_DIR}/requirements.txt"
|
|
||||||
|
|
||||||
echo "browser-use skill environment is ready: ${VENV_DIR}"
|
|
||||||
|
|
||||||
|
|
@ -40,7 +40,7 @@ services:
|
||||||
- ./workspace/.env
|
- ./workspace/.env
|
||||||
- ./hermes_data/.env
|
- ./hermes_data/.env
|
||||||
environment:
|
environment:
|
||||||
- BROWSER_USE_CDP_URL=${BROWSER_USE_CDP_URL:-http://chromium-gui:9223}
|
- BROWSER_USE_CDP_URL=${BROWSER_USE_CDP_URL:-http://172.25.0.3:9223}
|
||||||
- BROWSER_USE_PYTHON=/opt/browser-use-venv/bin/python
|
- BROWSER_USE_PYTHON=/opt/browser-use-venv/bin/python
|
||||||
volumes:
|
volumes:
|
||||||
- ./workspace:/workspace
|
- ./workspace:/workspace
|
||||||
|
|
|
||||||
|
|
@ -13,15 +13,18 @@ fluxbox >/tmp/fluxbox.log 2>&1 &
|
||||||
|
|
||||||
x11vnc -display :99 -forever -shared -rfbport 5900 -nopw >/tmp/x11vnc.log 2>&1 &
|
x11vnc -display :99 -forever -shared -rfbport 5900 -nopw >/tmp/x11vnc.log 2>&1 &
|
||||||
websockify --web=/usr/share/novnc/ 6080 localhost:5900 >/tmp/novnc.log 2>&1 &
|
websockify --web=/usr/share/novnc/ 6080 localhost:5900 >/tmp/novnc.log 2>&1 &
|
||||||
socat TCP-LISTEN:9223,fork,bind=0.0.0.0 TCP:127.0.0.1:9222 >/tmp/socat.log 2>&1 &
|
|
||||||
|
# Проксирование CDP на все адреса используя socat
|
||||||
|
# Chromium слушает на ::1:9223 (IPv6 localhost)
|
||||||
|
socat TCP-LISTEN:9223,reuseaddr,fork TCP6:[::1]:9223 >/tmp/socat.log 2>&1 &
|
||||||
|
|
||||||
exec chromium \
|
exec chromium \
|
||||||
--no-sandbox \
|
--no-sandbox \
|
||||||
--disable-dev-shm-usage \
|
--disable-dev-shm-usage \
|
||||||
--disable-gpu \
|
--disable-gpu \
|
||||||
--disable-setuid-sandbox \
|
--disable-setuid-sandbox \
|
||||||
--remote-debugging-address=0.0.0.0 \
|
--remote-debugging-address=127.0.0.1 \
|
||||||
--remote-debugging-port=9222 \
|
--remote-debugging-port=9223 \
|
||||||
--user-data-dir=/tmp/chromium-profile \
|
--user-data-dir=/tmp/chromium-profile \
|
||||||
--window-size=1920,1080 \
|
--window-size=1920,1080 \
|
||||||
--no-first-run \
|
--no-first-run \
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue