181 lines
5.9 KiB
Python
181 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Run browser-use task against a Chromium CDP endpoint."""
|
|
|
|
import argparse
|
|
import asyncio
|
|
import json
|
|
import os
|
|
import socket
|
|
from pathlib import Path
|
|
from typing import Any
|
|
from urllib.parse import urlparse, urlunparse
|
|
from urllib.request import urlopen
|
|
|
|
from browser_use import Agent, BrowserSession
|
|
from browser_use.llm import ChatOpenAI
|
|
|
|
|
|
ENV_FALLBACK_PATHS = (
|
|
Path("/workspace/.env"),
|
|
Path("/workspace/workspace/.env"),
|
|
Path("/root/.hermes/.env"),
|
|
)
|
|
|
|
|
|
def _read_env_from_files(name: str) -> str | None:
|
|
for env_path in ENV_FALLBACK_PATHS:
|
|
if not env_path.exists():
|
|
continue
|
|
try:
|
|
for raw_line in env_path.read_text(encoding="utf-8").splitlines():
|
|
line = raw_line.strip()
|
|
if not line or line.startswith("#") or "=" not in line:
|
|
continue
|
|
key, value = line.split("=", 1)
|
|
if key.strip() == name:
|
|
cleaned = value.strip().strip('"').strip("'")
|
|
return cleaned or None
|
|
except OSError:
|
|
continue
|
|
return None
|
|
|
|
|
|
def _get_env(name: str, default: str | None = None) -> str | None:
|
|
value = os.getenv(name)
|
|
if value:
|
|
return value
|
|
from_file = _read_env_from_files(name)
|
|
if from_file:
|
|
return from_file
|
|
return default if default else None
|
|
|
|
|
|
def _build_task(task: str, start_url: str | None) -> str:
|
|
if not start_url:
|
|
return task
|
|
return f"Start from {start_url}. Task: {task}"
|
|
|
|
|
|
def _serialize_history(history: Any) -> dict[str, Any]:
|
|
result = ""
|
|
errors: list[str] = []
|
|
if hasattr(history, "final_result"):
|
|
try:
|
|
result = history.final_result() or ""
|
|
except Exception:
|
|
result = ""
|
|
if hasattr(history, "errors"):
|
|
try:
|
|
raw_errors = list(history.errors())
|
|
errors = [str(e) for e in raw_errors if e]
|
|
except Exception:
|
|
errors = []
|
|
return {
|
|
"final_result": result,
|
|
"errors": errors,
|
|
"has_errors": bool(errors),
|
|
}
|
|
|
|
|
|
def _resolve_cdp_url(cdp_url: str) -> str:
|
|
if cdp_url.startswith("ws://") or cdp_url.startswith("wss://"):
|
|
return cdp_url
|
|
if cdp_url.startswith("http://") or cdp_url.startswith("https://"):
|
|
parsed = urlparse(cdp_url)
|
|
host = parsed.hostname or ""
|
|
port = parsed.port
|
|
|
|
# Chrome DevTools rejects non-IP/non-localhost Host headers in some setups.
|
|
# For docker service names, resolve to IP and query via numeric host.
|
|
if host not in {"localhost", "127.0.0.1", "0.0.0.0"}:
|
|
try:
|
|
resolved_host = socket.gethostbyname(host)
|
|
netloc = resolved_host if not port else f"{resolved_host}:{port}"
|
|
parsed = parsed._replace(netloc=netloc)
|
|
except OSError:
|
|
pass
|
|
|
|
version_url = urlunparse(parsed).rstrip("/")
|
|
if not version_url.endswith("/json/version"):
|
|
version_url = f"{version_url}/json/version"
|
|
with urlopen(version_url, timeout=10) as response: # nosec B310
|
|
payload = json.loads(response.read().decode("utf-8"))
|
|
ws_url = payload.get("webSocketDebuggerUrl")
|
|
if not ws_url:
|
|
raise RuntimeError(f"CDP endpoint did not return webSocketDebuggerUrl: {version_url}")
|
|
|
|
# Keep a reachable host for ws:// URL when input used docker DNS alias.
|
|
if host and host not in {"localhost", "127.0.0.1", "0.0.0.0"}:
|
|
ws_parsed = urlparse(str(ws_url))
|
|
ws_netloc = ws_parsed.netloc
|
|
ws_port = ws_parsed.port
|
|
if ws_port is None:
|
|
ws_port = 443 if ws_parsed.scheme == "wss" else 80
|
|
try:
|
|
resolved_host = socket.gethostbyname(host)
|
|
ws_netloc = f"{resolved_host}:{ws_port}"
|
|
ws_url = urlunparse(ws_parsed._replace(netloc=ws_netloc))
|
|
except OSError:
|
|
pass
|
|
|
|
return str(ws_url)
|
|
raise RuntimeError(f"Unsupported CDP URL scheme: {cdp_url}")
|
|
|
|
|
|
async def _run(args: argparse.Namespace) -> int:
|
|
api_key = _get_env("OPENAI_API_KEY")
|
|
if not api_key:
|
|
print(json.dumps({"success": False, "error": "OPENAI_API_KEY is not set"}))
|
|
return 2
|
|
|
|
model = _get_env("BROWSER_USE_MODEL", _get_env("OPENAI_MODEL", "gpt-4o-mini"))
|
|
base_url = _get_env("OPENAI_BASE_URL")
|
|
raw_cdp_url = args.cdp_url or _get_env("BROWSER_USE_CDP_URL", "ws://chromium:3000/chromium?token=hermes-local")
|
|
cdp_url = _resolve_cdp_url(raw_cdp_url)
|
|
|
|
llm = ChatOpenAI(
|
|
model=model,
|
|
api_key=api_key,
|
|
base_url=base_url,
|
|
temperature=0.0,
|
|
)
|
|
|
|
browser_session = BrowserSession(cdp_url=cdp_url)
|
|
agent = Agent(
|
|
task=_build_task(args.task, args.start_url),
|
|
llm=llm,
|
|
browser_session=browser_session,
|
|
use_vision=False,
|
|
)
|
|
|
|
history = await agent.run(max_steps=args.max_steps)
|
|
payload = _serialize_history(history)
|
|
|
|
print(
|
|
json.dumps(
|
|
{
|
|
"success": not payload["has_errors"],
|
|
"model": model,
|
|
"cdp_url": cdp_url,
|
|
"task": args.task,
|
|
"result": payload,
|
|
},
|
|
ensure_ascii=True,
|
|
)
|
|
)
|
|
return 0 if not payload["has_errors"] else 1
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description="Run browser-use task")
|
|
parser.add_argument("--task", required=True, help="Natural language task for browser-use")
|
|
parser.add_argument("--start-url", default=None, help="Optional URL to open first")
|
|
parser.add_argument("--max-steps", type=int, default=20, help="Max agent steps")
|
|
parser.add_argument("--cdp-url", default=None, help="CDP URL (ws://... or http://.../json/version host)")
|
|
args = parser.parse_args()
|
|
return asyncio.run(_run(args))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|
|
|