#!/usr/bin/env python3 """Run browser-use task against a Chromium CDP endpoint.""" import argparse import asyncio import json import os import socket from pathlib import Path from typing import Any from urllib.parse import urlparse, urlunparse from urllib.request import urlopen from browser_use import Agent, BrowserSession from browser_use.llm import ChatOpenAI ENV_FALLBACK_PATHS = ( Path("/workspace/.env"), Path("/workspace/workspace/.env"), Path("/root/.hermes/.env"), ) def _read_env_from_files(name: str) -> str | None: for env_path in ENV_FALLBACK_PATHS: if not env_path.exists(): continue try: for raw_line in env_path.read_text(encoding="utf-8").splitlines(): line = raw_line.strip() if not line or line.startswith("#") or "=" not in line: continue key, value = line.split("=", 1) if key.strip() == name: cleaned = value.strip().strip('"').strip("'") return cleaned or None except OSError: continue return None def _get_env(name: str, default: str | None = None) -> str | None: value = os.getenv(name) if value: return value from_file = _read_env_from_files(name) if from_file: return from_file return default if default else None def _build_task(task: str, start_url: str | None) -> str: if not start_url: return task return f"Start from {start_url}. Task: {task}" def _serialize_history(history: Any) -> dict[str, Any]: result = "" errors: list[str] = [] if hasattr(history, "final_result"): try: result = history.final_result() or "" except Exception: result = "" if hasattr(history, "errors"): try: raw_errors = list(history.errors()) errors = [str(e) for e in raw_errors if e] except Exception: errors = [] return { "final_result": result, "errors": errors, "has_errors": bool(errors), } def _resolve_cdp_url(cdp_url: str) -> str: if cdp_url.startswith("ws://") or cdp_url.startswith("wss://"): return cdp_url if cdp_url.startswith("http://") or cdp_url.startswith("https://"): parsed = urlparse(cdp_url) host = parsed.hostname or "" port = parsed.port # Chrome DevTools rejects non-IP/non-localhost Host headers in some setups. # For docker service names, resolve to IP and query via numeric host. if host not in {"localhost", "127.0.0.1", "0.0.0.0"}: try: resolved_host = socket.gethostbyname(host) netloc = resolved_host if not port else f"{resolved_host}:{port}" parsed = parsed._replace(netloc=netloc) except OSError: pass version_url = urlunparse(parsed).rstrip("/") if not version_url.endswith("/json/version"): version_url = f"{version_url}/json/version" with urlopen(version_url, timeout=10) as response: # nosec B310 payload = json.loads(response.read().decode("utf-8")) ws_url = payload.get("webSocketDebuggerUrl") if not ws_url: raise RuntimeError(f"CDP endpoint did not return webSocketDebuggerUrl: {version_url}") # Keep a reachable host for ws:// URL when input used docker DNS alias. if host and host not in {"localhost", "127.0.0.1", "0.0.0.0"}: ws_parsed = urlparse(str(ws_url)) ws_netloc = ws_parsed.netloc ws_port = ws_parsed.port if ws_port is None: ws_port = 443 if ws_parsed.scheme == "wss" else 80 try: resolved_host = socket.gethostbyname(host) ws_netloc = f"{resolved_host}:{ws_port}" ws_url = urlunparse(ws_parsed._replace(netloc=ws_netloc)) except OSError: pass return str(ws_url) raise RuntimeError(f"Unsupported CDP URL scheme: {cdp_url}") async def _run(args: argparse.Namespace) -> int: api_key = _get_env("OPENAI_API_KEY") if not api_key: print(json.dumps({"success": False, "error": "OPENAI_API_KEY is not set"})) return 2 model = _get_env("BROWSER_USE_MODEL", _get_env("OPENAI_MODEL", "gpt-4o-mini")) base_url = _get_env("OPENAI_BASE_URL") raw_cdp_url = args.cdp_url or _get_env("BROWSER_USE_CDP_URL", "ws://chromium:3000/chromium?token=hermes-local") cdp_url = _resolve_cdp_url(raw_cdp_url) llm = ChatOpenAI( model=model, api_key=api_key, base_url=base_url, temperature=0.0, ) browser_session = BrowserSession(cdp_url=cdp_url) agent = Agent( task=_build_task(args.task, args.start_url), llm=llm, browser_session=browser_session, use_vision=False, ) history = await agent.run(max_steps=args.max_steps) payload = _serialize_history(history) print( json.dumps( { "success": not payload["has_errors"], "model": model, "cdp_url": cdp_url, "task": args.task, "result": payload, }, ensure_ascii=True, ) ) return 0 if not payload["has_errors"] else 1 def main() -> int: parser = argparse.ArgumentParser(description="Run browser-use task") parser.add_argument("--task", required=True, help="Natural language task for browser-use") parser.add_argument("--start-url", default=None, help="Optional URL to open first") parser.add_argument("--max-steps", type=int, default=20, help="Max agent steps") parser.add_argument("--cdp-url", default=None, help="CDP URL (ws://... or http://.../json/version host)") args = parser.parse_args() return asyncio.run(_run(args)) if __name__ == "__main__": raise SystemExit(main())