BrowserUse_and_ComputerUse_.../browser-use/scripts/run_browser_use.py
Кобылкевич Фёдор 74cb5455ca update skill
2026-03-26 23:28:59 +03:00

181 lines
5.9 KiB
Python

#!/usr/bin/env python3
"""Run browser-use task against a Chromium CDP endpoint."""
import argparse
import asyncio
import json
import os
import socket
from pathlib import Path
from typing import Any
from urllib.parse import urlparse, urlunparse
from urllib.request import urlopen
from browser_use import Agent, BrowserSession
from browser_use.llm import ChatOpenAI
ENV_FALLBACK_PATHS = (
Path("/workspace/.env"),
Path("/workspace/workspace/.env"),
Path("/root/.hermes/.env"),
)
def _read_env_from_files(name: str) -> str | None:
for env_path in ENV_FALLBACK_PATHS:
if not env_path.exists():
continue
try:
for raw_line in env_path.read_text(encoding="utf-8").splitlines():
line = raw_line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, value = line.split("=", 1)
if key.strip() == name:
cleaned = value.strip().strip('"').strip("'")
return cleaned or None
except OSError:
continue
return None
def _get_env(name: str, default: str | None = None) -> str | None:
value = os.getenv(name)
if value:
return value
from_file = _read_env_from_files(name)
if from_file:
return from_file
return default if default else None
def _build_task(task: str, start_url: str | None) -> str:
if not start_url:
return task
return f"Start from {start_url}. Task: {task}"
def _serialize_history(history: Any) -> dict[str, Any]:
result = ""
errors: list[str] = []
if hasattr(history, "final_result"):
try:
result = history.final_result() or ""
except Exception:
result = ""
if hasattr(history, "errors"):
try:
raw_errors = list(history.errors())
errors = [str(e) for e in raw_errors if e]
except Exception:
errors = []
return {
"final_result": result,
"errors": errors,
"has_errors": bool(errors),
}
def _resolve_cdp_url(cdp_url: str) -> str:
if cdp_url.startswith("ws://") or cdp_url.startswith("wss://"):
return cdp_url
if cdp_url.startswith("http://") or cdp_url.startswith("https://"):
parsed = urlparse(cdp_url)
host = parsed.hostname or ""
port = parsed.port
# Chrome DevTools rejects non-IP/non-localhost Host headers in some setups.
# For docker service names, resolve to IP and query via numeric host.
if host not in {"localhost", "127.0.0.1", "0.0.0.0"}:
try:
resolved_host = socket.gethostbyname(host)
netloc = resolved_host if not port else f"{resolved_host}:{port}"
parsed = parsed._replace(netloc=netloc)
except OSError:
pass
version_url = urlunparse(parsed).rstrip("/")
if not version_url.endswith("/json/version"):
version_url = f"{version_url}/json/version"
with urlopen(version_url, timeout=10) as response: # nosec B310
payload = json.loads(response.read().decode("utf-8"))
ws_url = payload.get("webSocketDebuggerUrl")
if not ws_url:
raise RuntimeError(f"CDP endpoint did not return webSocketDebuggerUrl: {version_url}")
# Keep a reachable host for ws:// URL when input used docker DNS alias.
if host and host not in {"localhost", "127.0.0.1", "0.0.0.0"}:
ws_parsed = urlparse(str(ws_url))
ws_netloc = ws_parsed.netloc
ws_port = ws_parsed.port
if ws_port is None:
ws_port = 443 if ws_parsed.scheme == "wss" else 80
try:
resolved_host = socket.gethostbyname(host)
ws_netloc = f"{resolved_host}:{ws_port}"
ws_url = urlunparse(ws_parsed._replace(netloc=ws_netloc))
except OSError:
pass
return str(ws_url)
raise RuntimeError(f"Unsupported CDP URL scheme: {cdp_url}")
async def _run(args: argparse.Namespace) -> int:
api_key = _get_env("OPENAI_API_KEY")
if not api_key:
print(json.dumps({"success": False, "error": "OPENAI_API_KEY is not set"}))
return 2
model = _get_env("BROWSER_USE_MODEL", _get_env("OPENAI_MODEL", "gpt-4o-mini"))
base_url = _get_env("OPENAI_BASE_URL")
raw_cdp_url = args.cdp_url or _get_env("BROWSER_USE_CDP_URL", "ws://chromium:3000/chromium?token=hermes-local")
cdp_url = _resolve_cdp_url(raw_cdp_url)
llm = ChatOpenAI(
model=model,
api_key=api_key,
base_url=base_url,
temperature=0.0,
)
browser_session = BrowserSession(cdp_url=cdp_url)
agent = Agent(
task=_build_task(args.task, args.start_url),
llm=llm,
browser_session=browser_session,
use_vision=False,
)
history = await agent.run(max_steps=args.max_steps)
payload = _serialize_history(history)
print(
json.dumps(
{
"success": not payload["has_errors"],
"model": model,
"cdp_url": cdp_url,
"task": args.task,
"result": payload,
},
ensure_ascii=True,
)
)
return 0 if not payload["has_errors"] else 1
def main() -> int:
parser = argparse.ArgumentParser(description="Run browser-use task")
parser.add_argument("--task", required=True, help="Natural language task for browser-use")
parser.add_argument("--start-url", default=None, help="Optional URL to open first")
parser.add_argument("--max-steps", type=int, default=20, help="Max agent steps")
parser.add_argument("--cdp-url", default=None, help="CDP URL (ws://... or http://.../json/version host)")
args = parser.parse_args()
return asyncio.run(_run(args))
if __name__ == "__main__":
raise SystemExit(main())