BrowserUse_and_ComputerUse_.../browser_env/browser_use_runner.py

457 lines
15 KiB
Python

import asyncio
import json
import os
import threading
import time
import uuid
from dataclasses import dataclass, field
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from typing import Any
from urllib import error, request
from browser_use import Agent, Browser, ChatOpenAI
CAPTCHA_WAIT_TIMEOUT = int(os.getenv("BROWSER_CAPTCHA_MAX_WAIT_SECONDS", "900"))
_RUNNER_TASKS: dict[str, "RunnerTask"] = {}
_RUNNER_LOCK = threading.Lock()
_CF_STRONG = (
"just a moment",
"attention required",
"checking your browser",
"cf-challenge",
"cdn-cgi/challenge-platform",
"__cf_chl",
"turnstile",
)
_RECAPTCHA_STRONG = (
"g-recaptcha",
"recaptcha/api2",
"www.google.com/recaptcha",
"grecaptcha",
)
_HCAPTCHA_STRONG = (
"hcaptcha",
"newassets.hcaptcha.com",
"js.hcaptcha.com/1/api.js",
)
_GENERIC_CAPTCHA_STRONG = (
"captcha",
"are you human",
"verify you are human",
"human verification",
"bot detection",
"security check",
"press and hold",
)
def _json_response(handler, status_code, payload):
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
handler.send_response(status_code)
handler.send_header("Content-Type", "application/json; charset=utf-8")
handler.send_header("Content-Length", str(len(data)))
handler.end_headers()
handler.wfile.write(data)
@dataclass
class RunnerTask:
task_id: str
task: str
browser_view_url: str
resume_token: str = field(default_factory=lambda: uuid.uuid4().hex)
created_at: float = field(default_factory=time.time)
status: str = "starting"
payload: dict[str, Any] | None = None
error: str | None = None
agent: Any = None
browser: Any = None
loop: asyncio.AbstractEventLoop | None = None
thread: threading.Thread | None = None
settled_event: threading.Event = field(default_factory=threading.Event)
finished: bool = False
awaiting: bool = False
aborted: bool = False
transition_count: int = 0
lock: threading.Lock = field(default_factory=threading.Lock)
def set_payload(self, status: str, payload: dict[str, Any]) -> None:
with self.lock:
self.status = status
self.payload = payload
self.transition_count += 1
self.awaiting = status == "awaiting_user_captcha"
self.finished = status in {"succeeded", "failed"}
self.settled_event.set()
def _get_task(task_id: str) -> RunnerTask | None:
with _RUNNER_LOCK:
return _RUNNER_TASKS.get(task_id)
def _put_task(task: RunnerTask) -> RunnerTask:
with _RUNNER_LOCK:
_RUNNER_TASKS[task.task_id] = task
return task
async def _get_page_html(agent: Agent) -> str:
try:
cdp_session = await agent.browser_session.get_or_create_cdp_session()
doc = await cdp_session.cdp_client.send.DOM.getDocument(session_id=cdp_session.session_id)
html_result = await cdp_session.cdp_client.send.DOM.getOuterHTML(
params={"nodeId": doc["root"]["nodeId"]},
session_id=cdp_session.session_id,
)
return str(html_result.get("outerHTML", ""))
except Exception:
return ""
async def _capture_page_state(agent: Agent) -> dict[str, Any]:
url = ""
title = ""
summary = None
try:
summary = await agent.browser_session.get_browser_state_summary()
except Exception:
summary = None
if summary is not None:
if isinstance(summary, dict):
url = str(summary.get("url") or "")
title = str(summary.get("title") or "")
else:
url = str(getattr(summary, "url", "") or "")
title = str(getattr(summary, "title", "") or "")
if not url:
try:
url = str(await agent.browser_session.get_current_page_url() or "")
except Exception:
url = ""
if not title:
try:
title = str(await agent.browser_session.get_current_page_title() or "")
except Exception:
title = ""
html = await _get_page_html(agent)
return {"url": url, "title": title, "html": html}
def _classify_captcha(haystack: str) -> str:
if any(token in haystack for token in _CF_STRONG):
return "cloudflare"
if any(token in haystack for token in _RECAPTCHA_STRONG):
return "recaptcha"
if any(token in haystack for token in _HCAPTCHA_STRONG):
return "hcaptcha"
return "unknown"
def _detect_captcha_from_state(page_state: dict[str, Any]) -> tuple[bool, str, list[str]]:
url = str(page_state.get("url") or "").lower()
title = str(page_state.get("title") or "").lower()
html = str(page_state.get("html") or "").lower()
haystack = "\n".join([url, title, html[:150000]])
signals: list[str] = []
if any(token in haystack for token in _CF_STRONG):
signals.append("cloudflare_challenge")
if any(token in haystack for token in _RECAPTCHA_STRONG):
signals.append("recaptcha")
if any(token in haystack for token in _HCAPTCHA_STRONG):
signals.append("hcaptcha")
generic_hits = [token for token in _GENERIC_CAPTCHA_STRONG if token in haystack]
if generic_hits:
signals.extend(f"generic:{token}" for token in generic_hits[:3])
blocked = bool(signals)
captcha_type = _classify_captcha(haystack)
return blocked, captcha_type, signals
async def _build_captcha_payload(task: RunnerTask, agent: Agent) -> dict[str, Any]:
page_state = await _capture_page_state(agent)
blocked, captcha_type, signals = _detect_captcha_from_state(page_state)
if not blocked:
raise RuntimeError("Captcha payload requested without an active challenge")
verification = {
"mode": "dom_url_title",
"selectors_absent": [
"iframe[src*='recaptcha']",
"[class*='hcaptcha']",
"[id*='captcha']",
"form[action*='challenge']",
"input[name='cf-turnstile-response']",
],
"challenge_signals_absent": signals,
"max_wait_seconds": CAPTCHA_WAIT_TIMEOUT,
}
browser_view_url = task.browser_view_url or None
instructions = (
"Open the live browser view, complete the verification challenge manually, "
"then return and reply 'ready' or 'done'."
)
return {
"success": False,
"status": "awaiting_user_captcha",
"task_id": task.task_id,
"session_id": task.task_id,
"resume_token": task.resume_token,
"browser_view_url": browser_view_url,
"captcha_type": captcha_type,
"instructions": instructions,
"detected_at": time.time(),
"page_url": page_state.get("url"),
"page_title": page_state.get("title"),
"verification": verification,
"human_intervention": {
"status": "awaiting_user_captcha",
"task_id": task.task_id,
"session_id": task.task_id,
"resume_token": task.resume_token,
"browser_view_url": browser_view_url,
"captcha_type": captcha_type,
"instructions": instructions,
"detected_at": time.time(),
"verification": verification,
},
}
async def _verify_captcha_state(task: RunnerTask) -> dict[str, Any]:
if not task.agent:
return {"success": False, "verified": False, "error": "Task is not attached to an active agent"}
page_state = await _capture_page_state(task.agent)
blocked, captcha_type, signals = _detect_captcha_from_state(page_state)
return {
"success": True,
"task_id": task.task_id,
"verified": not blocked,
"captcha_type": captcha_type if blocked else None,
"page_url": page_state.get("url"),
"page_title": page_state.get("title"),
"signals": signals,
"verification_mode": "dom_url_title",
}
async def _run_browser_task(task: RunnerTask):
cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222")
browser = Browser(cdp_url=cdp_url)
llm = ChatOpenAI(
model=os.getenv("MODEL_DEFAULT", "qwen3.5-122b"),
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_BASE_URL"),
temperature=0.0,
)
agent = Agent(task=task.task, llm=llm, browser=browser)
task.browser = browser
task.agent = agent
async def on_step_end(current_agent: Agent):
if task.awaiting or task.finished or task.aborted:
return
page_state = await _capture_page_state(current_agent)
blocked, _, _ = _detect_captcha_from_state(page_state)
if not blocked:
return
payload = await _build_captcha_payload(task, current_agent)
task.set_payload("awaiting_user_captcha", payload)
current_agent.pause()
try:
history = await agent.run(on_step_end=on_step_end)
if task.aborted:
task.set_payload(
"failed",
{"success": False, "status": "failed", "error": task.error or "Task aborted during CAPTCHA flow."},
)
return
if task.awaiting:
return
task.set_payload(
"succeeded",
{
"success": True,
"status": "succeeded",
"result": history.final_result(),
"browser_view_url": task.browser_view_url or None,
},
)
except Exception as err:
if not task.awaiting and not task.finished:
task.set_payload(
"failed",
{"success": False, "status": "failed", "error": f"Browser automation failed: {err}"},
)
finally:
if not task.awaiting:
try:
await browser.close()
except Exception:
pass
def _runner_thread_main(task: RunnerTask) -> None:
loop = asyncio.new_event_loop()
task.loop = loop
asyncio.set_event_loop(loop)
try:
loop.run_until_complete(_run_browser_task(task))
finally:
pending = asyncio.all_tasks(loop=loop)
for pending_task in pending:
pending_task.cancel()
if pending:
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
loop.close()
def _start_task(task_id: str, task_text: str) -> dict[str, Any]:
existing = _get_task(task_id)
if existing:
return existing.payload or {
"success": False,
"status": existing.status,
"error": "Task already exists",
"task_id": task_id,
}
state = _put_task(
RunnerTask(
task_id=task_id,
task=task_text,
browser_view_url=os.getenv("BROWSER_VIEW_URL", ""),
)
)
thread = threading.Thread(target=_runner_thread_main, args=(state,), daemon=True, name=f"browser-task-{task_id[:8]}")
state.thread = thread
thread.start()
state.settled_event.wait()
return state.payload or {"success": False, "status": "failed", "error": "Task exited without payload", "task_id": task_id}
def _resume_task(task_id: str) -> dict[str, Any]:
state = _get_task(task_id)
if not state:
return {"success": False, "status": "failed", "error": "Task not found", "task_id": task_id}
if not state.loop or not state.agent:
return {"success": False, "status": "failed", "error": "Task cannot be resumed", "task_id": task_id}
state.awaiting = False
state.settled_event.clear()
state.loop.call_soon_threadsafe(state.agent.resume)
state.settled_event.wait()
return state.payload or {"success": False, "status": "failed", "error": "Resume did not produce a payload", "task_id": task_id}
def _verify_task(task_id: str) -> dict[str, Any]:
state = _get_task(task_id)
if not state:
return {"success": False, "verified": False, "error": "Task not found", "task_id": task_id}
if not state.loop:
return {"success": False, "verified": False, "error": "Task has no active event loop", "task_id": task_id}
future = asyncio.run_coroutine_threadsafe(_verify_captcha_state(state), state.loop)
return future.result(timeout=20)
def _abort_task(task_id: str, reason: str | None = None) -> dict[str, Any]:
state = _get_task(task_id)
if not state:
return {"success": False, "status": "failed", "error": "Task not found", "task_id": task_id}
state.aborted = True
state.error = reason or "CAPTCHA flow aborted by user."
if state.loop and state.agent:
state.loop.call_soon_threadsafe(state.agent.resume)
state.set_payload(
"failed",
{"success": False, "status": "failed", "task_id": task_id, "error": state.error, "error_code": "captcha_aborted"},
)
return state.payload
class BrowserUseRPCHandler(BaseHTTPRequestHandler):
def do_GET(self):
if self.path != "/health":
_json_response(self, 404, {"success": False, "error": "Not found"})
return
try:
debug_url = os.getenv("BROWSER_HEALTH_URL", "http://127.0.0.1:9222/json/version")
with request.urlopen(debug_url, timeout=2):
pass
_json_response(self, 200, {"success": True})
except Exception as err:
_json_response(self, 503, {"success": False, "error": f"Browser is not ready: {err}"})
def do_POST(self):
try:
content_length = int(self.headers.get("Content-Length", "0"))
raw = self.rfile.read(content_length)
payload = json.loads(raw.decode("utf-8") if raw else "{}")
except json.JSONDecodeError:
_json_response(self, 400, {"success": False, "error": "Invalid JSON payload"})
return
if self.path == "/run":
task = payload.get("task", "")
task_id = str(payload.get("task_id") or uuid.uuid4().hex)
if not isinstance(task, str) or not task.strip():
_json_response(self, 400, {"success": False, "error": "Field 'task' is required"})
return
result = _start_task(task_id=task_id, task_text=task.strip())
_json_response(self, 200, result)
return
if self.path == "/verify":
task_id = str(payload.get("task_id") or "")
if not task_id:
_json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"})
return
_json_response(self, 200, _verify_task(task_id))
return
if self.path == "/resume":
task_id = str(payload.get("task_id") or "")
if not task_id:
_json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"})
return
_json_response(self, 200, _resume_task(task_id))
return
if self.path == "/abort":
task_id = str(payload.get("task_id") or "")
if not task_id:
_json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"})
return
_json_response(self, 200, _abort_task(task_id, reason=payload.get("reason")))
return
_json_response(self, 404, {"success": False, "error": "Not found"})
def log_message(self, format_str, *args):
return
def main():
host = os.getenv("BROWSER_USE_RPC_HOST", "0.0.0.0")
port = int(os.getenv("BROWSER_USE_RPC_PORT", "8787"))
server = ThreadingHTTPServer((host, port), BrowserUseRPCHandler)
print(f"browser-use RPC listening on {host}:{port}")
server.serve_forever()
if __name__ == "__main__":
main()