import asyncio import json import os import threading import time import uuid from dataclasses import dataclass, field from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from typing import Any from urllib import error, request from browser_use import Agent, Browser, ChatOpenAI CAPTCHA_WAIT_TIMEOUT = int(os.getenv("BROWSER_CAPTCHA_MAX_WAIT_SECONDS", "900")) _RUNNER_TASKS: dict[str, "RunnerTask"] = {} _RUNNER_LOCK = threading.Lock() _CF_STRONG = ( "just a moment", "attention required", "checking your browser", "cf-challenge", "cdn-cgi/challenge-platform", "__cf_chl", "turnstile", ) _RECAPTCHA_STRONG = ( "g-recaptcha", "recaptcha/api2", "www.google.com/recaptcha", "grecaptcha", ) _HCAPTCHA_STRONG = ( "hcaptcha", "newassets.hcaptcha.com", "js.hcaptcha.com/1/api.js", ) _GENERIC_CAPTCHA_STRONG = ( "captcha", "are you human", "verify you are human", "human verification", "bot detection", "security check", "press and hold", ) def _json_response(handler, status_code, payload): data = json.dumps(payload, ensure_ascii=False).encode("utf-8") handler.send_response(status_code) handler.send_header("Content-Type", "application/json; charset=utf-8") handler.send_header("Content-Length", str(len(data))) handler.end_headers() handler.wfile.write(data) @dataclass class RunnerTask: task_id: str task: str browser_view_url: str resume_token: str = field(default_factory=lambda: uuid.uuid4().hex) created_at: float = field(default_factory=time.time) status: str = "starting" payload: dict[str, Any] | None = None error: str | None = None agent: Any = None browser: Any = None loop: asyncio.AbstractEventLoop | None = None thread: threading.Thread | None = None settled_event: threading.Event = field(default_factory=threading.Event) finished: bool = False awaiting: bool = False aborted: bool = False transition_count: int = 0 lock: threading.Lock = field(default_factory=threading.Lock) def set_payload(self, status: str, payload: dict[str, Any]) -> None: with self.lock: self.status = status self.payload = payload self.transition_count += 1 self.awaiting = status == "awaiting_user_captcha" self.finished = status in {"succeeded", "failed"} self.settled_event.set() def _get_task(task_id: str) -> RunnerTask | None: with _RUNNER_LOCK: return _RUNNER_TASKS.get(task_id) def _put_task(task: RunnerTask) -> RunnerTask: with _RUNNER_LOCK: _RUNNER_TASKS[task.task_id] = task return task async def _get_page_html(agent: Agent) -> str: try: cdp_session = await agent.browser_session.get_or_create_cdp_session() doc = await cdp_session.cdp_client.send.DOM.getDocument(session_id=cdp_session.session_id) html_result = await cdp_session.cdp_client.send.DOM.getOuterHTML( params={"nodeId": doc["root"]["nodeId"]}, session_id=cdp_session.session_id, ) return str(html_result.get("outerHTML", "")) except Exception: return "" async def _capture_page_state(agent: Agent) -> dict[str, Any]: url = "" title = "" summary = None try: summary = await agent.browser_session.get_browser_state_summary() except Exception: summary = None if summary is not None: if isinstance(summary, dict): url = str(summary.get("url") or "") title = str(summary.get("title") or "") else: url = str(getattr(summary, "url", "") or "") title = str(getattr(summary, "title", "") or "") if not url: try: url = str(await agent.browser_session.get_current_page_url() or "") except Exception: url = "" if not title: try: title = str(await agent.browser_session.get_current_page_title() or "") except Exception: title = "" html = await _get_page_html(agent) return {"url": url, "title": title, "html": html} def _classify_captcha(haystack: str) -> str: if any(token in haystack for token in _CF_STRONG): return "cloudflare" if any(token in haystack for token in _RECAPTCHA_STRONG): return "recaptcha" if any(token in haystack for token in _HCAPTCHA_STRONG): return "hcaptcha" return "unknown" def _detect_captcha_from_state(page_state: dict[str, Any]) -> tuple[bool, str, list[str]]: url = str(page_state.get("url") or "").lower() title = str(page_state.get("title") or "").lower() html = str(page_state.get("html") or "").lower() haystack = "\n".join([url, title, html[:150000]]) signals: list[str] = [] if any(token in haystack for token in _CF_STRONG): signals.append("cloudflare_challenge") if any(token in haystack for token in _RECAPTCHA_STRONG): signals.append("recaptcha") if any(token in haystack for token in _HCAPTCHA_STRONG): signals.append("hcaptcha") generic_hits = [token for token in _GENERIC_CAPTCHA_STRONG if token in haystack] if generic_hits: signals.extend(f"generic:{token}" for token in generic_hits[:3]) blocked = bool(signals) captcha_type = _classify_captcha(haystack) return blocked, captcha_type, signals async def _build_captcha_payload(task: RunnerTask, agent: Agent) -> dict[str, Any]: page_state = await _capture_page_state(agent) blocked, captcha_type, signals = _detect_captcha_from_state(page_state) if not blocked: raise RuntimeError("Captcha payload requested without an active challenge") verification = { "mode": "dom_url_title", "selectors_absent": [ "iframe[src*='recaptcha']", "[class*='hcaptcha']", "[id*='captcha']", "form[action*='challenge']", "input[name='cf-turnstile-response']", ], "challenge_signals_absent": signals, "max_wait_seconds": CAPTCHA_WAIT_TIMEOUT, } browser_view_url = task.browser_view_url or None instructions = ( "Open the live browser view, complete the verification challenge manually, " "then return and reply 'ready' or 'done'." ) return { "success": False, "status": "awaiting_user_captcha", "task_id": task.task_id, "session_id": task.task_id, "resume_token": task.resume_token, "browser_view_url": browser_view_url, "captcha_type": captcha_type, "instructions": instructions, "detected_at": time.time(), "page_url": page_state.get("url"), "page_title": page_state.get("title"), "verification": verification, "human_intervention": { "status": "awaiting_user_captcha", "task_id": task.task_id, "session_id": task.task_id, "resume_token": task.resume_token, "browser_view_url": browser_view_url, "captcha_type": captcha_type, "instructions": instructions, "detected_at": time.time(), "verification": verification, }, } async def _verify_captcha_state(task: RunnerTask) -> dict[str, Any]: if not task.agent: return {"success": False, "verified": False, "error": "Task is not attached to an active agent"} page_state = await _capture_page_state(task.agent) blocked, captcha_type, signals = _detect_captcha_from_state(page_state) return { "success": True, "task_id": task.task_id, "verified": not blocked, "captcha_type": captcha_type if blocked else None, "page_url": page_state.get("url"), "page_title": page_state.get("title"), "signals": signals, "verification_mode": "dom_url_title", } async def _run_browser_task(task: RunnerTask): cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222") browser = Browser(cdp_url=cdp_url) llm = ChatOpenAI( model=os.getenv("MODEL_DEFAULT", "qwen3.5-122b"), api_key=os.getenv("OPENAI_API_KEY"), base_url=os.getenv("OPENAI_BASE_URL"), temperature=0.0, ) agent = Agent(task=task.task, llm=llm, browser=browser) task.browser = browser task.agent = agent async def on_step_end(current_agent: Agent): if task.awaiting or task.finished or task.aborted: return page_state = await _capture_page_state(current_agent) blocked, _, _ = _detect_captcha_from_state(page_state) if not blocked: return payload = await _build_captcha_payload(task, current_agent) task.set_payload("awaiting_user_captcha", payload) current_agent.pause() try: history = await agent.run(on_step_end=on_step_end) if task.aborted: task.set_payload( "failed", {"success": False, "status": "failed", "error": task.error or "Task aborted during CAPTCHA flow."}, ) return if task.awaiting: return task.set_payload( "succeeded", { "success": True, "status": "succeeded", "result": history.final_result(), "browser_view_url": task.browser_view_url or None, }, ) except Exception as err: if not task.awaiting and not task.finished: task.set_payload( "failed", {"success": False, "status": "failed", "error": f"Browser automation failed: {err}"}, ) finally: if not task.awaiting: try: await browser.close() except Exception: pass def _runner_thread_main(task: RunnerTask) -> None: loop = asyncio.new_event_loop() task.loop = loop asyncio.set_event_loop(loop) try: loop.run_until_complete(_run_browser_task(task)) finally: pending = asyncio.all_tasks(loop=loop) for pending_task in pending: pending_task.cancel() if pending: loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True)) loop.close() def _start_task(task_id: str, task_text: str) -> dict[str, Any]: existing = _get_task(task_id) if existing: return existing.payload or { "success": False, "status": existing.status, "error": "Task already exists", "task_id": task_id, } state = _put_task( RunnerTask( task_id=task_id, task=task_text, browser_view_url=os.getenv("BROWSER_VIEW_URL", ""), ) ) thread = threading.Thread(target=_runner_thread_main, args=(state,), daemon=True, name=f"browser-task-{task_id[:8]}") state.thread = thread thread.start() state.settled_event.wait() return state.payload or {"success": False, "status": "failed", "error": "Task exited without payload", "task_id": task_id} def _resume_task(task_id: str) -> dict[str, Any]: state = _get_task(task_id) if not state: return {"success": False, "status": "failed", "error": "Task not found", "task_id": task_id} if not state.loop or not state.agent: return {"success": False, "status": "failed", "error": "Task cannot be resumed", "task_id": task_id} state.awaiting = False state.settled_event.clear() state.loop.call_soon_threadsafe(state.agent.resume) state.settled_event.wait() return state.payload or {"success": False, "status": "failed", "error": "Resume did not produce a payload", "task_id": task_id} def _verify_task(task_id: str) -> dict[str, Any]: state = _get_task(task_id) if not state: return {"success": False, "verified": False, "error": "Task not found", "task_id": task_id} if not state.loop: return {"success": False, "verified": False, "error": "Task has no active event loop", "task_id": task_id} future = asyncio.run_coroutine_threadsafe(_verify_captcha_state(state), state.loop) return future.result(timeout=20) def _abort_task(task_id: str, reason: str | None = None) -> dict[str, Any]: state = _get_task(task_id) if not state: return {"success": False, "status": "failed", "error": "Task not found", "task_id": task_id} state.aborted = True state.error = reason or "CAPTCHA flow aborted by user." if state.loop and state.agent: state.loop.call_soon_threadsafe(state.agent.resume) state.set_payload( "failed", {"success": False, "status": "failed", "task_id": task_id, "error": state.error, "error_code": "captcha_aborted"}, ) return state.payload class BrowserUseRPCHandler(BaseHTTPRequestHandler): def do_GET(self): if self.path != "/health": _json_response(self, 404, {"success": False, "error": "Not found"}) return try: debug_url = os.getenv("BROWSER_HEALTH_URL", "http://127.0.0.1:9222/json/version") with request.urlopen(debug_url, timeout=2): pass _json_response(self, 200, {"success": True}) except Exception as err: _json_response(self, 503, {"success": False, "error": f"Browser is not ready: {err}"}) def do_POST(self): try: content_length = int(self.headers.get("Content-Length", "0")) raw = self.rfile.read(content_length) payload = json.loads(raw.decode("utf-8") if raw else "{}") except json.JSONDecodeError: _json_response(self, 400, {"success": False, "error": "Invalid JSON payload"}) return if self.path == "/run": task = payload.get("task", "") task_id = str(payload.get("task_id") or uuid.uuid4().hex) if not isinstance(task, str) or not task.strip(): _json_response(self, 400, {"success": False, "error": "Field 'task' is required"}) return result = _start_task(task_id=task_id, task_text=task.strip()) _json_response(self, 200, result) return if self.path == "/verify": task_id = str(payload.get("task_id") or "") if not task_id: _json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"}) return _json_response(self, 200, _verify_task(task_id)) return if self.path == "/resume": task_id = str(payload.get("task_id") or "") if not task_id: _json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"}) return _json_response(self, 200, _resume_task(task_id)) return if self.path == "/abort": task_id = str(payload.get("task_id") or "") if not task_id: _json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"}) return _json_response(self, 200, _abort_task(task_id, reason=payload.get("reason"))) return _json_response(self, 404, {"success": False, "error": "Not found"}) def log_message(self, format_str, *args): return def main(): host = os.getenv("BROWSER_USE_RPC_HOST", "0.0.0.0") port = int(os.getenv("BROWSER_USE_RPC_PORT", "8787")) server = ThreadingHTTPServer((host, port), BrowserUseRPCHandler) print(f"browser-use RPC listening on {host}:{port}") server.serve_forever() if __name__ == "__main__": main()