457 lines
15 KiB
Python
457 lines
15 KiB
Python
import asyncio
|
|
import json
|
|
import os
|
|
import threading
|
|
import time
|
|
import uuid
|
|
from dataclasses import dataclass, field
|
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
|
from typing import Any
|
|
from urllib import error, request
|
|
|
|
from browser_use import Agent, Browser, ChatOpenAI
|
|
|
|
|
|
CAPTCHA_WAIT_TIMEOUT = int(os.getenv("BROWSER_CAPTCHA_MAX_WAIT_SECONDS", "900"))
|
|
_RUNNER_TASKS: dict[str, "RunnerTask"] = {}
|
|
_RUNNER_LOCK = threading.Lock()
|
|
|
|
_CF_STRONG = (
|
|
"just a moment",
|
|
"attention required",
|
|
"checking your browser",
|
|
"cf-challenge",
|
|
"cdn-cgi/challenge-platform",
|
|
"__cf_chl",
|
|
"turnstile",
|
|
)
|
|
_RECAPTCHA_STRONG = (
|
|
"g-recaptcha",
|
|
"recaptcha/api2",
|
|
"www.google.com/recaptcha",
|
|
"grecaptcha",
|
|
)
|
|
_HCAPTCHA_STRONG = (
|
|
"hcaptcha",
|
|
"newassets.hcaptcha.com",
|
|
"js.hcaptcha.com/1/api.js",
|
|
)
|
|
_GENERIC_CAPTCHA_STRONG = (
|
|
"captcha",
|
|
"are you human",
|
|
"verify you are human",
|
|
"human verification",
|
|
"bot detection",
|
|
"security check",
|
|
"press and hold",
|
|
)
|
|
|
|
|
|
def _json_response(handler, status_code, payload):
|
|
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
|
handler.send_response(status_code)
|
|
handler.send_header("Content-Type", "application/json; charset=utf-8")
|
|
handler.send_header("Content-Length", str(len(data)))
|
|
handler.end_headers()
|
|
handler.wfile.write(data)
|
|
|
|
|
|
@dataclass
|
|
class RunnerTask:
|
|
task_id: str
|
|
task: str
|
|
browser_view_url: str
|
|
resume_token: str = field(default_factory=lambda: uuid.uuid4().hex)
|
|
created_at: float = field(default_factory=time.time)
|
|
status: str = "starting"
|
|
payload: dict[str, Any] | None = None
|
|
error: str | None = None
|
|
agent: Any = None
|
|
browser: Any = None
|
|
loop: asyncio.AbstractEventLoop | None = None
|
|
thread: threading.Thread | None = None
|
|
settled_event: threading.Event = field(default_factory=threading.Event)
|
|
finished: bool = False
|
|
awaiting: bool = False
|
|
aborted: bool = False
|
|
transition_count: int = 0
|
|
lock: threading.Lock = field(default_factory=threading.Lock)
|
|
|
|
def set_payload(self, status: str, payload: dict[str, Any]) -> None:
|
|
with self.lock:
|
|
self.status = status
|
|
self.payload = payload
|
|
self.transition_count += 1
|
|
self.awaiting = status == "awaiting_user_captcha"
|
|
self.finished = status in {"succeeded", "failed"}
|
|
self.settled_event.set()
|
|
|
|
|
|
def _get_task(task_id: str) -> RunnerTask | None:
|
|
with _RUNNER_LOCK:
|
|
return _RUNNER_TASKS.get(task_id)
|
|
|
|
|
|
def _put_task(task: RunnerTask) -> RunnerTask:
|
|
with _RUNNER_LOCK:
|
|
_RUNNER_TASKS[task.task_id] = task
|
|
return task
|
|
|
|
|
|
async def _get_page_html(agent: Agent) -> str:
|
|
try:
|
|
cdp_session = await agent.browser_session.get_or_create_cdp_session()
|
|
doc = await cdp_session.cdp_client.send.DOM.getDocument(session_id=cdp_session.session_id)
|
|
html_result = await cdp_session.cdp_client.send.DOM.getOuterHTML(
|
|
params={"nodeId": doc["root"]["nodeId"]},
|
|
session_id=cdp_session.session_id,
|
|
)
|
|
return str(html_result.get("outerHTML", ""))
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
async def _capture_page_state(agent: Agent) -> dict[str, Any]:
|
|
url = ""
|
|
title = ""
|
|
summary = None
|
|
try:
|
|
summary = await agent.browser_session.get_browser_state_summary()
|
|
except Exception:
|
|
summary = None
|
|
|
|
if summary is not None:
|
|
if isinstance(summary, dict):
|
|
url = str(summary.get("url") or "")
|
|
title = str(summary.get("title") or "")
|
|
else:
|
|
url = str(getattr(summary, "url", "") or "")
|
|
title = str(getattr(summary, "title", "") or "")
|
|
|
|
if not url:
|
|
try:
|
|
url = str(await agent.browser_session.get_current_page_url() or "")
|
|
except Exception:
|
|
url = ""
|
|
if not title:
|
|
try:
|
|
title = str(await agent.browser_session.get_current_page_title() or "")
|
|
except Exception:
|
|
title = ""
|
|
|
|
html = await _get_page_html(agent)
|
|
return {"url": url, "title": title, "html": html}
|
|
|
|
|
|
def _classify_captcha(haystack: str) -> str:
|
|
if any(token in haystack for token in _CF_STRONG):
|
|
return "cloudflare"
|
|
if any(token in haystack for token in _RECAPTCHA_STRONG):
|
|
return "recaptcha"
|
|
if any(token in haystack for token in _HCAPTCHA_STRONG):
|
|
return "hcaptcha"
|
|
return "unknown"
|
|
|
|
|
|
def _detect_captcha_from_state(page_state: dict[str, Any]) -> tuple[bool, str, list[str]]:
|
|
url = str(page_state.get("url") or "").lower()
|
|
title = str(page_state.get("title") or "").lower()
|
|
html = str(page_state.get("html") or "").lower()
|
|
haystack = "\n".join([url, title, html[:150000]])
|
|
|
|
signals: list[str] = []
|
|
if any(token in haystack for token in _CF_STRONG):
|
|
signals.append("cloudflare_challenge")
|
|
if any(token in haystack for token in _RECAPTCHA_STRONG):
|
|
signals.append("recaptcha")
|
|
if any(token in haystack for token in _HCAPTCHA_STRONG):
|
|
signals.append("hcaptcha")
|
|
|
|
generic_hits = [token for token in _GENERIC_CAPTCHA_STRONG if token in haystack]
|
|
if generic_hits:
|
|
signals.extend(f"generic:{token}" for token in generic_hits[:3])
|
|
|
|
blocked = bool(signals)
|
|
captcha_type = _classify_captcha(haystack)
|
|
return blocked, captcha_type, signals
|
|
|
|
|
|
async def _build_captcha_payload(task: RunnerTask, agent: Agent) -> dict[str, Any]:
|
|
page_state = await _capture_page_state(agent)
|
|
blocked, captcha_type, signals = _detect_captcha_from_state(page_state)
|
|
if not blocked:
|
|
raise RuntimeError("Captcha payload requested without an active challenge")
|
|
|
|
verification = {
|
|
"mode": "dom_url_title",
|
|
"selectors_absent": [
|
|
"iframe[src*='recaptcha']",
|
|
"[class*='hcaptcha']",
|
|
"[id*='captcha']",
|
|
"form[action*='challenge']",
|
|
"input[name='cf-turnstile-response']",
|
|
],
|
|
"challenge_signals_absent": signals,
|
|
"max_wait_seconds": CAPTCHA_WAIT_TIMEOUT,
|
|
}
|
|
browser_view_url = task.browser_view_url or None
|
|
instructions = (
|
|
"Open the live browser view, complete the verification challenge manually, "
|
|
"then return and reply 'ready' or 'done'."
|
|
)
|
|
return {
|
|
"success": False,
|
|
"status": "awaiting_user_captcha",
|
|
"task_id": task.task_id,
|
|
"session_id": task.task_id,
|
|
"resume_token": task.resume_token,
|
|
"browser_view_url": browser_view_url,
|
|
"captcha_type": captcha_type,
|
|
"instructions": instructions,
|
|
"detected_at": time.time(),
|
|
"page_url": page_state.get("url"),
|
|
"page_title": page_state.get("title"),
|
|
"verification": verification,
|
|
"human_intervention": {
|
|
"status": "awaiting_user_captcha",
|
|
"task_id": task.task_id,
|
|
"session_id": task.task_id,
|
|
"resume_token": task.resume_token,
|
|
"browser_view_url": browser_view_url,
|
|
"captcha_type": captcha_type,
|
|
"instructions": instructions,
|
|
"detected_at": time.time(),
|
|
"verification": verification,
|
|
},
|
|
}
|
|
|
|
|
|
async def _verify_captcha_state(task: RunnerTask) -> dict[str, Any]:
|
|
if not task.agent:
|
|
return {"success": False, "verified": False, "error": "Task is not attached to an active agent"}
|
|
|
|
page_state = await _capture_page_state(task.agent)
|
|
blocked, captcha_type, signals = _detect_captcha_from_state(page_state)
|
|
return {
|
|
"success": True,
|
|
"task_id": task.task_id,
|
|
"verified": not blocked,
|
|
"captcha_type": captcha_type if blocked else None,
|
|
"page_url": page_state.get("url"),
|
|
"page_title": page_state.get("title"),
|
|
"signals": signals,
|
|
"verification_mode": "dom_url_title",
|
|
}
|
|
|
|
|
|
async def _run_browser_task(task: RunnerTask):
|
|
cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222")
|
|
|
|
browser = Browser(cdp_url=cdp_url)
|
|
llm = ChatOpenAI(
|
|
model=os.getenv("MODEL_DEFAULT", "qwen3.5-122b"),
|
|
api_key=os.getenv("OPENAI_API_KEY"),
|
|
base_url=os.getenv("OPENAI_BASE_URL"),
|
|
temperature=0.0,
|
|
)
|
|
|
|
agent = Agent(task=task.task, llm=llm, browser=browser)
|
|
task.browser = browser
|
|
task.agent = agent
|
|
|
|
async def on_step_end(current_agent: Agent):
|
|
if task.awaiting or task.finished or task.aborted:
|
|
return
|
|
page_state = await _capture_page_state(current_agent)
|
|
blocked, _, _ = _detect_captcha_from_state(page_state)
|
|
if not blocked:
|
|
return
|
|
payload = await _build_captcha_payload(task, current_agent)
|
|
task.set_payload("awaiting_user_captcha", payload)
|
|
current_agent.pause()
|
|
|
|
try:
|
|
history = await agent.run(on_step_end=on_step_end)
|
|
if task.aborted:
|
|
task.set_payload(
|
|
"failed",
|
|
{"success": False, "status": "failed", "error": task.error or "Task aborted during CAPTCHA flow."},
|
|
)
|
|
return
|
|
if task.awaiting:
|
|
return
|
|
task.set_payload(
|
|
"succeeded",
|
|
{
|
|
"success": True,
|
|
"status": "succeeded",
|
|
"result": history.final_result(),
|
|
"browser_view_url": task.browser_view_url or None,
|
|
},
|
|
)
|
|
except Exception as err:
|
|
if not task.awaiting and not task.finished:
|
|
task.set_payload(
|
|
"failed",
|
|
{"success": False, "status": "failed", "error": f"Browser automation failed: {err}"},
|
|
)
|
|
finally:
|
|
if not task.awaiting:
|
|
try:
|
|
await browser.close()
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def _runner_thread_main(task: RunnerTask) -> None:
|
|
loop = asyncio.new_event_loop()
|
|
task.loop = loop
|
|
asyncio.set_event_loop(loop)
|
|
try:
|
|
loop.run_until_complete(_run_browser_task(task))
|
|
finally:
|
|
pending = asyncio.all_tasks(loop=loop)
|
|
for pending_task in pending:
|
|
pending_task.cancel()
|
|
if pending:
|
|
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
|
|
loop.close()
|
|
|
|
|
|
def _start_task(task_id: str, task_text: str) -> dict[str, Any]:
|
|
existing = _get_task(task_id)
|
|
if existing:
|
|
return existing.payload or {
|
|
"success": False,
|
|
"status": existing.status,
|
|
"error": "Task already exists",
|
|
"task_id": task_id,
|
|
}
|
|
|
|
state = _put_task(
|
|
RunnerTask(
|
|
task_id=task_id,
|
|
task=task_text,
|
|
browser_view_url=os.getenv("BROWSER_VIEW_URL", ""),
|
|
)
|
|
)
|
|
thread = threading.Thread(target=_runner_thread_main, args=(state,), daemon=True, name=f"browser-task-{task_id[:8]}")
|
|
state.thread = thread
|
|
thread.start()
|
|
|
|
state.settled_event.wait()
|
|
return state.payload or {"success": False, "status": "failed", "error": "Task exited without payload", "task_id": task_id}
|
|
|
|
|
|
def _resume_task(task_id: str) -> dict[str, Any]:
|
|
state = _get_task(task_id)
|
|
if not state:
|
|
return {"success": False, "status": "failed", "error": "Task not found", "task_id": task_id}
|
|
if not state.loop or not state.agent:
|
|
return {"success": False, "status": "failed", "error": "Task cannot be resumed", "task_id": task_id}
|
|
|
|
state.awaiting = False
|
|
state.settled_event.clear()
|
|
state.loop.call_soon_threadsafe(state.agent.resume)
|
|
state.settled_event.wait()
|
|
return state.payload or {"success": False, "status": "failed", "error": "Resume did not produce a payload", "task_id": task_id}
|
|
|
|
|
|
def _verify_task(task_id: str) -> dict[str, Any]:
|
|
state = _get_task(task_id)
|
|
if not state:
|
|
return {"success": False, "verified": False, "error": "Task not found", "task_id": task_id}
|
|
if not state.loop:
|
|
return {"success": False, "verified": False, "error": "Task has no active event loop", "task_id": task_id}
|
|
future = asyncio.run_coroutine_threadsafe(_verify_captcha_state(state), state.loop)
|
|
return future.result(timeout=20)
|
|
|
|
|
|
def _abort_task(task_id: str, reason: str | None = None) -> dict[str, Any]:
|
|
state = _get_task(task_id)
|
|
if not state:
|
|
return {"success": False, "status": "failed", "error": "Task not found", "task_id": task_id}
|
|
state.aborted = True
|
|
state.error = reason or "CAPTCHA flow aborted by user."
|
|
if state.loop and state.agent:
|
|
state.loop.call_soon_threadsafe(state.agent.resume)
|
|
state.set_payload(
|
|
"failed",
|
|
{"success": False, "status": "failed", "task_id": task_id, "error": state.error, "error_code": "captcha_aborted"},
|
|
)
|
|
return state.payload
|
|
|
|
|
|
class BrowserUseRPCHandler(BaseHTTPRequestHandler):
|
|
def do_GET(self):
|
|
if self.path != "/health":
|
|
_json_response(self, 404, {"success": False, "error": "Not found"})
|
|
return
|
|
|
|
try:
|
|
debug_url = os.getenv("BROWSER_HEALTH_URL", "http://127.0.0.1:9222/json/version")
|
|
with request.urlopen(debug_url, timeout=2):
|
|
pass
|
|
_json_response(self, 200, {"success": True})
|
|
except Exception as err:
|
|
_json_response(self, 503, {"success": False, "error": f"Browser is not ready: {err}"})
|
|
|
|
def do_POST(self):
|
|
try:
|
|
content_length = int(self.headers.get("Content-Length", "0"))
|
|
raw = self.rfile.read(content_length)
|
|
payload = json.loads(raw.decode("utf-8") if raw else "{}")
|
|
except json.JSONDecodeError:
|
|
_json_response(self, 400, {"success": False, "error": "Invalid JSON payload"})
|
|
return
|
|
|
|
if self.path == "/run":
|
|
task = payload.get("task", "")
|
|
task_id = str(payload.get("task_id") or uuid.uuid4().hex)
|
|
if not isinstance(task, str) or not task.strip():
|
|
_json_response(self, 400, {"success": False, "error": "Field 'task' is required"})
|
|
return
|
|
result = _start_task(task_id=task_id, task_text=task.strip())
|
|
_json_response(self, 200, result)
|
|
return
|
|
|
|
if self.path == "/verify":
|
|
task_id = str(payload.get("task_id") or "")
|
|
if not task_id:
|
|
_json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"})
|
|
return
|
|
_json_response(self, 200, _verify_task(task_id))
|
|
return
|
|
|
|
if self.path == "/resume":
|
|
task_id = str(payload.get("task_id") or "")
|
|
if not task_id:
|
|
_json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"})
|
|
return
|
|
_json_response(self, 200, _resume_task(task_id))
|
|
return
|
|
|
|
if self.path == "/abort":
|
|
task_id = str(payload.get("task_id") or "")
|
|
if not task_id:
|
|
_json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"})
|
|
return
|
|
_json_response(self, 200, _abort_task(task_id, reason=payload.get("reason")))
|
|
return
|
|
|
|
_json_response(self, 404, {"success": False, "error": "Not found"})
|
|
|
|
def log_message(self, format_str, *args):
|
|
return
|
|
|
|
|
|
def main():
|
|
host = os.getenv("BROWSER_USE_RPC_HOST", "0.0.0.0")
|
|
port = int(os.getenv("BROWSER_USE_RPC_PORT", "8787"))
|
|
server = ThreadingHTTPServer((host, port), BrowserUseRPCHandler)
|
|
print(f"browser-use RPC listening on {host}:{port}")
|
|
server.serve_forever()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|