diff --git a/api/clients/browser_rpc_client.py b/api/clients/browser_rpc_client.py index ce227d43..f59a6a70 100644 --- a/api/clients/browser_rpc_client.py +++ b/api/clients/browser_rpc_client.py @@ -10,8 +10,16 @@ class BrowserRpcClient: self._rpc_url = rpc_url self._session = session - async def run(self, task: str, timeout_sec: float, rpc_url: str | None = None) -> dict[str, Any]: - payload = {"task": task} + async def run( + self, + task: str, + timeout_sec: float, + rpc_url: str | None = None, + task_id: str | None = None, + ) -> dict[str, Any]: + payload: dict[str, Any] = {"task": task} + if task_id: + payload["task_id"] = task_id timeout = aiohttp.ClientTimeout(total=timeout_sec) target_url = rpc_url or self._rpc_url @@ -34,6 +42,15 @@ class BrowserRpcClient: return data -async def run_browser_task(rpc_url: str, task: str, timeout_sec: float) -> dict[str, Any]: +async def run_browser_task( + rpc_url: str, + task: str, + timeout_sec: float, + task_id: str | None = None, +) -> dict[str, Any]: async with aiohttp.ClientSession() as session: - return await BrowserRpcClient(rpc_url, session=session).run(task=task, timeout_sec=timeout_sec) + return await BrowserRpcClient(rpc_url, session=session).run( + task=task, + timeout_sec=timeout_sec, + task_id=task_id, + ) diff --git a/api/contracts/captcha_schemas.py b/api/contracts/captcha_schemas.py new file mode 100644 index 00000000..20a5324e --- /dev/null +++ b/api/contracts/captcha_schemas.py @@ -0,0 +1,54 @@ +from pydantic import BaseModel, Field + +from api.domain.captcha_state import CaptchaState + + +class CaptchaNotifyRequest(BaseModel): + """Запрос от browser-use tool о том, что задача упёрлась в капчу.""" + + browser_view_url: str | None = Field(default=None, description="URL noVNC/прокси для ручного решения") + captcha_kind: str | None = Field(default=None, description="Тип капчи (recaptcha_v2/hcaptcha/turnstile/unknown)") + reason: str | None = Field(default=None, description="Свободное описание ситуации от агента") + timeout_seconds: int = Field(default=300, ge=1, le=3600, description="Сколько ждать решения до timeout-prompt") + + +class CaptchaSolvedRequest(BaseModel): + """Уведомление от детектора, что капча больше не блокирует страницу.""" + + detector: str | None = Field(default=None, description="Имя детектора (dom_poller/2captcha/user)") + + +class CaptchaExtendRequest(BaseModel): + """Пользовательский ответ «продлить» на timeout-prompt.""" + + extra_seconds: int = Field(default=300, ge=1, le=3600, description="На сколько ещё ждать решения") + + +class CaptchaAbortRequest(BaseModel): + """Пользовательский ответ «отменить» на timeout-prompt.""" + + reason: str | None = Field(default=None, description="Свободный текст причины") + + +class CaptchaStatusResponse(BaseModel): + """Снимок captcha-состояния задачи.""" + + task_id: str + state: CaptchaState + captcha_kind: str | None = None + reason: str | None = None + browser_view_url: str | None = None + notified_at: float | None = None + solved_at: float | None = None + deadline: float | None = None + extra_seconds: int = 0 + remaining_seconds: float | None = None + + +class CaptchaActionResponse(BaseModel): + """Универсальный ответ на действие над captcha-flow.""" + + task_id: str + state: CaptchaState + accepted: bool = True + message: str | None = None diff --git a/api/domain/captcha_state.py b/api/domain/captcha_state.py new file mode 100644 index 00000000..67f533e2 --- /dev/null +++ b/api/domain/captcha_state.py @@ -0,0 +1,13 @@ +from enum import Enum + + +class CaptchaState(str, Enum): + """Состояние captcha-флоу для задачи.""" + + none = "none" + awaiting = "awaiting" + solved = "solved" + timeout_prompt = "timeout_prompt" + extended = "extended" + aborted = "aborted" + failed = "failed" diff --git a/api/main.py b/api/main.py index b45c1b88..6cfa7982 100644 --- a/api/main.py +++ b/api/main.py @@ -6,6 +6,7 @@ from fastapi import FastAPI from api.clients.browser_rpc_client import BrowserRpcClient from api.core.settings import settings from api.repositories.task_store import TaskStore +from api.routes.captcha import router as captcha_router from api.routes.runs import router as runs_router from api.routes.tasks import router as tasks_router from api.services.task_service import TaskService @@ -37,6 +38,7 @@ def create_app() -> FastAPI: ) app.include_router(tasks_router) app.include_router(runs_router) + app.include_router(captcha_router) @app.get("/health") async def health() -> dict: diff --git a/api/mappers/captcha_mapper.py b/api/mappers/captcha_mapper.py new file mode 100644 index 00000000..6a4a7bab --- /dev/null +++ b/api/mappers/captcha_mapper.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import time + +from api.contracts.captcha_schemas import CaptchaActionResponse, CaptchaStatusResponse +from api.repositories.task_store import TaskRecord + + +class CaptchaMapper: + @staticmethod + def to_status(rec: TaskRecord) -> CaptchaStatusResponse: + remaining: float | None = None + if rec.captcha_deadline is not None: + remaining = max(0.0, rec.captcha_deadline - time.time()) + return CaptchaStatusResponse( + task_id=rec.task_id, + state=rec.captcha_state, + captcha_kind=rec.captcha_kind, + reason=rec.captcha_reason, + browser_view_url=rec.captcha_view_url, + notified_at=rec.captcha_notified_at, + solved_at=rec.captcha_solved_at, + deadline=rec.captcha_deadline, + extra_seconds=rec.captcha_extra_seconds, + remaining_seconds=remaining, + ) + + @staticmethod + def to_action(rec: TaskRecord, message: str | None = None, accepted: bool = True) -> CaptchaActionResponse: + return CaptchaActionResponse( + task_id=rec.task_id, + state=rec.captcha_state, + accepted=accepted, + message=message, + ) diff --git a/api/repositories/task_store.py b/api/repositories/task_store.py index b13ee7b3..37a76e28 100644 --- a/api/repositories/task_store.py +++ b/api/repositories/task_store.py @@ -4,6 +4,7 @@ from asyncio import Event, Lock, Queue from dataclasses import dataclass, field from typing import Any +from api.domain.captcha_state import CaptchaState from api.domain.task_status import TaskStatus @@ -25,6 +26,16 @@ class TaskRecord: cancel_requested: bool = False done_event: Event = field(default_factory=Event) + captcha_state: CaptchaState = CaptchaState.none + captcha_kind: str | None = None + captcha_reason: str | None = None + captcha_view_url: str | None = None + captcha_notified_at: float | None = None + captcha_solved_at: float | None = None + captcha_deadline: float | None = None + captcha_extra_seconds: int = 0 + captcha_event: Event = field(default_factory=Event) + @property def execution_time(self) -> float: if self.started_at is None: @@ -40,13 +51,7 @@ class TaskStore: self._thread_index: dict[str, list[str]] = {} self._subscribers: dict[str, set[Queue[dict[str, Any]]]] = {} - async def create( - self, - task: str, - timeout: int, - metadata: dict[str, Any] | None, - thread_id: str = "default", - ) -> TaskRecord: + async def create(self, task: str, timeout: int, metadata: dict[str, Any] | None, thread_id: str = "default") -> TaskRecord: task_id = uuid.uuid4().hex rec = TaskRecord(task_id=task_id, thread_id=thread_id, task=task, timeout=timeout, metadata=metadata) async with self._lock: @@ -75,25 +80,15 @@ class TaskStore: rec.started_at = time.time() return rec - async def set_done( - self, - task_id: str, - success: bool, - raw_response: dict[str, Any] | None, - error: str | None, - result: str | None = None, - history: list[dict[str, Any]] | None = None, - ) -> TaskRecord | None: + async def set_done(self, task_id: str, success: bool, raw_response: dict[str, Any] | None, error: str | None, result: str | None = None, history: list[dict[str, Any]] | None = None) -> TaskRecord | None: async with self._lock: rec = self._tasks.get(task_id) if rec is None: return None rec.finished_at = time.time() rec.raw_response = raw_response - rec.error = error if error is not None else ( - raw_response.get("error") if isinstance(raw_response, dict) else None) - rec.result = result if result is not None else ( - raw_response.get("result") if isinstance(raw_response, dict) else None) + rec.error = error if error is not None else (raw_response.get("error") if isinstance(raw_response, dict) else None) + rec.result = result if result is not None else (raw_response.get("result") if isinstance(raw_response, dict) else None) rec.history = list(history or []) rec.status = TaskStatus.succeeded if success else TaskStatus.failed rec.done_event.set() @@ -132,7 +127,6 @@ class TaskStore: return False, False if rec.status in (TaskStatus.queued, TaskStatus.running): return True, False - del self._tasks[task_id] thread_list = self._thread_index.get(rec.thread_id, []) if task_id in thread_list: @@ -140,6 +134,93 @@ class TaskStore: self._subscribers.pop(task_id, None) return True, True + async def set_captcha_awaiting(self, task_id: str, kind: str | None, reason: str | None, view_url: str | None, timeout_seconds: int) -> TaskRecord | None: + async with self._lock: + rec = self._tasks.get(task_id) + if rec is None: + return None + now = time.time() + rec.captcha_state = CaptchaState.awaiting + rec.captcha_kind = kind + rec.captcha_reason = reason + rec.captcha_view_url = view_url + rec.captcha_notified_at = now + rec.captcha_solved_at = None + rec.captcha_extra_seconds = 0 + rec.captcha_deadline = now + max(1, int(timeout_seconds)) + rec.captcha_event.clear() + return rec + + async def set_captcha_solved(self, task_id: str) -> TaskRecord | None: + async with self._lock: + rec = self._tasks.get(task_id) + if rec is None: + return None + if rec.captcha_state in (CaptchaState.none, CaptchaState.solved): + return rec + rec.captcha_state = CaptchaState.solved + rec.captcha_solved_at = time.time() + rec.captcha_event.set() + return rec + + async def set_captcha_timeout_prompt(self, task_id: str) -> TaskRecord | None: + async with self._lock: + rec = self._tasks.get(task_id) + if rec is None: + return None + if rec.captcha_state != CaptchaState.awaiting: + return rec + rec.captcha_state = CaptchaState.timeout_prompt + rec.captcha_event.set() + return rec + + async def set_captcha_extended(self, task_id: str, extra_seconds: int) -> TaskRecord | None: + async with self._lock: + rec = self._tasks.get(task_id) + if rec is None: + return None + if rec.captcha_state not in (CaptchaState.timeout_prompt, CaptchaState.awaiting): + return rec + extra = max(1, int(extra_seconds)) + rec.captcha_extra_seconds += extra + base = rec.captcha_deadline or time.time() + rec.captcha_deadline = max(base, time.time()) + extra + rec.captcha_state = CaptchaState.extended + rec.captcha_event.set() + return rec + + async def set_captcha_aborted(self, task_id: str, reason: str | None = None) -> TaskRecord | None: + async with self._lock: + rec = self._tasks.get(task_id) + if rec is None: + return None + if rec.captcha_state in (CaptchaState.none, CaptchaState.aborted, CaptchaState.failed): + return rec + rec.captcha_state = CaptchaState.aborted + rec.captcha_reason = reason or rec.captcha_reason + rec.captcha_event.set() + return rec + + async def set_captcha_failed(self, task_id: str, error: str | None = None) -> TaskRecord | None: + async with self._lock: + rec = self._tasks.get(task_id) + if rec is None: + return None + rec.captcha_state = CaptchaState.failed + if error: + rec.captcha_reason = error + rec.captcha_event.set() + return rec + + async def reset_captcha(self, task_id: str) -> TaskRecord | None: + async with self._lock: + rec = self._tasks.get(task_id) + if rec is None: + return None + rec.captcha_state = CaptchaState.none + rec.captcha_event.clear() + return rec + async def subscribe(self, task_id: str) -> Queue[dict[str, Any]] | None: queue: Queue[dict[str, Any]] = Queue() async with self._lock: diff --git a/api/routes/captcha.py b/api/routes/captcha.py new file mode 100644 index 00000000..2c445f6f --- /dev/null +++ b/api/routes/captcha.py @@ -0,0 +1,105 @@ +import asyncio + +from fastapi import APIRouter, Depends, HTTPException, Query + +from api.contracts.captcha_schemas import ( + CaptchaAbortRequest, + CaptchaActionResponse, + CaptchaExtendRequest, + CaptchaNotifyRequest, + CaptchaSolvedRequest, + CaptchaStatusResponse, +) +from api.mappers.captcha_mapper import CaptchaMapper +from api.routes.dependencies import get_task_service +from api.services.protocols import TaskServiceProtocol + +router = APIRouter(prefix="/api/browser", tags=["browser-captcha"]) + + +@router.post("/tasks/{task_id}/captcha/notify", response_model=CaptchaActionResponse, status_code=202) +async def captcha_notify( + task_id: str, + payload: CaptchaNotifyRequest, + service: TaskServiceProtocol = Depends(get_task_service), +) -> CaptchaActionResponse: + rec = await service.notify_captcha( + task_id=task_id, + kind=payload.captcha_kind, + reason=payload.reason, + view_url=payload.browser_view_url, + timeout_seconds=payload.timeout_seconds, + ) + if rec is None: + raise HTTPException(status_code=404, detail="Task not found") + return CaptchaMapper.to_action(rec, message="captcha awaiting solution") + + +@router.get("/tasks/{task_id}/captcha", response_model=CaptchaStatusResponse) +async def captcha_status( + task_id: str, + service: TaskServiceProtocol = Depends(get_task_service), +) -> CaptchaStatusResponse: + rec = await service.get_task(task_id) + if rec is None: + raise HTTPException(status_code=404, detail="Task not found") + return CaptchaMapper.to_status(rec) + + +@router.get("/tasks/{task_id}/captcha/wait", response_model=CaptchaStatusResponse) +async def captcha_wait( + task_id: str, + timeout: float = Query(default=30.0, ge=0.1, le=600.0, description="long-poll cap, seconds"), + service: TaskServiceProtocol = Depends(get_task_service), +) -> CaptchaStatusResponse: + rec = await service.wait_captcha(task_id=task_id, timeout=timeout) + if rec is None: + raise HTTPException(status_code=404, detail="Task not found") + return CaptchaMapper.to_status(rec) + + +@router.post("/tasks/{task_id}/captcha/solved", response_model=CaptchaActionResponse) +async def captcha_solved( + task_id: str, + payload: CaptchaSolvedRequest | None = None, + service: TaskServiceProtocol = Depends(get_task_service), +) -> CaptchaActionResponse: + rec = await service.mark_captcha_solved(task_id=task_id, detector=(payload.detector if payload else None)) + if rec is None: + raise HTTPException(status_code=404, detail="Task not found") + return CaptchaMapper.to_action(rec, message="captcha marked as solved") + + +@router.post("/tasks/{task_id}/captcha/timeout-prompt", response_model=CaptchaActionResponse) +async def captcha_timeout_prompt( + task_id: str, + service: TaskServiceProtocol = Depends(get_task_service), +) -> CaptchaActionResponse: + rec = await service.prompt_captcha_timeout(task_id=task_id) + if rec is None: + raise HTTPException(status_code=404, detail="Task not found") + return CaptchaMapper.to_action(rec, message="user must choose: extend or abort") + + +@router.post("/tasks/{task_id}/captcha/extend", response_model=CaptchaActionResponse) +async def captcha_extend( + task_id: str, + payload: CaptchaExtendRequest, + service: TaskServiceProtocol = Depends(get_task_service), +) -> CaptchaActionResponse: + rec = await service.extend_captcha(task_id=task_id, extra_seconds=payload.extra_seconds) + if rec is None: + raise HTTPException(status_code=404, detail="Task not found") + return CaptchaMapper.to_action(rec, message=f"captcha extended by {payload.extra_seconds}s") + + +@router.post("/tasks/{task_id}/captcha/abort", response_model=CaptchaActionResponse) +async def captcha_abort( + task_id: str, + payload: CaptchaAbortRequest | None = None, + service: TaskServiceProtocol = Depends(get_task_service), +) -> CaptchaActionResponse: + rec = await service.abort_captcha(task_id=task_id, reason=(payload.reason if payload else None)) + if rec is None: + raise HTTPException(status_code=404, detail="Task not found") + return CaptchaMapper.to_action(rec, message="captcha aborted by user") diff --git a/api/services/protocols.py b/api/services/protocols.py index 2dd785b1..8ec95be7 100644 --- a/api/services/protocols.py +++ b/api/services/protocols.py @@ -26,3 +26,22 @@ class TaskServiceProtocol(Protocol): async def subscribe_run_stream(self, run_id: str) -> Queue[dict[str, Any]] | None: ... async def unsubscribe_run_stream(self, run_id: str, queue: Queue[dict[str, Any]]) -> None: ... + + async def notify_captcha( + self, + task_id: str, + kind: str | None, + reason: str | None, + view_url: str | None, + timeout_seconds: int, + ) -> TaskRecord | None: ... + + async def mark_captcha_solved(self, task_id: str, detector: str | None = None) -> TaskRecord | None: ... + + async def extend_captcha(self, task_id: str, extra_seconds: int) -> TaskRecord | None: ... + + async def abort_captcha(self, task_id: str, reason: str | None = None) -> TaskRecord | None: ... + + async def prompt_captcha_timeout(self, task_id: str) -> TaskRecord | None: ... + + async def wait_captcha(self, task_id: str, timeout: float) -> TaskRecord | None: ... diff --git a/api/services/task_service.py b/api/services/task_service.py index afa5968c..ca3c2e4a 100644 --- a/api/services/task_service.py +++ b/api/services/task_service.py @@ -89,6 +89,93 @@ class TaskService: async def unsubscribe_run_stream(self, run_id: str, queue) -> None: await self._store.unsubscribe(run_id, queue) + async def notify_captcha( + self, + task_id: str, + kind: str | None, + reason: str | None, + view_url: str | None, + timeout_seconds: int, + ): + rec = await self._store.set_captcha_awaiting( + task_id=task_id, + kind=kind, + reason=reason, + view_url=view_url, + timeout_seconds=timeout_seconds, + ) + if rec is not None: + await self._store.publish( + task_id, + self._event(task_id, "captcha_required", { + "captcha_kind": rec.captcha_kind, + "reason": rec.captcha_reason, + "browser_view_url": rec.captcha_view_url, + "deadline": rec.captcha_deadline, + "timeout_seconds": timeout_seconds, + }), + ) + return rec + + async def mark_captcha_solved(self, task_id: str, detector: str | None = None): + rec = await self._store.set_captcha_solved(task_id) + if rec is not None: + await self._store.publish( + task_id, + self._event(task_id, "captcha_solved", { + "detector": detector or "unknown", + "solved_at": rec.captcha_solved_at, + }), + ) + return rec + + async def extend_captcha(self, task_id: str, extra_seconds: int): + rec = await self._store.set_captcha_extended(task_id=task_id, extra_seconds=extra_seconds) + if rec is not None: + await self._store.publish( + task_id, + self._event(task_id, "captcha_extended", { + "extra_seconds": extra_seconds, + "deadline": rec.captcha_deadline, + }), + ) + return rec + + async def prompt_captcha_timeout(self, task_id: str): + rec = await self._store.set_captcha_timeout_prompt(task_id) + if rec is not None: + await self._store.publish( + task_id, + self._event(task_id, "captcha_timeout_prompt", { + "captcha_kind": rec.captcha_kind, + "browser_view_url": rec.captcha_view_url, + "deadline": rec.captcha_deadline, + "actions": ["extend", "abort"], + }), + ) + return rec + + async def abort_captcha(self, task_id: str, reason: str | None = None): + rec = await self._store.set_captcha_aborted(task_id=task_id, reason=reason) + if rec is not None: + await self._store.publish( + task_id, + self._event(task_id, "captcha_aborted", { + "reason": rec.captcha_reason, + }), + ) + return rec + + async def wait_captcha(self, task_id: str, timeout: float): + rec = await self._store.get(task_id) + if rec is None: + return None + try: + await asyncio.wait_for(rec.captcha_event.wait(), timeout=timeout) + except asyncio.TimeoutError: + pass + return await self._store.get(task_id) + async def close(self) -> None: if not self._background_tasks: return @@ -127,7 +214,12 @@ class TaskService: rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap) raw = await asyncio.wait_for( - self._rpc_client.run(task=rec.task, timeout_sec=rpc_timeout, rpc_url=runtime.get("rpc_url")), + self._rpc_client.run( + task=rec.task, + timeout_sec=rpc_timeout, + rpc_url=runtime.get("rpc_url"), + task_id=task_id, + ), timeout=float(rec.timeout) + 5, ) raw = self._with_runtime_metadata(raw, runtime) diff --git a/browser_env/browser_use_runner.py b/browser_env/browser_use_runner.py index 89ff39b3..f7c33f7d 100644 --- a/browser_env/browser_use_runner.py +++ b/browser_env/browser_use_runner.py @@ -9,6 +9,8 @@ from urllib import error, request from browser_use import Agent, Browser, ChatOpenAI from pydantic import BaseModel, Field, ValidationError, field_validator +from browser_env.tools import captcha_tool + SPEED_OPTIMIZATION_PROMPT = """ Speed optimization instructions: - Be extremely concise and direct in your responses @@ -16,11 +18,21 @@ Speed optimization instructions: - Use multi-action sequences whenever possible to reduce steps """ +CAPTCHA_PROMPT = """ +CAPTCHA handling: +- If the current page is blocked by reCAPTCHA, hCaptcha, or Cloudflare Turnstile, + call the `to_captcha` action ONCE with a short `reason` argument and WAIT for its result. +- Do not click on captcha challenges yourself; the human will solve them via the live browser view. +- After `to_captcha` returns success=true, continue the original task from the same step. +- If `to_captcha` returns success=false, report the error and stop. +""" + class RunTaskRequest(BaseModel): """RPC payload для запуска browser-use задачи.""" task: str = Field(..., min_length=1) + task_id: str | None = Field(default=None, description="ID задачи из browser-api (используется to_captcha tool)") @field_validator("task") @classmethod @@ -69,10 +81,14 @@ def _json_response(handler, status_code: int, payload: dict[str, Any] | BaseMode handler.wfile.write(data) -async def run_browser_task(task: str) -> RunTaskSuccessResponse | RunTaskErrorResponse: +async def run_browser_task(task: str, task_id: str | None = None) -> RunTaskSuccessResponse | RunTaskErrorResponse: cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222") browser_view_url = os.getenv("BROWSER_VIEW_URL", "") + if task_id: + # Прокидываем task_id в окружение, чтобы to_captcha tool знал, куда POST'ить. + os.environ["CURRENT_TASK_ID"] = task_id + browser = Browser(cdp_url=cdp_url) llm = ChatOpenAI( @@ -82,13 +98,27 @@ async def run_browser_task(task: str) -> RunTaskSuccessResponse | RunTaskErrorRe temperature=0.0, ) - agent = Agent(task=task, - llm=llm, - browser=browser, - flash_mode=True, - use_vision=False, - extend_system_message=SPEED_OPTIMIZATION_PROMPT, - ) + controller = None + try: + from browser_use import Controller # type: ignore + controller = Controller() + captcha_tool.register(controller) + except Exception: + # Если у установленной версии browser-use нет Controller — продолжаем без custom action + controller = None + + agent_kwargs = dict( + task=task, + llm=llm, + browser=browser, + flash_mode=True, + use_vision=False, + extend_system_message=SPEED_OPTIMIZATION_PROMPT + CAPTCHA_PROMPT, + ) + if controller is not None: + agent_kwargs["controller"] = controller + + agent = Agent(**agent_kwargs) try: history = await agent.run() @@ -219,7 +249,7 @@ class BrowserUseRPCHandler(BaseHTTPRequestHandler): payload = json.loads(raw.decode("utf-8") if raw else "{}") request_model = RunTaskRequest.model_validate(payload) - result_model = asyncio.run(run_browser_task(request_model.task)) + result_model = asyncio.run(run_browser_task(request_model.task, task_id=request_model.task_id)) code = 200 if result_model.success else 500 _json_response(self, code, result_model) except ValidationError as err: diff --git a/browser_env/tools/__init__.py b/browser_env/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/browser_env/tools/captcha_tool.py b/browser_env/tools/captcha_tool.py new file mode 100644 index 00000000..b2a180dc --- /dev/null +++ b/browser_env/tools/captcha_tool.py @@ -0,0 +1,233 @@ +"""to_captcha custom action для browser-use. + +Когда LLM-агент видит на странице капчу (reCAPTCHA / hCaptcha / Cloudflare Turnstile), +он вызывает action `to_captcha`. Action: + 1. Уведомляет browser-api (POST /api/browser/tasks/{task_id}/captcha/notify), + передавая URL noVNC-просмотрщика, чтобы пользователь решил капчу руками. + 2. Параллельно ОПРАШИВАЕТ DOM каждые ~1.5 сек: + * iframe reCAPTCHA/hCaptcha/Turnstile исчез + * скрытый textarea/input с токеном заполнен + Как только один из критериев сработал — POST /captcha/solved (detector=dom_poller), + возвращает управление browser-use Agent. Агент продолжает с того же шага, + где остановился, потому что browser-use держит общий browser context. + 3. Если за timeout_seconds капчу автодетектор не увидел решённой — + поднимает captcha_state в timeout_prompt (через API), даёт пользователю шанс + ответить «продлить» (POST /captcha/extend) или «отменить» (POST /captcha/abort). + 4. На abort action возвращает success=False — Agent получит сигнал об ошибке. + +Пользовательского подтверждения «готово» НЕТ. Решение засекается только DOM-детектором +либо внешним вызовом /captcha/solved. +""" + +from __future__ import annotations + +import asyncio +import json +import os +import time +from typing import Any +from urllib import error, request + + +CAPTCHA_KIND_DETECTORS: tuple[tuple[str, str], ...] = ( + ("recaptcha_v2", "() => !!document.querySelector('iframe[src*=\"recaptcha\"]')"), + ("hcaptcha", "() => !!document.querySelector('iframe[src*=\"hcaptcha.com\"]')"), + ("turnstile", "() => !!document.querySelector('iframe[src*=\"challenges.cloudflare.com\"]')"), +) + +CAPTCHA_TOKEN_CHECKS: tuple[str, ...] = ( + "() => { const el = document.querySelector('textarea[name=\"g-recaptcha-response\"]'); return !!(el && el.value && el.value.length > 20); }", + "() => { const el = document.querySelector('textarea[name=\"h-captcha-response\"]'); return !!(el && el.value && el.value.length > 20); }", + "() => { const el = document.querySelector('input[name=\"cf-turnstile-response\"]'); return !!(el && el.value && el.value.length > 5); }", +) + +# Селекторы, по которым считаем что капча на странице ещё видна. +CAPTCHA_PRESENCE_CHECK = ( + "() => !!document.querySelector(" + "'iframe[src*=\"recaptcha\"], iframe[src*=\"hcaptcha.com\"], iframe[src*=\"challenges.cloudflare.com\"]'" + ")" +) + + +async def _safe_eval(page: Any, js: str) -> bool: + """Безопасно выполняет JS-проверку, прячет ошибки навигации/закрытой страницы.""" + try: + result = await page.evaluate(js) + return bool(result) + except Exception: + return False + + +async def detect_captcha_kind(page: Any) -> str | None: + for name, js in CAPTCHA_KIND_DETECTORS: + if await _safe_eval(page, js): + return name + if await _safe_eval(page, CAPTCHA_PRESENCE_CHECK): + return "unknown" + return None + + +async def is_captcha_solved(page: Any) -> bool: + """Капча считается решённой, если ни одного captcha-iframe нет, ИЛИ хотя бы один токен заполнен.""" + for js in CAPTCHA_TOKEN_CHECKS: + if await _safe_eval(page, js): + return True + still_present = await _safe_eval(page, CAPTCHA_PRESENCE_CHECK) + return not still_present + + +def _http_post(url: str, payload: dict[str, Any] | None = None, timeout: float = 10.0) -> dict[str, Any]: + body = json.dumps(payload or {}).encode("utf-8") + req = request.Request(url, data=body, headers={"Content-Type": "application/json"}, method="POST") + try: + with request.urlopen(req, timeout=timeout) as resp: + raw = resp.read().decode("utf-8") + return json.loads(raw) if raw else {} + except error.HTTPError as exc: + raw = exc.read().decode("utf-8", errors="replace") if exc.fp else "" + return {"_http_error": exc.code, "_body": raw} + except Exception as exc: + return {"_error": str(exc)} + + +def _http_get(url: str, timeout: float = 35.0) -> dict[str, Any]: + req = request.Request(url, method="GET") + try: + with request.urlopen(req, timeout=timeout) as resp: + raw = resp.read().decode("utf-8") + return json.loads(raw) if raw else {} + except error.HTTPError as exc: + raw = exc.read().decode("utf-8", errors="replace") if exc.fp else "" + return {"_http_error": exc.code, "_body": raw} + except Exception as exc: + return {"_error": str(exc)} + + +async def run_to_captcha( + page: Any, + reason: str | None = None, + *, + task_id: str | None = None, + api_base: str | None = None, + view_url: str | None = None, + timeout_seconds: int | None = None, + poll_interval: float = 1.5, +) -> dict[str, Any]: + """Основной сценарий. Вызывается из custom action browser-use. + + Возвращает dict вида {"success": bool, "captcha_kind": str, "resolved_by": str|None, "error": str|None}. + """ + + resolved_task_id = task_id or os.getenv("CURRENT_TASK_ID") + resolved_api_base = (api_base or os.getenv("BROWSER_API_INTERNAL_URL", "http://browser-api:8088/api/browser")).rstrip("/") + resolved_view_url = view_url or os.getenv("BROWSER_VIEW_URL", "") + resolved_timeout = int(timeout_seconds if timeout_seconds is not None else os.getenv("CAPTCHA_TIMEOUT_SECONDS", "300")) + + if not resolved_task_id: + return {"success": False, "error": "to_captcha: CURRENT_TASK_ID is not set; tool cannot reach the API"} + + captcha_kind = await detect_captcha_kind(page) or "unknown" + + notify_resp = await asyncio.to_thread( + _http_post, + f"{resolved_api_base}/tasks/{resolved_task_id}/captcha/notify", + { + "browser_view_url": resolved_view_url or None, + "captcha_kind": captcha_kind, + "reason": reason, + "timeout_seconds": resolved_timeout, + }, + ) + if notify_resp.get("_error") or notify_resp.get("_http_error"): + return { + "success": False, + "captcha_kind": captcha_kind, + "error": f"to_captcha: notify failed: {notify_resp}", + } + + deadline = time.time() + resolved_timeout + prompted_user = False + + while True: + # 1) DOM-проверка: решилось ли само? + if await is_captcha_solved(page): + await asyncio.to_thread( + _http_post, + f"{resolved_api_base}/tasks/{resolved_task_id}/captcha/solved", + {"detector": "dom_poller"}, + ) + return { + "success": True, + "captcha_kind": captcha_kind, + "resolved_by": "dom_poller", + "browser_view_url": resolved_view_url, + } + + # 2) Статус из API: вдруг внешний вызов abort/extend/solved + status = await asyncio.to_thread( + _http_get, + f"{resolved_api_base}/tasks/{resolved_task_id}/captcha", + ) + state = (status or {}).get("state") + if state == "solved": + return { + "success": True, + "captcha_kind": captcha_kind, + "resolved_by": "external", + "browser_view_url": resolved_view_url, + } + if state == "aborted": + return { + "success": False, + "captcha_kind": captcha_kind, + "error": "to_captcha: aborted by user", + "browser_view_url": resolved_view_url, + } + if state == "extended": + api_deadline = (status or {}).get("deadline") + if isinstance(api_deadline, (int, float)) and api_deadline > deadline: + deadline = float(api_deadline) + prompted_user = False + + # 3) Таймаут — спрашиваем пользователя «продлить/отменить» один раз + if time.time() >= deadline: + if not prompted_user: + await asyncio.to_thread( + _http_post, + f"{resolved_api_base}/tasks/{resolved_task_id}/captcha/timeout-prompt", + {}, + ) + prompted_user = True + deadline = time.time() + min(60, resolved_timeout) + continue + return { + "success": False, + "captcha_kind": captcha_kind, + "error": "to_captcha: timeout (no user response)", + "browser_view_url": resolved_view_url, + } + + await asyncio.sleep(poll_interval) + + +def register(controller: Any) -> None: + """Регистрирует action `to_captcha` на переданном browser-use Controller.""" + + @controller.action( + "Pause the run, ask the human to solve the on-page CAPTCHA via the live browser view, " + "and resume automatically once the DOM detector sees the challenge gone. " + "Call this ONLY when the current page is blocked by reCAPTCHA, hCaptcha or Cloudflare Turnstile." + ) + async def to_captcha(reason: str = "", browser=None, page=None) -> dict[str, Any]: + actual_page = page + if actual_page is None and browser is not None: + get_page = getattr(browser, "get_current_page", None) or getattr(browser, "get_page", None) + if callable(get_page): + actual_page = get_page() + if asyncio.iscoroutine(actual_page): + actual_page = await actual_page + if actual_page is None: + return {"success": False, "error": "to_captcha: browser-use did not provide a page"} + return await run_to_captcha(actual_page, reason=reason or None) + + return to_captcha