diff --git a/api/clients/browser_rpc_client.py b/api/clients/browser_rpc_client.py index f59a6a70..ce227d43 100644 --- a/api/clients/browser_rpc_client.py +++ b/api/clients/browser_rpc_client.py @@ -10,16 +10,8 @@ class BrowserRpcClient: self._rpc_url = rpc_url self._session = session - async def run( - self, - task: str, - timeout_sec: float, - rpc_url: str | None = None, - task_id: str | None = None, - ) -> dict[str, Any]: - payload: dict[str, Any] = {"task": task} - if task_id: - payload["task_id"] = task_id + async def run(self, task: str, timeout_sec: float, rpc_url: str | None = None) -> dict[str, Any]: + payload = {"task": task} timeout = aiohttp.ClientTimeout(total=timeout_sec) target_url = rpc_url or self._rpc_url @@ -42,15 +34,6 @@ class BrowserRpcClient: return data -async def run_browser_task( - rpc_url: str, - task: str, - timeout_sec: float, - task_id: str | None = None, -) -> dict[str, Any]: +async def run_browser_task(rpc_url: str, task: str, timeout_sec: float) -> dict[str, Any]: async with aiohttp.ClientSession() as session: - return await BrowserRpcClient(rpc_url, session=session).run( - task=task, - timeout_sec=timeout_sec, - task_id=task_id, - ) + return await BrowserRpcClient(rpc_url, session=session).run(task=task, timeout_sec=timeout_sec) diff --git a/api/contracts/captcha_schemas.py b/api/contracts/captcha_schemas.py deleted file mode 100644 index 20a5324e..00000000 --- a/api/contracts/captcha_schemas.py +++ /dev/null @@ -1,54 +0,0 @@ -from pydantic import BaseModel, Field - -from api.domain.captcha_state import CaptchaState - - -class CaptchaNotifyRequest(BaseModel): - """Запрос от browser-use tool о том, что задача упёрлась в капчу.""" - - browser_view_url: str | None = Field(default=None, description="URL noVNC/прокси для ручного решения") - captcha_kind: str | None = Field(default=None, description="Тип капчи (recaptcha_v2/hcaptcha/turnstile/unknown)") - reason: str | None = Field(default=None, description="Свободное описание ситуации от агента") - timeout_seconds: int = Field(default=300, ge=1, le=3600, description="Сколько ждать решения до timeout-prompt") - - -class CaptchaSolvedRequest(BaseModel): - """Уведомление от детектора, что капча больше не блокирует страницу.""" - - detector: str | None = Field(default=None, description="Имя детектора (dom_poller/2captcha/user)") - - -class CaptchaExtendRequest(BaseModel): - """Пользовательский ответ «продлить» на timeout-prompt.""" - - extra_seconds: int = Field(default=300, ge=1, le=3600, description="На сколько ещё ждать решения") - - -class CaptchaAbortRequest(BaseModel): - """Пользовательский ответ «отменить» на timeout-prompt.""" - - reason: str | None = Field(default=None, description="Свободный текст причины") - - -class CaptchaStatusResponse(BaseModel): - """Снимок captcha-состояния задачи.""" - - task_id: str - state: CaptchaState - captcha_kind: str | None = None - reason: str | None = None - browser_view_url: str | None = None - notified_at: float | None = None - solved_at: float | None = None - deadline: float | None = None - extra_seconds: int = 0 - remaining_seconds: float | None = None - - -class CaptchaActionResponse(BaseModel): - """Универсальный ответ на действие над captcha-flow.""" - - task_id: str - state: CaptchaState - accepted: bool = True - message: str | None = None diff --git a/api/domain/captcha_state.py b/api/domain/captcha_state.py deleted file mode 100644 index 67f533e2..00000000 --- a/api/domain/captcha_state.py +++ /dev/null @@ -1,13 +0,0 @@ -from enum import Enum - - -class CaptchaState(str, Enum): - """Состояние captcha-флоу для задачи.""" - - none = "none" - awaiting = "awaiting" - solved = "solved" - timeout_prompt = "timeout_prompt" - extended = "extended" - aborted = "aborted" - failed = "failed" diff --git a/api/main.py b/api/main.py index 6cfa7982..b45c1b88 100644 --- a/api/main.py +++ b/api/main.py @@ -6,7 +6,6 @@ from fastapi import FastAPI from api.clients.browser_rpc_client import BrowserRpcClient from api.core.settings import settings from api.repositories.task_store import TaskStore -from api.routes.captcha import router as captcha_router from api.routes.runs import router as runs_router from api.routes.tasks import router as tasks_router from api.services.task_service import TaskService @@ -38,7 +37,6 @@ def create_app() -> FastAPI: ) app.include_router(tasks_router) app.include_router(runs_router) - app.include_router(captcha_router) @app.get("/health") async def health() -> dict: diff --git a/api/mappers/captcha_mapper.py b/api/mappers/captcha_mapper.py deleted file mode 100644 index 6a4a7bab..00000000 --- a/api/mappers/captcha_mapper.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import annotations - -import time - -from api.contracts.captcha_schemas import CaptchaActionResponse, CaptchaStatusResponse -from api.repositories.task_store import TaskRecord - - -class CaptchaMapper: - @staticmethod - def to_status(rec: TaskRecord) -> CaptchaStatusResponse: - remaining: float | None = None - if rec.captcha_deadline is not None: - remaining = max(0.0, rec.captcha_deadline - time.time()) - return CaptchaStatusResponse( - task_id=rec.task_id, - state=rec.captcha_state, - captcha_kind=rec.captcha_kind, - reason=rec.captcha_reason, - browser_view_url=rec.captcha_view_url, - notified_at=rec.captcha_notified_at, - solved_at=rec.captcha_solved_at, - deadline=rec.captcha_deadline, - extra_seconds=rec.captcha_extra_seconds, - remaining_seconds=remaining, - ) - - @staticmethod - def to_action(rec: TaskRecord, message: str | None = None, accepted: bool = True) -> CaptchaActionResponse: - return CaptchaActionResponse( - task_id=rec.task_id, - state=rec.captcha_state, - accepted=accepted, - message=message, - ) diff --git a/api/repositories/task_store.py b/api/repositories/task_store.py index 37a76e28..b13ee7b3 100644 --- a/api/repositories/task_store.py +++ b/api/repositories/task_store.py @@ -4,7 +4,6 @@ from asyncio import Event, Lock, Queue from dataclasses import dataclass, field from typing import Any -from api.domain.captcha_state import CaptchaState from api.domain.task_status import TaskStatus @@ -26,16 +25,6 @@ class TaskRecord: cancel_requested: bool = False done_event: Event = field(default_factory=Event) - captcha_state: CaptchaState = CaptchaState.none - captcha_kind: str | None = None - captcha_reason: str | None = None - captcha_view_url: str | None = None - captcha_notified_at: float | None = None - captcha_solved_at: float | None = None - captcha_deadline: float | None = None - captcha_extra_seconds: int = 0 - captcha_event: Event = field(default_factory=Event) - @property def execution_time(self) -> float: if self.started_at is None: @@ -51,7 +40,13 @@ class TaskStore: self._thread_index: dict[str, list[str]] = {} self._subscribers: dict[str, set[Queue[dict[str, Any]]]] = {} - async def create(self, task: str, timeout: int, metadata: dict[str, Any] | None, thread_id: str = "default") -> TaskRecord: + async def create( + self, + task: str, + timeout: int, + metadata: dict[str, Any] | None, + thread_id: str = "default", + ) -> TaskRecord: task_id = uuid.uuid4().hex rec = TaskRecord(task_id=task_id, thread_id=thread_id, task=task, timeout=timeout, metadata=metadata) async with self._lock: @@ -80,15 +75,25 @@ class TaskStore: rec.started_at = time.time() return rec - async def set_done(self, task_id: str, success: bool, raw_response: dict[str, Any] | None, error: str | None, result: str | None = None, history: list[dict[str, Any]] | None = None) -> TaskRecord | None: + async def set_done( + self, + task_id: str, + success: bool, + raw_response: dict[str, Any] | None, + error: str | None, + result: str | None = None, + history: list[dict[str, Any]] | None = None, + ) -> TaskRecord | None: async with self._lock: rec = self._tasks.get(task_id) if rec is None: return None rec.finished_at = time.time() rec.raw_response = raw_response - rec.error = error if error is not None else (raw_response.get("error") if isinstance(raw_response, dict) else None) - rec.result = result if result is not None else (raw_response.get("result") if isinstance(raw_response, dict) else None) + rec.error = error if error is not None else ( + raw_response.get("error") if isinstance(raw_response, dict) else None) + rec.result = result if result is not None else ( + raw_response.get("result") if isinstance(raw_response, dict) else None) rec.history = list(history or []) rec.status = TaskStatus.succeeded if success else TaskStatus.failed rec.done_event.set() @@ -127,6 +132,7 @@ class TaskStore: return False, False if rec.status in (TaskStatus.queued, TaskStatus.running): return True, False + del self._tasks[task_id] thread_list = self._thread_index.get(rec.thread_id, []) if task_id in thread_list: @@ -134,93 +140,6 @@ class TaskStore: self._subscribers.pop(task_id, None) return True, True - async def set_captcha_awaiting(self, task_id: str, kind: str | None, reason: str | None, view_url: str | None, timeout_seconds: int) -> TaskRecord | None: - async with self._lock: - rec = self._tasks.get(task_id) - if rec is None: - return None - now = time.time() - rec.captcha_state = CaptchaState.awaiting - rec.captcha_kind = kind - rec.captcha_reason = reason - rec.captcha_view_url = view_url - rec.captcha_notified_at = now - rec.captcha_solved_at = None - rec.captcha_extra_seconds = 0 - rec.captcha_deadline = now + max(1, int(timeout_seconds)) - rec.captcha_event.clear() - return rec - - async def set_captcha_solved(self, task_id: str) -> TaskRecord | None: - async with self._lock: - rec = self._tasks.get(task_id) - if rec is None: - return None - if rec.captcha_state in (CaptchaState.none, CaptchaState.solved): - return rec - rec.captcha_state = CaptchaState.solved - rec.captcha_solved_at = time.time() - rec.captcha_event.set() - return rec - - async def set_captcha_timeout_prompt(self, task_id: str) -> TaskRecord | None: - async with self._lock: - rec = self._tasks.get(task_id) - if rec is None: - return None - if rec.captcha_state != CaptchaState.awaiting: - return rec - rec.captcha_state = CaptchaState.timeout_prompt - rec.captcha_event.set() - return rec - - async def set_captcha_extended(self, task_id: str, extra_seconds: int) -> TaskRecord | None: - async with self._lock: - rec = self._tasks.get(task_id) - if rec is None: - return None - if rec.captcha_state not in (CaptchaState.timeout_prompt, CaptchaState.awaiting): - return rec - extra = max(1, int(extra_seconds)) - rec.captcha_extra_seconds += extra - base = rec.captcha_deadline or time.time() - rec.captcha_deadline = max(base, time.time()) + extra - rec.captcha_state = CaptchaState.extended - rec.captcha_event.set() - return rec - - async def set_captcha_aborted(self, task_id: str, reason: str | None = None) -> TaskRecord | None: - async with self._lock: - rec = self._tasks.get(task_id) - if rec is None: - return None - if rec.captcha_state in (CaptchaState.none, CaptchaState.aborted, CaptchaState.failed): - return rec - rec.captcha_state = CaptchaState.aborted - rec.captcha_reason = reason or rec.captcha_reason - rec.captcha_event.set() - return rec - - async def set_captcha_failed(self, task_id: str, error: str | None = None) -> TaskRecord | None: - async with self._lock: - rec = self._tasks.get(task_id) - if rec is None: - return None - rec.captcha_state = CaptchaState.failed - if error: - rec.captcha_reason = error - rec.captcha_event.set() - return rec - - async def reset_captcha(self, task_id: str) -> TaskRecord | None: - async with self._lock: - rec = self._tasks.get(task_id) - if rec is None: - return None - rec.captcha_state = CaptchaState.none - rec.captcha_event.clear() - return rec - async def subscribe(self, task_id: str) -> Queue[dict[str, Any]] | None: queue: Queue[dict[str, Any]] = Queue() async with self._lock: diff --git a/api/routes/captcha.py b/api/routes/captcha.py deleted file mode 100644 index 2c445f6f..00000000 --- a/api/routes/captcha.py +++ /dev/null @@ -1,105 +0,0 @@ -import asyncio - -from fastapi import APIRouter, Depends, HTTPException, Query - -from api.contracts.captcha_schemas import ( - CaptchaAbortRequest, - CaptchaActionResponse, - CaptchaExtendRequest, - CaptchaNotifyRequest, - CaptchaSolvedRequest, - CaptchaStatusResponse, -) -from api.mappers.captcha_mapper import CaptchaMapper -from api.routes.dependencies import get_task_service -from api.services.protocols import TaskServiceProtocol - -router = APIRouter(prefix="/api/browser", tags=["browser-captcha"]) - - -@router.post("/tasks/{task_id}/captcha/notify", response_model=CaptchaActionResponse, status_code=202) -async def captcha_notify( - task_id: str, - payload: CaptchaNotifyRequest, - service: TaskServiceProtocol = Depends(get_task_service), -) -> CaptchaActionResponse: - rec = await service.notify_captcha( - task_id=task_id, - kind=payload.captcha_kind, - reason=payload.reason, - view_url=payload.browser_view_url, - timeout_seconds=payload.timeout_seconds, - ) - if rec is None: - raise HTTPException(status_code=404, detail="Task not found") - return CaptchaMapper.to_action(rec, message="captcha awaiting solution") - - -@router.get("/tasks/{task_id}/captcha", response_model=CaptchaStatusResponse) -async def captcha_status( - task_id: str, - service: TaskServiceProtocol = Depends(get_task_service), -) -> CaptchaStatusResponse: - rec = await service.get_task(task_id) - if rec is None: - raise HTTPException(status_code=404, detail="Task not found") - return CaptchaMapper.to_status(rec) - - -@router.get("/tasks/{task_id}/captcha/wait", response_model=CaptchaStatusResponse) -async def captcha_wait( - task_id: str, - timeout: float = Query(default=30.0, ge=0.1, le=600.0, description="long-poll cap, seconds"), - service: TaskServiceProtocol = Depends(get_task_service), -) -> CaptchaStatusResponse: - rec = await service.wait_captcha(task_id=task_id, timeout=timeout) - if rec is None: - raise HTTPException(status_code=404, detail="Task not found") - return CaptchaMapper.to_status(rec) - - -@router.post("/tasks/{task_id}/captcha/solved", response_model=CaptchaActionResponse) -async def captcha_solved( - task_id: str, - payload: CaptchaSolvedRequest | None = None, - service: TaskServiceProtocol = Depends(get_task_service), -) -> CaptchaActionResponse: - rec = await service.mark_captcha_solved(task_id=task_id, detector=(payload.detector if payload else None)) - if rec is None: - raise HTTPException(status_code=404, detail="Task not found") - return CaptchaMapper.to_action(rec, message="captcha marked as solved") - - -@router.post("/tasks/{task_id}/captcha/timeout-prompt", response_model=CaptchaActionResponse) -async def captcha_timeout_prompt( - task_id: str, - service: TaskServiceProtocol = Depends(get_task_service), -) -> CaptchaActionResponse: - rec = await service.prompt_captcha_timeout(task_id=task_id) - if rec is None: - raise HTTPException(status_code=404, detail="Task not found") - return CaptchaMapper.to_action(rec, message="user must choose: extend or abort") - - -@router.post("/tasks/{task_id}/captcha/extend", response_model=CaptchaActionResponse) -async def captcha_extend( - task_id: str, - payload: CaptchaExtendRequest, - service: TaskServiceProtocol = Depends(get_task_service), -) -> CaptchaActionResponse: - rec = await service.extend_captcha(task_id=task_id, extra_seconds=payload.extra_seconds) - if rec is None: - raise HTTPException(status_code=404, detail="Task not found") - return CaptchaMapper.to_action(rec, message=f"captcha extended by {payload.extra_seconds}s") - - -@router.post("/tasks/{task_id}/captcha/abort", response_model=CaptchaActionResponse) -async def captcha_abort( - task_id: str, - payload: CaptchaAbortRequest | None = None, - service: TaskServiceProtocol = Depends(get_task_service), -) -> CaptchaActionResponse: - rec = await service.abort_captcha(task_id=task_id, reason=(payload.reason if payload else None)) - if rec is None: - raise HTTPException(status_code=404, detail="Task not found") - return CaptchaMapper.to_action(rec, message="captcha aborted by user") diff --git a/api/services/protocols.py b/api/services/protocols.py index 8ec95be7..2dd785b1 100644 --- a/api/services/protocols.py +++ b/api/services/protocols.py @@ -26,22 +26,3 @@ class TaskServiceProtocol(Protocol): async def subscribe_run_stream(self, run_id: str) -> Queue[dict[str, Any]] | None: ... async def unsubscribe_run_stream(self, run_id: str, queue: Queue[dict[str, Any]]) -> None: ... - - async def notify_captcha( - self, - task_id: str, - kind: str | None, - reason: str | None, - view_url: str | None, - timeout_seconds: int, - ) -> TaskRecord | None: ... - - async def mark_captcha_solved(self, task_id: str, detector: str | None = None) -> TaskRecord | None: ... - - async def extend_captcha(self, task_id: str, extra_seconds: int) -> TaskRecord | None: ... - - async def abort_captcha(self, task_id: str, reason: str | None = None) -> TaskRecord | None: ... - - async def prompt_captcha_timeout(self, task_id: str) -> TaskRecord | None: ... - - async def wait_captcha(self, task_id: str, timeout: float) -> TaskRecord | None: ... diff --git a/api/services/task_service.py b/api/services/task_service.py index ca3c2e4a..afa5968c 100644 --- a/api/services/task_service.py +++ b/api/services/task_service.py @@ -89,93 +89,6 @@ class TaskService: async def unsubscribe_run_stream(self, run_id: str, queue) -> None: await self._store.unsubscribe(run_id, queue) - async def notify_captcha( - self, - task_id: str, - kind: str | None, - reason: str | None, - view_url: str | None, - timeout_seconds: int, - ): - rec = await self._store.set_captcha_awaiting( - task_id=task_id, - kind=kind, - reason=reason, - view_url=view_url, - timeout_seconds=timeout_seconds, - ) - if rec is not None: - await self._store.publish( - task_id, - self._event(task_id, "captcha_required", { - "captcha_kind": rec.captcha_kind, - "reason": rec.captcha_reason, - "browser_view_url": rec.captcha_view_url, - "deadline": rec.captcha_deadline, - "timeout_seconds": timeout_seconds, - }), - ) - return rec - - async def mark_captcha_solved(self, task_id: str, detector: str | None = None): - rec = await self._store.set_captcha_solved(task_id) - if rec is not None: - await self._store.publish( - task_id, - self._event(task_id, "captcha_solved", { - "detector": detector or "unknown", - "solved_at": rec.captcha_solved_at, - }), - ) - return rec - - async def extend_captcha(self, task_id: str, extra_seconds: int): - rec = await self._store.set_captcha_extended(task_id=task_id, extra_seconds=extra_seconds) - if rec is not None: - await self._store.publish( - task_id, - self._event(task_id, "captcha_extended", { - "extra_seconds": extra_seconds, - "deadline": rec.captcha_deadline, - }), - ) - return rec - - async def prompt_captcha_timeout(self, task_id: str): - rec = await self._store.set_captcha_timeout_prompt(task_id) - if rec is not None: - await self._store.publish( - task_id, - self._event(task_id, "captcha_timeout_prompt", { - "captcha_kind": rec.captcha_kind, - "browser_view_url": rec.captcha_view_url, - "deadline": rec.captcha_deadline, - "actions": ["extend", "abort"], - }), - ) - return rec - - async def abort_captcha(self, task_id: str, reason: str | None = None): - rec = await self._store.set_captcha_aborted(task_id=task_id, reason=reason) - if rec is not None: - await self._store.publish( - task_id, - self._event(task_id, "captcha_aborted", { - "reason": rec.captcha_reason, - }), - ) - return rec - - async def wait_captcha(self, task_id: str, timeout: float): - rec = await self._store.get(task_id) - if rec is None: - return None - try: - await asyncio.wait_for(rec.captcha_event.wait(), timeout=timeout) - except asyncio.TimeoutError: - pass - return await self._store.get(task_id) - async def close(self) -> None: if not self._background_tasks: return @@ -214,12 +127,7 @@ class TaskService: rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap) raw = await asyncio.wait_for( - self._rpc_client.run( - task=rec.task, - timeout_sec=rpc_timeout, - rpc_url=runtime.get("rpc_url"), - task_id=task_id, - ), + self._rpc_client.run(task=rec.task, timeout_sec=rpc_timeout, rpc_url=runtime.get("rpc_url")), timeout=float(rec.timeout) + 5, ) raw = self._with_runtime_metadata(raw, runtime) diff --git a/browser_env/browser_use_runner.py b/browser_env/browser_use_runner.py index f7c33f7d..89ff39b3 100644 --- a/browser_env/browser_use_runner.py +++ b/browser_env/browser_use_runner.py @@ -9,8 +9,6 @@ from urllib import error, request from browser_use import Agent, Browser, ChatOpenAI from pydantic import BaseModel, Field, ValidationError, field_validator -from browser_env.tools import captcha_tool - SPEED_OPTIMIZATION_PROMPT = """ Speed optimization instructions: - Be extremely concise and direct in your responses @@ -18,21 +16,11 @@ Speed optimization instructions: - Use multi-action sequences whenever possible to reduce steps """ -CAPTCHA_PROMPT = """ -CAPTCHA handling: -- If the current page is blocked by reCAPTCHA, hCaptcha, or Cloudflare Turnstile, - call the `to_captcha` action ONCE with a short `reason` argument and WAIT for its result. -- Do not click on captcha challenges yourself; the human will solve them via the live browser view. -- After `to_captcha` returns success=true, continue the original task from the same step. -- If `to_captcha` returns success=false, report the error and stop. -""" - class RunTaskRequest(BaseModel): """RPC payload для запуска browser-use задачи.""" task: str = Field(..., min_length=1) - task_id: str | None = Field(default=None, description="ID задачи из browser-api (используется to_captcha tool)") @field_validator("task") @classmethod @@ -81,14 +69,10 @@ def _json_response(handler, status_code: int, payload: dict[str, Any] | BaseMode handler.wfile.write(data) -async def run_browser_task(task: str, task_id: str | None = None) -> RunTaskSuccessResponse | RunTaskErrorResponse: +async def run_browser_task(task: str) -> RunTaskSuccessResponse | RunTaskErrorResponse: cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222") browser_view_url = os.getenv("BROWSER_VIEW_URL", "") - if task_id: - # Прокидываем task_id в окружение, чтобы to_captcha tool знал, куда POST'ить. - os.environ["CURRENT_TASK_ID"] = task_id - browser = Browser(cdp_url=cdp_url) llm = ChatOpenAI( @@ -98,27 +82,13 @@ async def run_browser_task(task: str, task_id: str | None = None) -> RunTaskSucc temperature=0.0, ) - controller = None - try: - from browser_use import Controller # type: ignore - controller = Controller() - captcha_tool.register(controller) - except Exception: - # Если у установленной версии browser-use нет Controller — продолжаем без custom action - controller = None - - agent_kwargs = dict( - task=task, - llm=llm, - browser=browser, - flash_mode=True, - use_vision=False, - extend_system_message=SPEED_OPTIMIZATION_PROMPT + CAPTCHA_PROMPT, - ) - if controller is not None: - agent_kwargs["controller"] = controller - - agent = Agent(**agent_kwargs) + agent = Agent(task=task, + llm=llm, + browser=browser, + flash_mode=True, + use_vision=False, + extend_system_message=SPEED_OPTIMIZATION_PROMPT, + ) try: history = await agent.run() @@ -249,7 +219,7 @@ class BrowserUseRPCHandler(BaseHTTPRequestHandler): payload = json.loads(raw.decode("utf-8") if raw else "{}") request_model = RunTaskRequest.model_validate(payload) - result_model = asyncio.run(run_browser_task(request_model.task, task_id=request_model.task_id)) + result_model = asyncio.run(run_browser_task(request_model.task)) code = 200 if result_model.success else 500 _json_response(self, code, result_model) except ValidationError as err: diff --git a/browser_env/tools/__init__.py b/browser_env/tools/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/browser_env/tools/captcha_tool.py b/browser_env/tools/captcha_tool.py deleted file mode 100644 index b2a180dc..00000000 --- a/browser_env/tools/captcha_tool.py +++ /dev/null @@ -1,233 +0,0 @@ -"""to_captcha custom action для browser-use. - -Когда LLM-агент видит на странице капчу (reCAPTCHA / hCaptcha / Cloudflare Turnstile), -он вызывает action `to_captcha`. Action: - 1. Уведомляет browser-api (POST /api/browser/tasks/{task_id}/captcha/notify), - передавая URL noVNC-просмотрщика, чтобы пользователь решил капчу руками. - 2. Параллельно ОПРАШИВАЕТ DOM каждые ~1.5 сек: - * iframe reCAPTCHA/hCaptcha/Turnstile исчез - * скрытый textarea/input с токеном заполнен - Как только один из критериев сработал — POST /captcha/solved (detector=dom_poller), - возвращает управление browser-use Agent. Агент продолжает с того же шага, - где остановился, потому что browser-use держит общий browser context. - 3. Если за timeout_seconds капчу автодетектор не увидел решённой — - поднимает captcha_state в timeout_prompt (через API), даёт пользователю шанс - ответить «продлить» (POST /captcha/extend) или «отменить» (POST /captcha/abort). - 4. На abort action возвращает success=False — Agent получит сигнал об ошибке. - -Пользовательского подтверждения «готово» НЕТ. Решение засекается только DOM-детектором -либо внешним вызовом /captcha/solved. -""" - -from __future__ import annotations - -import asyncio -import json -import os -import time -from typing import Any -from urllib import error, request - - -CAPTCHA_KIND_DETECTORS: tuple[tuple[str, str], ...] = ( - ("recaptcha_v2", "() => !!document.querySelector('iframe[src*=\"recaptcha\"]')"), - ("hcaptcha", "() => !!document.querySelector('iframe[src*=\"hcaptcha.com\"]')"), - ("turnstile", "() => !!document.querySelector('iframe[src*=\"challenges.cloudflare.com\"]')"), -) - -CAPTCHA_TOKEN_CHECKS: tuple[str, ...] = ( - "() => { const el = document.querySelector('textarea[name=\"g-recaptcha-response\"]'); return !!(el && el.value && el.value.length > 20); }", - "() => { const el = document.querySelector('textarea[name=\"h-captcha-response\"]'); return !!(el && el.value && el.value.length > 20); }", - "() => { const el = document.querySelector('input[name=\"cf-turnstile-response\"]'); return !!(el && el.value && el.value.length > 5); }", -) - -# Селекторы, по которым считаем что капча на странице ещё видна. -CAPTCHA_PRESENCE_CHECK = ( - "() => !!document.querySelector(" - "'iframe[src*=\"recaptcha\"], iframe[src*=\"hcaptcha.com\"], iframe[src*=\"challenges.cloudflare.com\"]'" - ")" -) - - -async def _safe_eval(page: Any, js: str) -> bool: - """Безопасно выполняет JS-проверку, прячет ошибки навигации/закрытой страницы.""" - try: - result = await page.evaluate(js) - return bool(result) - except Exception: - return False - - -async def detect_captcha_kind(page: Any) -> str | None: - for name, js in CAPTCHA_KIND_DETECTORS: - if await _safe_eval(page, js): - return name - if await _safe_eval(page, CAPTCHA_PRESENCE_CHECK): - return "unknown" - return None - - -async def is_captcha_solved(page: Any) -> bool: - """Капча считается решённой, если ни одного captcha-iframe нет, ИЛИ хотя бы один токен заполнен.""" - for js in CAPTCHA_TOKEN_CHECKS: - if await _safe_eval(page, js): - return True - still_present = await _safe_eval(page, CAPTCHA_PRESENCE_CHECK) - return not still_present - - -def _http_post(url: str, payload: dict[str, Any] | None = None, timeout: float = 10.0) -> dict[str, Any]: - body = json.dumps(payload or {}).encode("utf-8") - req = request.Request(url, data=body, headers={"Content-Type": "application/json"}, method="POST") - try: - with request.urlopen(req, timeout=timeout) as resp: - raw = resp.read().decode("utf-8") - return json.loads(raw) if raw else {} - except error.HTTPError as exc: - raw = exc.read().decode("utf-8", errors="replace") if exc.fp else "" - return {"_http_error": exc.code, "_body": raw} - except Exception as exc: - return {"_error": str(exc)} - - -def _http_get(url: str, timeout: float = 35.0) -> dict[str, Any]: - req = request.Request(url, method="GET") - try: - with request.urlopen(req, timeout=timeout) as resp: - raw = resp.read().decode("utf-8") - return json.loads(raw) if raw else {} - except error.HTTPError as exc: - raw = exc.read().decode("utf-8", errors="replace") if exc.fp else "" - return {"_http_error": exc.code, "_body": raw} - except Exception as exc: - return {"_error": str(exc)} - - -async def run_to_captcha( - page: Any, - reason: str | None = None, - *, - task_id: str | None = None, - api_base: str | None = None, - view_url: str | None = None, - timeout_seconds: int | None = None, - poll_interval: float = 1.5, -) -> dict[str, Any]: - """Основной сценарий. Вызывается из custom action browser-use. - - Возвращает dict вида {"success": bool, "captcha_kind": str, "resolved_by": str|None, "error": str|None}. - """ - - resolved_task_id = task_id or os.getenv("CURRENT_TASK_ID") - resolved_api_base = (api_base or os.getenv("BROWSER_API_INTERNAL_URL", "http://browser-api:8088/api/browser")).rstrip("/") - resolved_view_url = view_url or os.getenv("BROWSER_VIEW_URL", "") - resolved_timeout = int(timeout_seconds if timeout_seconds is not None else os.getenv("CAPTCHA_TIMEOUT_SECONDS", "300")) - - if not resolved_task_id: - return {"success": False, "error": "to_captcha: CURRENT_TASK_ID is not set; tool cannot reach the API"} - - captcha_kind = await detect_captcha_kind(page) or "unknown" - - notify_resp = await asyncio.to_thread( - _http_post, - f"{resolved_api_base}/tasks/{resolved_task_id}/captcha/notify", - { - "browser_view_url": resolved_view_url or None, - "captcha_kind": captcha_kind, - "reason": reason, - "timeout_seconds": resolved_timeout, - }, - ) - if notify_resp.get("_error") or notify_resp.get("_http_error"): - return { - "success": False, - "captcha_kind": captcha_kind, - "error": f"to_captcha: notify failed: {notify_resp}", - } - - deadline = time.time() + resolved_timeout - prompted_user = False - - while True: - # 1) DOM-проверка: решилось ли само? - if await is_captcha_solved(page): - await asyncio.to_thread( - _http_post, - f"{resolved_api_base}/tasks/{resolved_task_id}/captcha/solved", - {"detector": "dom_poller"}, - ) - return { - "success": True, - "captcha_kind": captcha_kind, - "resolved_by": "dom_poller", - "browser_view_url": resolved_view_url, - } - - # 2) Статус из API: вдруг внешний вызов abort/extend/solved - status = await asyncio.to_thread( - _http_get, - f"{resolved_api_base}/tasks/{resolved_task_id}/captcha", - ) - state = (status or {}).get("state") - if state == "solved": - return { - "success": True, - "captcha_kind": captcha_kind, - "resolved_by": "external", - "browser_view_url": resolved_view_url, - } - if state == "aborted": - return { - "success": False, - "captcha_kind": captcha_kind, - "error": "to_captcha: aborted by user", - "browser_view_url": resolved_view_url, - } - if state == "extended": - api_deadline = (status or {}).get("deadline") - if isinstance(api_deadline, (int, float)) and api_deadline > deadline: - deadline = float(api_deadline) - prompted_user = False - - # 3) Таймаут — спрашиваем пользователя «продлить/отменить» один раз - if time.time() >= deadline: - if not prompted_user: - await asyncio.to_thread( - _http_post, - f"{resolved_api_base}/tasks/{resolved_task_id}/captcha/timeout-prompt", - {}, - ) - prompted_user = True - deadline = time.time() + min(60, resolved_timeout) - continue - return { - "success": False, - "captcha_kind": captcha_kind, - "error": "to_captcha: timeout (no user response)", - "browser_view_url": resolved_view_url, - } - - await asyncio.sleep(poll_interval) - - -def register(controller: Any) -> None: - """Регистрирует action `to_captcha` на переданном browser-use Controller.""" - - @controller.action( - "Pause the run, ask the human to solve the on-page CAPTCHA via the live browser view, " - "and resume automatically once the DOM detector sees the challenge gone. " - "Call this ONLY when the current page is blocked by reCAPTCHA, hCaptcha or Cloudflare Turnstile." - ) - async def to_captcha(reason: str = "", browser=None, page=None) -> dict[str, Any]: - actual_page = page - if actual_page is None and browser is not None: - get_page = getattr(browser, "get_current_page", None) or getattr(browser, "get_page", None) - if callable(get_page): - actual_page = get_page() - if asyncio.iscoroutine(actual_page): - actual_page = await actual_page - if actual_page is None: - return {"success": False, "error": "to_captcha: browser-use did not provide a page"} - return await run_to_captcha(actual_page, reason=reason or None) - - return to_captcha