Compare commits

...
Sign in to create a new pull request.

1 commit

Author SHA1 Message Date
4852345bf6 add new tool: to_captcha 2026-05-17 01:03:55 +03:00
12 changed files with 716 additions and 35 deletions

View file

@ -10,8 +10,16 @@ class BrowserRpcClient:
self._rpc_url = rpc_url self._rpc_url = rpc_url
self._session = session self._session = session
async def run(self, task: str, timeout_sec: float, rpc_url: str | None = None) -> dict[str, Any]: async def run(
payload = {"task": task} self,
task: str,
timeout_sec: float,
rpc_url: str | None = None,
task_id: str | None = None,
) -> dict[str, Any]:
payload: dict[str, Any] = {"task": task}
if task_id:
payload["task_id"] = task_id
timeout = aiohttp.ClientTimeout(total=timeout_sec) timeout = aiohttp.ClientTimeout(total=timeout_sec)
target_url = rpc_url or self._rpc_url target_url = rpc_url or self._rpc_url
@ -34,6 +42,15 @@ class BrowserRpcClient:
return data return data
async def run_browser_task(rpc_url: str, task: str, timeout_sec: float) -> dict[str, Any]: async def run_browser_task(
rpc_url: str,
task: str,
timeout_sec: float,
task_id: str | None = None,
) -> dict[str, Any]:
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
return await BrowserRpcClient(rpc_url, session=session).run(task=task, timeout_sec=timeout_sec) return await BrowserRpcClient(rpc_url, session=session).run(
task=task,
timeout_sec=timeout_sec,
task_id=task_id,
)

View file

@ -0,0 +1,54 @@
from pydantic import BaseModel, Field
from api.domain.captcha_state import CaptchaState
class CaptchaNotifyRequest(BaseModel):
"""Запрос от browser-use tool о том, что задача упёрлась в капчу."""
browser_view_url: str | None = Field(default=None, description="URL noVNC/прокси для ручного решения")
captcha_kind: str | None = Field(default=None, description="Тип капчи (recaptcha_v2/hcaptcha/turnstile/unknown)")
reason: str | None = Field(default=None, description="Свободное описание ситуации от агента")
timeout_seconds: int = Field(default=300, ge=1, le=3600, description="Сколько ждать решения до timeout-prompt")
class CaptchaSolvedRequest(BaseModel):
"""Уведомление от детектора, что капча больше не блокирует страницу."""
detector: str | None = Field(default=None, description="Имя детектора (dom_poller/2captcha/user)")
class CaptchaExtendRequest(BaseModel):
"""Пользовательский ответ «продлить» на timeout-prompt."""
extra_seconds: int = Field(default=300, ge=1, le=3600, description="На сколько ещё ждать решения")
class CaptchaAbortRequest(BaseModel):
"""Пользовательский ответ «отменить» на timeout-prompt."""
reason: str | None = Field(default=None, description="Свободный текст причины")
class CaptchaStatusResponse(BaseModel):
"""Снимок captcha-состояния задачи."""
task_id: str
state: CaptchaState
captcha_kind: str | None = None
reason: str | None = None
browser_view_url: str | None = None
notified_at: float | None = None
solved_at: float | None = None
deadline: float | None = None
extra_seconds: int = 0
remaining_seconds: float | None = None
class CaptchaActionResponse(BaseModel):
"""Универсальный ответ на действие над captcha-flow."""
task_id: str
state: CaptchaState
accepted: bool = True
message: str | None = None

View file

@ -0,0 +1,13 @@
from enum import Enum
class CaptchaState(str, Enum):
"""Состояние captcha-флоу для задачи."""
none = "none"
awaiting = "awaiting"
solved = "solved"
timeout_prompt = "timeout_prompt"
extended = "extended"
aborted = "aborted"
failed = "failed"

View file

@ -6,6 +6,7 @@ from fastapi import FastAPI
from api.clients.browser_rpc_client import BrowserRpcClient from api.clients.browser_rpc_client import BrowserRpcClient
from api.core.settings import settings from api.core.settings import settings
from api.repositories.task_store import TaskStore from api.repositories.task_store import TaskStore
from api.routes.captcha import router as captcha_router
from api.routes.runs import router as runs_router from api.routes.runs import router as runs_router
from api.routes.tasks import router as tasks_router from api.routes.tasks import router as tasks_router
from api.services.task_service import TaskService from api.services.task_service import TaskService
@ -37,6 +38,7 @@ def create_app() -> FastAPI:
) )
app.include_router(tasks_router) app.include_router(tasks_router)
app.include_router(runs_router) app.include_router(runs_router)
app.include_router(captcha_router)
@app.get("/health") @app.get("/health")
async def health() -> dict: async def health() -> dict:

View file

@ -0,0 +1,35 @@
from __future__ import annotations
import time
from api.contracts.captcha_schemas import CaptchaActionResponse, CaptchaStatusResponse
from api.repositories.task_store import TaskRecord
class CaptchaMapper:
@staticmethod
def to_status(rec: TaskRecord) -> CaptchaStatusResponse:
remaining: float | None = None
if rec.captcha_deadline is not None:
remaining = max(0.0, rec.captcha_deadline - time.time())
return CaptchaStatusResponse(
task_id=rec.task_id,
state=rec.captcha_state,
captcha_kind=rec.captcha_kind,
reason=rec.captcha_reason,
browser_view_url=rec.captcha_view_url,
notified_at=rec.captcha_notified_at,
solved_at=rec.captcha_solved_at,
deadline=rec.captcha_deadline,
extra_seconds=rec.captcha_extra_seconds,
remaining_seconds=remaining,
)
@staticmethod
def to_action(rec: TaskRecord, message: str | None = None, accepted: bool = True) -> CaptchaActionResponse:
return CaptchaActionResponse(
task_id=rec.task_id,
state=rec.captcha_state,
accepted=accepted,
message=message,
)

View file

@ -4,6 +4,7 @@ from asyncio import Event, Lock, Queue
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Any from typing import Any
from api.domain.captcha_state import CaptchaState
from api.domain.task_status import TaskStatus from api.domain.task_status import TaskStatus
@ -25,6 +26,16 @@ class TaskRecord:
cancel_requested: bool = False cancel_requested: bool = False
done_event: Event = field(default_factory=Event) done_event: Event = field(default_factory=Event)
captcha_state: CaptchaState = CaptchaState.none
captcha_kind: str | None = None
captcha_reason: str | None = None
captcha_view_url: str | None = None
captcha_notified_at: float | None = None
captcha_solved_at: float | None = None
captcha_deadline: float | None = None
captcha_extra_seconds: int = 0
captcha_event: Event = field(default_factory=Event)
@property @property
def execution_time(self) -> float: def execution_time(self) -> float:
if self.started_at is None: if self.started_at is None:
@ -40,13 +51,7 @@ class TaskStore:
self._thread_index: dict[str, list[str]] = {} self._thread_index: dict[str, list[str]] = {}
self._subscribers: dict[str, set[Queue[dict[str, Any]]]] = {} self._subscribers: dict[str, set[Queue[dict[str, Any]]]] = {}
async def create( async def create(self, task: str, timeout: int, metadata: dict[str, Any] | None, thread_id: str = "default") -> TaskRecord:
self,
task: str,
timeout: int,
metadata: dict[str, Any] | None,
thread_id: str = "default",
) -> TaskRecord:
task_id = uuid.uuid4().hex task_id = uuid.uuid4().hex
rec = TaskRecord(task_id=task_id, thread_id=thread_id, task=task, timeout=timeout, metadata=metadata) rec = TaskRecord(task_id=task_id, thread_id=thread_id, task=task, timeout=timeout, metadata=metadata)
async with self._lock: async with self._lock:
@ -75,25 +80,15 @@ class TaskStore:
rec.started_at = time.time() rec.started_at = time.time()
return rec return rec
async def set_done( async def set_done(self, task_id: str, success: bool, raw_response: dict[str, Any] | None, error: str | None, result: str | None = None, history: list[dict[str, Any]] | None = None) -> TaskRecord | None:
self,
task_id: str,
success: bool,
raw_response: dict[str, Any] | None,
error: str | None,
result: str | None = None,
history: list[dict[str, Any]] | None = None,
) -> TaskRecord | None:
async with self._lock: async with self._lock:
rec = self._tasks.get(task_id) rec = self._tasks.get(task_id)
if rec is None: if rec is None:
return None return None
rec.finished_at = time.time() rec.finished_at = time.time()
rec.raw_response = raw_response rec.raw_response = raw_response
rec.error = error if error is not None else ( rec.error = error if error is not None else (raw_response.get("error") if isinstance(raw_response, dict) else None)
raw_response.get("error") if isinstance(raw_response, dict) else None) rec.result = result if result is not None else (raw_response.get("result") if isinstance(raw_response, dict) else None)
rec.result = result if result is not None else (
raw_response.get("result") if isinstance(raw_response, dict) else None)
rec.history = list(history or []) rec.history = list(history or [])
rec.status = TaskStatus.succeeded if success else TaskStatus.failed rec.status = TaskStatus.succeeded if success else TaskStatus.failed
rec.done_event.set() rec.done_event.set()
@ -132,7 +127,6 @@ class TaskStore:
return False, False return False, False
if rec.status in (TaskStatus.queued, TaskStatus.running): if rec.status in (TaskStatus.queued, TaskStatus.running):
return True, False return True, False
del self._tasks[task_id] del self._tasks[task_id]
thread_list = self._thread_index.get(rec.thread_id, []) thread_list = self._thread_index.get(rec.thread_id, [])
if task_id in thread_list: if task_id in thread_list:
@ -140,6 +134,93 @@ class TaskStore:
self._subscribers.pop(task_id, None) self._subscribers.pop(task_id, None)
return True, True return True, True
async def set_captcha_awaiting(self, task_id: str, kind: str | None, reason: str | None, view_url: str | None, timeout_seconds: int) -> TaskRecord | None:
async with self._lock:
rec = self._tasks.get(task_id)
if rec is None:
return None
now = time.time()
rec.captcha_state = CaptchaState.awaiting
rec.captcha_kind = kind
rec.captcha_reason = reason
rec.captcha_view_url = view_url
rec.captcha_notified_at = now
rec.captcha_solved_at = None
rec.captcha_extra_seconds = 0
rec.captcha_deadline = now + max(1, int(timeout_seconds))
rec.captcha_event.clear()
return rec
async def set_captcha_solved(self, task_id: str) -> TaskRecord | None:
async with self._lock:
rec = self._tasks.get(task_id)
if rec is None:
return None
if rec.captcha_state in (CaptchaState.none, CaptchaState.solved):
return rec
rec.captcha_state = CaptchaState.solved
rec.captcha_solved_at = time.time()
rec.captcha_event.set()
return rec
async def set_captcha_timeout_prompt(self, task_id: str) -> TaskRecord | None:
async with self._lock:
rec = self._tasks.get(task_id)
if rec is None:
return None
if rec.captcha_state != CaptchaState.awaiting:
return rec
rec.captcha_state = CaptchaState.timeout_prompt
rec.captcha_event.set()
return rec
async def set_captcha_extended(self, task_id: str, extra_seconds: int) -> TaskRecord | None:
async with self._lock:
rec = self._tasks.get(task_id)
if rec is None:
return None
if rec.captcha_state not in (CaptchaState.timeout_prompt, CaptchaState.awaiting):
return rec
extra = max(1, int(extra_seconds))
rec.captcha_extra_seconds += extra
base = rec.captcha_deadline or time.time()
rec.captcha_deadline = max(base, time.time()) + extra
rec.captcha_state = CaptchaState.extended
rec.captcha_event.set()
return rec
async def set_captcha_aborted(self, task_id: str, reason: str | None = None) -> TaskRecord | None:
async with self._lock:
rec = self._tasks.get(task_id)
if rec is None:
return None
if rec.captcha_state in (CaptchaState.none, CaptchaState.aborted, CaptchaState.failed):
return rec
rec.captcha_state = CaptchaState.aborted
rec.captcha_reason = reason or rec.captcha_reason
rec.captcha_event.set()
return rec
async def set_captcha_failed(self, task_id: str, error: str | None = None) -> TaskRecord | None:
async with self._lock:
rec = self._tasks.get(task_id)
if rec is None:
return None
rec.captcha_state = CaptchaState.failed
if error:
rec.captcha_reason = error
rec.captcha_event.set()
return rec
async def reset_captcha(self, task_id: str) -> TaskRecord | None:
async with self._lock:
rec = self._tasks.get(task_id)
if rec is None:
return None
rec.captcha_state = CaptchaState.none
rec.captcha_event.clear()
return rec
async def subscribe(self, task_id: str) -> Queue[dict[str, Any]] | None: async def subscribe(self, task_id: str) -> Queue[dict[str, Any]] | None:
queue: Queue[dict[str, Any]] = Queue() queue: Queue[dict[str, Any]] = Queue()
async with self._lock: async with self._lock:

105
api/routes/captcha.py Normal file
View file

@ -0,0 +1,105 @@
import asyncio
from fastapi import APIRouter, Depends, HTTPException, Query
from api.contracts.captcha_schemas import (
CaptchaAbortRequest,
CaptchaActionResponse,
CaptchaExtendRequest,
CaptchaNotifyRequest,
CaptchaSolvedRequest,
CaptchaStatusResponse,
)
from api.mappers.captcha_mapper import CaptchaMapper
from api.routes.dependencies import get_task_service
from api.services.protocols import TaskServiceProtocol
router = APIRouter(prefix="/api/browser", tags=["browser-captcha"])
@router.post("/tasks/{task_id}/captcha/notify", response_model=CaptchaActionResponse, status_code=202)
async def captcha_notify(
task_id: str,
payload: CaptchaNotifyRequest,
service: TaskServiceProtocol = Depends(get_task_service),
) -> CaptchaActionResponse:
rec = await service.notify_captcha(
task_id=task_id,
kind=payload.captcha_kind,
reason=payload.reason,
view_url=payload.browser_view_url,
timeout_seconds=payload.timeout_seconds,
)
if rec is None:
raise HTTPException(status_code=404, detail="Task not found")
return CaptchaMapper.to_action(rec, message="captcha awaiting solution")
@router.get("/tasks/{task_id}/captcha", response_model=CaptchaStatusResponse)
async def captcha_status(
task_id: str,
service: TaskServiceProtocol = Depends(get_task_service),
) -> CaptchaStatusResponse:
rec = await service.get_task(task_id)
if rec is None:
raise HTTPException(status_code=404, detail="Task not found")
return CaptchaMapper.to_status(rec)
@router.get("/tasks/{task_id}/captcha/wait", response_model=CaptchaStatusResponse)
async def captcha_wait(
task_id: str,
timeout: float = Query(default=30.0, ge=0.1, le=600.0, description="long-poll cap, seconds"),
service: TaskServiceProtocol = Depends(get_task_service),
) -> CaptchaStatusResponse:
rec = await service.wait_captcha(task_id=task_id, timeout=timeout)
if rec is None:
raise HTTPException(status_code=404, detail="Task not found")
return CaptchaMapper.to_status(rec)
@router.post("/tasks/{task_id}/captcha/solved", response_model=CaptchaActionResponse)
async def captcha_solved(
task_id: str,
payload: CaptchaSolvedRequest | None = None,
service: TaskServiceProtocol = Depends(get_task_service),
) -> CaptchaActionResponse:
rec = await service.mark_captcha_solved(task_id=task_id, detector=(payload.detector if payload else None))
if rec is None:
raise HTTPException(status_code=404, detail="Task not found")
return CaptchaMapper.to_action(rec, message="captcha marked as solved")
@router.post("/tasks/{task_id}/captcha/timeout-prompt", response_model=CaptchaActionResponse)
async def captcha_timeout_prompt(
task_id: str,
service: TaskServiceProtocol = Depends(get_task_service),
) -> CaptchaActionResponse:
rec = await service.prompt_captcha_timeout(task_id=task_id)
if rec is None:
raise HTTPException(status_code=404, detail="Task not found")
return CaptchaMapper.to_action(rec, message="user must choose: extend or abort")
@router.post("/tasks/{task_id}/captcha/extend", response_model=CaptchaActionResponse)
async def captcha_extend(
task_id: str,
payload: CaptchaExtendRequest,
service: TaskServiceProtocol = Depends(get_task_service),
) -> CaptchaActionResponse:
rec = await service.extend_captcha(task_id=task_id, extra_seconds=payload.extra_seconds)
if rec is None:
raise HTTPException(status_code=404, detail="Task not found")
return CaptchaMapper.to_action(rec, message=f"captcha extended by {payload.extra_seconds}s")
@router.post("/tasks/{task_id}/captcha/abort", response_model=CaptchaActionResponse)
async def captcha_abort(
task_id: str,
payload: CaptchaAbortRequest | None = None,
service: TaskServiceProtocol = Depends(get_task_service),
) -> CaptchaActionResponse:
rec = await service.abort_captcha(task_id=task_id, reason=(payload.reason if payload else None))
if rec is None:
raise HTTPException(status_code=404, detail="Task not found")
return CaptchaMapper.to_action(rec, message="captcha aborted by user")

View file

@ -26,3 +26,22 @@ class TaskServiceProtocol(Protocol):
async def subscribe_run_stream(self, run_id: str) -> Queue[dict[str, Any]] | None: ... async def subscribe_run_stream(self, run_id: str) -> Queue[dict[str, Any]] | None: ...
async def unsubscribe_run_stream(self, run_id: str, queue: Queue[dict[str, Any]]) -> None: ... async def unsubscribe_run_stream(self, run_id: str, queue: Queue[dict[str, Any]]) -> None: ...
async def notify_captcha(
self,
task_id: str,
kind: str | None,
reason: str | None,
view_url: str | None,
timeout_seconds: int,
) -> TaskRecord | None: ...
async def mark_captcha_solved(self, task_id: str, detector: str | None = None) -> TaskRecord | None: ...
async def extend_captcha(self, task_id: str, extra_seconds: int) -> TaskRecord | None: ...
async def abort_captcha(self, task_id: str, reason: str | None = None) -> TaskRecord | None: ...
async def prompt_captcha_timeout(self, task_id: str) -> TaskRecord | None: ...
async def wait_captcha(self, task_id: str, timeout: float) -> TaskRecord | None: ...

View file

@ -89,6 +89,93 @@ class TaskService:
async def unsubscribe_run_stream(self, run_id: str, queue) -> None: async def unsubscribe_run_stream(self, run_id: str, queue) -> None:
await self._store.unsubscribe(run_id, queue) await self._store.unsubscribe(run_id, queue)
async def notify_captcha(
self,
task_id: str,
kind: str | None,
reason: str | None,
view_url: str | None,
timeout_seconds: int,
):
rec = await self._store.set_captcha_awaiting(
task_id=task_id,
kind=kind,
reason=reason,
view_url=view_url,
timeout_seconds=timeout_seconds,
)
if rec is not None:
await self._store.publish(
task_id,
self._event(task_id, "captcha_required", {
"captcha_kind": rec.captcha_kind,
"reason": rec.captcha_reason,
"browser_view_url": rec.captcha_view_url,
"deadline": rec.captcha_deadline,
"timeout_seconds": timeout_seconds,
}),
)
return rec
async def mark_captcha_solved(self, task_id: str, detector: str | None = None):
rec = await self._store.set_captcha_solved(task_id)
if rec is not None:
await self._store.publish(
task_id,
self._event(task_id, "captcha_solved", {
"detector": detector or "unknown",
"solved_at": rec.captcha_solved_at,
}),
)
return rec
async def extend_captcha(self, task_id: str, extra_seconds: int):
rec = await self._store.set_captcha_extended(task_id=task_id, extra_seconds=extra_seconds)
if rec is not None:
await self._store.publish(
task_id,
self._event(task_id, "captcha_extended", {
"extra_seconds": extra_seconds,
"deadline": rec.captcha_deadline,
}),
)
return rec
async def prompt_captcha_timeout(self, task_id: str):
rec = await self._store.set_captcha_timeout_prompt(task_id)
if rec is not None:
await self._store.publish(
task_id,
self._event(task_id, "captcha_timeout_prompt", {
"captcha_kind": rec.captcha_kind,
"browser_view_url": rec.captcha_view_url,
"deadline": rec.captcha_deadline,
"actions": ["extend", "abort"],
}),
)
return rec
async def abort_captcha(self, task_id: str, reason: str | None = None):
rec = await self._store.set_captcha_aborted(task_id=task_id, reason=reason)
if rec is not None:
await self._store.publish(
task_id,
self._event(task_id, "captcha_aborted", {
"reason": rec.captcha_reason,
}),
)
return rec
async def wait_captcha(self, task_id: str, timeout: float):
rec = await self._store.get(task_id)
if rec is None:
return None
try:
await asyncio.wait_for(rec.captcha_event.wait(), timeout=timeout)
except asyncio.TimeoutError:
pass
return await self._store.get(task_id)
async def close(self) -> None: async def close(self) -> None:
if not self._background_tasks: if not self._background_tasks:
return return
@ -127,7 +214,12 @@ class TaskService:
rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap) rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap)
raw = await asyncio.wait_for( raw = await asyncio.wait_for(
self._rpc_client.run(task=rec.task, timeout_sec=rpc_timeout, rpc_url=runtime.get("rpc_url")), self._rpc_client.run(
task=rec.task,
timeout_sec=rpc_timeout,
rpc_url=runtime.get("rpc_url"),
task_id=task_id,
),
timeout=float(rec.timeout) + 5, timeout=float(rec.timeout) + 5,
) )
raw = self._with_runtime_metadata(raw, runtime) raw = self._with_runtime_metadata(raw, runtime)

View file

@ -9,6 +9,8 @@ from urllib import error, request
from browser_use import Agent, Browser, ChatOpenAI from browser_use import Agent, Browser, ChatOpenAI
from pydantic import BaseModel, Field, ValidationError, field_validator from pydantic import BaseModel, Field, ValidationError, field_validator
from browser_env.tools import captcha_tool
SPEED_OPTIMIZATION_PROMPT = """ SPEED_OPTIMIZATION_PROMPT = """
Speed optimization instructions: Speed optimization instructions:
- Be extremely concise and direct in your responses - Be extremely concise and direct in your responses
@ -16,11 +18,21 @@ Speed optimization instructions:
- Use multi-action sequences whenever possible to reduce steps - Use multi-action sequences whenever possible to reduce steps
""" """
CAPTCHA_PROMPT = """
CAPTCHA handling:
- If the current page is blocked by reCAPTCHA, hCaptcha, or Cloudflare Turnstile,
call the `to_captcha` action ONCE with a short `reason` argument and WAIT for its result.
- Do not click on captcha challenges yourself; the human will solve them via the live browser view.
- After `to_captcha` returns success=true, continue the original task from the same step.
- If `to_captcha` returns success=false, report the error and stop.
"""
class RunTaskRequest(BaseModel): class RunTaskRequest(BaseModel):
"""RPC payload для запуска browser-use задачи.""" """RPC payload для запуска browser-use задачи."""
task: str = Field(..., min_length=1) task: str = Field(..., min_length=1)
task_id: str | None = Field(default=None, description="ID задачи из browser-api (используется to_captcha tool)")
@field_validator("task") @field_validator("task")
@classmethod @classmethod
@ -69,10 +81,14 @@ def _json_response(handler, status_code: int, payload: dict[str, Any] | BaseMode
handler.wfile.write(data) handler.wfile.write(data)
async def run_browser_task(task: str) -> RunTaskSuccessResponse | RunTaskErrorResponse: async def run_browser_task(task: str, task_id: str | None = None) -> RunTaskSuccessResponse | RunTaskErrorResponse:
cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222") cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222")
browser_view_url = os.getenv("BROWSER_VIEW_URL", "") browser_view_url = os.getenv("BROWSER_VIEW_URL", "")
if task_id:
# Прокидываем task_id в окружение, чтобы to_captcha tool знал, куда POST'ить.
os.environ["CURRENT_TASK_ID"] = task_id
browser = Browser(cdp_url=cdp_url) browser = Browser(cdp_url=cdp_url)
llm = ChatOpenAI( llm = ChatOpenAI(
@ -82,13 +98,27 @@ async def run_browser_task(task: str) -> RunTaskSuccessResponse | RunTaskErrorRe
temperature=0.0, temperature=0.0,
) )
agent = Agent(task=task, controller = None
llm=llm, try:
browser=browser, from browser_use import Controller # type: ignore
flash_mode=True, controller = Controller()
use_vision=False, captcha_tool.register(controller)
extend_system_message=SPEED_OPTIMIZATION_PROMPT, except Exception:
) # Если у установленной версии browser-use нет Controller — продолжаем без custom action
controller = None
agent_kwargs = dict(
task=task,
llm=llm,
browser=browser,
flash_mode=True,
use_vision=False,
extend_system_message=SPEED_OPTIMIZATION_PROMPT + CAPTCHA_PROMPT,
)
if controller is not None:
agent_kwargs["controller"] = controller
agent = Agent(**agent_kwargs)
try: try:
history = await agent.run() history = await agent.run()
@ -219,7 +249,7 @@ class BrowserUseRPCHandler(BaseHTTPRequestHandler):
payload = json.loads(raw.decode("utf-8") if raw else "{}") payload = json.loads(raw.decode("utf-8") if raw else "{}")
request_model = RunTaskRequest.model_validate(payload) request_model = RunTaskRequest.model_validate(payload)
result_model = asyncio.run(run_browser_task(request_model.task)) result_model = asyncio.run(run_browser_task(request_model.task, task_id=request_model.task_id))
code = 200 if result_model.success else 500 code = 200 if result_model.success else 500
_json_response(self, code, result_model) _json_response(self, code, result_model)
except ValidationError as err: except ValidationError as err:

View file

View file

@ -0,0 +1,233 @@
"""to_captcha custom action для browser-use.
Когда LLM-агент видит на странице капчу (reCAPTCHA / hCaptcha / Cloudflare Turnstile),
он вызывает action `to_captcha`. Action:
1. Уведомляет browser-api (POST /api/browser/tasks/{task_id}/captcha/notify),
передавая URL noVNC-просмотрщика, чтобы пользователь решил капчу руками.
2. Параллельно ОПРАШИВАЕТ DOM каждые ~1.5 сек:
* iframe reCAPTCHA/hCaptcha/Turnstile исчез
* скрытый textarea/input с токеном заполнен
Как только один из критериев сработал POST /captcha/solved (detector=dom_poller),
возвращает управление browser-use Agent. Агент продолжает с того же шага,
где остановился, потому что browser-use держит общий browser context.
3. Если за timeout_seconds капчу автодетектор не увидел решённой
поднимает captcha_state в timeout_prompt (через API), даёт пользователю шанс
ответить «продлить» (POST /captcha/extend) или «отменить» (POST /captcha/abort).
4. На abort action возвращает success=False Agent получит сигнал об ошибке.
Пользовательского подтверждения «готово» НЕТ. Решение засекается только DOM-детектором
либо внешним вызовом /captcha/solved.
"""
from __future__ import annotations
import asyncio
import json
import os
import time
from typing import Any
from urllib import error, request
CAPTCHA_KIND_DETECTORS: tuple[tuple[str, str], ...] = (
("recaptcha_v2", "() => !!document.querySelector('iframe[src*=\"recaptcha\"]')"),
("hcaptcha", "() => !!document.querySelector('iframe[src*=\"hcaptcha.com\"]')"),
("turnstile", "() => !!document.querySelector('iframe[src*=\"challenges.cloudflare.com\"]')"),
)
CAPTCHA_TOKEN_CHECKS: tuple[str, ...] = (
"() => { const el = document.querySelector('textarea[name=\"g-recaptcha-response\"]'); return !!(el && el.value && el.value.length > 20); }",
"() => { const el = document.querySelector('textarea[name=\"h-captcha-response\"]'); return !!(el && el.value && el.value.length > 20); }",
"() => { const el = document.querySelector('input[name=\"cf-turnstile-response\"]'); return !!(el && el.value && el.value.length > 5); }",
)
# Селекторы, по которым считаем что капча на странице ещё видна.
CAPTCHA_PRESENCE_CHECK = (
"() => !!document.querySelector("
"'iframe[src*=\"recaptcha\"], iframe[src*=\"hcaptcha.com\"], iframe[src*=\"challenges.cloudflare.com\"]'"
")"
)
async def _safe_eval(page: Any, js: str) -> bool:
"""Безопасно выполняет JS-проверку, прячет ошибки навигации/закрытой страницы."""
try:
result = await page.evaluate(js)
return bool(result)
except Exception:
return False
async def detect_captcha_kind(page: Any) -> str | None:
for name, js in CAPTCHA_KIND_DETECTORS:
if await _safe_eval(page, js):
return name
if await _safe_eval(page, CAPTCHA_PRESENCE_CHECK):
return "unknown"
return None
async def is_captcha_solved(page: Any) -> bool:
"""Капча считается решённой, если ни одного captcha-iframe нет, ИЛИ хотя бы один токен заполнен."""
for js in CAPTCHA_TOKEN_CHECKS:
if await _safe_eval(page, js):
return True
still_present = await _safe_eval(page, CAPTCHA_PRESENCE_CHECK)
return not still_present
def _http_post(url: str, payload: dict[str, Any] | None = None, timeout: float = 10.0) -> dict[str, Any]:
body = json.dumps(payload or {}).encode("utf-8")
req = request.Request(url, data=body, headers={"Content-Type": "application/json"}, method="POST")
try:
with request.urlopen(req, timeout=timeout) as resp:
raw = resp.read().decode("utf-8")
return json.loads(raw) if raw else {}
except error.HTTPError as exc:
raw = exc.read().decode("utf-8", errors="replace") if exc.fp else ""
return {"_http_error": exc.code, "_body": raw}
except Exception as exc:
return {"_error": str(exc)}
def _http_get(url: str, timeout: float = 35.0) -> dict[str, Any]:
req = request.Request(url, method="GET")
try:
with request.urlopen(req, timeout=timeout) as resp:
raw = resp.read().decode("utf-8")
return json.loads(raw) if raw else {}
except error.HTTPError as exc:
raw = exc.read().decode("utf-8", errors="replace") if exc.fp else ""
return {"_http_error": exc.code, "_body": raw}
except Exception as exc:
return {"_error": str(exc)}
async def run_to_captcha(
page: Any,
reason: str | None = None,
*,
task_id: str | None = None,
api_base: str | None = None,
view_url: str | None = None,
timeout_seconds: int | None = None,
poll_interval: float = 1.5,
) -> dict[str, Any]:
"""Основной сценарий. Вызывается из custom action browser-use.
Возвращает dict вида {"success": bool, "captcha_kind": str, "resolved_by": str|None, "error": str|None}.
"""
resolved_task_id = task_id or os.getenv("CURRENT_TASK_ID")
resolved_api_base = (api_base or os.getenv("BROWSER_API_INTERNAL_URL", "http://browser-api:8088/api/browser")).rstrip("/")
resolved_view_url = view_url or os.getenv("BROWSER_VIEW_URL", "")
resolved_timeout = int(timeout_seconds if timeout_seconds is not None else os.getenv("CAPTCHA_TIMEOUT_SECONDS", "300"))
if not resolved_task_id:
return {"success": False, "error": "to_captcha: CURRENT_TASK_ID is not set; tool cannot reach the API"}
captcha_kind = await detect_captcha_kind(page) or "unknown"
notify_resp = await asyncio.to_thread(
_http_post,
f"{resolved_api_base}/tasks/{resolved_task_id}/captcha/notify",
{
"browser_view_url": resolved_view_url or None,
"captcha_kind": captcha_kind,
"reason": reason,
"timeout_seconds": resolved_timeout,
},
)
if notify_resp.get("_error") or notify_resp.get("_http_error"):
return {
"success": False,
"captcha_kind": captcha_kind,
"error": f"to_captcha: notify failed: {notify_resp}",
}
deadline = time.time() + resolved_timeout
prompted_user = False
while True:
# 1) DOM-проверка: решилось ли само?
if await is_captcha_solved(page):
await asyncio.to_thread(
_http_post,
f"{resolved_api_base}/tasks/{resolved_task_id}/captcha/solved",
{"detector": "dom_poller"},
)
return {
"success": True,
"captcha_kind": captcha_kind,
"resolved_by": "dom_poller",
"browser_view_url": resolved_view_url,
}
# 2) Статус из API: вдруг внешний вызов abort/extend/solved
status = await asyncio.to_thread(
_http_get,
f"{resolved_api_base}/tasks/{resolved_task_id}/captcha",
)
state = (status or {}).get("state")
if state == "solved":
return {
"success": True,
"captcha_kind": captcha_kind,
"resolved_by": "external",
"browser_view_url": resolved_view_url,
}
if state == "aborted":
return {
"success": False,
"captcha_kind": captcha_kind,
"error": "to_captcha: aborted by user",
"browser_view_url": resolved_view_url,
}
if state == "extended":
api_deadline = (status or {}).get("deadline")
if isinstance(api_deadline, (int, float)) and api_deadline > deadline:
deadline = float(api_deadline)
prompted_user = False
# 3) Таймаут — спрашиваем пользователя «продлить/отменить» один раз
if time.time() >= deadline:
if not prompted_user:
await asyncio.to_thread(
_http_post,
f"{resolved_api_base}/tasks/{resolved_task_id}/captcha/timeout-prompt",
{},
)
prompted_user = True
deadline = time.time() + min(60, resolved_timeout)
continue
return {
"success": False,
"captcha_kind": captcha_kind,
"error": "to_captcha: timeout (no user response)",
"browser_view_url": resolved_view_url,
}
await asyncio.sleep(poll_interval)
def register(controller: Any) -> None:
"""Регистрирует action `to_captcha` на переданном browser-use Controller."""
@controller.action(
"Pause the run, ask the human to solve the on-page CAPTCHA via the live browser view, "
"and resume automatically once the DOM detector sees the challenge gone. "
"Call this ONLY when the current page is blocked by reCAPTCHA, hCaptcha or Cloudflare Turnstile."
)
async def to_captcha(reason: str = "", browser=None, page=None) -> dict[str, Any]:
actual_page = page
if actual_page is None and browser is not None:
get_page = getattr(browser, "get_current_page", None) or getattr(browser, "get_page", None)
if callable(get_page):
actual_page = get_page()
if asyncio.iscoroutine(actual_page):
actual_page = await actual_page
if actual_page is None:
return {"success": False, "error": "to_captcha: browser-use did not provide a page"}
return await run_to_captcha(actual_page, reason=reason or None)
return to_captcha