add new tool: to_captcha

This commit is contained in:
VladislavIlin7 2026-04-21 23:32:09 +03:00
parent 50589232d6
commit f1f32d8366
14 changed files with 1008 additions and 130 deletions

View file

@ -7,19 +7,21 @@ from api.clients.browser_rpc_contracts import BrowserRpcError
class BrowserRpcClient:
def __init__(self, rpc_url: str, session: aiohttp.ClientSession) -> None:
self._rpc_url = rpc_url
self._run_url = rpc_url.rstrip("/")
if self._run_url.endswith("/run"):
self._base_url = self._run_url[: -len("/run")]
else:
self._base_url = self._run_url
self._run_url = f"{self._base_url}/run"
self._session = session
async def run(self, task: str, timeout_sec: float) -> dict[str, Any]:
payload = {"task": task}
timeout = aiohttp.ClientTimeout(total=timeout_sec)
async def _post_json(self, url: str, payload: dict[str, Any], timeout_sec: float | None = None) -> dict[str, Any]:
timeout = aiohttp.ClientTimeout(total=timeout_sec) if timeout_sec is not None else None
try:
async with self._session.post(self._rpc_url, json=payload, timeout=timeout) as response:
async with self._session.post(url, json=payload, timeout=timeout) as response:
if response.status >= 400:
body = await response.text()
raise BrowserRpcError(f"RPC HTTP: {response.status}: {body}")
try:
data = await response.json(content_type=None)
except aiohttp.ContentTypeError as exc:
@ -29,10 +31,22 @@ class BrowserRpcClient:
if not isinstance(data, dict):
raise BrowserRpcError("RPC returned invalid payload type")
return data
async def run(self, task_id: str, task: str, timeout_sec: float) -> dict[str, Any]:
return await self._post_json(self._run_url, {"task_id": task_id, "task": task}, timeout_sec=timeout_sec)
async def run_browser_task(rpc_url: str, task: str, timeout_sec: float) -> dict[str, Any]:
async def verify_captcha(self, task_id: str) -> dict[str, Any]:
return await self._post_json(f"{self._base_url}/verify", {"task_id": task_id}, timeout_sec=15)
async def resume(self, task_id: str, timeout_sec: float) -> dict[str, Any]:
return await self._post_json(f"{self._base_url}/resume", {"task_id": task_id}, timeout_sec=timeout_sec)
async def abort(self, task_id: str, reason: str | None = None) -> dict[str, Any]:
return await self._post_json(f"{self._base_url}/abort", {"task_id": task_id, "reason": reason}, timeout_sec=15)
async def run_browser_task(rpc_url: str, task_id: str, task: str, timeout_sec: float) -> dict[str, Any]:
async with aiohttp.ClientSession() as session:
return await BrowserRpcClient(rpc_url, session=session).run(task=task, timeout_sec=timeout_sec)
client = BrowserRpcClient(rpc_url, session=session)
return await client.run(task_id=task_id, task=task, timeout_sec=timeout_sec)

View file

@ -1,8 +1,15 @@
from typing import Any, Protocol
class BrowserRpcError(RuntimeError): ...
class BrowserRpcError(RuntimeError):
pass
class BrowserRpcRunner(Protocol):
async def run(self, task: str, timeout_sec: float) -> dict[str, Any]: ...
async def run(self, task_id: str, task: str, timeout_sec: float) -> dict[str, Any]: ...
async def verify_captcha(self, task_id: str) -> dict[str, Any]: ...
async def resume(self, task_id: str, timeout_sec: float) -> dict[str, Any]: ...
async def abort(self, task_id: str, reason: str | None = None) -> dict[str, Any]: ...

View file

@ -1,4 +1,4 @@
from typing import Any
from typing import Any, Literal
from pydantic import BaseModel, Field
@ -6,38 +6,67 @@ from api.domain.task_status import TaskStatus
class BrowserTaskRequest(BaseModel):
"""Запрос на запуск задачи в browser-use агенте."""
"""Request to start a browser task."""
task: str = Field(..., description="Текстовая задача для browser-use агента")
timeout: int = Field(300, description="Максимальное время выполнения задачи в секундах")
metadata: dict[str, Any] | None = Field(default=None, description="Дополнительные метаданные клиента")
task: str = Field(..., description="Text task for the browser-use agent")
timeout: int = Field(300, description="Maximum task execution time in seconds")
metadata: dict[str, Any] | None = Field(default=None, description="Optional client metadata")
class BrowserCaptchaVerification(BaseModel):
mode: Literal["dom_url_title"] = "dom_url_title"
selectors_absent: list[str] = Field(default_factory=list)
challenge_signals_absent: list[str] = Field(default_factory=list)
max_wait_seconds: int = Field(..., description="How long the task may stay paused waiting for the user")
class BrowserHumanInterventionPayload(BaseModel):
status: Literal["awaiting_user_captcha"] = "awaiting_user_captcha"
task_id: str
session_id: str
resume_token: str
browser_view_url: str | None = None
captcha_type: Literal["cloudflare", "recaptcha", "hcaptcha", "unknown"] = "unknown"
instructions: str
detected_at: float
verification: BrowserCaptchaVerification
class BrowserCaptchaReadyRequest(BaseModel):
user_response: str | None = Field(default=None, description="Free-form confirmation from the user")
class BrowserCaptchaAbortRequest(BaseModel):
reason: str | None = Field(default=None, description="Optional reason for aborting the CAPTCHA flow")
class BrowserTaskAcceptedResponse(BaseModel):
"""Ответ о том, что задача принята в обработку."""
"""Response indicating that a task was accepted for processing."""
task_id: str
status: TaskStatus
class BrowserTaskStatusResponse(BaseModel):
"""Текущий статус задачи и временные отметки ее выполнения."""
"""Current task status and timestamps."""
task_id: str
status: TaskStatus
create_at: float = Field(..., description="Время создания задачи в Unix timestamp")
started_at: float | None = Field(default=None, description="Время начала выполнения в Unix timestamp")
finished_at: float | None = Field(default=None, description="Время завершения выполнения в Unix timestamp")
error: str | None = Field(default=None, description="Текст ошибки, если задача завершилась с ошибкой")
create_at: float = Field(..., description="Task creation time as a Unix timestamp")
started_at: float | None = Field(default=None, description="Task start time as a Unix timestamp")
finished_at: float | None = Field(default=None, description="Task completion time as a Unix timestamp")
error: str | None = Field(default=None, description="Task error when applicable")
human_intervention: BrowserHumanInterventionPayload | None = None
class BrowserTaskResultResponse(BaseModel):
"""Финальный результат выполнения задачи в browser-use."""
"""Terminal result or meaningful paused state for a browser task."""
task_id: str
status: TaskStatus
success: bool = Field(..., description="Успешно ли выполнена задача")
execution_time: float = Field(..., description="Фактическое время выполнения в секундах")
result: str | None = Field(default=None, description="Итоговый текстовый результат")
error: str | None = Field(default=None, description="Текст ошибки, если выполнение не удалось")
raw_response: dict[str, Any] | None = Field(default=None, description="Сырой ответ от browser-use RPC")
success: bool = Field(..., description="Whether the task completed successfully")
execution_time: float = Field(..., description="Observed execution time in seconds")
result: str | None = Field(default=None, description="Final textual result when available")
error: str | None = Field(default=None, description="Error text when execution failed")
raw_response: dict[str, Any] | None = Field(default=None, description="Raw payload returned by the browser runtime")
human_intervention: BrowserHumanInterventionPayload | None = None

View file

@ -9,6 +9,7 @@ class Settings:
browser_rpc_url: str = os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run")
browser_rpc_timeout: float = float(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900"))
captcha_wait_timeout: int = int(os.getenv("BROWSER_CAPTCHA_MAX_WAIT_SECONDS", "900"))
max_concurrency: int = int(os.getenv("BROWSER_API_MAX_CONCURRENCY", "2"))

View file

@ -2,8 +2,10 @@ from enum import Enum
class TaskStatus(str, Enum):
"""Состояние задачи браузерного агента."""
"""Browser task lifecycle states."""
queued = "queued"
running = "running"
awaiting_user_captcha = "awaiting_user_captcha"
succeeded = "succeeded"
failed = "failed"

View file

@ -18,6 +18,7 @@ async def lifespan(app: FastAPI):
rpc_client=BrowserRpcClient(settings.browser_rpc_url, session=session),
max_concurrency=settings.max_concurrency,
rpc_timeout_cap=settings.browser_rpc_timeout,
captcha_wait_timeout=settings.captcha_wait_timeout,
)
app.state.task_service = task_service
try:

View file

@ -20,13 +20,17 @@ class TaskRecord:
result: str | None = None
error: str | None = None
raw_response: dict[str, Any] | None = None
human_intervention: dict[str, Any] | None = None
session_id: str | None = None
resume_token: str | None = None
awaiting_deadline: float | None = None
@property
def execution_time(self) -> float:
if self.started_at is None:
return 0
end = self.finished_at if self.finished_at is not None else time.time()
return max(0, end - self.started_at)
return max(0.0, end - self.started_at)
class TaskStore:
@ -51,16 +55,42 @@ class TaskStore:
if rec is None:
return None
rec.status = TaskStatus.running
rec.started_at = time.time()
rec.finished_at = None
rec.error = None
rec.human_intervention = None
rec.awaiting_deadline = None
if rec.started_at is None:
rec.started_at = time.time()
return rec
async def set_awaiting_captcha(
self,
task_id: str,
raw_response: dict[str, Any],
max_wait_seconds: int,
) -> TaskRecord | None:
async with self._lock:
rec = self._tasks.get(task_id)
if rec is None:
return None
payload = raw_response.get("human_intervention") or {}
rec.status = TaskStatus.awaiting_user_captcha
rec.raw_response = raw_response
rec.human_intervention = payload
rec.session_id = payload.get("session_id") or rec.session_id
rec.resume_token = payload.get("resume_token") or rec.resume_token
rec.awaiting_deadline = time.time() + max(1, int(max_wait_seconds))
rec.error = None
rec.finished_at = None
return rec
async def set_done(
self,
task_id: str,
success: bool,
raw_response: dict[str, Any] | None,
error: str | None,
result: str | None = None,
self,
task_id: str,
success: bool,
raw_response: dict[str, Any] | None,
error: str | None,
result: str | None = None,
) -> TaskRecord | None:
async with self._lock:
rec = self._tasks.get(task_id)
@ -69,8 +99,35 @@ class TaskStore:
rec.finished_at = time.time()
rec.raw_response = raw_response
rec.error = error if error is not None else (
raw_response.get("error") if isinstance(raw_response, dict) else None)
raw_response.get("error") if isinstance(raw_response, dict) else None
)
rec.result = result if result is not None else (
raw_response.get("result") if isinstance(raw_response, dict) else None)
raw_response.get("result") if isinstance(raw_response, dict) else None
)
rec.human_intervention = None
rec.awaiting_deadline = None
rec.status = TaskStatus.succeeded if success else TaskStatus.failed
return rec
async def expire_if_needed(self, task_id: str) -> TaskRecord | None:
async with self._lock:
rec = self._tasks.get(task_id)
if rec is None:
return None
if rec.status != TaskStatus.awaiting_user_captcha:
return rec
if rec.awaiting_deadline is None or rec.awaiting_deadline > time.time():
return rec
rec.status = TaskStatus.failed
rec.finished_at = time.time()
rec.error = "CAPTCHA wait expired before the user completed verification."
rec.raw_response = {
"success": False,
"status": TaskStatus.failed.value,
"error": rec.error,
"error_code": "captcha_wait_expired",
}
rec.human_intervention = None
rec.awaiting_deadline = None
return rec

View file

@ -2,6 +2,8 @@ from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import JSONResponse
from api.contracts.task_schemas import (
BrowserCaptchaAbortRequest,
BrowserCaptchaReadyRequest,
BrowserTaskAcceptedResponse,
BrowserTaskRequest,
BrowserTaskResultResponse,
@ -20,8 +22,8 @@ def get_task_service(request: Request) -> TaskService:
@router.post("/tasks", response_model=BrowserTaskAcceptedResponse, status_code=202)
async def create_task(
payload: BrowserTaskRequest,
service: TaskService = Depends(get_task_service),
payload: BrowserTaskRequest,
service: TaskService = Depends(get_task_service),
) -> BrowserTaskAcceptedResponse:
rec = await service.submit_task(task=payload.task.strip(), timeout=payload.timeout, metadata=payload.metadata)
return BrowserTaskAcceptedResponse(task_id=rec.task_id, status=rec.status)
@ -37,8 +39,8 @@ async def get_task_status(task_id: str, service: TaskService = Depends(get_task_
@router.get("/tasks/{task_id}/result", response_model=BrowserTaskResultResponse)
async def get_task_result(
task_id: str,
service: TaskService = Depends(get_task_service),
task_id: str,
service: TaskService = Depends(get_task_service),
) -> JSONResponse | BrowserTaskResultResponse:
rec = await service.get_task(task_id)
if rec is None:
@ -55,18 +57,35 @@ async def get_task_result(
"result": None,
"error": None,
"raw_response": None,
"human_intervention": None,
},
)
return BrowserTaskResultResponse(
task_id=rec.task_id,
status=rec.status,
success=(rec.status == TaskStatus.succeeded),
execution_time=rec.execution_time,
result=rec.result,
error=rec.error,
raw_response=rec.raw_response,
)
return _to_result_response(rec)
@router.post("/tasks/{task_id}/captcha/ready", response_model=BrowserTaskResultResponse)
async def captcha_ready(
task_id: str,
payload: BrowserCaptchaReadyRequest,
service: TaskService = Depends(get_task_service),
) -> BrowserTaskResultResponse:
rec = await service.resume_captcha(task_id, user_response=payload.user_response)
if rec is None:
raise HTTPException(status_code=404, detail="Task not found")
return _to_result_response(rec)
@router.post("/tasks/{task_id}/captcha/abort", response_model=BrowserTaskResultResponse)
async def captcha_abort(
task_id: str,
payload: BrowserCaptchaAbortRequest,
service: TaskService = Depends(get_task_service),
) -> BrowserTaskResultResponse:
rec = await service.abort_captcha(task_id, reason=payload.reason)
if rec is None:
raise HTTPException(status_code=404, detail="Task not found")
return _to_result_response(rec)
def _to_status_response(rec: TaskRecord) -> BrowserTaskStatusResponse:
@ -77,4 +96,18 @@ def _to_status_response(rec: TaskRecord) -> BrowserTaskStatusResponse:
started_at=rec.started_at,
finished_at=rec.finished_at,
error=rec.error,
human_intervention=rec.human_intervention,
)
def _to_result_response(rec: TaskRecord) -> BrowserTaskResultResponse:
return BrowserTaskResultResponse(
task_id=rec.task_id,
status=rec.status,
success=(rec.status == TaskStatus.succeeded),
execution_time=rec.execution_time,
result=rec.result,
error=rec.error,
raw_response=rec.raw_response,
human_intervention=rec.human_intervention,
)

View file

@ -1,21 +1,24 @@
import asyncio
from api.clients.browser_rpc_contracts import BrowserRpcError, BrowserRpcRunner
from api.domain.task_status import TaskStatus
from api.repositories.task_store import TaskRecord, TaskStore
class TaskService:
def __init__(
self,
store: TaskStore,
rpc_client: BrowserRpcRunner,
max_concurrency: int,
rpc_timeout_cap: float | None = None,
self,
store: TaskStore,
rpc_client: BrowserRpcRunner,
max_concurrency: int,
rpc_timeout_cap: float | None = None,
captcha_wait_timeout: int = 900,
) -> None:
self._store = store
self._rpc_client = rpc_client
self._semaphore = asyncio.Semaphore(max_concurrency)
self._rpc_timeout_cap = rpc_timeout_cap
self._captcha_wait_timeout = captcha_wait_timeout
self._background_tasks: set[asyncio.Task[None]] = set()
async def submit_task(self, task: str, timeout: int, metadata: dict | None) -> TaskRecord:
@ -26,7 +29,22 @@ class TaskService:
return record
async def get_task(self, task_id: str) -> TaskRecord | None:
return await self._store.get(task_id)
before = await self._store.get(task_id)
was_awaiting = bool(before is not None and before.status == TaskStatus.awaiting_user_captcha)
await self._store.expire_if_needed(task_id)
after = await self._store.get(task_id)
if (
was_awaiting
and after is not None
and after.status == TaskStatus.failed
and after.error
and "expired" in after.error.lower()
):
try:
await self._rpc_client.abort(task_id, reason=after.error)
except BrowserRpcError:
pass
return after
async def close(self) -> None:
if not self._background_tasks:
@ -37,6 +55,86 @@ class TaskService:
await asyncio.gather(*self._background_tasks, return_exceptions=True)
self._background_tasks.clear()
async def resume_captcha(self, task_id: str, user_response: str | None = None) -> TaskRecord | None:
rec = await self.get_task(task_id)
if rec is None:
return None
if rec.status == TaskStatus.failed:
return rec
if rec.status != TaskStatus.awaiting_user_captcha:
return rec
verify_raw = await self._rpc_client.verify_captcha(task_id)
if not verify_raw.get("verified"):
await self._store.set_awaiting_captcha(
task_id=task_id,
raw_response={
"success": False,
"status": TaskStatus.awaiting_user_captcha.value,
"error": "CAPTCHA is still present.",
"human_intervention": rec.human_intervention,
"verification": verify_raw,
"user_response": user_response,
},
max_wait_seconds=(rec.human_intervention or {}).get("verification", {}).get(
"max_wait_seconds", self._captcha_wait_timeout
),
)
return await self._store.get(task_id)
await self._store.set_running(task_id)
try:
rpc_timeout = float(rec.timeout)
if self._rpc_timeout_cap is not None:
rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap)
raw = await asyncio.wait_for(
self._rpc_client.resume(task_id=task_id, timeout_sec=rpc_timeout),
timeout=float(rec.timeout) + 5,
)
except asyncio.TimeoutError:
await self._store.set_done(
task_id=task_id,
success=False,
raw_response=None,
error="Timeout exceeded after CAPTCHA resume.",
)
return await self._store.get(task_id)
except BrowserRpcError as exc:
await self._store.set_done(
task_id=task_id,
success=False,
raw_response=None,
error=str(exc),
)
return await self._store.get(task_id)
await self._apply_rpc_result(task_id, raw)
return await self._store.get(task_id)
async def abort_captcha(self, task_id: str, reason: str | None = None) -> TaskRecord | None:
rec = await self.get_task(task_id)
if rec is None:
return None
if rec.status == TaskStatus.awaiting_user_captcha:
try:
await self._rpc_client.abort(task_id, reason=reason)
except BrowserRpcError:
pass
await self._store.set_done(
task_id=task_id,
success=False,
raw_response={
"success": False,
"status": TaskStatus.failed.value,
"error_code": "captcha_aborted",
"reason": reason,
},
error=reason or "User aborted CAPTCHA flow.",
)
return await self._store.get(task_id)
async def _worker(self, task_id: str) -> None:
rec = await self._store.set_running(task_id)
if rec is None:
@ -49,17 +147,10 @@ class TaskService:
rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap)
raw = await asyncio.wait_for(
self._rpc_client.run(task=rec.task, timeout_sec=rpc_timeout),
self._rpc_client.run(task_id=task_id, task=rec.task, timeout_sec=rpc_timeout),
timeout=float(rec.timeout) + 5,
)
success = bool(raw.get("success"))
await self._store.set_done(
task_id=task_id,
success=success,
raw_response=raw,
error=None,
result=raw.get("result") if isinstance(raw, dict) else None,
)
await self._apply_rpc_result(task_id, raw)
except asyncio.TimeoutError:
await self._store.set_done(
task_id=task_id,
@ -81,3 +172,22 @@ class TaskService:
raw_response=None,
error=f"Internal error: {exc}",
)
async def _apply_rpc_result(self, task_id: str, raw: dict | None) -> None:
raw = raw or {}
status = raw.get("status")
if status == TaskStatus.awaiting_user_captcha.value:
human = raw.get("human_intervention") or {}
verification = human.get("verification") or {}
max_wait_seconds = verification.get("max_wait_seconds", self._captcha_wait_timeout)
await self._store.set_awaiting_captcha(task_id, raw_response=raw, max_wait_seconds=max_wait_seconds)
return
success = bool(raw.get("success"))
await self._store.set_done(
task_id=task_id,
success=success,
raw_response=raw,
error=None,
result=raw.get("result") if isinstance(raw, dict) else None,
)