BrowserUse_and_ComputerUse_.../api/services/task_service.py

193 lines
7.3 KiB
Python

import asyncio
from api.clients.browser_rpc_contracts import BrowserRpcError, BrowserRpcRunner
from api.domain.task_status import TaskStatus
from api.repositories.task_store import TaskRecord, TaskStore
class TaskService:
def __init__(
self,
store: TaskStore,
rpc_client: BrowserRpcRunner,
max_concurrency: int,
rpc_timeout_cap: float | None = None,
captcha_wait_timeout: int = 900,
) -> None:
self._store = store
self._rpc_client = rpc_client
self._semaphore = asyncio.Semaphore(max_concurrency)
self._rpc_timeout_cap = rpc_timeout_cap
self._captcha_wait_timeout = captcha_wait_timeout
self._background_tasks: set[asyncio.Task[None]] = set()
async def submit_task(self, task: str, timeout: int, metadata: dict | None) -> TaskRecord:
record = await self._store.create(task=task, timeout=timeout, metadata=metadata)
background_task = asyncio.create_task(self._worker(record.task_id))
self._background_tasks.add(background_task)
background_task.add_done_callback(self._background_tasks.discard)
return record
async def get_task(self, task_id: str) -> TaskRecord | None:
before = await self._store.get(task_id)
was_awaiting = bool(before is not None and before.status == TaskStatus.awaiting_user_captcha)
await self._store.expire_if_needed(task_id)
after = await self._store.get(task_id)
if (
was_awaiting
and after is not None
and after.status == TaskStatus.failed
and after.error
and "expired" in after.error.lower()
):
try:
await self._rpc_client.abort(task_id, reason=after.error)
except BrowserRpcError:
pass
return after
async def close(self) -> None:
if not self._background_tasks:
return
for task in list(self._background_tasks):
task.cancel()
await asyncio.gather(*self._background_tasks, return_exceptions=True)
self._background_tasks.clear()
async def resume_captcha(self, task_id: str, user_response: str | None = None) -> TaskRecord | None:
rec = await self.get_task(task_id)
if rec is None:
return None
if rec.status == TaskStatus.failed:
return rec
if rec.status != TaskStatus.awaiting_user_captcha:
return rec
verify_raw = await self._rpc_client.verify_captcha(task_id)
if not verify_raw.get("verified"):
await self._store.set_awaiting_captcha(
task_id=task_id,
raw_response={
"success": False,
"status": TaskStatus.awaiting_user_captcha.value,
"error": "CAPTCHA is still present.",
"human_intervention": rec.human_intervention,
"verification": verify_raw,
"user_response": user_response,
},
max_wait_seconds=(rec.human_intervention or {}).get("verification", {}).get(
"max_wait_seconds", self._captcha_wait_timeout
),
)
return await self._store.get(task_id)
await self._store.set_running(task_id)
try:
rpc_timeout = float(rec.timeout)
if self._rpc_timeout_cap is not None:
rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap)
raw = await asyncio.wait_for(
self._rpc_client.resume(task_id=task_id, timeout_sec=rpc_timeout),
timeout=float(rec.timeout) + 5,
)
except asyncio.TimeoutError:
await self._store.set_done(
task_id=task_id,
success=False,
raw_response=None,
error="Timeout exceeded after CAPTCHA resume.",
)
return await self._store.get(task_id)
except BrowserRpcError as exc:
await self._store.set_done(
task_id=task_id,
success=False,
raw_response=None,
error=str(exc),
)
return await self._store.get(task_id)
await self._apply_rpc_result(task_id, raw)
return await self._store.get(task_id)
async def abort_captcha(self, task_id: str, reason: str | None = None) -> TaskRecord | None:
rec = await self.get_task(task_id)
if rec is None:
return None
if rec.status == TaskStatus.awaiting_user_captcha:
try:
await self._rpc_client.abort(task_id, reason=reason)
except BrowserRpcError:
pass
await self._store.set_done(
task_id=task_id,
success=False,
raw_response={
"success": False,
"status": TaskStatus.failed.value,
"error_code": "captcha_aborted",
"reason": reason,
},
error=reason or "User aborted CAPTCHA flow.",
)
return await self._store.get(task_id)
async def _worker(self, task_id: str) -> None:
rec = await self._store.set_running(task_id)
if rec is None:
return
async with self._semaphore:
try:
rpc_timeout = float(rec.timeout)
if self._rpc_timeout_cap is not None:
rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap)
raw = await asyncio.wait_for(
self._rpc_client.run(task_id=task_id, task=rec.task, timeout_sec=rpc_timeout),
timeout=float(rec.timeout) + 5,
)
await self._apply_rpc_result(task_id, raw)
except asyncio.TimeoutError:
await self._store.set_done(
task_id=task_id,
success=False,
raw_response=None,
error="Timeout exceeded",
)
except BrowserRpcError as exc:
await self._store.set_done(
task_id=task_id,
success=False,
raw_response=None,
error=str(exc),
)
except Exception as exc:
await self._store.set_done(
task_id=task_id,
success=False,
raw_response=None,
error=f"Internal error: {exc}",
)
async def _apply_rpc_result(self, task_id: str, raw: dict | None) -> None:
raw = raw or {}
status = raw.get("status")
if status == TaskStatus.awaiting_user_captcha.value:
human = raw.get("human_intervention") or {}
verification = human.get("verification") or {}
max_wait_seconds = verification.get("max_wait_seconds", self._captcha_wait_timeout)
await self._store.set_awaiting_captcha(task_id, raw_response=raw, max_wait_seconds=max_wait_seconds)
return
success = bool(raw.get("success"))
await self._store.set_done(
task_id=task_id,
success=success,
raw_response=raw,
error=None,
result=raw.get("result") if isinstance(raw, dict) else None,
)