Compare commits
2 commits
main
...
dev_to_cap
| Author | SHA1 | Date | |
|---|---|---|---|
| 3b7d02ee81 | |||
| f1f32d8366 |
16 changed files with 1021 additions and 131 deletions
|
|
@ -7,19 +7,21 @@ from api.clients.browser_rpc_contracts import BrowserRpcError
|
|||
|
||||
class BrowserRpcClient:
|
||||
def __init__(self, rpc_url: str, session: aiohttp.ClientSession) -> None:
|
||||
self._rpc_url = rpc_url
|
||||
self._run_url = rpc_url.rstrip("/")
|
||||
if self._run_url.endswith("/run"):
|
||||
self._base_url = self._run_url[: -len("/run")]
|
||||
else:
|
||||
self._base_url = self._run_url
|
||||
self._run_url = f"{self._base_url}/run"
|
||||
self._session = session
|
||||
|
||||
async def run(self, task: str, timeout_sec: float) -> dict[str, Any]:
|
||||
payload = {"task": task}
|
||||
timeout = aiohttp.ClientTimeout(total=timeout_sec)
|
||||
|
||||
async def _post_json(self, url: str, payload: dict[str, Any], timeout_sec: float | None = None) -> dict[str, Any]:
|
||||
timeout = aiohttp.ClientTimeout(total=timeout_sec) if timeout_sec is not None else None
|
||||
try:
|
||||
async with self._session.post(self._rpc_url, json=payload, timeout=timeout) as response:
|
||||
async with self._session.post(url, json=payload, timeout=timeout) as response:
|
||||
if response.status >= 400:
|
||||
body = await response.text()
|
||||
raise BrowserRpcError(f"RPC HTTP: {response.status}: {body}")
|
||||
|
||||
try:
|
||||
data = await response.json(content_type=None)
|
||||
except aiohttp.ContentTypeError as exc:
|
||||
|
|
@ -29,10 +31,22 @@ class BrowserRpcClient:
|
|||
|
||||
if not isinstance(data, dict):
|
||||
raise BrowserRpcError("RPC returned invalid payload type")
|
||||
|
||||
return data
|
||||
|
||||
async def run(self, task_id: str, task: str, timeout_sec: float) -> dict[str, Any]:
|
||||
return await self._post_json(self._run_url, {"task_id": task_id, "task": task}, timeout_sec=timeout_sec)
|
||||
|
||||
async def run_browser_task(rpc_url: str, task: str, timeout_sec: float) -> dict[str, Any]:
|
||||
async def verify_captcha(self, task_id: str) -> dict[str, Any]:
|
||||
return await self._post_json(f"{self._base_url}/verify", {"task_id": task_id}, timeout_sec=15)
|
||||
|
||||
async def resume(self, task_id: str, timeout_sec: float) -> dict[str, Any]:
|
||||
return await self._post_json(f"{self._base_url}/resume", {"task_id": task_id}, timeout_sec=timeout_sec)
|
||||
|
||||
async def abort(self, task_id: str, reason: str | None = None) -> dict[str, Any]:
|
||||
return await self._post_json(f"{self._base_url}/abort", {"task_id": task_id, "reason": reason}, timeout_sec=15)
|
||||
|
||||
|
||||
async def run_browser_task(rpc_url: str, task_id: str, task: str, timeout_sec: float) -> dict[str, Any]:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
return await BrowserRpcClient(rpc_url, session=session).run(task=task, timeout_sec=timeout_sec)
|
||||
client = BrowserRpcClient(rpc_url, session=session)
|
||||
return await client.run(task_id=task_id, task=task, timeout_sec=timeout_sec)
|
||||
|
|
|
|||
|
|
@ -1,8 +1,15 @@
|
|||
from typing import Any, Protocol
|
||||
|
||||
|
||||
class BrowserRpcError(RuntimeError): ...
|
||||
class BrowserRpcError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class BrowserRpcRunner(Protocol):
|
||||
async def run(self, task: str, timeout_sec: float) -> dict[str, Any]: ...
|
||||
async def run(self, task_id: str, task: str, timeout_sec: float) -> dict[str, Any]: ...
|
||||
|
||||
async def verify_captcha(self, task_id: str) -> dict[str, Any]: ...
|
||||
|
||||
async def resume(self, task_id: str, timeout_sec: float) -> dict[str, Any]: ...
|
||||
|
||||
async def abort(self, task_id: str, reason: str | None = None) -> dict[str, Any]: ...
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Any
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
|
@ -6,38 +6,67 @@ from api.domain.task_status import TaskStatus
|
|||
|
||||
|
||||
class BrowserTaskRequest(BaseModel):
|
||||
"""Запрос на запуск задачи в browser-use агенте."""
|
||||
"""Request to start a browser task."""
|
||||
|
||||
task: str = Field(..., description="Текстовая задача для browser-use агента")
|
||||
timeout: int = Field(300, description="Максимальное время выполнения задачи в секундах")
|
||||
metadata: dict[str, Any] | None = Field(default=None, description="Дополнительные метаданные клиента")
|
||||
task: str = Field(..., description="Text task for the browser-use agent")
|
||||
timeout: int = Field(300, description="Maximum task execution time in seconds")
|
||||
metadata: dict[str, Any] | None = Field(default=None, description="Optional client metadata")
|
||||
|
||||
|
||||
class BrowserCaptchaVerification(BaseModel):
|
||||
mode: Literal["dom_url_title"] = "dom_url_title"
|
||||
selectors_absent: list[str] = Field(default_factory=list)
|
||||
challenge_signals_absent: list[str] = Field(default_factory=list)
|
||||
max_wait_seconds: int = Field(..., description="How long the task may stay paused waiting for the user")
|
||||
|
||||
|
||||
class BrowserHumanInterventionPayload(BaseModel):
|
||||
status: Literal["awaiting_user_captcha"] = "awaiting_user_captcha"
|
||||
task_id: str
|
||||
session_id: str
|
||||
resume_token: str
|
||||
browser_view_url: str | None = None
|
||||
captcha_type: Literal["cloudflare", "recaptcha", "hcaptcha", "unknown"] = "unknown"
|
||||
instructions: str
|
||||
detected_at: float
|
||||
verification: BrowserCaptchaVerification
|
||||
|
||||
|
||||
class BrowserCaptchaReadyRequest(BaseModel):
|
||||
user_response: str | None = Field(default=None, description="Free-form confirmation from the user")
|
||||
|
||||
|
||||
class BrowserCaptchaAbortRequest(BaseModel):
|
||||
reason: str | None = Field(default=None, description="Optional reason for aborting the CAPTCHA flow")
|
||||
|
||||
|
||||
class BrowserTaskAcceptedResponse(BaseModel):
|
||||
"""Ответ о том, что задача принята в обработку."""
|
||||
"""Response indicating that a task was accepted for processing."""
|
||||
|
||||
task_id: str
|
||||
status: TaskStatus
|
||||
|
||||
|
||||
class BrowserTaskStatusResponse(BaseModel):
|
||||
"""Текущий статус задачи и временные отметки ее выполнения."""
|
||||
"""Current task status and timestamps."""
|
||||
|
||||
task_id: str
|
||||
status: TaskStatus
|
||||
create_at: float = Field(..., description="Время создания задачи в Unix timestamp")
|
||||
started_at: float | None = Field(default=None, description="Время начала выполнения в Unix timestamp")
|
||||
finished_at: float | None = Field(default=None, description="Время завершения выполнения в Unix timestamp")
|
||||
error: str | None = Field(default=None, description="Текст ошибки, если задача завершилась с ошибкой")
|
||||
create_at: float = Field(..., description="Task creation time as a Unix timestamp")
|
||||
started_at: float | None = Field(default=None, description="Task start time as a Unix timestamp")
|
||||
finished_at: float | None = Field(default=None, description="Task completion time as a Unix timestamp")
|
||||
error: str | None = Field(default=None, description="Task error when applicable")
|
||||
human_intervention: BrowserHumanInterventionPayload | None = None
|
||||
|
||||
|
||||
class BrowserTaskResultResponse(BaseModel):
|
||||
"""Финальный результат выполнения задачи в browser-use."""
|
||||
"""Terminal result or meaningful paused state for a browser task."""
|
||||
|
||||
task_id: str
|
||||
status: TaskStatus
|
||||
success: bool = Field(..., description="Успешно ли выполнена задача")
|
||||
execution_time: float = Field(..., description="Фактическое время выполнения в секундах")
|
||||
result: str | None = Field(default=None, description="Итоговый текстовый результат")
|
||||
error: str | None = Field(default=None, description="Текст ошибки, если выполнение не удалось")
|
||||
raw_response: dict[str, Any] | None = Field(default=None, description="Сырой ответ от browser-use RPC")
|
||||
success: bool = Field(..., description="Whether the task completed successfully")
|
||||
execution_time: float = Field(..., description="Observed execution time in seconds")
|
||||
result: str | None = Field(default=None, description="Final textual result when available")
|
||||
error: str | None = Field(default=None, description="Error text when execution failed")
|
||||
raw_response: dict[str, Any] | None = Field(default=None, description="Raw payload returned by the browser runtime")
|
||||
human_intervention: BrowserHumanInterventionPayload | None = None
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ class Settings:
|
|||
|
||||
browser_rpc_url: str = os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run")
|
||||
browser_rpc_timeout: float = float(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900"))
|
||||
captcha_wait_timeout: int = int(os.getenv("BROWSER_CAPTCHA_MAX_WAIT_SECONDS", "900"))
|
||||
|
||||
max_concurrency: int = int(os.getenv("BROWSER_API_MAX_CONCURRENCY", "2"))
|
||||
|
||||
|
|
|
|||
|
|
@ -2,8 +2,10 @@ from enum import Enum
|
|||
|
||||
|
||||
class TaskStatus(str, Enum):
|
||||
"""Состояние задачи браузерного агента."""
|
||||
"""Browser task lifecycle states."""
|
||||
|
||||
queued = "queued"
|
||||
running = "running"
|
||||
awaiting_user_captcha = "awaiting_user_captcha"
|
||||
succeeded = "succeeded"
|
||||
failed = "failed"
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ async def lifespan(app: FastAPI):
|
|||
rpc_client=BrowserRpcClient(settings.browser_rpc_url, session=session),
|
||||
max_concurrency=settings.max_concurrency,
|
||||
rpc_timeout_cap=settings.browser_rpc_timeout,
|
||||
captcha_wait_timeout=settings.captcha_wait_timeout,
|
||||
)
|
||||
app.state.task_service = task_service
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -20,13 +20,17 @@ class TaskRecord:
|
|||
result: str | None = None
|
||||
error: str | None = None
|
||||
raw_response: dict[str, Any] | None = None
|
||||
human_intervention: dict[str, Any] | None = None
|
||||
session_id: str | None = None
|
||||
resume_token: str | None = None
|
||||
awaiting_deadline: float | None = None
|
||||
|
||||
@property
|
||||
def execution_time(self) -> float:
|
||||
if self.started_at is None:
|
||||
return 0
|
||||
end = self.finished_at if self.finished_at is not None else time.time()
|
||||
return max(0, end - self.started_at)
|
||||
return max(0.0, end - self.started_at)
|
||||
|
||||
|
||||
class TaskStore:
|
||||
|
|
@ -51,16 +55,42 @@ class TaskStore:
|
|||
if rec is None:
|
||||
return None
|
||||
rec.status = TaskStatus.running
|
||||
rec.started_at = time.time()
|
||||
rec.finished_at = None
|
||||
rec.error = None
|
||||
rec.human_intervention = None
|
||||
rec.awaiting_deadline = None
|
||||
if rec.started_at is None:
|
||||
rec.started_at = time.time()
|
||||
return rec
|
||||
|
||||
async def set_awaiting_captcha(
|
||||
self,
|
||||
task_id: str,
|
||||
raw_response: dict[str, Any],
|
||||
max_wait_seconds: int,
|
||||
) -> TaskRecord | None:
|
||||
async with self._lock:
|
||||
rec = self._tasks.get(task_id)
|
||||
if rec is None:
|
||||
return None
|
||||
payload = raw_response.get("human_intervention") or {}
|
||||
rec.status = TaskStatus.awaiting_user_captcha
|
||||
rec.raw_response = raw_response
|
||||
rec.human_intervention = payload
|
||||
rec.session_id = payload.get("session_id") or rec.session_id
|
||||
rec.resume_token = payload.get("resume_token") or rec.resume_token
|
||||
rec.awaiting_deadline = time.time() + max(1, int(max_wait_seconds))
|
||||
rec.error = None
|
||||
rec.finished_at = None
|
||||
return rec
|
||||
|
||||
async def set_done(
|
||||
self,
|
||||
task_id: str,
|
||||
success: bool,
|
||||
raw_response: dict[str, Any] | None,
|
||||
error: str | None,
|
||||
result: str | None = None,
|
||||
self,
|
||||
task_id: str,
|
||||
success: bool,
|
||||
raw_response: dict[str, Any] | None,
|
||||
error: str | None,
|
||||
result: str | None = None,
|
||||
) -> TaskRecord | None:
|
||||
async with self._lock:
|
||||
rec = self._tasks.get(task_id)
|
||||
|
|
@ -69,8 +99,35 @@ class TaskStore:
|
|||
rec.finished_at = time.time()
|
||||
rec.raw_response = raw_response
|
||||
rec.error = error if error is not None else (
|
||||
raw_response.get("error") if isinstance(raw_response, dict) else None)
|
||||
raw_response.get("error") if isinstance(raw_response, dict) else None
|
||||
)
|
||||
rec.result = result if result is not None else (
|
||||
raw_response.get("result") if isinstance(raw_response, dict) else None)
|
||||
raw_response.get("result") if isinstance(raw_response, dict) else None
|
||||
)
|
||||
rec.human_intervention = None
|
||||
rec.awaiting_deadline = None
|
||||
rec.status = TaskStatus.succeeded if success else TaskStatus.failed
|
||||
return rec
|
||||
|
||||
async def expire_if_needed(self, task_id: str) -> TaskRecord | None:
|
||||
async with self._lock:
|
||||
rec = self._tasks.get(task_id)
|
||||
if rec is None:
|
||||
return None
|
||||
if rec.status != TaskStatus.awaiting_user_captcha:
|
||||
return rec
|
||||
if rec.awaiting_deadline is None or rec.awaiting_deadline > time.time():
|
||||
return rec
|
||||
|
||||
rec.status = TaskStatus.failed
|
||||
rec.finished_at = time.time()
|
||||
rec.error = "CAPTCHA wait expired before the user completed verification."
|
||||
rec.raw_response = {
|
||||
"success": False,
|
||||
"status": TaskStatus.failed.value,
|
||||
"error": rec.error,
|
||||
"error_code": "captcha_wait_expired",
|
||||
}
|
||||
rec.human_intervention = None
|
||||
rec.awaiting_deadline = None
|
||||
return rec
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ from fastapi import APIRouter, Depends, HTTPException, Request
|
|||
from fastapi.responses import JSONResponse
|
||||
|
||||
from api.contracts.task_schemas import (
|
||||
BrowserCaptchaAbortRequest,
|
||||
BrowserCaptchaReadyRequest,
|
||||
BrowserTaskAcceptedResponse,
|
||||
BrowserTaskRequest,
|
||||
BrowserTaskResultResponse,
|
||||
|
|
@ -20,8 +22,8 @@ def get_task_service(request: Request) -> TaskService:
|
|||
|
||||
@router.post("/tasks", response_model=BrowserTaskAcceptedResponse, status_code=202)
|
||||
async def create_task(
|
||||
payload: BrowserTaskRequest,
|
||||
service: TaskService = Depends(get_task_service),
|
||||
payload: BrowserTaskRequest,
|
||||
service: TaskService = Depends(get_task_service),
|
||||
) -> BrowserTaskAcceptedResponse:
|
||||
rec = await service.submit_task(task=payload.task.strip(), timeout=payload.timeout, metadata=payload.metadata)
|
||||
return BrowserTaskAcceptedResponse(task_id=rec.task_id, status=rec.status)
|
||||
|
|
@ -37,8 +39,8 @@ async def get_task_status(task_id: str, service: TaskService = Depends(get_task_
|
|||
|
||||
@router.get("/tasks/{task_id}/result", response_model=BrowserTaskResultResponse)
|
||||
async def get_task_result(
|
||||
task_id: str,
|
||||
service: TaskService = Depends(get_task_service),
|
||||
task_id: str,
|
||||
service: TaskService = Depends(get_task_service),
|
||||
) -> JSONResponse | BrowserTaskResultResponse:
|
||||
rec = await service.get_task(task_id)
|
||||
if rec is None:
|
||||
|
|
@ -55,18 +57,35 @@ async def get_task_result(
|
|||
"result": None,
|
||||
"error": None,
|
||||
"raw_response": None,
|
||||
"human_intervention": None,
|
||||
},
|
||||
)
|
||||
|
||||
return BrowserTaskResultResponse(
|
||||
task_id=rec.task_id,
|
||||
status=rec.status,
|
||||
success=(rec.status == TaskStatus.succeeded),
|
||||
execution_time=rec.execution_time,
|
||||
result=rec.result,
|
||||
error=rec.error,
|
||||
raw_response=rec.raw_response,
|
||||
)
|
||||
return _to_result_response(rec)
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/captcha/ready", response_model=BrowserTaskResultResponse)
|
||||
async def captcha_ready(
|
||||
task_id: str,
|
||||
payload: BrowserCaptchaReadyRequest,
|
||||
service: TaskService = Depends(get_task_service),
|
||||
) -> BrowserTaskResultResponse:
|
||||
rec = await service.resume_captcha(task_id, user_response=payload.user_response)
|
||||
if rec is None:
|
||||
raise HTTPException(status_code=404, detail="Task not found")
|
||||
return _to_result_response(rec)
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/captcha/abort", response_model=BrowserTaskResultResponse)
|
||||
async def captcha_abort(
|
||||
task_id: str,
|
||||
payload: BrowserCaptchaAbortRequest,
|
||||
service: TaskService = Depends(get_task_service),
|
||||
) -> BrowserTaskResultResponse:
|
||||
rec = await service.abort_captcha(task_id, reason=payload.reason)
|
||||
if rec is None:
|
||||
raise HTTPException(status_code=404, detail="Task not found")
|
||||
return _to_result_response(rec)
|
||||
|
||||
|
||||
def _to_status_response(rec: TaskRecord) -> BrowserTaskStatusResponse:
|
||||
|
|
@ -77,4 +96,18 @@ def _to_status_response(rec: TaskRecord) -> BrowserTaskStatusResponse:
|
|||
started_at=rec.started_at,
|
||||
finished_at=rec.finished_at,
|
||||
error=rec.error,
|
||||
human_intervention=rec.human_intervention,
|
||||
)
|
||||
|
||||
|
||||
def _to_result_response(rec: TaskRecord) -> BrowserTaskResultResponse:
|
||||
return BrowserTaskResultResponse(
|
||||
task_id=rec.task_id,
|
||||
status=rec.status,
|
||||
success=(rec.status == TaskStatus.succeeded),
|
||||
execution_time=rec.execution_time,
|
||||
result=rec.result,
|
||||
error=rec.error,
|
||||
raw_response=rec.raw_response,
|
||||
human_intervention=rec.human_intervention,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,21 +1,24 @@
|
|||
import asyncio
|
||||
|
||||
from api.clients.browser_rpc_contracts import BrowserRpcError, BrowserRpcRunner
|
||||
from api.domain.task_status import TaskStatus
|
||||
from api.repositories.task_store import TaskRecord, TaskStore
|
||||
|
||||
|
||||
class TaskService:
|
||||
def __init__(
|
||||
self,
|
||||
store: TaskStore,
|
||||
rpc_client: BrowserRpcRunner,
|
||||
max_concurrency: int,
|
||||
rpc_timeout_cap: float | None = None,
|
||||
self,
|
||||
store: TaskStore,
|
||||
rpc_client: BrowserRpcRunner,
|
||||
max_concurrency: int,
|
||||
rpc_timeout_cap: float | None = None,
|
||||
captcha_wait_timeout: int = 900,
|
||||
) -> None:
|
||||
self._store = store
|
||||
self._rpc_client = rpc_client
|
||||
self._semaphore = asyncio.Semaphore(max_concurrency)
|
||||
self._rpc_timeout_cap = rpc_timeout_cap
|
||||
self._captcha_wait_timeout = captcha_wait_timeout
|
||||
self._background_tasks: set[asyncio.Task[None]] = set()
|
||||
|
||||
async def submit_task(self, task: str, timeout: int, metadata: dict | None) -> TaskRecord:
|
||||
|
|
@ -26,7 +29,22 @@ class TaskService:
|
|||
return record
|
||||
|
||||
async def get_task(self, task_id: str) -> TaskRecord | None:
|
||||
return await self._store.get(task_id)
|
||||
before = await self._store.get(task_id)
|
||||
was_awaiting = bool(before is not None and before.status == TaskStatus.awaiting_user_captcha)
|
||||
await self._store.expire_if_needed(task_id)
|
||||
after = await self._store.get(task_id)
|
||||
if (
|
||||
was_awaiting
|
||||
and after is not None
|
||||
and after.status == TaskStatus.failed
|
||||
and after.error
|
||||
and "expired" in after.error.lower()
|
||||
):
|
||||
try:
|
||||
await self._rpc_client.abort(task_id, reason=after.error)
|
||||
except BrowserRpcError:
|
||||
pass
|
||||
return after
|
||||
|
||||
async def close(self) -> None:
|
||||
if not self._background_tasks:
|
||||
|
|
@ -37,6 +55,86 @@ class TaskService:
|
|||
await asyncio.gather(*self._background_tasks, return_exceptions=True)
|
||||
self._background_tasks.clear()
|
||||
|
||||
async def resume_captcha(self, task_id: str, user_response: str | None = None) -> TaskRecord | None:
|
||||
rec = await self.get_task(task_id)
|
||||
if rec is None:
|
||||
return None
|
||||
if rec.status == TaskStatus.failed:
|
||||
return rec
|
||||
if rec.status != TaskStatus.awaiting_user_captcha:
|
||||
return rec
|
||||
|
||||
verify_raw = await self._rpc_client.verify_captcha(task_id)
|
||||
if not verify_raw.get("verified"):
|
||||
await self._store.set_awaiting_captcha(
|
||||
task_id=task_id,
|
||||
raw_response={
|
||||
"success": False,
|
||||
"status": TaskStatus.awaiting_user_captcha.value,
|
||||
"error": "CAPTCHA is still present.",
|
||||
"human_intervention": rec.human_intervention,
|
||||
"verification": verify_raw,
|
||||
"user_response": user_response,
|
||||
},
|
||||
max_wait_seconds=(rec.human_intervention or {}).get("verification", {}).get(
|
||||
"max_wait_seconds", self._captcha_wait_timeout
|
||||
),
|
||||
)
|
||||
return await self._store.get(task_id)
|
||||
|
||||
await self._store.set_running(task_id)
|
||||
|
||||
try:
|
||||
rpc_timeout = float(rec.timeout)
|
||||
if self._rpc_timeout_cap is not None:
|
||||
rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap)
|
||||
|
||||
raw = await asyncio.wait_for(
|
||||
self._rpc_client.resume(task_id=task_id, timeout_sec=rpc_timeout),
|
||||
timeout=float(rec.timeout) + 5,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
await self._store.set_done(
|
||||
task_id=task_id,
|
||||
success=False,
|
||||
raw_response=None,
|
||||
error="Timeout exceeded after CAPTCHA resume.",
|
||||
)
|
||||
return await self._store.get(task_id)
|
||||
except BrowserRpcError as exc:
|
||||
await self._store.set_done(
|
||||
task_id=task_id,
|
||||
success=False,
|
||||
raw_response=None,
|
||||
error=str(exc),
|
||||
)
|
||||
return await self._store.get(task_id)
|
||||
|
||||
await self._apply_rpc_result(task_id, raw)
|
||||
return await self._store.get(task_id)
|
||||
|
||||
async def abort_captcha(self, task_id: str, reason: str | None = None) -> TaskRecord | None:
|
||||
rec = await self.get_task(task_id)
|
||||
if rec is None:
|
||||
return None
|
||||
if rec.status == TaskStatus.awaiting_user_captcha:
|
||||
try:
|
||||
await self._rpc_client.abort(task_id, reason=reason)
|
||||
except BrowserRpcError:
|
||||
pass
|
||||
await self._store.set_done(
|
||||
task_id=task_id,
|
||||
success=False,
|
||||
raw_response={
|
||||
"success": False,
|
||||
"status": TaskStatus.failed.value,
|
||||
"error_code": "captcha_aborted",
|
||||
"reason": reason,
|
||||
},
|
||||
error=reason or "User aborted CAPTCHA flow.",
|
||||
)
|
||||
return await self._store.get(task_id)
|
||||
|
||||
async def _worker(self, task_id: str) -> None:
|
||||
rec = await self._store.set_running(task_id)
|
||||
if rec is None:
|
||||
|
|
@ -49,17 +147,10 @@ class TaskService:
|
|||
rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap)
|
||||
|
||||
raw = await asyncio.wait_for(
|
||||
self._rpc_client.run(task=rec.task, timeout_sec=rpc_timeout),
|
||||
self._rpc_client.run(task_id=task_id, task=rec.task, timeout_sec=rpc_timeout),
|
||||
timeout=float(rec.timeout) + 5,
|
||||
)
|
||||
success = bool(raw.get("success"))
|
||||
await self._store.set_done(
|
||||
task_id=task_id,
|
||||
success=success,
|
||||
raw_response=raw,
|
||||
error=None,
|
||||
result=raw.get("result") if isinstance(raw, dict) else None,
|
||||
)
|
||||
await self._apply_rpc_result(task_id, raw)
|
||||
except asyncio.TimeoutError:
|
||||
await self._store.set_done(
|
||||
task_id=task_id,
|
||||
|
|
@ -81,3 +172,22 @@ class TaskService:
|
|||
raw_response=None,
|
||||
error=f"Internal error: {exc}",
|
||||
)
|
||||
|
||||
async def _apply_rpc_result(self, task_id: str, raw: dict | None) -> None:
|
||||
raw = raw or {}
|
||||
status = raw.get("status")
|
||||
if status == TaskStatus.awaiting_user_captcha.value:
|
||||
human = raw.get("human_intervention") or {}
|
||||
verification = human.get("verification") or {}
|
||||
max_wait_seconds = verification.get("max_wait_seconds", self._captcha_wait_timeout)
|
||||
await self._store.set_awaiting_captcha(task_id, raw_response=raw, max_wait_seconds=max_wait_seconds)
|
||||
return
|
||||
|
||||
success = bool(raw.get("success"))
|
||||
await self._store.set_done(
|
||||
task_id=task_id,
|
||||
success=success,
|
||||
raw_response=raw,
|
||||
error=None,
|
||||
result=raw.get("result") if isinstance(raw, dict) else None,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,12 +1,52 @@
|
|||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
from typing import Any
|
||||
from urllib import error, request
|
||||
|
||||
from browser_use import Agent, Browser, ChatOpenAI
|
||||
|
||||
|
||||
CAPTCHA_WAIT_TIMEOUT = int(os.getenv("BROWSER_CAPTCHA_MAX_WAIT_SECONDS", "900"))
|
||||
_RUNNER_TASKS: dict[str, "RunnerTask"] = {}
|
||||
_RUNNER_LOCK = threading.Lock()
|
||||
|
||||
_CF_STRONG = (
|
||||
"just a moment",
|
||||
"attention required",
|
||||
"checking your browser",
|
||||
"cf-challenge",
|
||||
"cdn-cgi/challenge-platform",
|
||||
"__cf_chl",
|
||||
"turnstile",
|
||||
)
|
||||
_RECAPTCHA_STRONG = (
|
||||
"g-recaptcha",
|
||||
"recaptcha/api2",
|
||||
"www.google.com/recaptcha",
|
||||
"grecaptcha",
|
||||
)
|
||||
_HCAPTCHA_STRONG = (
|
||||
"hcaptcha",
|
||||
"newassets.hcaptcha.com",
|
||||
"js.hcaptcha.com/1/api.js",
|
||||
)
|
||||
_GENERIC_CAPTCHA_STRONG = (
|
||||
"captcha",
|
||||
"are you human",
|
||||
"verify you are human",
|
||||
"human verification",
|
||||
"bot detection",
|
||||
"security check",
|
||||
"press and hold",
|
||||
)
|
||||
|
||||
|
||||
def _json_response(handler, status_code, payload):
|
||||
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||||
handler.send_response(status_code)
|
||||
|
|
@ -16,12 +56,198 @@ def _json_response(handler, status_code, payload):
|
|||
handler.wfile.write(data)
|
||||
|
||||
|
||||
async def run_browser_task(task):
|
||||
@dataclass
|
||||
class RunnerTask:
|
||||
task_id: str
|
||||
task: str
|
||||
browser_view_url: str
|
||||
resume_token: str = field(default_factory=lambda: uuid.uuid4().hex)
|
||||
created_at: float = field(default_factory=time.time)
|
||||
status: str = "starting"
|
||||
payload: dict[str, Any] | None = None
|
||||
error: str | None = None
|
||||
agent: Any = None
|
||||
browser: Any = None
|
||||
loop: asyncio.AbstractEventLoop | None = None
|
||||
thread: threading.Thread | None = None
|
||||
settled_event: threading.Event = field(default_factory=threading.Event)
|
||||
finished: bool = False
|
||||
awaiting: bool = False
|
||||
aborted: bool = False
|
||||
transition_count: int = 0
|
||||
lock: threading.Lock = field(default_factory=threading.Lock)
|
||||
|
||||
def set_payload(self, status: str, payload: dict[str, Any]) -> None:
|
||||
with self.lock:
|
||||
self.status = status
|
||||
self.payload = payload
|
||||
self.transition_count += 1
|
||||
self.awaiting = status == "awaiting_user_captcha"
|
||||
self.finished = status in {"succeeded", "failed"}
|
||||
self.settled_event.set()
|
||||
|
||||
|
||||
def _get_task(task_id: str) -> RunnerTask | None:
|
||||
with _RUNNER_LOCK:
|
||||
return _RUNNER_TASKS.get(task_id)
|
||||
|
||||
|
||||
def _put_task(task: RunnerTask) -> RunnerTask:
|
||||
with _RUNNER_LOCK:
|
||||
_RUNNER_TASKS[task.task_id] = task
|
||||
return task
|
||||
|
||||
|
||||
async def _get_page_html(agent: Agent) -> str:
|
||||
try:
|
||||
cdp_session = await agent.browser_session.get_or_create_cdp_session()
|
||||
doc = await cdp_session.cdp_client.send.DOM.getDocument(session_id=cdp_session.session_id)
|
||||
html_result = await cdp_session.cdp_client.send.DOM.getOuterHTML(
|
||||
params={"nodeId": doc["root"]["nodeId"]},
|
||||
session_id=cdp_session.session_id,
|
||||
)
|
||||
return str(html_result.get("outerHTML", ""))
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
async def _capture_page_state(agent: Agent) -> dict[str, Any]:
|
||||
url = ""
|
||||
title = ""
|
||||
summary = None
|
||||
try:
|
||||
summary = await agent.browser_session.get_browser_state_summary()
|
||||
except Exception:
|
||||
summary = None
|
||||
|
||||
if summary is not None:
|
||||
if isinstance(summary, dict):
|
||||
url = str(summary.get("url") or "")
|
||||
title = str(summary.get("title") or "")
|
||||
else:
|
||||
url = str(getattr(summary, "url", "") or "")
|
||||
title = str(getattr(summary, "title", "") or "")
|
||||
|
||||
if not url:
|
||||
try:
|
||||
url = str(await agent.browser_session.get_current_page_url() or "")
|
||||
except Exception:
|
||||
url = ""
|
||||
if not title:
|
||||
try:
|
||||
title = str(await agent.browser_session.get_current_page_title() or "")
|
||||
except Exception:
|
||||
title = ""
|
||||
|
||||
html = await _get_page_html(agent)
|
||||
return {"url": url, "title": title, "html": html}
|
||||
|
||||
|
||||
def _classify_captcha(haystack: str) -> str:
|
||||
if any(token in haystack for token in _CF_STRONG):
|
||||
return "cloudflare"
|
||||
if any(token in haystack for token in _RECAPTCHA_STRONG):
|
||||
return "recaptcha"
|
||||
if any(token in haystack for token in _HCAPTCHA_STRONG):
|
||||
return "hcaptcha"
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _detect_captcha_from_state(page_state: dict[str, Any]) -> tuple[bool, str, list[str]]:
|
||||
url = str(page_state.get("url") or "").lower()
|
||||
title = str(page_state.get("title") or "").lower()
|
||||
html = str(page_state.get("html") or "").lower()
|
||||
haystack = "\n".join([url, title, html[:150000]])
|
||||
|
||||
signals: list[str] = []
|
||||
if any(token in haystack for token in _CF_STRONG):
|
||||
signals.append("cloudflare_challenge")
|
||||
if any(token in haystack for token in _RECAPTCHA_STRONG):
|
||||
signals.append("recaptcha")
|
||||
if any(token in haystack for token in _HCAPTCHA_STRONG):
|
||||
signals.append("hcaptcha")
|
||||
|
||||
generic_hits = [token for token in _GENERIC_CAPTCHA_STRONG if token in haystack]
|
||||
if generic_hits:
|
||||
signals.extend(f"generic:{token}" for token in generic_hits[:3])
|
||||
|
||||
blocked = bool(signals)
|
||||
captcha_type = _classify_captcha(haystack)
|
||||
return blocked, captcha_type, signals
|
||||
|
||||
|
||||
async def _build_captcha_payload(task: RunnerTask, agent: Agent) -> dict[str, Any]:
|
||||
page_state = await _capture_page_state(agent)
|
||||
blocked, captcha_type, signals = _detect_captcha_from_state(page_state)
|
||||
if not blocked:
|
||||
raise RuntimeError("Captcha payload requested without an active challenge")
|
||||
|
||||
verification = {
|
||||
"mode": "dom_url_title",
|
||||
"selectors_absent": [
|
||||
"iframe[src*='recaptcha']",
|
||||
"[class*='hcaptcha']",
|
||||
"[id*='captcha']",
|
||||
"form[action*='challenge']",
|
||||
"input[name='cf-turnstile-response']",
|
||||
],
|
||||
"challenge_signals_absent": signals,
|
||||
"max_wait_seconds": CAPTCHA_WAIT_TIMEOUT,
|
||||
}
|
||||
browser_view_url = task.browser_view_url or None
|
||||
instructions = (
|
||||
"Open the live browser view, complete the verification challenge manually, "
|
||||
"then return and reply 'ready' or 'done'."
|
||||
)
|
||||
return {
|
||||
"success": False,
|
||||
"status": "awaiting_user_captcha",
|
||||
"task_id": task.task_id,
|
||||
"session_id": task.task_id,
|
||||
"resume_token": task.resume_token,
|
||||
"browser_view_url": browser_view_url,
|
||||
"captcha_type": captcha_type,
|
||||
"instructions": instructions,
|
||||
"detected_at": time.time(),
|
||||
"page_url": page_state.get("url"),
|
||||
"page_title": page_state.get("title"),
|
||||
"verification": verification,
|
||||
"human_intervention": {
|
||||
"status": "awaiting_user_captcha",
|
||||
"task_id": task.task_id,
|
||||
"session_id": task.task_id,
|
||||
"resume_token": task.resume_token,
|
||||
"browser_view_url": browser_view_url,
|
||||
"captcha_type": captcha_type,
|
||||
"instructions": instructions,
|
||||
"detected_at": time.time(),
|
||||
"verification": verification,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def _verify_captcha_state(task: RunnerTask) -> dict[str, Any]:
|
||||
if not task.agent:
|
||||
return {"success": False, "verified": False, "error": "Task is not attached to an active agent"}
|
||||
|
||||
page_state = await _capture_page_state(task.agent)
|
||||
blocked, captcha_type, signals = _detect_captcha_from_state(page_state)
|
||||
return {
|
||||
"success": True,
|
||||
"task_id": task.task_id,
|
||||
"verified": not blocked,
|
||||
"captcha_type": captcha_type if blocked else None,
|
||||
"page_url": page_state.get("url"),
|
||||
"page_title": page_state.get("title"),
|
||||
"signals": signals,
|
||||
"verification_mode": "dom_url_title",
|
||||
}
|
||||
|
||||
|
||||
async def _run_browser_task(task: RunnerTask):
|
||||
cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222")
|
||||
browser_view_url = os.getenv("BROWSER_VIEW_URL", "")
|
||||
|
||||
browser = Browser(cdp_url=cdp_url)
|
||||
|
||||
llm = ChatOpenAI(
|
||||
model=os.getenv("MODEL_DEFAULT", "qwen3.5-122b"),
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
|
|
@ -29,22 +255,131 @@ async def run_browser_task(task):
|
|||
temperature=0.0,
|
||||
)
|
||||
|
||||
agent = Agent(task=task, llm=llm, browser=browser)
|
||||
agent = Agent(task=task.task, llm=llm, browser=browser)
|
||||
task.browser = browser
|
||||
task.agent = agent
|
||||
|
||||
async def on_step_end(current_agent: Agent):
|
||||
if task.awaiting or task.finished or task.aborted:
|
||||
return
|
||||
page_state = await _capture_page_state(current_agent)
|
||||
blocked, _, _ = _detect_captcha_from_state(page_state)
|
||||
if not blocked:
|
||||
return
|
||||
payload = await _build_captcha_payload(task, current_agent)
|
||||
task.set_payload("awaiting_user_captcha", payload)
|
||||
current_agent.pause()
|
||||
|
||||
try:
|
||||
history = await agent.run()
|
||||
return {
|
||||
"success": True,
|
||||
"result": history.final_result(),
|
||||
"browser_view": browser_view_url,
|
||||
}
|
||||
history = await agent.run(on_step_end=on_step_end)
|
||||
if task.aborted:
|
||||
task.set_payload(
|
||||
"failed",
|
||||
{"success": False, "status": "failed", "error": task.error or "Task aborted during CAPTCHA flow."},
|
||||
)
|
||||
return
|
||||
if task.awaiting:
|
||||
return
|
||||
task.set_payload(
|
||||
"succeeded",
|
||||
{
|
||||
"success": True,
|
||||
"status": "succeeded",
|
||||
"result": history.final_result(),
|
||||
"browser_view_url": task.browser_view_url or None,
|
||||
},
|
||||
)
|
||||
except Exception as err:
|
||||
return {"success": False, "error": f"Browser automation failed: {err}"}
|
||||
if not task.awaiting and not task.finished:
|
||||
task.set_payload(
|
||||
"failed",
|
||||
{"success": False, "status": "failed", "error": f"Browser automation failed: {err}"},
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
await browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
if not task.awaiting:
|
||||
try:
|
||||
await browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _runner_thread_main(task: RunnerTask) -> None:
|
||||
loop = asyncio.new_event_loop()
|
||||
task.loop = loop
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
loop.run_until_complete(_run_browser_task(task))
|
||||
finally:
|
||||
pending = asyncio.all_tasks(loop=loop)
|
||||
for pending_task in pending:
|
||||
pending_task.cancel()
|
||||
if pending:
|
||||
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
|
||||
loop.close()
|
||||
|
||||
|
||||
def _start_task(task_id: str, task_text: str) -> dict[str, Any]:
|
||||
existing = _get_task(task_id)
|
||||
if existing:
|
||||
return existing.payload or {
|
||||
"success": False,
|
||||
"status": existing.status,
|
||||
"error": "Task already exists",
|
||||
"task_id": task_id,
|
||||
}
|
||||
|
||||
state = _put_task(
|
||||
RunnerTask(
|
||||
task_id=task_id,
|
||||
task=task_text,
|
||||
browser_view_url=os.getenv("BROWSER_VIEW_URL", ""),
|
||||
)
|
||||
)
|
||||
thread = threading.Thread(target=_runner_thread_main, args=(state,), daemon=True, name=f"browser-task-{task_id[:8]}")
|
||||
state.thread = thread
|
||||
thread.start()
|
||||
|
||||
state.settled_event.wait()
|
||||
return state.payload or {"success": False, "status": "failed", "error": "Task exited without payload", "task_id": task_id}
|
||||
|
||||
|
||||
def _resume_task(task_id: str) -> dict[str, Any]:
|
||||
state = _get_task(task_id)
|
||||
if not state:
|
||||
return {"success": False, "status": "failed", "error": "Task not found", "task_id": task_id}
|
||||
if not state.loop or not state.agent:
|
||||
return {"success": False, "status": "failed", "error": "Task cannot be resumed", "task_id": task_id}
|
||||
|
||||
state.awaiting = False
|
||||
state.settled_event.clear()
|
||||
state.loop.call_soon_threadsafe(state.agent.resume)
|
||||
state.settled_event.wait()
|
||||
return state.payload or {"success": False, "status": "failed", "error": "Resume did not produce a payload", "task_id": task_id}
|
||||
|
||||
|
||||
def _verify_task(task_id: str) -> dict[str, Any]:
|
||||
state = _get_task(task_id)
|
||||
if not state:
|
||||
return {"success": False, "verified": False, "error": "Task not found", "task_id": task_id}
|
||||
if not state.loop:
|
||||
return {"success": False, "verified": False, "error": "Task has no active event loop", "task_id": task_id}
|
||||
future = asyncio.run_coroutine_threadsafe(_verify_captcha_state(state), state.loop)
|
||||
return future.result(timeout=20)
|
||||
|
||||
|
||||
def _abort_task(task_id: str, reason: str | None = None) -> dict[str, Any]:
|
||||
state = _get_task(task_id)
|
||||
if not state:
|
||||
return {"success": False, "status": "failed", "error": "Task not found", "task_id": task_id}
|
||||
state.aborted = True
|
||||
state.error = reason or "CAPTCHA flow aborted by user."
|
||||
if state.loop and state.agent:
|
||||
state.loop.call_soon_threadsafe(state.agent.resume)
|
||||
state.set_payload(
|
||||
"failed",
|
||||
{"success": False, "status": "failed", "task_id": task_id, "error": state.error, "error_code": "captcha_aborted"},
|
||||
)
|
||||
return state.payload
|
||||
|
||||
|
||||
class BrowserUseRPCHandler(BaseHTTPRequestHandler):
|
||||
|
|
@ -62,28 +397,49 @@ class BrowserUseRPCHandler(BaseHTTPRequestHandler):
|
|||
_json_response(self, 503, {"success": False, "error": f"Browser is not ready: {err}"})
|
||||
|
||||
def do_POST(self):
|
||||
if self.path != "/run":
|
||||
_json_response(self, 404, {"success": False, "error": "Not found"})
|
||||
return
|
||||
|
||||
try:
|
||||
content_length = int(self.headers.get("Content-Length", "0"))
|
||||
raw = self.rfile.read(content_length)
|
||||
payload = json.loads(raw.decode("utf-8") if raw else "{}")
|
||||
except json.JSONDecodeError:
|
||||
_json_response(self, 400, {"success": False, "error": "Invalid JSON payload"})
|
||||
return
|
||||
|
||||
if self.path == "/run":
|
||||
task = payload.get("task", "")
|
||||
task_id = str(payload.get("task_id") or uuid.uuid4().hex)
|
||||
if not isinstance(task, str) or not task.strip():
|
||||
_json_response(self, 400, {"success": False, "error": "Field 'task' is required"})
|
||||
return
|
||||
result = _start_task(task_id=task_id, task_text=task.strip())
|
||||
_json_response(self, 200, result)
|
||||
return
|
||||
|
||||
result = asyncio.run(run_browser_task(task.strip()))
|
||||
code = 200 if result.get("success") else 500
|
||||
_json_response(self, code, result)
|
||||
except json.JSONDecodeError:
|
||||
_json_response(self, 400, {"success": False, "error": "Invalid JSON payload"})
|
||||
except error.URLError as err:
|
||||
_json_response(self, 503, {"success": False, "error": f"Transport error: {err}"})
|
||||
except Exception as err:
|
||||
_json_response(self, 500, {"success": False, "error": f"Internal error: {err}"})
|
||||
if self.path == "/verify":
|
||||
task_id = str(payload.get("task_id") or "")
|
||||
if not task_id:
|
||||
_json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"})
|
||||
return
|
||||
_json_response(self, 200, _verify_task(task_id))
|
||||
return
|
||||
|
||||
if self.path == "/resume":
|
||||
task_id = str(payload.get("task_id") or "")
|
||||
if not task_id:
|
||||
_json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"})
|
||||
return
|
||||
_json_response(self, 200, _resume_task(task_id))
|
||||
return
|
||||
|
||||
if self.path == "/abort":
|
||||
task_id = str(payload.get("task_id") or "")
|
||||
if not task_id:
|
||||
_json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"})
|
||||
return
|
||||
_json_response(self, 200, _abort_task(task_id, reason=payload.get("reason")))
|
||||
return
|
||||
|
||||
_json_response(self, 404, {"success": False, "error": "Not found"})
|
||||
|
||||
def log_message(self, format_str, *args):
|
||||
return
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ if [ ! -f /var/lib/dbus/machine-id ]; then
|
|||
dbus-uuidgen > /var/lib/dbus/machine-id 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Удаляем stale lock/socket от прошлых падений Xvfb на том же DISPLAY.
|
||||
# УдалÑем stale lock/socket от прошлых падений Xvfb на том же DISPLAY.
|
||||
rm -f "/tmp/.X${DISPLAY_NUM}-lock" "/tmp/.X11-unix/X${DISPLAY_NUM}" || true
|
||||
|
||||
log "starting X stack on DISPLAY=${DISPLAY}"
|
||||
|
|
|
|||
|
|
@ -154,6 +154,15 @@ SKILLS_GUIDANCE = (
|
|||
"Skills that aren't maintained become liabilities."
|
||||
)
|
||||
|
||||
BROWSER_CAPTCHA_GUIDANCE = (
|
||||
"For browser tasks, do not pre-emptively refuse just because CAPTCHA may appear. "
|
||||
"Start the task with internet_browser. "
|
||||
"If the browser runtime reports status='awaiting_user_captcha', immediately use to_captcha "
|
||||
"to hand control to the user for manual verification and then resume. "
|
||||
"Important: you must never claim that you solved CAPTCHA yourself and you must not attempt bypass methods. "
|
||||
"Your role is orchestration: run browser steps, request manual CAPTCHA completion from the user, verify, and continue."
|
||||
)
|
||||
|
||||
PLATFORM_HINTS = {
|
||||
"whatsapp": (
|
||||
"You are on a text messaging communication platform, WhatsApp. "
|
||||
|
|
|
|||
|
|
@ -1922,6 +1922,7 @@ class HermesCLI:
|
|||
platform="cli",
|
||||
session_db=self._session_db,
|
||||
clarify_callback=self._clarify_callback,
|
||||
captcha_callback=self._captcha_callback,
|
||||
reasoning_callback=(
|
||||
self._stream_reasoning_delta if (self.streaming_enabled and self.show_reasoning)
|
||||
else self._on_reasoning if (self.show_reasoning or self.verbose)
|
||||
|
|
@ -5113,6 +5114,40 @@ class HermesCLI:
|
|||
"Use your best judgement to make the choice and proceed."
|
||||
)
|
||||
|
||||
def _captcha_callback(self, payload):
|
||||
"""Prompt the user to complete a paused CAPTCHA flow in the live browser."""
|
||||
import time as _time
|
||||
|
||||
timeout = int((payload.get("verification") or {}).get("max_wait_seconds", 900))
|
||||
response_queue = queue.Queue()
|
||||
self._captcha_state = {
|
||||
"payload": payload,
|
||||
"response_queue": response_queue,
|
||||
}
|
||||
self._captcha_deadline = _time.monotonic() + timeout
|
||||
self._invalidate()
|
||||
|
||||
last_refresh = _time.monotonic()
|
||||
while True:
|
||||
try:
|
||||
result = response_queue.get(timeout=1)
|
||||
self._captcha_deadline = 0
|
||||
return result
|
||||
except queue.Empty:
|
||||
remaining = self._captcha_deadline - _time.monotonic()
|
||||
if remaining <= 0:
|
||||
break
|
||||
now = _time.monotonic()
|
||||
if now - last_refresh >= 5.0:
|
||||
last_refresh = now
|
||||
self._invalidate()
|
||||
|
||||
self._captcha_state = None
|
||||
self._captcha_deadline = 0
|
||||
self._invalidate()
|
||||
_cprint(f"\n{_DIM}(captcha wait timed out after {timeout}s){_RST}")
|
||||
return {"action": "timeout", "user_response": ""}
|
||||
|
||||
def _sudo_password_callback(self) -> str:
|
||||
"""
|
||||
Prompt for sudo password through the prompt_toolkit UI.
|
||||
|
|
@ -5812,6 +5847,8 @@ class HermesCLI:
|
|||
return [("class:sudo-prompt", f"🔑 {state_suffix}")]
|
||||
if self._approval_state:
|
||||
return [("class:prompt-working", f"⚠ {state_suffix}")]
|
||||
if self._captcha_state:
|
||||
return [("class:prompt-working", f"🧩 {state_suffix}")]
|
||||
if self._clarify_freetext:
|
||||
return [("class:clarify-selected", f"✎ {state_suffix}")]
|
||||
if self._clarify_state:
|
||||
|
|
@ -5878,6 +5915,7 @@ class HermesCLI:
|
|||
sudo_widget,
|
||||
secret_widget,
|
||||
approval_widget,
|
||||
captcha_widget,
|
||||
clarify_widget,
|
||||
spinner_widget,
|
||||
spacer,
|
||||
|
|
@ -5900,6 +5938,7 @@ class HermesCLI:
|
|||
sudo_widget,
|
||||
secret_widget,
|
||||
approval_widget,
|
||||
captcha_widget,
|
||||
clarify_widget,
|
||||
spinner_widget,
|
||||
spacer,
|
||||
|
|
@ -5983,6 +6022,10 @@ class HermesCLI:
|
|||
self._approval_deadline = 0
|
||||
self._approval_lock = threading.Lock() # serialize concurrent approval prompts (delegation race fix)
|
||||
|
||||
# CAPTCHA / human verification prompt state
|
||||
self._captcha_state = None # dict with payload + response_queue
|
||||
self._captcha_deadline = 0
|
||||
|
||||
# Slash command loading state
|
||||
self._command_running = False
|
||||
self._command_status = ""
|
||||
|
|
@ -6058,6 +6101,23 @@ class HermesCLI:
|
|||
event.app.invalidate()
|
||||
return
|
||||
|
||||
# --- CAPTCHA prompt: accept ready/done/abort style input ---
|
||||
if self._captcha_state:
|
||||
text = event.app.current_buffer.text.strip()
|
||||
normalized = text.lower()
|
||||
if normalized in {"abort", "cancel", "stop"}:
|
||||
action = "abort"
|
||||
elif text:
|
||||
action = "ready"
|
||||
else:
|
||||
return
|
||||
self._captcha_state["response_queue"].put({"action": action, "user_response": text})
|
||||
self._captcha_state = None
|
||||
self._captcha_deadline = 0
|
||||
event.app.current_buffer.reset()
|
||||
event.app.invalidate()
|
||||
return
|
||||
|
||||
# --- Clarify freetext mode: user typed their own answer ---
|
||||
if self._clarify_freetext and self._clarify_state:
|
||||
text = event.app.current_buffer.text.strip()
|
||||
|
|
@ -6194,7 +6254,7 @@ class HermesCLI:
|
|||
# Buffer.auto_up/auto_down handle both: cursor movement when multi-line,
|
||||
# history browsing when on the first/last line (or single-line input).
|
||||
_normal_input = Condition(
|
||||
lambda: not self._clarify_state and not self._approval_state and not self._sudo_state and not self._secret_state
|
||||
lambda: not self._clarify_state and not self._approval_state and not self._captcha_state and not self._sudo_state and not self._secret_state
|
||||
)
|
||||
|
||||
@kb.add('up', filter=_normal_input)
|
||||
|
|
@ -6261,6 +6321,14 @@ class HermesCLI:
|
|||
event.app.invalidate()
|
||||
return
|
||||
|
||||
if self._captcha_state:
|
||||
self._captcha_state["response_queue"].put({"action": "timeout", "user_response": ""})
|
||||
self._captcha_state = None
|
||||
self._captcha_deadline = 0
|
||||
event.app.current_buffer.reset()
|
||||
event.app.invalidate()
|
||||
return
|
||||
|
||||
# Cancel clarify prompt
|
||||
if self._clarify_state:
|
||||
self._clarify_state["response_queue"].put(
|
||||
|
|
@ -6334,7 +6402,7 @@ class HermesCLI:
|
|||
# Guard: don't START recording during agent run or interactive prompts
|
||||
if cli_ref._agent_running:
|
||||
return
|
||||
if cli_ref._clarify_state or cli_ref._sudo_state or cli_ref._approval_state:
|
||||
if cli_ref._clarify_state or cli_ref._sudo_state or cli_ref._approval_state or cli_ref._captcha_state:
|
||||
return
|
||||
# Guard: don't start while a previous stop/transcribe cycle is
|
||||
# still running — recorder.stop() holds AudioRecorder._lock and
|
||||
|
|
@ -6554,6 +6622,8 @@ class HermesCLI:
|
|||
return "type secret (hidden), Enter to skip"
|
||||
if cli_ref._approval_state:
|
||||
return ""
|
||||
if cli_ref._captcha_state:
|
||||
return "type ready/done after you solve the challenge, or abort to cancel"
|
||||
if cli_ref._clarify_freetext:
|
||||
return "type your answer here and press Enter"
|
||||
if cli_ref._clarify_state:
|
||||
|
|
@ -6597,6 +6667,13 @@ class HermesCLI:
|
|||
('class:clarify-countdown', f' ({remaining}s)'),
|
||||
]
|
||||
|
||||
if cli_ref._captcha_state:
|
||||
remaining = max(0, int(cli_ref._captcha_deadline - _time.monotonic()))
|
||||
return [
|
||||
('class:hint', " complete the challenge in the browser, then type 'ready'"),
|
||||
('class:clarify-countdown', f' ({remaining}s)'),
|
||||
]
|
||||
|
||||
if cli_ref._clarify_state:
|
||||
remaining = max(0, int(cli_ref._clarify_deadline - _time.monotonic()))
|
||||
countdown = f' ({remaining}s)' if cli_ref._clarify_deadline else ''
|
||||
|
|
@ -6619,7 +6696,7 @@ class HermesCLI:
|
|||
return []
|
||||
|
||||
def get_hint_height():
|
||||
if cli_ref._sudo_state or cli_ref._secret_state or cli_ref._approval_state or cli_ref._clarify_state or cli_ref._command_running:
|
||||
if cli_ref._sudo_state or cli_ref._secret_state or cli_ref._approval_state or cli_ref._captcha_state or cli_ref._clarify_state or cli_ref._command_running:
|
||||
return 1
|
||||
# Keep a 1-line spacer while agent runs so output doesn't push
|
||||
# right up against the top rule of the input area
|
||||
|
|
@ -6644,7 +6721,7 @@ class HermesCLI:
|
|||
height=get_hint_height,
|
||||
)
|
||||
|
||||
# --- Clarify tool: dynamic display widget for questions + choices ---
|
||||
# --- Interactive panels: CAPTCHA + clarify ---
|
||||
|
||||
def _panel_box_width(title: str, content_lines: list[str], min_width: int = 46, max_width: int = 76) -> int:
|
||||
"""Choose a stable panel width wide enough for the title and content."""
|
||||
|
|
@ -6672,6 +6749,45 @@ class HermesCLI:
|
|||
def _append_blank_panel_line(lines, border_style: str, box_width: int) -> None:
|
||||
lines.append((border_style, "│" + (" " * box_width) + "│\n"))
|
||||
|
||||
def _get_captcha_display():
|
||||
state = cli_ref._captcha_state
|
||||
if not state:
|
||||
return []
|
||||
|
||||
payload = state.get("payload") or {}
|
||||
title = "Manual CAPTCHA Required"
|
||||
browser_view_url = payload.get("browser_view_url") or "Browser view URL is not configured."
|
||||
body_lines = [
|
||||
payload.get("instructions") or "Open the live browser and complete the verification challenge.",
|
||||
f"Type: {payload.get('captcha_type', 'unknown')}",
|
||||
f"Task ID: {payload.get('task_id', '')}",
|
||||
f"Browser: {browser_view_url}",
|
||||
"When the challenge disappears, type 'ready' or 'done' and press Enter.",
|
||||
"Type 'abort' to stop this browser task.",
|
||||
]
|
||||
box_width = _panel_box_width(title, body_lines, min_width=56, max_width=94)
|
||||
inner_text_width = max(8, box_width - 2)
|
||||
lines = []
|
||||
lines.append(('class:captcha-border', '╭─ '))
|
||||
lines.append(('class:captcha-title', title))
|
||||
lines.append(('class:captcha-border', ' ' + ('─' * max(0, box_width - len(title) - 3)) + '╮\n'))
|
||||
_append_blank_panel_line(lines, 'class:captcha-border', box_width)
|
||||
for text in body_lines:
|
||||
style = 'class:captcha-link' if text.startswith("Browser: ") else 'class:captcha-text'
|
||||
for wrapped in _wrap_panel_text(text, inner_text_width):
|
||||
_append_panel_line(lines, 'class:captcha-border', style, wrapped, box_width)
|
||||
_append_blank_panel_line(lines, 'class:captcha-border', box_width)
|
||||
lines.append(('class:captcha-border', '╰' + ('─' * box_width) + '╯\n'))
|
||||
return lines
|
||||
|
||||
captcha_widget = ConditionalContainer(
|
||||
Window(
|
||||
FormattedTextControl(_get_captcha_display),
|
||||
wrap_lines=True,
|
||||
),
|
||||
filter=Condition(lambda: cli_ref._captcha_state is not None),
|
||||
)
|
||||
|
||||
def _get_clarify_display():
|
||||
"""Build styled text for the clarify question/choices panel."""
|
||||
state = cli_ref._clarify_state
|
||||
|
|
@ -6897,6 +7013,7 @@ class HermesCLI:
|
|||
sudo_widget=sudo_widget,
|
||||
secret_widget=secret_widget,
|
||||
approval_widget=approval_widget,
|
||||
captcha_widget=captcha_widget,
|
||||
clarify_widget=clarify_widget,
|
||||
spinner_widget=spinner_widget,
|
||||
spacer=spacer,
|
||||
|
|
@ -6954,6 +7071,11 @@ class HermesCLI:
|
|||
'approval-cmd': '#AAAAAA italic',
|
||||
'approval-choice': '#AAAAAA',
|
||||
'approval-selected': '#FFD700 bold',
|
||||
# CAPTCHA panel
|
||||
'captcha-border': '#CD7F32',
|
||||
'captcha-title': '#FFBF00 bold',
|
||||
'captcha-text': '#FFF8DC',
|
||||
'captcha-link': '#87CEEB underline',
|
||||
# Voice mode
|
||||
'voice-prompt': '#87CEEB',
|
||||
'voice-recording': '#FF4444 bold',
|
||||
|
|
|
|||
|
|
@ -152,6 +152,7 @@ def _discover_tools():
|
|||
"tools.memory_tool",
|
||||
"tools.session_search_tool",
|
||||
"tools.clarify_tool",
|
||||
"tools.to_captcha_tool",
|
||||
"tools.code_execution_tool",
|
||||
"tools.delegate_tool",
|
||||
"tools.process_registry",
|
||||
|
|
@ -362,7 +363,7 @@ def get_tool_definitions(
|
|||
# because they need agent-level state (TodoStore, MemoryStore, etc.).
|
||||
# The registry still holds their schemas; dispatch just returns a stub error
|
||||
# so if something slips through, the LLM sees a sensible message.
|
||||
_AGENT_LOOP_TOOLS = {"todo", "memory", "session_search", "delegate_task"}
|
||||
_AGENT_LOOP_TOOLS = {"todo", "memory", "session_search", "delegate_task", "to_captcha"}
|
||||
_READ_SEARCH_TOOLS = {"read_file", "search_files"}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -73,6 +73,7 @@ from hermes_constants import OPENROUTER_BASE_URL
|
|||
from agent.prompt_builder import (
|
||||
DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
|
||||
MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
|
||||
BROWSER_CAPTCHA_GUIDANCE,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
fetch_model_metadata,
|
||||
|
|
@ -400,6 +401,7 @@ class AIAgent:
|
|||
thinking_callback: callable = None,
|
||||
reasoning_callback: callable = None,
|
||||
clarify_callback: callable = None,
|
||||
captcha_callback: callable = None,
|
||||
step_callback: callable = None,
|
||||
stream_delta_callback: callable = None,
|
||||
tool_gen_callback: callable = None,
|
||||
|
|
@ -447,6 +449,7 @@ class AIAgent:
|
|||
tool_progress_callback (callable): Callback function(tool_name, args_preview) for progress notifications
|
||||
clarify_callback (callable): Callback function(question, choices) -> str for interactive user questions.
|
||||
Provided by the platform layer (CLI or gateway). If None, the clarify tool returns an error.
|
||||
captcha_callback (callable): Callback function(payload_dict) -> dict for manual CAPTCHA completion flows.
|
||||
max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
|
||||
reasoning_config (Dict): OpenRouter reasoning configuration override (e.g. {"effort": "none"} to disable thinking).
|
||||
If None, defaults to {"enabled": True, "effort": "medium"} for OpenRouter. Set to disable/customize reasoning.
|
||||
|
|
@ -529,6 +532,7 @@ class AIAgent:
|
|||
self.thinking_callback = thinking_callback
|
||||
self.reasoning_callback = reasoning_callback
|
||||
self.clarify_callback = clarify_callback
|
||||
self.captcha_callback = captcha_callback
|
||||
self.step_callback = step_callback
|
||||
self.stream_delta_callback = stream_delta_callback
|
||||
self.status_callback = status_callback
|
||||
|
|
@ -2276,6 +2280,8 @@ class AIAgent:
|
|||
tool_guidance.append(SESSION_SEARCH_GUIDANCE)
|
||||
if "skill_manage" in self.valid_tool_names:
|
||||
tool_guidance.append(SKILLS_GUIDANCE)
|
||||
if "internet_browser" in self.valid_tool_names and "to_captcha" in self.valid_tool_names:
|
||||
tool_guidance.append(BROWSER_CAPTCHA_GUIDANCE)
|
||||
if tool_guidance:
|
||||
prompt_parts.append(" ".join(tool_guidance))
|
||||
|
||||
|
|
@ -4693,6 +4699,18 @@ class AIAgent:
|
|||
choices=function_args.get("choices"),
|
||||
callback=self.clarify_callback,
|
||||
)
|
||||
elif function_name == "to_captcha":
|
||||
from tools.to_captcha_tool import to_captcha_tool as _to_captcha_tool
|
||||
return _to_captcha_tool(
|
||||
task_id=function_args.get("task_id", ""),
|
||||
browser_view_url=function_args.get("browser_view_url"),
|
||||
captcha_type=function_args.get("captcha_type"),
|
||||
instructions=function_args.get("instructions"),
|
||||
detected_at=function_args.get("detected_at"),
|
||||
verification=function_args.get("verification"),
|
||||
resume_token=function_args.get("resume_token"),
|
||||
callback=self.captcha_callback,
|
||||
)
|
||||
elif function_name == "delegate_task":
|
||||
from tools.delegate_tool import delegate_task as _delegate_task
|
||||
return _delegate_task(
|
||||
|
|
@ -4711,6 +4729,53 @@ class AIAgent:
|
|||
honcho_session_key=self._honcho_session_key,
|
||||
)
|
||||
|
||||
def _maybe_resolve_captcha(self, function_name: str, function_result: str, effective_task_id: str) -> str:
|
||||
"""Bridge paused browser tasks into the dedicated CAPTCHA orchestration flow."""
|
||||
if function_name != "internet_browser":
|
||||
return function_result
|
||||
|
||||
try:
|
||||
payload = json.loads(function_result)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return function_result
|
||||
if not isinstance(payload, dict):
|
||||
return function_result
|
||||
if payload.get("status") != "awaiting_user_captcha":
|
||||
return function_result
|
||||
|
||||
human = payload.get("human_intervention") or {}
|
||||
captcha_args = {
|
||||
"task_id": payload.get("task_id") or human.get("task_id") or effective_task_id,
|
||||
"browser_view_url": human.get("browser_view_url"),
|
||||
"captcha_type": human.get("captcha_type"),
|
||||
"instructions": human.get("instructions"),
|
||||
"detected_at": human.get("detected_at"),
|
||||
"verification": human.get("verification"),
|
||||
"resume_token": human.get("resume_token"),
|
||||
}
|
||||
captcha_result = self._invoke_tool("to_captcha", captcha_args, effective_task_id)
|
||||
try:
|
||||
captcha_payload = json.loads(captcha_result)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return captcha_result
|
||||
if not isinstance(captcha_payload, dict):
|
||||
return captcha_result
|
||||
|
||||
if captcha_payload.get("status") == "resumed":
|
||||
task_result = captcha_payload.get("task_result")
|
||||
if isinstance(task_result, dict):
|
||||
return json.dumps(task_result, ensure_ascii=False)
|
||||
return json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"status": captcha_payload.get("status", "still_blocked"),
|
||||
"task_id": captcha_args["task_id"],
|
||||
"human_intervention": human,
|
||||
"captcha_flow": captcha_payload,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
def _execute_tool_calls_concurrent(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||
"""Execute multiple tool calls concurrently using a thread pool.
|
||||
|
||||
|
|
@ -4843,6 +4908,8 @@ class AIAgent:
|
|||
tool_duration = 0.0
|
||||
else:
|
||||
function_name, function_args, function_result, tool_duration, is_error = r
|
||||
function_result = self._maybe_resolve_captcha(function_name, function_result, effective_task_id)
|
||||
is_error, _ = _detect_tool_failure(function_name, function_result)
|
||||
|
||||
if is_error:
|
||||
result_preview = function_result[:200] if len(function_result) > 200 else function_result
|
||||
|
|
@ -5029,6 +5096,21 @@ class AIAgent:
|
|||
tool_duration = time.time() - tool_start_time
|
||||
if self.quiet_mode:
|
||||
self._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "to_captcha":
|
||||
from tools.to_captcha_tool import to_captcha_tool as _to_captcha_tool
|
||||
function_result = _to_captcha_tool(
|
||||
task_id=function_args.get("task_id", ""),
|
||||
browser_view_url=function_args.get("browser_view_url"),
|
||||
captcha_type=function_args.get("captcha_type"),
|
||||
instructions=function_args.get("instructions"),
|
||||
detected_at=function_args.get("detected_at"),
|
||||
verification=function_args.get("verification"),
|
||||
resume_token=function_args.get("resume_token"),
|
||||
callback=self.captcha_callback,
|
||||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if self.quiet_mode:
|
||||
self._vprint(f" {_get_cute_tool_message_impl('to_captcha', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "delegate_task":
|
||||
from tools.delegate_tool import delegate_task as _delegate_task
|
||||
tasks_arg = function_args.get("tasks")
|
||||
|
|
@ -5099,6 +5181,7 @@ class AIAgent:
|
|||
logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
|
||||
function_result = self._maybe_resolve_captcha(function_name, function_result, effective_task_id)
|
||||
result_preview = function_result if self.verbose_logging else (
|
||||
function_result[:200] if len(function_result) > 200 else function_result
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,65 +1,130 @@
|
|||
import json
|
||||
import os
|
||||
import time
|
||||
from urllib import error, request
|
||||
|
||||
from tools.registry import registry
|
||||
|
||||
|
||||
def run_browser_task(task):
|
||||
def _browser_api_base_url() -> str:
|
||||
return os.getenv("BROWSER_API_URL", "http://browser-api:8088/api/browser").rstrip("/")
|
||||
|
||||
|
||||
def _http_json(url: str, method: str = "GET", payload: dict | None = None, timeout_sec: int = 30) -> dict:
|
||||
body = None
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if payload is not None:
|
||||
body = json.dumps(payload).encode("utf-8")
|
||||
req = request.Request(url, data=body, headers=headers, method=method)
|
||||
try:
|
||||
with request.urlopen(req, timeout=timeout_sec) as resp:
|
||||
raw = resp.read().decode("utf-8")
|
||||
return json.loads(raw) if raw else {}
|
||||
except error.HTTPError as http_err:
|
||||
raw = http_err.read().decode("utf-8", errors="replace")
|
||||
try:
|
||||
data = json.loads(raw) if raw else {}
|
||||
except json.JSONDecodeError:
|
||||
data = {"details": raw}
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Browser API returned HTTP {http_err.code}",
|
||||
"details": data,
|
||||
}
|
||||
except Exception as err:
|
||||
return {"success": False, "error": f"Browser API request failed: {err}"}
|
||||
|
||||
|
||||
def run_browser_task(task: str):
|
||||
if not task or not str(task).strip():
|
||||
return json.dumps({"success": False, "error": "Task is required"}, ensure_ascii=False)
|
||||
|
||||
rpc_url = os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run")
|
||||
timeout_sec = int(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900"))
|
||||
payload = json.dumps({"task": task}).encode("utf-8")
|
||||
req = request.Request(rpc_url, data=payload, headers={"Content-Type": "application/json"}, method="POST")
|
||||
poll_interval = float(os.getenv("BROWSER_API_POLL_INTERVAL", "1.5"))
|
||||
api_base = _browser_api_base_url()
|
||||
|
||||
try:
|
||||
with request.urlopen(req, timeout=timeout_sec) as resp:
|
||||
body = resp.read().decode("utf-8")
|
||||
return body
|
||||
except error.HTTPError as http_err:
|
||||
body = http_err.read().decode("utf-8", errors="replace")
|
||||
accepted = _http_json(
|
||||
f"{api_base}/tasks",
|
||||
method="POST",
|
||||
payload={"task": task, "timeout": timeout_sec, "metadata": {"source": "internet_browser"}},
|
||||
timeout_sec=30,
|
||||
)
|
||||
task_id = accepted.get("task_id")
|
||||
if not task_id:
|
||||
return json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"error": f"browser-use RPC returned HTTP {http_err.code}",
|
||||
"details": body,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
except Exception as err:
|
||||
return json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"error": f"browser-use RPC request failed: {err}",
|
||||
"error": accepted.get("error", "Browser task was not accepted"),
|
||||
"details": accepted,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
deadline = time.time() + timeout_sec + 10
|
||||
status_url = f"{api_base}/tasks/{task_id}"
|
||||
result_url = f"{api_base}/tasks/{task_id}/result"
|
||||
|
||||
while time.time() < deadline:
|
||||
status_payload = _http_json(status_url, timeout_sec=15)
|
||||
status = status_payload.get("status")
|
||||
if not status and status_payload.get("error"):
|
||||
return json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"status": "failed",
|
||||
"task_id": task_id,
|
||||
"error": status_payload.get("error"),
|
||||
"details": status_payload.get("details"),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
if status == "awaiting_user_captcha":
|
||||
return json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"status": status,
|
||||
"task_id": task_id,
|
||||
"human_intervention": status_payload.get("human_intervention"),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
if status in {"succeeded", "failed"}:
|
||||
result_payload = _http_json(result_url, timeout_sec=30)
|
||||
result_payload.setdefault("task_id", task_id)
|
||||
return json.dumps(result_payload, ensure_ascii=False)
|
||||
time.sleep(poll_interval)
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"status": "failed",
|
||||
"task_id": task_id,
|
||||
"error": "Timed out while waiting for browser task result",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
|
||||
registry.register(
|
||||
name="internet_browser",
|
||||
toolset="browse_cmd",
|
||||
toolset="browse_cmd",
|
||||
schema={
|
||||
"name": "internet_browser",
|
||||
"description": (
|
||||
"ГЛАВНЫЙ ИНСТРУМЕНТ ДЛЯ ВЕБ-СЕРФИНГА. Вызывай этот инструмент НАПРЯМУЮ (через стандартный tool call/function call). "
|
||||
"КАТЕГОРИЧЕСКИ ЗАПРЕЩЕНО использовать `execute_code` или `delegate_task` для работы с браузером. "
|
||||
"Не пиши Python-скрипты! Просто передай в этот инструмент параметр `task`. "
|
||||
"Используй для любых задач в интернете: поиск товаров (Wildberries, Ozon), чтение статей, клики, навигация."
|
||||
"Main browser automation tool for internet tasks. Call it directly via a normal tool/function call. "
|
||||
"Do not use execute_code or delegate_task for browser work. Pass the task in natural language."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task": {
|
||||
"type": "string",
|
||||
"description": "Подробная задача на естественном языке. Например: 'Зайди на wildberries.ru, найди черную футболку и верни цену'."
|
||||
"type": "string",
|
||||
"description": "Detailed natural-language browser task."
|
||||
}
|
||||
},
|
||||
"required": ["task"]
|
||||
}
|
||||
},
|
||||
|
||||
handler=lambda args, **kw: run_browser_task(args.get("task")),
|
||||
emoji="🌐",
|
||||
)
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue