Compare commits
2 commits
main
...
dev_to_cap
| Author | SHA1 | Date | |
|---|---|---|---|
| 3b7d02ee81 | |||
| f1f32d8366 |
16 changed files with 1021 additions and 131 deletions
|
|
@ -7,19 +7,21 @@ from api.clients.browser_rpc_contracts import BrowserRpcError
|
||||||
|
|
||||||
class BrowserRpcClient:
|
class BrowserRpcClient:
|
||||||
def __init__(self, rpc_url: str, session: aiohttp.ClientSession) -> None:
|
def __init__(self, rpc_url: str, session: aiohttp.ClientSession) -> None:
|
||||||
self._rpc_url = rpc_url
|
self._run_url = rpc_url.rstrip("/")
|
||||||
|
if self._run_url.endswith("/run"):
|
||||||
|
self._base_url = self._run_url[: -len("/run")]
|
||||||
|
else:
|
||||||
|
self._base_url = self._run_url
|
||||||
|
self._run_url = f"{self._base_url}/run"
|
||||||
self._session = session
|
self._session = session
|
||||||
|
|
||||||
async def run(self, task: str, timeout_sec: float) -> dict[str, Any]:
|
async def _post_json(self, url: str, payload: dict[str, Any], timeout_sec: float | None = None) -> dict[str, Any]:
|
||||||
payload = {"task": task}
|
timeout = aiohttp.ClientTimeout(total=timeout_sec) if timeout_sec is not None else None
|
||||||
timeout = aiohttp.ClientTimeout(total=timeout_sec)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async with self._session.post(self._rpc_url, json=payload, timeout=timeout) as response:
|
async with self._session.post(url, json=payload, timeout=timeout) as response:
|
||||||
if response.status >= 400:
|
if response.status >= 400:
|
||||||
body = await response.text()
|
body = await response.text()
|
||||||
raise BrowserRpcError(f"RPC HTTP: {response.status}: {body}")
|
raise BrowserRpcError(f"RPC HTTP: {response.status}: {body}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = await response.json(content_type=None)
|
data = await response.json(content_type=None)
|
||||||
except aiohttp.ContentTypeError as exc:
|
except aiohttp.ContentTypeError as exc:
|
||||||
|
|
@ -29,10 +31,22 @@ class BrowserRpcClient:
|
||||||
|
|
||||||
if not isinstance(data, dict):
|
if not isinstance(data, dict):
|
||||||
raise BrowserRpcError("RPC returned invalid payload type")
|
raise BrowserRpcError("RPC returned invalid payload type")
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
async def run(self, task_id: str, task: str, timeout_sec: float) -> dict[str, Any]:
|
||||||
|
return await self._post_json(self._run_url, {"task_id": task_id, "task": task}, timeout_sec=timeout_sec)
|
||||||
|
|
||||||
async def run_browser_task(rpc_url: str, task: str, timeout_sec: float) -> dict[str, Any]:
|
async def verify_captcha(self, task_id: str) -> dict[str, Any]:
|
||||||
|
return await self._post_json(f"{self._base_url}/verify", {"task_id": task_id}, timeout_sec=15)
|
||||||
|
|
||||||
|
async def resume(self, task_id: str, timeout_sec: float) -> dict[str, Any]:
|
||||||
|
return await self._post_json(f"{self._base_url}/resume", {"task_id": task_id}, timeout_sec=timeout_sec)
|
||||||
|
|
||||||
|
async def abort(self, task_id: str, reason: str | None = None) -> dict[str, Any]:
|
||||||
|
return await self._post_json(f"{self._base_url}/abort", {"task_id": task_id, "reason": reason}, timeout_sec=15)
|
||||||
|
|
||||||
|
|
||||||
|
async def run_browser_task(rpc_url: str, task_id: str, task: str, timeout_sec: float) -> dict[str, Any]:
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
return await BrowserRpcClient(rpc_url, session=session).run(task=task, timeout_sec=timeout_sec)
|
client = BrowserRpcClient(rpc_url, session=session)
|
||||||
|
return await client.run(task_id=task_id, task=task, timeout_sec=timeout_sec)
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,15 @@
|
||||||
from typing import Any, Protocol
|
from typing import Any, Protocol
|
||||||
|
|
||||||
|
|
||||||
class BrowserRpcError(RuntimeError): ...
|
class BrowserRpcError(RuntimeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class BrowserRpcRunner(Protocol):
|
class BrowserRpcRunner(Protocol):
|
||||||
async def run(self, task: str, timeout_sec: float) -> dict[str, Any]: ...
|
async def run(self, task_id: str, task: str, timeout_sec: float) -> dict[str, Any]: ...
|
||||||
|
|
||||||
|
async def verify_captcha(self, task_id: str) -> dict[str, Any]: ...
|
||||||
|
|
||||||
|
async def resume(self, task_id: str, timeout_sec: float) -> dict[str, Any]: ...
|
||||||
|
|
||||||
|
async def abort(self, task_id: str, reason: str | None = None) -> dict[str, Any]: ...
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Any
|
from typing import Any, Literal
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
@ -6,38 +6,67 @@ from api.domain.task_status import TaskStatus
|
||||||
|
|
||||||
|
|
||||||
class BrowserTaskRequest(BaseModel):
|
class BrowserTaskRequest(BaseModel):
|
||||||
"""Запрос на запуск задачи в browser-use агенте."""
|
"""Request to start a browser task."""
|
||||||
|
|
||||||
task: str = Field(..., description="Текстовая задача для browser-use агента")
|
task: str = Field(..., description="Text task for the browser-use agent")
|
||||||
timeout: int = Field(300, description="Максимальное время выполнения задачи в секундах")
|
timeout: int = Field(300, description="Maximum task execution time in seconds")
|
||||||
metadata: dict[str, Any] | None = Field(default=None, description="Дополнительные метаданные клиента")
|
metadata: dict[str, Any] | None = Field(default=None, description="Optional client metadata")
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserCaptchaVerification(BaseModel):
|
||||||
|
mode: Literal["dom_url_title"] = "dom_url_title"
|
||||||
|
selectors_absent: list[str] = Field(default_factory=list)
|
||||||
|
challenge_signals_absent: list[str] = Field(default_factory=list)
|
||||||
|
max_wait_seconds: int = Field(..., description="How long the task may stay paused waiting for the user")
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserHumanInterventionPayload(BaseModel):
|
||||||
|
status: Literal["awaiting_user_captcha"] = "awaiting_user_captcha"
|
||||||
|
task_id: str
|
||||||
|
session_id: str
|
||||||
|
resume_token: str
|
||||||
|
browser_view_url: str | None = None
|
||||||
|
captcha_type: Literal["cloudflare", "recaptcha", "hcaptcha", "unknown"] = "unknown"
|
||||||
|
instructions: str
|
||||||
|
detected_at: float
|
||||||
|
verification: BrowserCaptchaVerification
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserCaptchaReadyRequest(BaseModel):
|
||||||
|
user_response: str | None = Field(default=None, description="Free-form confirmation from the user")
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserCaptchaAbortRequest(BaseModel):
|
||||||
|
reason: str | None = Field(default=None, description="Optional reason for aborting the CAPTCHA flow")
|
||||||
|
|
||||||
|
|
||||||
class BrowserTaskAcceptedResponse(BaseModel):
|
class BrowserTaskAcceptedResponse(BaseModel):
|
||||||
"""Ответ о том, что задача принята в обработку."""
|
"""Response indicating that a task was accepted for processing."""
|
||||||
|
|
||||||
task_id: str
|
task_id: str
|
||||||
status: TaskStatus
|
status: TaskStatus
|
||||||
|
|
||||||
|
|
||||||
class BrowserTaskStatusResponse(BaseModel):
|
class BrowserTaskStatusResponse(BaseModel):
|
||||||
"""Текущий статус задачи и временные отметки ее выполнения."""
|
"""Current task status and timestamps."""
|
||||||
|
|
||||||
task_id: str
|
task_id: str
|
||||||
status: TaskStatus
|
status: TaskStatus
|
||||||
create_at: float = Field(..., description="Время создания задачи в Unix timestamp")
|
create_at: float = Field(..., description="Task creation time as a Unix timestamp")
|
||||||
started_at: float | None = Field(default=None, description="Время начала выполнения в Unix timestamp")
|
started_at: float | None = Field(default=None, description="Task start time as a Unix timestamp")
|
||||||
finished_at: float | None = Field(default=None, description="Время завершения выполнения в Unix timestamp")
|
finished_at: float | None = Field(default=None, description="Task completion time as a Unix timestamp")
|
||||||
error: str | None = Field(default=None, description="Текст ошибки, если задача завершилась с ошибкой")
|
error: str | None = Field(default=None, description="Task error when applicable")
|
||||||
|
human_intervention: BrowserHumanInterventionPayload | None = None
|
||||||
|
|
||||||
|
|
||||||
class BrowserTaskResultResponse(BaseModel):
|
class BrowserTaskResultResponse(BaseModel):
|
||||||
"""Финальный результат выполнения задачи в browser-use."""
|
"""Terminal result or meaningful paused state for a browser task."""
|
||||||
|
|
||||||
task_id: str
|
task_id: str
|
||||||
status: TaskStatus
|
status: TaskStatus
|
||||||
success: bool = Field(..., description="Успешно ли выполнена задача")
|
success: bool = Field(..., description="Whether the task completed successfully")
|
||||||
execution_time: float = Field(..., description="Фактическое время выполнения в секундах")
|
execution_time: float = Field(..., description="Observed execution time in seconds")
|
||||||
result: str | None = Field(default=None, description="Итоговый текстовый результат")
|
result: str | None = Field(default=None, description="Final textual result when available")
|
||||||
error: str | None = Field(default=None, description="Текст ошибки, если выполнение не удалось")
|
error: str | None = Field(default=None, description="Error text when execution failed")
|
||||||
raw_response: dict[str, Any] | None = Field(default=None, description="Сырой ответ от browser-use RPC")
|
raw_response: dict[str, Any] | None = Field(default=None, description="Raw payload returned by the browser runtime")
|
||||||
|
human_intervention: BrowserHumanInterventionPayload | None = None
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ class Settings:
|
||||||
|
|
||||||
browser_rpc_url: str = os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run")
|
browser_rpc_url: str = os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run")
|
||||||
browser_rpc_timeout: float = float(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900"))
|
browser_rpc_timeout: float = float(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900"))
|
||||||
|
captcha_wait_timeout: int = int(os.getenv("BROWSER_CAPTCHA_MAX_WAIT_SECONDS", "900"))
|
||||||
|
|
||||||
max_concurrency: int = int(os.getenv("BROWSER_API_MAX_CONCURRENCY", "2"))
|
max_concurrency: int = int(os.getenv("BROWSER_API_MAX_CONCURRENCY", "2"))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,10 @@ from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
class TaskStatus(str, Enum):
|
class TaskStatus(str, Enum):
|
||||||
"""Состояние задачи браузерного агента."""
|
"""Browser task lifecycle states."""
|
||||||
|
|
||||||
queued = "queued"
|
queued = "queued"
|
||||||
running = "running"
|
running = "running"
|
||||||
|
awaiting_user_captcha = "awaiting_user_captcha"
|
||||||
succeeded = "succeeded"
|
succeeded = "succeeded"
|
||||||
failed = "failed"
|
failed = "failed"
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ async def lifespan(app: FastAPI):
|
||||||
rpc_client=BrowserRpcClient(settings.browser_rpc_url, session=session),
|
rpc_client=BrowserRpcClient(settings.browser_rpc_url, session=session),
|
||||||
max_concurrency=settings.max_concurrency,
|
max_concurrency=settings.max_concurrency,
|
||||||
rpc_timeout_cap=settings.browser_rpc_timeout,
|
rpc_timeout_cap=settings.browser_rpc_timeout,
|
||||||
|
captcha_wait_timeout=settings.captcha_wait_timeout,
|
||||||
)
|
)
|
||||||
app.state.task_service = task_service
|
app.state.task_service = task_service
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -20,13 +20,17 @@ class TaskRecord:
|
||||||
result: str | None = None
|
result: str | None = None
|
||||||
error: str | None = None
|
error: str | None = None
|
||||||
raw_response: dict[str, Any] | None = None
|
raw_response: dict[str, Any] | None = None
|
||||||
|
human_intervention: dict[str, Any] | None = None
|
||||||
|
session_id: str | None = None
|
||||||
|
resume_token: str | None = None
|
||||||
|
awaiting_deadline: float | None = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def execution_time(self) -> float:
|
def execution_time(self) -> float:
|
||||||
if self.started_at is None:
|
if self.started_at is None:
|
||||||
return 0
|
return 0
|
||||||
end = self.finished_at if self.finished_at is not None else time.time()
|
end = self.finished_at if self.finished_at is not None else time.time()
|
||||||
return max(0, end - self.started_at)
|
return max(0.0, end - self.started_at)
|
||||||
|
|
||||||
|
|
||||||
class TaskStore:
|
class TaskStore:
|
||||||
|
|
@ -51,16 +55,42 @@ class TaskStore:
|
||||||
if rec is None:
|
if rec is None:
|
||||||
return None
|
return None
|
||||||
rec.status = TaskStatus.running
|
rec.status = TaskStatus.running
|
||||||
rec.started_at = time.time()
|
rec.finished_at = None
|
||||||
|
rec.error = None
|
||||||
|
rec.human_intervention = None
|
||||||
|
rec.awaiting_deadline = None
|
||||||
|
if rec.started_at is None:
|
||||||
|
rec.started_at = time.time()
|
||||||
|
return rec
|
||||||
|
|
||||||
|
async def set_awaiting_captcha(
|
||||||
|
self,
|
||||||
|
task_id: str,
|
||||||
|
raw_response: dict[str, Any],
|
||||||
|
max_wait_seconds: int,
|
||||||
|
) -> TaskRecord | None:
|
||||||
|
async with self._lock:
|
||||||
|
rec = self._tasks.get(task_id)
|
||||||
|
if rec is None:
|
||||||
|
return None
|
||||||
|
payload = raw_response.get("human_intervention") or {}
|
||||||
|
rec.status = TaskStatus.awaiting_user_captcha
|
||||||
|
rec.raw_response = raw_response
|
||||||
|
rec.human_intervention = payload
|
||||||
|
rec.session_id = payload.get("session_id") or rec.session_id
|
||||||
|
rec.resume_token = payload.get("resume_token") or rec.resume_token
|
||||||
|
rec.awaiting_deadline = time.time() + max(1, int(max_wait_seconds))
|
||||||
|
rec.error = None
|
||||||
|
rec.finished_at = None
|
||||||
return rec
|
return rec
|
||||||
|
|
||||||
async def set_done(
|
async def set_done(
|
||||||
self,
|
self,
|
||||||
task_id: str,
|
task_id: str,
|
||||||
success: bool,
|
success: bool,
|
||||||
raw_response: dict[str, Any] | None,
|
raw_response: dict[str, Any] | None,
|
||||||
error: str | None,
|
error: str | None,
|
||||||
result: str | None = None,
|
result: str | None = None,
|
||||||
) -> TaskRecord | None:
|
) -> TaskRecord | None:
|
||||||
async with self._lock:
|
async with self._lock:
|
||||||
rec = self._tasks.get(task_id)
|
rec = self._tasks.get(task_id)
|
||||||
|
|
@ -69,8 +99,35 @@ class TaskStore:
|
||||||
rec.finished_at = time.time()
|
rec.finished_at = time.time()
|
||||||
rec.raw_response = raw_response
|
rec.raw_response = raw_response
|
||||||
rec.error = error if error is not None else (
|
rec.error = error if error is not None else (
|
||||||
raw_response.get("error") if isinstance(raw_response, dict) else None)
|
raw_response.get("error") if isinstance(raw_response, dict) else None
|
||||||
|
)
|
||||||
rec.result = result if result is not None else (
|
rec.result = result if result is not None else (
|
||||||
raw_response.get("result") if isinstance(raw_response, dict) else None)
|
raw_response.get("result") if isinstance(raw_response, dict) else None
|
||||||
|
)
|
||||||
|
rec.human_intervention = None
|
||||||
|
rec.awaiting_deadline = None
|
||||||
rec.status = TaskStatus.succeeded if success else TaskStatus.failed
|
rec.status = TaskStatus.succeeded if success else TaskStatus.failed
|
||||||
return rec
|
return rec
|
||||||
|
|
||||||
|
async def expire_if_needed(self, task_id: str) -> TaskRecord | None:
|
||||||
|
async with self._lock:
|
||||||
|
rec = self._tasks.get(task_id)
|
||||||
|
if rec is None:
|
||||||
|
return None
|
||||||
|
if rec.status != TaskStatus.awaiting_user_captcha:
|
||||||
|
return rec
|
||||||
|
if rec.awaiting_deadline is None or rec.awaiting_deadline > time.time():
|
||||||
|
return rec
|
||||||
|
|
||||||
|
rec.status = TaskStatus.failed
|
||||||
|
rec.finished_at = time.time()
|
||||||
|
rec.error = "CAPTCHA wait expired before the user completed verification."
|
||||||
|
rec.raw_response = {
|
||||||
|
"success": False,
|
||||||
|
"status": TaskStatus.failed.value,
|
||||||
|
"error": rec.error,
|
||||||
|
"error_code": "captcha_wait_expired",
|
||||||
|
}
|
||||||
|
rec.human_intervention = None
|
||||||
|
rec.awaiting_deadline = None
|
||||||
|
return rec
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@ from fastapi import APIRouter, Depends, HTTPException, Request
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
|
|
||||||
from api.contracts.task_schemas import (
|
from api.contracts.task_schemas import (
|
||||||
|
BrowserCaptchaAbortRequest,
|
||||||
|
BrowserCaptchaReadyRequest,
|
||||||
BrowserTaskAcceptedResponse,
|
BrowserTaskAcceptedResponse,
|
||||||
BrowserTaskRequest,
|
BrowserTaskRequest,
|
||||||
BrowserTaskResultResponse,
|
BrowserTaskResultResponse,
|
||||||
|
|
@ -20,8 +22,8 @@ def get_task_service(request: Request) -> TaskService:
|
||||||
|
|
||||||
@router.post("/tasks", response_model=BrowserTaskAcceptedResponse, status_code=202)
|
@router.post("/tasks", response_model=BrowserTaskAcceptedResponse, status_code=202)
|
||||||
async def create_task(
|
async def create_task(
|
||||||
payload: BrowserTaskRequest,
|
payload: BrowserTaskRequest,
|
||||||
service: TaskService = Depends(get_task_service),
|
service: TaskService = Depends(get_task_service),
|
||||||
) -> BrowserTaskAcceptedResponse:
|
) -> BrowserTaskAcceptedResponse:
|
||||||
rec = await service.submit_task(task=payload.task.strip(), timeout=payload.timeout, metadata=payload.metadata)
|
rec = await service.submit_task(task=payload.task.strip(), timeout=payload.timeout, metadata=payload.metadata)
|
||||||
return BrowserTaskAcceptedResponse(task_id=rec.task_id, status=rec.status)
|
return BrowserTaskAcceptedResponse(task_id=rec.task_id, status=rec.status)
|
||||||
|
|
@ -37,8 +39,8 @@ async def get_task_status(task_id: str, service: TaskService = Depends(get_task_
|
||||||
|
|
||||||
@router.get("/tasks/{task_id}/result", response_model=BrowserTaskResultResponse)
|
@router.get("/tasks/{task_id}/result", response_model=BrowserTaskResultResponse)
|
||||||
async def get_task_result(
|
async def get_task_result(
|
||||||
task_id: str,
|
task_id: str,
|
||||||
service: TaskService = Depends(get_task_service),
|
service: TaskService = Depends(get_task_service),
|
||||||
) -> JSONResponse | BrowserTaskResultResponse:
|
) -> JSONResponse | BrowserTaskResultResponse:
|
||||||
rec = await service.get_task(task_id)
|
rec = await service.get_task(task_id)
|
||||||
if rec is None:
|
if rec is None:
|
||||||
|
|
@ -55,18 +57,35 @@ async def get_task_result(
|
||||||
"result": None,
|
"result": None,
|
||||||
"error": None,
|
"error": None,
|
||||||
"raw_response": None,
|
"raw_response": None,
|
||||||
|
"human_intervention": None,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
return BrowserTaskResultResponse(
|
return _to_result_response(rec)
|
||||||
task_id=rec.task_id,
|
|
||||||
status=rec.status,
|
|
||||||
success=(rec.status == TaskStatus.succeeded),
|
@router.post("/tasks/{task_id}/captcha/ready", response_model=BrowserTaskResultResponse)
|
||||||
execution_time=rec.execution_time,
|
async def captcha_ready(
|
||||||
result=rec.result,
|
task_id: str,
|
||||||
error=rec.error,
|
payload: BrowserCaptchaReadyRequest,
|
||||||
raw_response=rec.raw_response,
|
service: TaskService = Depends(get_task_service),
|
||||||
)
|
) -> BrowserTaskResultResponse:
|
||||||
|
rec = await service.resume_captcha(task_id, user_response=payload.user_response)
|
||||||
|
if rec is None:
|
||||||
|
raise HTTPException(status_code=404, detail="Task not found")
|
||||||
|
return _to_result_response(rec)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/tasks/{task_id}/captcha/abort", response_model=BrowserTaskResultResponse)
|
||||||
|
async def captcha_abort(
|
||||||
|
task_id: str,
|
||||||
|
payload: BrowserCaptchaAbortRequest,
|
||||||
|
service: TaskService = Depends(get_task_service),
|
||||||
|
) -> BrowserTaskResultResponse:
|
||||||
|
rec = await service.abort_captcha(task_id, reason=payload.reason)
|
||||||
|
if rec is None:
|
||||||
|
raise HTTPException(status_code=404, detail="Task not found")
|
||||||
|
return _to_result_response(rec)
|
||||||
|
|
||||||
|
|
||||||
def _to_status_response(rec: TaskRecord) -> BrowserTaskStatusResponse:
|
def _to_status_response(rec: TaskRecord) -> BrowserTaskStatusResponse:
|
||||||
|
|
@ -77,4 +96,18 @@ def _to_status_response(rec: TaskRecord) -> BrowserTaskStatusResponse:
|
||||||
started_at=rec.started_at,
|
started_at=rec.started_at,
|
||||||
finished_at=rec.finished_at,
|
finished_at=rec.finished_at,
|
||||||
error=rec.error,
|
error=rec.error,
|
||||||
|
human_intervention=rec.human_intervention,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _to_result_response(rec: TaskRecord) -> BrowserTaskResultResponse:
|
||||||
|
return BrowserTaskResultResponse(
|
||||||
|
task_id=rec.task_id,
|
||||||
|
status=rec.status,
|
||||||
|
success=(rec.status == TaskStatus.succeeded),
|
||||||
|
execution_time=rec.execution_time,
|
||||||
|
result=rec.result,
|
||||||
|
error=rec.error,
|
||||||
|
raw_response=rec.raw_response,
|
||||||
|
human_intervention=rec.human_intervention,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,21 +1,24 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
from api.clients.browser_rpc_contracts import BrowserRpcError, BrowserRpcRunner
|
from api.clients.browser_rpc_contracts import BrowserRpcError, BrowserRpcRunner
|
||||||
|
from api.domain.task_status import TaskStatus
|
||||||
from api.repositories.task_store import TaskRecord, TaskStore
|
from api.repositories.task_store import TaskRecord, TaskStore
|
||||||
|
|
||||||
|
|
||||||
class TaskService:
|
class TaskService:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
store: TaskStore,
|
store: TaskStore,
|
||||||
rpc_client: BrowserRpcRunner,
|
rpc_client: BrowserRpcRunner,
|
||||||
max_concurrency: int,
|
max_concurrency: int,
|
||||||
rpc_timeout_cap: float | None = None,
|
rpc_timeout_cap: float | None = None,
|
||||||
|
captcha_wait_timeout: int = 900,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._store = store
|
self._store = store
|
||||||
self._rpc_client = rpc_client
|
self._rpc_client = rpc_client
|
||||||
self._semaphore = asyncio.Semaphore(max_concurrency)
|
self._semaphore = asyncio.Semaphore(max_concurrency)
|
||||||
self._rpc_timeout_cap = rpc_timeout_cap
|
self._rpc_timeout_cap = rpc_timeout_cap
|
||||||
|
self._captcha_wait_timeout = captcha_wait_timeout
|
||||||
self._background_tasks: set[asyncio.Task[None]] = set()
|
self._background_tasks: set[asyncio.Task[None]] = set()
|
||||||
|
|
||||||
async def submit_task(self, task: str, timeout: int, metadata: dict | None) -> TaskRecord:
|
async def submit_task(self, task: str, timeout: int, metadata: dict | None) -> TaskRecord:
|
||||||
|
|
@ -26,7 +29,22 @@ class TaskService:
|
||||||
return record
|
return record
|
||||||
|
|
||||||
async def get_task(self, task_id: str) -> TaskRecord | None:
|
async def get_task(self, task_id: str) -> TaskRecord | None:
|
||||||
return await self._store.get(task_id)
|
before = await self._store.get(task_id)
|
||||||
|
was_awaiting = bool(before is not None and before.status == TaskStatus.awaiting_user_captcha)
|
||||||
|
await self._store.expire_if_needed(task_id)
|
||||||
|
after = await self._store.get(task_id)
|
||||||
|
if (
|
||||||
|
was_awaiting
|
||||||
|
and after is not None
|
||||||
|
and after.status == TaskStatus.failed
|
||||||
|
and after.error
|
||||||
|
and "expired" in after.error.lower()
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
await self._rpc_client.abort(task_id, reason=after.error)
|
||||||
|
except BrowserRpcError:
|
||||||
|
pass
|
||||||
|
return after
|
||||||
|
|
||||||
async def close(self) -> None:
|
async def close(self) -> None:
|
||||||
if not self._background_tasks:
|
if not self._background_tasks:
|
||||||
|
|
@ -37,6 +55,86 @@ class TaskService:
|
||||||
await asyncio.gather(*self._background_tasks, return_exceptions=True)
|
await asyncio.gather(*self._background_tasks, return_exceptions=True)
|
||||||
self._background_tasks.clear()
|
self._background_tasks.clear()
|
||||||
|
|
||||||
|
async def resume_captcha(self, task_id: str, user_response: str | None = None) -> TaskRecord | None:
|
||||||
|
rec = await self.get_task(task_id)
|
||||||
|
if rec is None:
|
||||||
|
return None
|
||||||
|
if rec.status == TaskStatus.failed:
|
||||||
|
return rec
|
||||||
|
if rec.status != TaskStatus.awaiting_user_captcha:
|
||||||
|
return rec
|
||||||
|
|
||||||
|
verify_raw = await self._rpc_client.verify_captcha(task_id)
|
||||||
|
if not verify_raw.get("verified"):
|
||||||
|
await self._store.set_awaiting_captcha(
|
||||||
|
task_id=task_id,
|
||||||
|
raw_response={
|
||||||
|
"success": False,
|
||||||
|
"status": TaskStatus.awaiting_user_captcha.value,
|
||||||
|
"error": "CAPTCHA is still present.",
|
||||||
|
"human_intervention": rec.human_intervention,
|
||||||
|
"verification": verify_raw,
|
||||||
|
"user_response": user_response,
|
||||||
|
},
|
||||||
|
max_wait_seconds=(rec.human_intervention or {}).get("verification", {}).get(
|
||||||
|
"max_wait_seconds", self._captcha_wait_timeout
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return await self._store.get(task_id)
|
||||||
|
|
||||||
|
await self._store.set_running(task_id)
|
||||||
|
|
||||||
|
try:
|
||||||
|
rpc_timeout = float(rec.timeout)
|
||||||
|
if self._rpc_timeout_cap is not None:
|
||||||
|
rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap)
|
||||||
|
|
||||||
|
raw = await asyncio.wait_for(
|
||||||
|
self._rpc_client.resume(task_id=task_id, timeout_sec=rpc_timeout),
|
||||||
|
timeout=float(rec.timeout) + 5,
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
await self._store.set_done(
|
||||||
|
task_id=task_id,
|
||||||
|
success=False,
|
||||||
|
raw_response=None,
|
||||||
|
error="Timeout exceeded after CAPTCHA resume.",
|
||||||
|
)
|
||||||
|
return await self._store.get(task_id)
|
||||||
|
except BrowserRpcError as exc:
|
||||||
|
await self._store.set_done(
|
||||||
|
task_id=task_id,
|
||||||
|
success=False,
|
||||||
|
raw_response=None,
|
||||||
|
error=str(exc),
|
||||||
|
)
|
||||||
|
return await self._store.get(task_id)
|
||||||
|
|
||||||
|
await self._apply_rpc_result(task_id, raw)
|
||||||
|
return await self._store.get(task_id)
|
||||||
|
|
||||||
|
async def abort_captcha(self, task_id: str, reason: str | None = None) -> TaskRecord | None:
|
||||||
|
rec = await self.get_task(task_id)
|
||||||
|
if rec is None:
|
||||||
|
return None
|
||||||
|
if rec.status == TaskStatus.awaiting_user_captcha:
|
||||||
|
try:
|
||||||
|
await self._rpc_client.abort(task_id, reason=reason)
|
||||||
|
except BrowserRpcError:
|
||||||
|
pass
|
||||||
|
await self._store.set_done(
|
||||||
|
task_id=task_id,
|
||||||
|
success=False,
|
||||||
|
raw_response={
|
||||||
|
"success": False,
|
||||||
|
"status": TaskStatus.failed.value,
|
||||||
|
"error_code": "captcha_aborted",
|
||||||
|
"reason": reason,
|
||||||
|
},
|
||||||
|
error=reason or "User aborted CAPTCHA flow.",
|
||||||
|
)
|
||||||
|
return await self._store.get(task_id)
|
||||||
|
|
||||||
async def _worker(self, task_id: str) -> None:
|
async def _worker(self, task_id: str) -> None:
|
||||||
rec = await self._store.set_running(task_id)
|
rec = await self._store.set_running(task_id)
|
||||||
if rec is None:
|
if rec is None:
|
||||||
|
|
@ -49,17 +147,10 @@ class TaskService:
|
||||||
rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap)
|
rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap)
|
||||||
|
|
||||||
raw = await asyncio.wait_for(
|
raw = await asyncio.wait_for(
|
||||||
self._rpc_client.run(task=rec.task, timeout_sec=rpc_timeout),
|
self._rpc_client.run(task_id=task_id, task=rec.task, timeout_sec=rpc_timeout),
|
||||||
timeout=float(rec.timeout) + 5,
|
timeout=float(rec.timeout) + 5,
|
||||||
)
|
)
|
||||||
success = bool(raw.get("success"))
|
await self._apply_rpc_result(task_id, raw)
|
||||||
await self._store.set_done(
|
|
||||||
task_id=task_id,
|
|
||||||
success=success,
|
|
||||||
raw_response=raw,
|
|
||||||
error=None,
|
|
||||||
result=raw.get("result") if isinstance(raw, dict) else None,
|
|
||||||
)
|
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
await self._store.set_done(
|
await self._store.set_done(
|
||||||
task_id=task_id,
|
task_id=task_id,
|
||||||
|
|
@ -81,3 +172,22 @@ class TaskService:
|
||||||
raw_response=None,
|
raw_response=None,
|
||||||
error=f"Internal error: {exc}",
|
error=f"Internal error: {exc}",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def _apply_rpc_result(self, task_id: str, raw: dict | None) -> None:
|
||||||
|
raw = raw or {}
|
||||||
|
status = raw.get("status")
|
||||||
|
if status == TaskStatus.awaiting_user_captcha.value:
|
||||||
|
human = raw.get("human_intervention") or {}
|
||||||
|
verification = human.get("verification") or {}
|
||||||
|
max_wait_seconds = verification.get("max_wait_seconds", self._captcha_wait_timeout)
|
||||||
|
await self._store.set_awaiting_captcha(task_id, raw_response=raw, max_wait_seconds=max_wait_seconds)
|
||||||
|
return
|
||||||
|
|
||||||
|
success = bool(raw.get("success"))
|
||||||
|
await self._store.set_done(
|
||||||
|
task_id=task_id,
|
||||||
|
success=success,
|
||||||
|
raw_response=raw,
|
||||||
|
error=None,
|
||||||
|
result=raw.get("result") if isinstance(raw, dict) else None,
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,52 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass, field
|
||||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||||
|
from typing import Any
|
||||||
from urllib import error, request
|
from urllib import error, request
|
||||||
|
|
||||||
from browser_use import Agent, Browser, ChatOpenAI
|
from browser_use import Agent, Browser, ChatOpenAI
|
||||||
|
|
||||||
|
|
||||||
|
CAPTCHA_WAIT_TIMEOUT = int(os.getenv("BROWSER_CAPTCHA_MAX_WAIT_SECONDS", "900"))
|
||||||
|
_RUNNER_TASKS: dict[str, "RunnerTask"] = {}
|
||||||
|
_RUNNER_LOCK = threading.Lock()
|
||||||
|
|
||||||
|
_CF_STRONG = (
|
||||||
|
"just a moment",
|
||||||
|
"attention required",
|
||||||
|
"checking your browser",
|
||||||
|
"cf-challenge",
|
||||||
|
"cdn-cgi/challenge-platform",
|
||||||
|
"__cf_chl",
|
||||||
|
"turnstile",
|
||||||
|
)
|
||||||
|
_RECAPTCHA_STRONG = (
|
||||||
|
"g-recaptcha",
|
||||||
|
"recaptcha/api2",
|
||||||
|
"www.google.com/recaptcha",
|
||||||
|
"grecaptcha",
|
||||||
|
)
|
||||||
|
_HCAPTCHA_STRONG = (
|
||||||
|
"hcaptcha",
|
||||||
|
"newassets.hcaptcha.com",
|
||||||
|
"js.hcaptcha.com/1/api.js",
|
||||||
|
)
|
||||||
|
_GENERIC_CAPTCHA_STRONG = (
|
||||||
|
"captcha",
|
||||||
|
"are you human",
|
||||||
|
"verify you are human",
|
||||||
|
"human verification",
|
||||||
|
"bot detection",
|
||||||
|
"security check",
|
||||||
|
"press and hold",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _json_response(handler, status_code, payload):
|
def _json_response(handler, status_code, payload):
|
||||||
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||||||
handler.send_response(status_code)
|
handler.send_response(status_code)
|
||||||
|
|
@ -16,12 +56,198 @@ def _json_response(handler, status_code, payload):
|
||||||
handler.wfile.write(data)
|
handler.wfile.write(data)
|
||||||
|
|
||||||
|
|
||||||
async def run_browser_task(task):
|
@dataclass
|
||||||
|
class RunnerTask:
|
||||||
|
task_id: str
|
||||||
|
task: str
|
||||||
|
browser_view_url: str
|
||||||
|
resume_token: str = field(default_factory=lambda: uuid.uuid4().hex)
|
||||||
|
created_at: float = field(default_factory=time.time)
|
||||||
|
status: str = "starting"
|
||||||
|
payload: dict[str, Any] | None = None
|
||||||
|
error: str | None = None
|
||||||
|
agent: Any = None
|
||||||
|
browser: Any = None
|
||||||
|
loop: asyncio.AbstractEventLoop | None = None
|
||||||
|
thread: threading.Thread | None = None
|
||||||
|
settled_event: threading.Event = field(default_factory=threading.Event)
|
||||||
|
finished: bool = False
|
||||||
|
awaiting: bool = False
|
||||||
|
aborted: bool = False
|
||||||
|
transition_count: int = 0
|
||||||
|
lock: threading.Lock = field(default_factory=threading.Lock)
|
||||||
|
|
||||||
|
def set_payload(self, status: str, payload: dict[str, Any]) -> None:
|
||||||
|
with self.lock:
|
||||||
|
self.status = status
|
||||||
|
self.payload = payload
|
||||||
|
self.transition_count += 1
|
||||||
|
self.awaiting = status == "awaiting_user_captcha"
|
||||||
|
self.finished = status in {"succeeded", "failed"}
|
||||||
|
self.settled_event.set()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_task(task_id: str) -> RunnerTask | None:
|
||||||
|
with _RUNNER_LOCK:
|
||||||
|
return _RUNNER_TASKS.get(task_id)
|
||||||
|
|
||||||
|
|
||||||
|
def _put_task(task: RunnerTask) -> RunnerTask:
|
||||||
|
with _RUNNER_LOCK:
|
||||||
|
_RUNNER_TASKS[task.task_id] = task
|
||||||
|
return task
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_page_html(agent: Agent) -> str:
|
||||||
|
try:
|
||||||
|
cdp_session = await agent.browser_session.get_or_create_cdp_session()
|
||||||
|
doc = await cdp_session.cdp_client.send.DOM.getDocument(session_id=cdp_session.session_id)
|
||||||
|
html_result = await cdp_session.cdp_client.send.DOM.getOuterHTML(
|
||||||
|
params={"nodeId": doc["root"]["nodeId"]},
|
||||||
|
session_id=cdp_session.session_id,
|
||||||
|
)
|
||||||
|
return str(html_result.get("outerHTML", ""))
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
async def _capture_page_state(agent: Agent) -> dict[str, Any]:
|
||||||
|
url = ""
|
||||||
|
title = ""
|
||||||
|
summary = None
|
||||||
|
try:
|
||||||
|
summary = await agent.browser_session.get_browser_state_summary()
|
||||||
|
except Exception:
|
||||||
|
summary = None
|
||||||
|
|
||||||
|
if summary is not None:
|
||||||
|
if isinstance(summary, dict):
|
||||||
|
url = str(summary.get("url") or "")
|
||||||
|
title = str(summary.get("title") or "")
|
||||||
|
else:
|
||||||
|
url = str(getattr(summary, "url", "") or "")
|
||||||
|
title = str(getattr(summary, "title", "") or "")
|
||||||
|
|
||||||
|
if not url:
|
||||||
|
try:
|
||||||
|
url = str(await agent.browser_session.get_current_page_url() or "")
|
||||||
|
except Exception:
|
||||||
|
url = ""
|
||||||
|
if not title:
|
||||||
|
try:
|
||||||
|
title = str(await agent.browser_session.get_current_page_title() or "")
|
||||||
|
except Exception:
|
||||||
|
title = ""
|
||||||
|
|
||||||
|
html = await _get_page_html(agent)
|
||||||
|
return {"url": url, "title": title, "html": html}
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_captcha(haystack: str) -> str:
|
||||||
|
if any(token in haystack for token in _CF_STRONG):
|
||||||
|
return "cloudflare"
|
||||||
|
if any(token in haystack for token in _RECAPTCHA_STRONG):
|
||||||
|
return "recaptcha"
|
||||||
|
if any(token in haystack for token in _HCAPTCHA_STRONG):
|
||||||
|
return "hcaptcha"
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_captcha_from_state(page_state: dict[str, Any]) -> tuple[bool, str, list[str]]:
|
||||||
|
url = str(page_state.get("url") or "").lower()
|
||||||
|
title = str(page_state.get("title") or "").lower()
|
||||||
|
html = str(page_state.get("html") or "").lower()
|
||||||
|
haystack = "\n".join([url, title, html[:150000]])
|
||||||
|
|
||||||
|
signals: list[str] = []
|
||||||
|
if any(token in haystack for token in _CF_STRONG):
|
||||||
|
signals.append("cloudflare_challenge")
|
||||||
|
if any(token in haystack for token in _RECAPTCHA_STRONG):
|
||||||
|
signals.append("recaptcha")
|
||||||
|
if any(token in haystack for token in _HCAPTCHA_STRONG):
|
||||||
|
signals.append("hcaptcha")
|
||||||
|
|
||||||
|
generic_hits = [token for token in _GENERIC_CAPTCHA_STRONG if token in haystack]
|
||||||
|
if generic_hits:
|
||||||
|
signals.extend(f"generic:{token}" for token in generic_hits[:3])
|
||||||
|
|
||||||
|
blocked = bool(signals)
|
||||||
|
captcha_type = _classify_captcha(haystack)
|
||||||
|
return blocked, captcha_type, signals
|
||||||
|
|
||||||
|
|
||||||
|
async def _build_captcha_payload(task: RunnerTask, agent: Agent) -> dict[str, Any]:
|
||||||
|
page_state = await _capture_page_state(agent)
|
||||||
|
blocked, captcha_type, signals = _detect_captcha_from_state(page_state)
|
||||||
|
if not blocked:
|
||||||
|
raise RuntimeError("Captcha payload requested without an active challenge")
|
||||||
|
|
||||||
|
verification = {
|
||||||
|
"mode": "dom_url_title",
|
||||||
|
"selectors_absent": [
|
||||||
|
"iframe[src*='recaptcha']",
|
||||||
|
"[class*='hcaptcha']",
|
||||||
|
"[id*='captcha']",
|
||||||
|
"form[action*='challenge']",
|
||||||
|
"input[name='cf-turnstile-response']",
|
||||||
|
],
|
||||||
|
"challenge_signals_absent": signals,
|
||||||
|
"max_wait_seconds": CAPTCHA_WAIT_TIMEOUT,
|
||||||
|
}
|
||||||
|
browser_view_url = task.browser_view_url or None
|
||||||
|
instructions = (
|
||||||
|
"Open the live browser view, complete the verification challenge manually, "
|
||||||
|
"then return and reply 'ready' or 'done'."
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"status": "awaiting_user_captcha",
|
||||||
|
"task_id": task.task_id,
|
||||||
|
"session_id": task.task_id,
|
||||||
|
"resume_token": task.resume_token,
|
||||||
|
"browser_view_url": browser_view_url,
|
||||||
|
"captcha_type": captcha_type,
|
||||||
|
"instructions": instructions,
|
||||||
|
"detected_at": time.time(),
|
||||||
|
"page_url": page_state.get("url"),
|
||||||
|
"page_title": page_state.get("title"),
|
||||||
|
"verification": verification,
|
||||||
|
"human_intervention": {
|
||||||
|
"status": "awaiting_user_captcha",
|
||||||
|
"task_id": task.task_id,
|
||||||
|
"session_id": task.task_id,
|
||||||
|
"resume_token": task.resume_token,
|
||||||
|
"browser_view_url": browser_view_url,
|
||||||
|
"captcha_type": captcha_type,
|
||||||
|
"instructions": instructions,
|
||||||
|
"detected_at": time.time(),
|
||||||
|
"verification": verification,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _verify_captcha_state(task: RunnerTask) -> dict[str, Any]:
|
||||||
|
if not task.agent:
|
||||||
|
return {"success": False, "verified": False, "error": "Task is not attached to an active agent"}
|
||||||
|
|
||||||
|
page_state = await _capture_page_state(task.agent)
|
||||||
|
blocked, captcha_type, signals = _detect_captcha_from_state(page_state)
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"task_id": task.task_id,
|
||||||
|
"verified": not blocked,
|
||||||
|
"captcha_type": captcha_type if blocked else None,
|
||||||
|
"page_url": page_state.get("url"),
|
||||||
|
"page_title": page_state.get("title"),
|
||||||
|
"signals": signals,
|
||||||
|
"verification_mode": "dom_url_title",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _run_browser_task(task: RunnerTask):
|
||||||
cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222")
|
cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222")
|
||||||
browser_view_url = os.getenv("BROWSER_VIEW_URL", "")
|
|
||||||
|
|
||||||
browser = Browser(cdp_url=cdp_url)
|
browser = Browser(cdp_url=cdp_url)
|
||||||
|
|
||||||
llm = ChatOpenAI(
|
llm = ChatOpenAI(
|
||||||
model=os.getenv("MODEL_DEFAULT", "qwen3.5-122b"),
|
model=os.getenv("MODEL_DEFAULT", "qwen3.5-122b"),
|
||||||
api_key=os.getenv("OPENAI_API_KEY"),
|
api_key=os.getenv("OPENAI_API_KEY"),
|
||||||
|
|
@ -29,22 +255,131 @@ async def run_browser_task(task):
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
agent = Agent(task=task, llm=llm, browser=browser)
|
agent = Agent(task=task.task, llm=llm, browser=browser)
|
||||||
|
task.browser = browser
|
||||||
|
task.agent = agent
|
||||||
|
|
||||||
|
async def on_step_end(current_agent: Agent):
|
||||||
|
if task.awaiting or task.finished or task.aborted:
|
||||||
|
return
|
||||||
|
page_state = await _capture_page_state(current_agent)
|
||||||
|
blocked, _, _ = _detect_captcha_from_state(page_state)
|
||||||
|
if not blocked:
|
||||||
|
return
|
||||||
|
payload = await _build_captcha_payload(task, current_agent)
|
||||||
|
task.set_payload("awaiting_user_captcha", payload)
|
||||||
|
current_agent.pause()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
history = await agent.run()
|
history = await agent.run(on_step_end=on_step_end)
|
||||||
return {
|
if task.aborted:
|
||||||
"success": True,
|
task.set_payload(
|
||||||
"result": history.final_result(),
|
"failed",
|
||||||
"browser_view": browser_view_url,
|
{"success": False, "status": "failed", "error": task.error or "Task aborted during CAPTCHA flow."},
|
||||||
}
|
)
|
||||||
|
return
|
||||||
|
if task.awaiting:
|
||||||
|
return
|
||||||
|
task.set_payload(
|
||||||
|
"succeeded",
|
||||||
|
{
|
||||||
|
"success": True,
|
||||||
|
"status": "succeeded",
|
||||||
|
"result": history.final_result(),
|
||||||
|
"browser_view_url": task.browser_view_url or None,
|
||||||
|
},
|
||||||
|
)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return {"success": False, "error": f"Browser automation failed: {err}"}
|
if not task.awaiting and not task.finished:
|
||||||
|
task.set_payload(
|
||||||
|
"failed",
|
||||||
|
{"success": False, "status": "failed", "error": f"Browser automation failed: {err}"},
|
||||||
|
)
|
||||||
finally:
|
finally:
|
||||||
try:
|
if not task.awaiting:
|
||||||
await browser.close()
|
try:
|
||||||
except Exception:
|
await browser.close()
|
||||||
pass
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _runner_thread_main(task: RunnerTask) -> None:
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
task.loop = loop
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
try:
|
||||||
|
loop.run_until_complete(_run_browser_task(task))
|
||||||
|
finally:
|
||||||
|
pending = asyncio.all_tasks(loop=loop)
|
||||||
|
for pending_task in pending:
|
||||||
|
pending_task.cancel()
|
||||||
|
if pending:
|
||||||
|
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _start_task(task_id: str, task_text: str) -> dict[str, Any]:
|
||||||
|
existing = _get_task(task_id)
|
||||||
|
if existing:
|
||||||
|
return existing.payload or {
|
||||||
|
"success": False,
|
||||||
|
"status": existing.status,
|
||||||
|
"error": "Task already exists",
|
||||||
|
"task_id": task_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
state = _put_task(
|
||||||
|
RunnerTask(
|
||||||
|
task_id=task_id,
|
||||||
|
task=task_text,
|
||||||
|
browser_view_url=os.getenv("BROWSER_VIEW_URL", ""),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
thread = threading.Thread(target=_runner_thread_main, args=(state,), daemon=True, name=f"browser-task-{task_id[:8]}")
|
||||||
|
state.thread = thread
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
state.settled_event.wait()
|
||||||
|
return state.payload or {"success": False, "status": "failed", "error": "Task exited without payload", "task_id": task_id}
|
||||||
|
|
||||||
|
|
||||||
|
def _resume_task(task_id: str) -> dict[str, Any]:
|
||||||
|
state = _get_task(task_id)
|
||||||
|
if not state:
|
||||||
|
return {"success": False, "status": "failed", "error": "Task not found", "task_id": task_id}
|
||||||
|
if not state.loop or not state.agent:
|
||||||
|
return {"success": False, "status": "failed", "error": "Task cannot be resumed", "task_id": task_id}
|
||||||
|
|
||||||
|
state.awaiting = False
|
||||||
|
state.settled_event.clear()
|
||||||
|
state.loop.call_soon_threadsafe(state.agent.resume)
|
||||||
|
state.settled_event.wait()
|
||||||
|
return state.payload or {"success": False, "status": "failed", "error": "Resume did not produce a payload", "task_id": task_id}
|
||||||
|
|
||||||
|
|
||||||
|
def _verify_task(task_id: str) -> dict[str, Any]:
|
||||||
|
state = _get_task(task_id)
|
||||||
|
if not state:
|
||||||
|
return {"success": False, "verified": False, "error": "Task not found", "task_id": task_id}
|
||||||
|
if not state.loop:
|
||||||
|
return {"success": False, "verified": False, "error": "Task has no active event loop", "task_id": task_id}
|
||||||
|
future = asyncio.run_coroutine_threadsafe(_verify_captcha_state(state), state.loop)
|
||||||
|
return future.result(timeout=20)
|
||||||
|
|
||||||
|
|
||||||
|
def _abort_task(task_id: str, reason: str | None = None) -> dict[str, Any]:
|
||||||
|
state = _get_task(task_id)
|
||||||
|
if not state:
|
||||||
|
return {"success": False, "status": "failed", "error": "Task not found", "task_id": task_id}
|
||||||
|
state.aborted = True
|
||||||
|
state.error = reason or "CAPTCHA flow aborted by user."
|
||||||
|
if state.loop and state.agent:
|
||||||
|
state.loop.call_soon_threadsafe(state.agent.resume)
|
||||||
|
state.set_payload(
|
||||||
|
"failed",
|
||||||
|
{"success": False, "status": "failed", "task_id": task_id, "error": state.error, "error_code": "captcha_aborted"},
|
||||||
|
)
|
||||||
|
return state.payload
|
||||||
|
|
||||||
|
|
||||||
class BrowserUseRPCHandler(BaseHTTPRequestHandler):
|
class BrowserUseRPCHandler(BaseHTTPRequestHandler):
|
||||||
|
|
@ -62,28 +397,49 @@ class BrowserUseRPCHandler(BaseHTTPRequestHandler):
|
||||||
_json_response(self, 503, {"success": False, "error": f"Browser is not ready: {err}"})
|
_json_response(self, 503, {"success": False, "error": f"Browser is not ready: {err}"})
|
||||||
|
|
||||||
def do_POST(self):
|
def do_POST(self):
|
||||||
if self.path != "/run":
|
|
||||||
_json_response(self, 404, {"success": False, "error": "Not found"})
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
content_length = int(self.headers.get("Content-Length", "0"))
|
content_length = int(self.headers.get("Content-Length", "0"))
|
||||||
raw = self.rfile.read(content_length)
|
raw = self.rfile.read(content_length)
|
||||||
payload = json.loads(raw.decode("utf-8") if raw else "{}")
|
payload = json.loads(raw.decode("utf-8") if raw else "{}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
_json_response(self, 400, {"success": False, "error": "Invalid JSON payload"})
|
||||||
|
return
|
||||||
|
|
||||||
|
if self.path == "/run":
|
||||||
task = payload.get("task", "")
|
task = payload.get("task", "")
|
||||||
|
task_id = str(payload.get("task_id") or uuid.uuid4().hex)
|
||||||
if not isinstance(task, str) or not task.strip():
|
if not isinstance(task, str) or not task.strip():
|
||||||
_json_response(self, 400, {"success": False, "error": "Field 'task' is required"})
|
_json_response(self, 400, {"success": False, "error": "Field 'task' is required"})
|
||||||
return
|
return
|
||||||
|
result = _start_task(task_id=task_id, task_text=task.strip())
|
||||||
|
_json_response(self, 200, result)
|
||||||
|
return
|
||||||
|
|
||||||
result = asyncio.run(run_browser_task(task.strip()))
|
if self.path == "/verify":
|
||||||
code = 200 if result.get("success") else 500
|
task_id = str(payload.get("task_id") or "")
|
||||||
_json_response(self, code, result)
|
if not task_id:
|
||||||
except json.JSONDecodeError:
|
_json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"})
|
||||||
_json_response(self, 400, {"success": False, "error": "Invalid JSON payload"})
|
return
|
||||||
except error.URLError as err:
|
_json_response(self, 200, _verify_task(task_id))
|
||||||
_json_response(self, 503, {"success": False, "error": f"Transport error: {err}"})
|
return
|
||||||
except Exception as err:
|
|
||||||
_json_response(self, 500, {"success": False, "error": f"Internal error: {err}"})
|
if self.path == "/resume":
|
||||||
|
task_id = str(payload.get("task_id") or "")
|
||||||
|
if not task_id:
|
||||||
|
_json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"})
|
||||||
|
return
|
||||||
|
_json_response(self, 200, _resume_task(task_id))
|
||||||
|
return
|
||||||
|
|
||||||
|
if self.path == "/abort":
|
||||||
|
task_id = str(payload.get("task_id") or "")
|
||||||
|
if not task_id:
|
||||||
|
_json_response(self, 400, {"success": False, "error": "Field 'task_id' is required"})
|
||||||
|
return
|
||||||
|
_json_response(self, 200, _abort_task(task_id, reason=payload.get("reason")))
|
||||||
|
return
|
||||||
|
|
||||||
|
_json_response(self, 404, {"success": False, "error": "Not found"})
|
||||||
|
|
||||||
def log_message(self, format_str, *args):
|
def log_message(self, format_str, *args):
|
||||||
return
|
return
|
||||||
|
|
|
||||||
|
|
@ -98,7 +98,7 @@ if [ ! -f /var/lib/dbus/machine-id ]; then
|
||||||
dbus-uuidgen > /var/lib/dbus/machine-id 2>/dev/null || true
|
dbus-uuidgen > /var/lib/dbus/machine-id 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Удаляем stale lock/socket от прошлых падений Xvfb на том же DISPLAY.
|
# УдалÑем stale lock/socket от прошлых падений Xvfb на том же DISPLAY.
|
||||||
rm -f "/tmp/.X${DISPLAY_NUM}-lock" "/tmp/.X11-unix/X${DISPLAY_NUM}" || true
|
rm -f "/tmp/.X${DISPLAY_NUM}-lock" "/tmp/.X11-unix/X${DISPLAY_NUM}" || true
|
||||||
|
|
||||||
log "starting X stack on DISPLAY=${DISPLAY}"
|
log "starting X stack on DISPLAY=${DISPLAY}"
|
||||||
|
|
|
||||||
|
|
@ -154,6 +154,15 @@ SKILLS_GUIDANCE = (
|
||||||
"Skills that aren't maintained become liabilities."
|
"Skills that aren't maintained become liabilities."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
BROWSER_CAPTCHA_GUIDANCE = (
|
||||||
|
"For browser tasks, do not pre-emptively refuse just because CAPTCHA may appear. "
|
||||||
|
"Start the task with internet_browser. "
|
||||||
|
"If the browser runtime reports status='awaiting_user_captcha', immediately use to_captcha "
|
||||||
|
"to hand control to the user for manual verification and then resume. "
|
||||||
|
"Important: you must never claim that you solved CAPTCHA yourself and you must not attempt bypass methods. "
|
||||||
|
"Your role is orchestration: run browser steps, request manual CAPTCHA completion from the user, verify, and continue."
|
||||||
|
)
|
||||||
|
|
||||||
PLATFORM_HINTS = {
|
PLATFORM_HINTS = {
|
||||||
"whatsapp": (
|
"whatsapp": (
|
||||||
"You are on a text messaging communication platform, WhatsApp. "
|
"You are on a text messaging communication platform, WhatsApp. "
|
||||||
|
|
|
||||||
|
|
@ -1922,6 +1922,7 @@ class HermesCLI:
|
||||||
platform="cli",
|
platform="cli",
|
||||||
session_db=self._session_db,
|
session_db=self._session_db,
|
||||||
clarify_callback=self._clarify_callback,
|
clarify_callback=self._clarify_callback,
|
||||||
|
captcha_callback=self._captcha_callback,
|
||||||
reasoning_callback=(
|
reasoning_callback=(
|
||||||
self._stream_reasoning_delta if (self.streaming_enabled and self.show_reasoning)
|
self._stream_reasoning_delta if (self.streaming_enabled and self.show_reasoning)
|
||||||
else self._on_reasoning if (self.show_reasoning or self.verbose)
|
else self._on_reasoning if (self.show_reasoning or self.verbose)
|
||||||
|
|
@ -5113,6 +5114,40 @@ class HermesCLI:
|
||||||
"Use your best judgement to make the choice and proceed."
|
"Use your best judgement to make the choice and proceed."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _captcha_callback(self, payload):
|
||||||
|
"""Prompt the user to complete a paused CAPTCHA flow in the live browser."""
|
||||||
|
import time as _time
|
||||||
|
|
||||||
|
timeout = int((payload.get("verification") or {}).get("max_wait_seconds", 900))
|
||||||
|
response_queue = queue.Queue()
|
||||||
|
self._captcha_state = {
|
||||||
|
"payload": payload,
|
||||||
|
"response_queue": response_queue,
|
||||||
|
}
|
||||||
|
self._captcha_deadline = _time.monotonic() + timeout
|
||||||
|
self._invalidate()
|
||||||
|
|
||||||
|
last_refresh = _time.monotonic()
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
result = response_queue.get(timeout=1)
|
||||||
|
self._captcha_deadline = 0
|
||||||
|
return result
|
||||||
|
except queue.Empty:
|
||||||
|
remaining = self._captcha_deadline - _time.monotonic()
|
||||||
|
if remaining <= 0:
|
||||||
|
break
|
||||||
|
now = _time.monotonic()
|
||||||
|
if now - last_refresh >= 5.0:
|
||||||
|
last_refresh = now
|
||||||
|
self._invalidate()
|
||||||
|
|
||||||
|
self._captcha_state = None
|
||||||
|
self._captcha_deadline = 0
|
||||||
|
self._invalidate()
|
||||||
|
_cprint(f"\n{_DIM}(captcha wait timed out after {timeout}s){_RST}")
|
||||||
|
return {"action": "timeout", "user_response": ""}
|
||||||
|
|
||||||
def _sudo_password_callback(self) -> str:
|
def _sudo_password_callback(self) -> str:
|
||||||
"""
|
"""
|
||||||
Prompt for sudo password through the prompt_toolkit UI.
|
Prompt for sudo password through the prompt_toolkit UI.
|
||||||
|
|
@ -5812,6 +5847,8 @@ class HermesCLI:
|
||||||
return [("class:sudo-prompt", f"🔑 {state_suffix}")]
|
return [("class:sudo-prompt", f"🔑 {state_suffix}")]
|
||||||
if self._approval_state:
|
if self._approval_state:
|
||||||
return [("class:prompt-working", f"⚠ {state_suffix}")]
|
return [("class:prompt-working", f"⚠ {state_suffix}")]
|
||||||
|
if self._captcha_state:
|
||||||
|
return [("class:prompt-working", f"🧩 {state_suffix}")]
|
||||||
if self._clarify_freetext:
|
if self._clarify_freetext:
|
||||||
return [("class:clarify-selected", f"✎ {state_suffix}")]
|
return [("class:clarify-selected", f"✎ {state_suffix}")]
|
||||||
if self._clarify_state:
|
if self._clarify_state:
|
||||||
|
|
@ -5878,6 +5915,7 @@ class HermesCLI:
|
||||||
sudo_widget,
|
sudo_widget,
|
||||||
secret_widget,
|
secret_widget,
|
||||||
approval_widget,
|
approval_widget,
|
||||||
|
captcha_widget,
|
||||||
clarify_widget,
|
clarify_widget,
|
||||||
spinner_widget,
|
spinner_widget,
|
||||||
spacer,
|
spacer,
|
||||||
|
|
@ -5900,6 +5938,7 @@ class HermesCLI:
|
||||||
sudo_widget,
|
sudo_widget,
|
||||||
secret_widget,
|
secret_widget,
|
||||||
approval_widget,
|
approval_widget,
|
||||||
|
captcha_widget,
|
||||||
clarify_widget,
|
clarify_widget,
|
||||||
spinner_widget,
|
spinner_widget,
|
||||||
spacer,
|
spacer,
|
||||||
|
|
@ -5983,6 +6022,10 @@ class HermesCLI:
|
||||||
self._approval_deadline = 0
|
self._approval_deadline = 0
|
||||||
self._approval_lock = threading.Lock() # serialize concurrent approval prompts (delegation race fix)
|
self._approval_lock = threading.Lock() # serialize concurrent approval prompts (delegation race fix)
|
||||||
|
|
||||||
|
# CAPTCHA / human verification prompt state
|
||||||
|
self._captcha_state = None # dict with payload + response_queue
|
||||||
|
self._captcha_deadline = 0
|
||||||
|
|
||||||
# Slash command loading state
|
# Slash command loading state
|
||||||
self._command_running = False
|
self._command_running = False
|
||||||
self._command_status = ""
|
self._command_status = ""
|
||||||
|
|
@ -6058,6 +6101,23 @@ class HermesCLI:
|
||||||
event.app.invalidate()
|
event.app.invalidate()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# --- CAPTCHA prompt: accept ready/done/abort style input ---
|
||||||
|
if self._captcha_state:
|
||||||
|
text = event.app.current_buffer.text.strip()
|
||||||
|
normalized = text.lower()
|
||||||
|
if normalized in {"abort", "cancel", "stop"}:
|
||||||
|
action = "abort"
|
||||||
|
elif text:
|
||||||
|
action = "ready"
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
self._captcha_state["response_queue"].put({"action": action, "user_response": text})
|
||||||
|
self._captcha_state = None
|
||||||
|
self._captcha_deadline = 0
|
||||||
|
event.app.current_buffer.reset()
|
||||||
|
event.app.invalidate()
|
||||||
|
return
|
||||||
|
|
||||||
# --- Clarify freetext mode: user typed their own answer ---
|
# --- Clarify freetext mode: user typed their own answer ---
|
||||||
if self._clarify_freetext and self._clarify_state:
|
if self._clarify_freetext and self._clarify_state:
|
||||||
text = event.app.current_buffer.text.strip()
|
text = event.app.current_buffer.text.strip()
|
||||||
|
|
@ -6194,7 +6254,7 @@ class HermesCLI:
|
||||||
# Buffer.auto_up/auto_down handle both: cursor movement when multi-line,
|
# Buffer.auto_up/auto_down handle both: cursor movement when multi-line,
|
||||||
# history browsing when on the first/last line (or single-line input).
|
# history browsing when on the first/last line (or single-line input).
|
||||||
_normal_input = Condition(
|
_normal_input = Condition(
|
||||||
lambda: not self._clarify_state and not self._approval_state and not self._sudo_state and not self._secret_state
|
lambda: not self._clarify_state and not self._approval_state and not self._captcha_state and not self._sudo_state and not self._secret_state
|
||||||
)
|
)
|
||||||
|
|
||||||
@kb.add('up', filter=_normal_input)
|
@kb.add('up', filter=_normal_input)
|
||||||
|
|
@ -6261,6 +6321,14 @@ class HermesCLI:
|
||||||
event.app.invalidate()
|
event.app.invalidate()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if self._captcha_state:
|
||||||
|
self._captcha_state["response_queue"].put({"action": "timeout", "user_response": ""})
|
||||||
|
self._captcha_state = None
|
||||||
|
self._captcha_deadline = 0
|
||||||
|
event.app.current_buffer.reset()
|
||||||
|
event.app.invalidate()
|
||||||
|
return
|
||||||
|
|
||||||
# Cancel clarify prompt
|
# Cancel clarify prompt
|
||||||
if self._clarify_state:
|
if self._clarify_state:
|
||||||
self._clarify_state["response_queue"].put(
|
self._clarify_state["response_queue"].put(
|
||||||
|
|
@ -6334,7 +6402,7 @@ class HermesCLI:
|
||||||
# Guard: don't START recording during agent run or interactive prompts
|
# Guard: don't START recording during agent run or interactive prompts
|
||||||
if cli_ref._agent_running:
|
if cli_ref._agent_running:
|
||||||
return
|
return
|
||||||
if cli_ref._clarify_state or cli_ref._sudo_state or cli_ref._approval_state:
|
if cli_ref._clarify_state or cli_ref._sudo_state or cli_ref._approval_state or cli_ref._captcha_state:
|
||||||
return
|
return
|
||||||
# Guard: don't start while a previous stop/transcribe cycle is
|
# Guard: don't start while a previous stop/transcribe cycle is
|
||||||
# still running — recorder.stop() holds AudioRecorder._lock and
|
# still running — recorder.stop() holds AudioRecorder._lock and
|
||||||
|
|
@ -6554,6 +6622,8 @@ class HermesCLI:
|
||||||
return "type secret (hidden), Enter to skip"
|
return "type secret (hidden), Enter to skip"
|
||||||
if cli_ref._approval_state:
|
if cli_ref._approval_state:
|
||||||
return ""
|
return ""
|
||||||
|
if cli_ref._captcha_state:
|
||||||
|
return "type ready/done after you solve the challenge, or abort to cancel"
|
||||||
if cli_ref._clarify_freetext:
|
if cli_ref._clarify_freetext:
|
||||||
return "type your answer here and press Enter"
|
return "type your answer here and press Enter"
|
||||||
if cli_ref._clarify_state:
|
if cli_ref._clarify_state:
|
||||||
|
|
@ -6597,6 +6667,13 @@ class HermesCLI:
|
||||||
('class:clarify-countdown', f' ({remaining}s)'),
|
('class:clarify-countdown', f' ({remaining}s)'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if cli_ref._captcha_state:
|
||||||
|
remaining = max(0, int(cli_ref._captcha_deadline - _time.monotonic()))
|
||||||
|
return [
|
||||||
|
('class:hint', " complete the challenge in the browser, then type 'ready'"),
|
||||||
|
('class:clarify-countdown', f' ({remaining}s)'),
|
||||||
|
]
|
||||||
|
|
||||||
if cli_ref._clarify_state:
|
if cli_ref._clarify_state:
|
||||||
remaining = max(0, int(cli_ref._clarify_deadline - _time.monotonic()))
|
remaining = max(0, int(cli_ref._clarify_deadline - _time.monotonic()))
|
||||||
countdown = f' ({remaining}s)' if cli_ref._clarify_deadline else ''
|
countdown = f' ({remaining}s)' if cli_ref._clarify_deadline else ''
|
||||||
|
|
@ -6619,7 +6696,7 @@ class HermesCLI:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def get_hint_height():
|
def get_hint_height():
|
||||||
if cli_ref._sudo_state or cli_ref._secret_state or cli_ref._approval_state or cli_ref._clarify_state or cli_ref._command_running:
|
if cli_ref._sudo_state or cli_ref._secret_state or cli_ref._approval_state or cli_ref._captcha_state or cli_ref._clarify_state or cli_ref._command_running:
|
||||||
return 1
|
return 1
|
||||||
# Keep a 1-line spacer while agent runs so output doesn't push
|
# Keep a 1-line spacer while agent runs so output doesn't push
|
||||||
# right up against the top rule of the input area
|
# right up against the top rule of the input area
|
||||||
|
|
@ -6644,7 +6721,7 @@ class HermesCLI:
|
||||||
height=get_hint_height,
|
height=get_hint_height,
|
||||||
)
|
)
|
||||||
|
|
||||||
# --- Clarify tool: dynamic display widget for questions + choices ---
|
# --- Interactive panels: CAPTCHA + clarify ---
|
||||||
|
|
||||||
def _panel_box_width(title: str, content_lines: list[str], min_width: int = 46, max_width: int = 76) -> int:
|
def _panel_box_width(title: str, content_lines: list[str], min_width: int = 46, max_width: int = 76) -> int:
|
||||||
"""Choose a stable panel width wide enough for the title and content."""
|
"""Choose a stable panel width wide enough for the title and content."""
|
||||||
|
|
@ -6672,6 +6749,45 @@ class HermesCLI:
|
||||||
def _append_blank_panel_line(lines, border_style: str, box_width: int) -> None:
|
def _append_blank_panel_line(lines, border_style: str, box_width: int) -> None:
|
||||||
lines.append((border_style, "│" + (" " * box_width) + "│\n"))
|
lines.append((border_style, "│" + (" " * box_width) + "│\n"))
|
||||||
|
|
||||||
|
def _get_captcha_display():
|
||||||
|
state = cli_ref._captcha_state
|
||||||
|
if not state:
|
||||||
|
return []
|
||||||
|
|
||||||
|
payload = state.get("payload") or {}
|
||||||
|
title = "Manual CAPTCHA Required"
|
||||||
|
browser_view_url = payload.get("browser_view_url") or "Browser view URL is not configured."
|
||||||
|
body_lines = [
|
||||||
|
payload.get("instructions") or "Open the live browser and complete the verification challenge.",
|
||||||
|
f"Type: {payload.get('captcha_type', 'unknown')}",
|
||||||
|
f"Task ID: {payload.get('task_id', '')}",
|
||||||
|
f"Browser: {browser_view_url}",
|
||||||
|
"When the challenge disappears, type 'ready' or 'done' and press Enter.",
|
||||||
|
"Type 'abort' to stop this browser task.",
|
||||||
|
]
|
||||||
|
box_width = _panel_box_width(title, body_lines, min_width=56, max_width=94)
|
||||||
|
inner_text_width = max(8, box_width - 2)
|
||||||
|
lines = []
|
||||||
|
lines.append(('class:captcha-border', '╭─ '))
|
||||||
|
lines.append(('class:captcha-title', title))
|
||||||
|
lines.append(('class:captcha-border', ' ' + ('─' * max(0, box_width - len(title) - 3)) + '╮\n'))
|
||||||
|
_append_blank_panel_line(lines, 'class:captcha-border', box_width)
|
||||||
|
for text in body_lines:
|
||||||
|
style = 'class:captcha-link' if text.startswith("Browser: ") else 'class:captcha-text'
|
||||||
|
for wrapped in _wrap_panel_text(text, inner_text_width):
|
||||||
|
_append_panel_line(lines, 'class:captcha-border', style, wrapped, box_width)
|
||||||
|
_append_blank_panel_line(lines, 'class:captcha-border', box_width)
|
||||||
|
lines.append(('class:captcha-border', '╰' + ('─' * box_width) + '╯\n'))
|
||||||
|
return lines
|
||||||
|
|
||||||
|
captcha_widget = ConditionalContainer(
|
||||||
|
Window(
|
||||||
|
FormattedTextControl(_get_captcha_display),
|
||||||
|
wrap_lines=True,
|
||||||
|
),
|
||||||
|
filter=Condition(lambda: cli_ref._captcha_state is not None),
|
||||||
|
)
|
||||||
|
|
||||||
def _get_clarify_display():
|
def _get_clarify_display():
|
||||||
"""Build styled text for the clarify question/choices panel."""
|
"""Build styled text for the clarify question/choices panel."""
|
||||||
state = cli_ref._clarify_state
|
state = cli_ref._clarify_state
|
||||||
|
|
@ -6897,6 +7013,7 @@ class HermesCLI:
|
||||||
sudo_widget=sudo_widget,
|
sudo_widget=sudo_widget,
|
||||||
secret_widget=secret_widget,
|
secret_widget=secret_widget,
|
||||||
approval_widget=approval_widget,
|
approval_widget=approval_widget,
|
||||||
|
captcha_widget=captcha_widget,
|
||||||
clarify_widget=clarify_widget,
|
clarify_widget=clarify_widget,
|
||||||
spinner_widget=spinner_widget,
|
spinner_widget=spinner_widget,
|
||||||
spacer=spacer,
|
spacer=spacer,
|
||||||
|
|
@ -6954,6 +7071,11 @@ class HermesCLI:
|
||||||
'approval-cmd': '#AAAAAA italic',
|
'approval-cmd': '#AAAAAA italic',
|
||||||
'approval-choice': '#AAAAAA',
|
'approval-choice': '#AAAAAA',
|
||||||
'approval-selected': '#FFD700 bold',
|
'approval-selected': '#FFD700 bold',
|
||||||
|
# CAPTCHA panel
|
||||||
|
'captcha-border': '#CD7F32',
|
||||||
|
'captcha-title': '#FFBF00 bold',
|
||||||
|
'captcha-text': '#FFF8DC',
|
||||||
|
'captcha-link': '#87CEEB underline',
|
||||||
# Voice mode
|
# Voice mode
|
||||||
'voice-prompt': '#87CEEB',
|
'voice-prompt': '#87CEEB',
|
||||||
'voice-recording': '#FF4444 bold',
|
'voice-recording': '#FF4444 bold',
|
||||||
|
|
|
||||||
|
|
@ -152,6 +152,7 @@ def _discover_tools():
|
||||||
"tools.memory_tool",
|
"tools.memory_tool",
|
||||||
"tools.session_search_tool",
|
"tools.session_search_tool",
|
||||||
"tools.clarify_tool",
|
"tools.clarify_tool",
|
||||||
|
"tools.to_captcha_tool",
|
||||||
"tools.code_execution_tool",
|
"tools.code_execution_tool",
|
||||||
"tools.delegate_tool",
|
"tools.delegate_tool",
|
||||||
"tools.process_registry",
|
"tools.process_registry",
|
||||||
|
|
@ -362,7 +363,7 @@ def get_tool_definitions(
|
||||||
# because they need agent-level state (TodoStore, MemoryStore, etc.).
|
# because they need agent-level state (TodoStore, MemoryStore, etc.).
|
||||||
# The registry still holds their schemas; dispatch just returns a stub error
|
# The registry still holds their schemas; dispatch just returns a stub error
|
||||||
# so if something slips through, the LLM sees a sensible message.
|
# so if something slips through, the LLM sees a sensible message.
|
||||||
_AGENT_LOOP_TOOLS = {"todo", "memory", "session_search", "delegate_task"}
|
_AGENT_LOOP_TOOLS = {"todo", "memory", "session_search", "delegate_task", "to_captcha"}
|
||||||
_READ_SEARCH_TOOLS = {"read_file", "search_files"}
|
_READ_SEARCH_TOOLS = {"read_file", "search_files"}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -73,6 +73,7 @@ from hermes_constants import OPENROUTER_BASE_URL
|
||||||
from agent.prompt_builder import (
|
from agent.prompt_builder import (
|
||||||
DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
|
DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
|
||||||
MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
|
MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
|
||||||
|
BROWSER_CAPTCHA_GUIDANCE,
|
||||||
)
|
)
|
||||||
from agent.model_metadata import (
|
from agent.model_metadata import (
|
||||||
fetch_model_metadata,
|
fetch_model_metadata,
|
||||||
|
|
@ -400,6 +401,7 @@ class AIAgent:
|
||||||
thinking_callback: callable = None,
|
thinking_callback: callable = None,
|
||||||
reasoning_callback: callable = None,
|
reasoning_callback: callable = None,
|
||||||
clarify_callback: callable = None,
|
clarify_callback: callable = None,
|
||||||
|
captcha_callback: callable = None,
|
||||||
step_callback: callable = None,
|
step_callback: callable = None,
|
||||||
stream_delta_callback: callable = None,
|
stream_delta_callback: callable = None,
|
||||||
tool_gen_callback: callable = None,
|
tool_gen_callback: callable = None,
|
||||||
|
|
@ -447,6 +449,7 @@ class AIAgent:
|
||||||
tool_progress_callback (callable): Callback function(tool_name, args_preview) for progress notifications
|
tool_progress_callback (callable): Callback function(tool_name, args_preview) for progress notifications
|
||||||
clarify_callback (callable): Callback function(question, choices) -> str for interactive user questions.
|
clarify_callback (callable): Callback function(question, choices) -> str for interactive user questions.
|
||||||
Provided by the platform layer (CLI or gateway). If None, the clarify tool returns an error.
|
Provided by the platform layer (CLI or gateway). If None, the clarify tool returns an error.
|
||||||
|
captcha_callback (callable): Callback function(payload_dict) -> dict for manual CAPTCHA completion flows.
|
||||||
max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
|
max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
|
||||||
reasoning_config (Dict): OpenRouter reasoning configuration override (e.g. {"effort": "none"} to disable thinking).
|
reasoning_config (Dict): OpenRouter reasoning configuration override (e.g. {"effort": "none"} to disable thinking).
|
||||||
If None, defaults to {"enabled": True, "effort": "medium"} for OpenRouter. Set to disable/customize reasoning.
|
If None, defaults to {"enabled": True, "effort": "medium"} for OpenRouter. Set to disable/customize reasoning.
|
||||||
|
|
@ -529,6 +532,7 @@ class AIAgent:
|
||||||
self.thinking_callback = thinking_callback
|
self.thinking_callback = thinking_callback
|
||||||
self.reasoning_callback = reasoning_callback
|
self.reasoning_callback = reasoning_callback
|
||||||
self.clarify_callback = clarify_callback
|
self.clarify_callback = clarify_callback
|
||||||
|
self.captcha_callback = captcha_callback
|
||||||
self.step_callback = step_callback
|
self.step_callback = step_callback
|
||||||
self.stream_delta_callback = stream_delta_callback
|
self.stream_delta_callback = stream_delta_callback
|
||||||
self.status_callback = status_callback
|
self.status_callback = status_callback
|
||||||
|
|
@ -2276,6 +2280,8 @@ class AIAgent:
|
||||||
tool_guidance.append(SESSION_SEARCH_GUIDANCE)
|
tool_guidance.append(SESSION_SEARCH_GUIDANCE)
|
||||||
if "skill_manage" in self.valid_tool_names:
|
if "skill_manage" in self.valid_tool_names:
|
||||||
tool_guidance.append(SKILLS_GUIDANCE)
|
tool_guidance.append(SKILLS_GUIDANCE)
|
||||||
|
if "internet_browser" in self.valid_tool_names and "to_captcha" in self.valid_tool_names:
|
||||||
|
tool_guidance.append(BROWSER_CAPTCHA_GUIDANCE)
|
||||||
if tool_guidance:
|
if tool_guidance:
|
||||||
prompt_parts.append(" ".join(tool_guidance))
|
prompt_parts.append(" ".join(tool_guidance))
|
||||||
|
|
||||||
|
|
@ -4693,6 +4699,18 @@ class AIAgent:
|
||||||
choices=function_args.get("choices"),
|
choices=function_args.get("choices"),
|
||||||
callback=self.clarify_callback,
|
callback=self.clarify_callback,
|
||||||
)
|
)
|
||||||
|
elif function_name == "to_captcha":
|
||||||
|
from tools.to_captcha_tool import to_captcha_tool as _to_captcha_tool
|
||||||
|
return _to_captcha_tool(
|
||||||
|
task_id=function_args.get("task_id", ""),
|
||||||
|
browser_view_url=function_args.get("browser_view_url"),
|
||||||
|
captcha_type=function_args.get("captcha_type"),
|
||||||
|
instructions=function_args.get("instructions"),
|
||||||
|
detected_at=function_args.get("detected_at"),
|
||||||
|
verification=function_args.get("verification"),
|
||||||
|
resume_token=function_args.get("resume_token"),
|
||||||
|
callback=self.captcha_callback,
|
||||||
|
)
|
||||||
elif function_name == "delegate_task":
|
elif function_name == "delegate_task":
|
||||||
from tools.delegate_tool import delegate_task as _delegate_task
|
from tools.delegate_tool import delegate_task as _delegate_task
|
||||||
return _delegate_task(
|
return _delegate_task(
|
||||||
|
|
@ -4711,6 +4729,53 @@ class AIAgent:
|
||||||
honcho_session_key=self._honcho_session_key,
|
honcho_session_key=self._honcho_session_key,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _maybe_resolve_captcha(self, function_name: str, function_result: str, effective_task_id: str) -> str:
|
||||||
|
"""Bridge paused browser tasks into the dedicated CAPTCHA orchestration flow."""
|
||||||
|
if function_name != "internet_browser":
|
||||||
|
return function_result
|
||||||
|
|
||||||
|
try:
|
||||||
|
payload = json.loads(function_result)
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
return function_result
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
return function_result
|
||||||
|
if payload.get("status") != "awaiting_user_captcha":
|
||||||
|
return function_result
|
||||||
|
|
||||||
|
human = payload.get("human_intervention") or {}
|
||||||
|
captcha_args = {
|
||||||
|
"task_id": payload.get("task_id") or human.get("task_id") or effective_task_id,
|
||||||
|
"browser_view_url": human.get("browser_view_url"),
|
||||||
|
"captcha_type": human.get("captcha_type"),
|
||||||
|
"instructions": human.get("instructions"),
|
||||||
|
"detected_at": human.get("detected_at"),
|
||||||
|
"verification": human.get("verification"),
|
||||||
|
"resume_token": human.get("resume_token"),
|
||||||
|
}
|
||||||
|
captcha_result = self._invoke_tool("to_captcha", captcha_args, effective_task_id)
|
||||||
|
try:
|
||||||
|
captcha_payload = json.loads(captcha_result)
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
return captcha_result
|
||||||
|
if not isinstance(captcha_payload, dict):
|
||||||
|
return captcha_result
|
||||||
|
|
||||||
|
if captcha_payload.get("status") == "resumed":
|
||||||
|
task_result = captcha_payload.get("task_result")
|
||||||
|
if isinstance(task_result, dict):
|
||||||
|
return json.dumps(task_result, ensure_ascii=False)
|
||||||
|
return json.dumps(
|
||||||
|
{
|
||||||
|
"success": False,
|
||||||
|
"status": captcha_payload.get("status", "still_blocked"),
|
||||||
|
"task_id": captcha_args["task_id"],
|
||||||
|
"human_intervention": human,
|
||||||
|
"captcha_flow": captcha_payload,
|
||||||
|
},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
|
||||||
def _execute_tool_calls_concurrent(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
def _execute_tool_calls_concurrent(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||||
"""Execute multiple tool calls concurrently using a thread pool.
|
"""Execute multiple tool calls concurrently using a thread pool.
|
||||||
|
|
||||||
|
|
@ -4843,6 +4908,8 @@ class AIAgent:
|
||||||
tool_duration = 0.0
|
tool_duration = 0.0
|
||||||
else:
|
else:
|
||||||
function_name, function_args, function_result, tool_duration, is_error = r
|
function_name, function_args, function_result, tool_duration, is_error = r
|
||||||
|
function_result = self._maybe_resolve_captcha(function_name, function_result, effective_task_id)
|
||||||
|
is_error, _ = _detect_tool_failure(function_name, function_result)
|
||||||
|
|
||||||
if is_error:
|
if is_error:
|
||||||
result_preview = function_result[:200] if len(function_result) > 200 else function_result
|
result_preview = function_result[:200] if len(function_result) > 200 else function_result
|
||||||
|
|
@ -5029,6 +5096,21 @@ class AIAgent:
|
||||||
tool_duration = time.time() - tool_start_time
|
tool_duration = time.time() - tool_start_time
|
||||||
if self.quiet_mode:
|
if self.quiet_mode:
|
||||||
self._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
|
self._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
|
||||||
|
elif function_name == "to_captcha":
|
||||||
|
from tools.to_captcha_tool import to_captcha_tool as _to_captcha_tool
|
||||||
|
function_result = _to_captcha_tool(
|
||||||
|
task_id=function_args.get("task_id", ""),
|
||||||
|
browser_view_url=function_args.get("browser_view_url"),
|
||||||
|
captcha_type=function_args.get("captcha_type"),
|
||||||
|
instructions=function_args.get("instructions"),
|
||||||
|
detected_at=function_args.get("detected_at"),
|
||||||
|
verification=function_args.get("verification"),
|
||||||
|
resume_token=function_args.get("resume_token"),
|
||||||
|
callback=self.captcha_callback,
|
||||||
|
)
|
||||||
|
tool_duration = time.time() - tool_start_time
|
||||||
|
if self.quiet_mode:
|
||||||
|
self._vprint(f" {_get_cute_tool_message_impl('to_captcha', function_args, tool_duration, result=function_result)}")
|
||||||
elif function_name == "delegate_task":
|
elif function_name == "delegate_task":
|
||||||
from tools.delegate_tool import delegate_task as _delegate_task
|
from tools.delegate_tool import delegate_task as _delegate_task
|
||||||
tasks_arg = function_args.get("tasks")
|
tasks_arg = function_args.get("tasks")
|
||||||
|
|
@ -5099,6 +5181,7 @@ class AIAgent:
|
||||||
logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
||||||
tool_duration = time.time() - tool_start_time
|
tool_duration = time.time() - tool_start_time
|
||||||
|
|
||||||
|
function_result = self._maybe_resolve_captcha(function_name, function_result, effective_task_id)
|
||||||
result_preview = function_result if self.verbose_logging else (
|
result_preview = function_result if self.verbose_logging else (
|
||||||
function_result[:200] if len(function_result) > 200 else function_result
|
function_result[:200] if len(function_result) > 200 else function_result
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,65 +1,130 @@
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
from urllib import error, request
|
from urllib import error, request
|
||||||
|
|
||||||
from tools.registry import registry
|
from tools.registry import registry
|
||||||
|
|
||||||
|
|
||||||
def run_browser_task(task):
|
def _browser_api_base_url() -> str:
|
||||||
|
return os.getenv("BROWSER_API_URL", "http://browser-api:8088/api/browser").rstrip("/")
|
||||||
|
|
||||||
|
|
||||||
|
def _http_json(url: str, method: str = "GET", payload: dict | None = None, timeout_sec: int = 30) -> dict:
|
||||||
|
body = None
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
if payload is not None:
|
||||||
|
body = json.dumps(payload).encode("utf-8")
|
||||||
|
req = request.Request(url, data=body, headers=headers, method=method)
|
||||||
|
try:
|
||||||
|
with request.urlopen(req, timeout=timeout_sec) as resp:
|
||||||
|
raw = resp.read().decode("utf-8")
|
||||||
|
return json.loads(raw) if raw else {}
|
||||||
|
except error.HTTPError as http_err:
|
||||||
|
raw = http_err.read().decode("utf-8", errors="replace")
|
||||||
|
try:
|
||||||
|
data = json.loads(raw) if raw else {}
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
data = {"details": raw}
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": f"Browser API returned HTTP {http_err.code}",
|
||||||
|
"details": data,
|
||||||
|
}
|
||||||
|
except Exception as err:
|
||||||
|
return {"success": False, "error": f"Browser API request failed: {err}"}
|
||||||
|
|
||||||
|
|
||||||
|
def run_browser_task(task: str):
|
||||||
if not task or not str(task).strip():
|
if not task or not str(task).strip():
|
||||||
return json.dumps({"success": False, "error": "Task is required"}, ensure_ascii=False)
|
return json.dumps({"success": False, "error": "Task is required"}, ensure_ascii=False)
|
||||||
|
|
||||||
rpc_url = os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run")
|
|
||||||
timeout_sec = int(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900"))
|
timeout_sec = int(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900"))
|
||||||
payload = json.dumps({"task": task}).encode("utf-8")
|
poll_interval = float(os.getenv("BROWSER_API_POLL_INTERVAL", "1.5"))
|
||||||
req = request.Request(rpc_url, data=payload, headers={"Content-Type": "application/json"}, method="POST")
|
api_base = _browser_api_base_url()
|
||||||
|
|
||||||
try:
|
accepted = _http_json(
|
||||||
with request.urlopen(req, timeout=timeout_sec) as resp:
|
f"{api_base}/tasks",
|
||||||
body = resp.read().decode("utf-8")
|
method="POST",
|
||||||
return body
|
payload={"task": task, "timeout": timeout_sec, "metadata": {"source": "internet_browser"}},
|
||||||
except error.HTTPError as http_err:
|
timeout_sec=30,
|
||||||
body = http_err.read().decode("utf-8", errors="replace")
|
)
|
||||||
|
task_id = accepted.get("task_id")
|
||||||
|
if not task_id:
|
||||||
return json.dumps(
|
return json.dumps(
|
||||||
{
|
{
|
||||||
"success": False,
|
"success": False,
|
||||||
"error": f"browser-use RPC returned HTTP {http_err.code}",
|
"error": accepted.get("error", "Browser task was not accepted"),
|
||||||
"details": body,
|
"details": accepted,
|
||||||
},
|
|
||||||
ensure_ascii=False,
|
|
||||||
)
|
|
||||||
except Exception as err:
|
|
||||||
return json.dumps(
|
|
||||||
{
|
|
||||||
"success": False,
|
|
||||||
"error": f"browser-use RPC request failed: {err}",
|
|
||||||
},
|
},
|
||||||
ensure_ascii=False,
|
ensure_ascii=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
deadline = time.time() + timeout_sec + 10
|
||||||
|
status_url = f"{api_base}/tasks/{task_id}"
|
||||||
|
result_url = f"{api_base}/tasks/{task_id}/result"
|
||||||
|
|
||||||
|
while time.time() < deadline:
|
||||||
|
status_payload = _http_json(status_url, timeout_sec=15)
|
||||||
|
status = status_payload.get("status")
|
||||||
|
if not status and status_payload.get("error"):
|
||||||
|
return json.dumps(
|
||||||
|
{
|
||||||
|
"success": False,
|
||||||
|
"status": "failed",
|
||||||
|
"task_id": task_id,
|
||||||
|
"error": status_payload.get("error"),
|
||||||
|
"details": status_payload.get("details"),
|
||||||
|
},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
if status == "awaiting_user_captcha":
|
||||||
|
return json.dumps(
|
||||||
|
{
|
||||||
|
"success": False,
|
||||||
|
"status": status,
|
||||||
|
"task_id": task_id,
|
||||||
|
"human_intervention": status_payload.get("human_intervention"),
|
||||||
|
},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
if status in {"succeeded", "failed"}:
|
||||||
|
result_payload = _http_json(result_url, timeout_sec=30)
|
||||||
|
result_payload.setdefault("task_id", task_id)
|
||||||
|
return json.dumps(result_payload, ensure_ascii=False)
|
||||||
|
time.sleep(poll_interval)
|
||||||
|
|
||||||
|
return json.dumps(
|
||||||
|
{
|
||||||
|
"success": False,
|
||||||
|
"status": "failed",
|
||||||
|
"task_id": task_id,
|
||||||
|
"error": "Timed out while waiting for browser task result",
|
||||||
|
},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
registry.register(
|
registry.register(
|
||||||
name="internet_browser",
|
name="internet_browser",
|
||||||
toolset="browse_cmd",
|
toolset="browse_cmd",
|
||||||
schema={
|
schema={
|
||||||
"name": "internet_browser",
|
"name": "internet_browser",
|
||||||
"description": (
|
"description": (
|
||||||
"ГЛАВНЫЙ ИНСТРУМЕНТ ДЛЯ ВЕБ-СЕРФИНГА. Вызывай этот инструмент НАПРЯМУЮ (через стандартный tool call/function call). "
|
"Main browser automation tool for internet tasks. Call it directly via a normal tool/function call. "
|
||||||
"КАТЕГОРИЧЕСКИ ЗАПРЕЩЕНО использовать `execute_code` или `delegate_task` для работы с браузером. "
|
"Do not use execute_code or delegate_task for browser work. Pass the task in natural language."
|
||||||
"Не пиши Python-скрипты! Просто передай в этот инструмент параметр `task`. "
|
|
||||||
"Используй для любых задач в интернете: поиск товаров (Wildberries, Ozon), чтение статей, клики, навигация."
|
|
||||||
),
|
),
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"task": {
|
"task": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Подробная задача на естественном языке. Например: 'Зайди на wildberries.ru, найди черную футболку и верни цену'."
|
"description": "Detailed natural-language browser task."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": ["task"]
|
"required": ["task"]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
handler=lambda args, **kw: run_browser_task(args.get("task")),
|
handler=lambda args, **kw: run_browser_task(args.get("task")),
|
||||||
emoji="🌐",
|
emoji="🌐",
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue