add new tool: to_captcha

This commit is contained in:
VladislavIlin7 2026-05-17 01:03:55 +03:00
parent 8f86dbbdac
commit 4852345bf6
12 changed files with 716 additions and 35 deletions

View file

@ -9,6 +9,8 @@ from urllib import error, request
from browser_use import Agent, Browser, ChatOpenAI
from pydantic import BaseModel, Field, ValidationError, field_validator
from browser_env.tools import captcha_tool
SPEED_OPTIMIZATION_PROMPT = """
Speed optimization instructions:
- Be extremely concise and direct in your responses
@ -16,11 +18,21 @@ Speed optimization instructions:
- Use multi-action sequences whenever possible to reduce steps
"""
CAPTCHA_PROMPT = """
CAPTCHA handling:
- If the current page is blocked by reCAPTCHA, hCaptcha, or Cloudflare Turnstile,
call the `to_captcha` action ONCE with a short `reason` argument and WAIT for its result.
- Do not click on captcha challenges yourself; the human will solve them via the live browser view.
- After `to_captcha` returns success=true, continue the original task from the same step.
- If `to_captcha` returns success=false, report the error and stop.
"""
class RunTaskRequest(BaseModel):
"""RPC payload для запуска browser-use задачи."""
task: str = Field(..., min_length=1)
task_id: str | None = Field(default=None, description="ID задачи из browser-api (используется to_captcha tool)")
@field_validator("task")
@classmethod
@ -69,10 +81,14 @@ def _json_response(handler, status_code: int, payload: dict[str, Any] | BaseMode
handler.wfile.write(data)
async def run_browser_task(task: str) -> RunTaskSuccessResponse | RunTaskErrorResponse:
async def run_browser_task(task: str, task_id: str | None = None) -> RunTaskSuccessResponse | RunTaskErrorResponse:
cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222")
browser_view_url = os.getenv("BROWSER_VIEW_URL", "")
if task_id:
# Прокидываем task_id в окружение, чтобы to_captcha tool знал, куда POST'ить.
os.environ["CURRENT_TASK_ID"] = task_id
browser = Browser(cdp_url=cdp_url)
llm = ChatOpenAI(
@ -82,13 +98,27 @@ async def run_browser_task(task: str) -> RunTaskSuccessResponse | RunTaskErrorRe
temperature=0.0,
)
agent = Agent(task=task,
llm=llm,
browser=browser,
flash_mode=True,
use_vision=False,
extend_system_message=SPEED_OPTIMIZATION_PROMPT,
)
controller = None
try:
from browser_use import Controller # type: ignore
controller = Controller()
captcha_tool.register(controller)
except Exception:
# Если у установленной версии browser-use нет Controller — продолжаем без custom action
controller = None
agent_kwargs = dict(
task=task,
llm=llm,
browser=browser,
flash_mode=True,
use_vision=False,
extend_system_message=SPEED_OPTIMIZATION_PROMPT + CAPTCHA_PROMPT,
)
if controller is not None:
agent_kwargs["controller"] = controller
agent = Agent(**agent_kwargs)
try:
history = await agent.run()
@ -219,7 +249,7 @@ class BrowserUseRPCHandler(BaseHTTPRequestHandler):
payload = json.loads(raw.decode("utf-8") if raw else "{}")
request_model = RunTaskRequest.model_validate(payload)
result_model = asyncio.run(run_browser_task(request_model.task))
result_model = asyncio.run(run_browser_task(request_model.task, task_id=request_model.task_id))
code = 200 if result_model.success else 500
_json_response(self, code, result_model)
except ValidationError as err: