add new tool: to_captcha
This commit is contained in:
parent
8f86dbbdac
commit
4852345bf6
12 changed files with 716 additions and 35 deletions
|
|
@ -9,6 +9,8 @@ from urllib import error, request
|
|||
from browser_use import Agent, Browser, ChatOpenAI
|
||||
from pydantic import BaseModel, Field, ValidationError, field_validator
|
||||
|
||||
from browser_env.tools import captcha_tool
|
||||
|
||||
SPEED_OPTIMIZATION_PROMPT = """
|
||||
Speed optimization instructions:
|
||||
- Be extremely concise and direct in your responses
|
||||
|
|
@ -16,11 +18,21 @@ Speed optimization instructions:
|
|||
- Use multi-action sequences whenever possible to reduce steps
|
||||
"""
|
||||
|
||||
CAPTCHA_PROMPT = """
|
||||
CAPTCHA handling:
|
||||
- If the current page is blocked by reCAPTCHA, hCaptcha, or Cloudflare Turnstile,
|
||||
call the `to_captcha` action ONCE with a short `reason` argument and WAIT for its result.
|
||||
- Do not click on captcha challenges yourself; the human will solve them via the live browser view.
|
||||
- After `to_captcha` returns success=true, continue the original task from the same step.
|
||||
- If `to_captcha` returns success=false, report the error and stop.
|
||||
"""
|
||||
|
||||
|
||||
class RunTaskRequest(BaseModel):
|
||||
"""RPC payload для запуска browser-use задачи."""
|
||||
|
||||
task: str = Field(..., min_length=1)
|
||||
task_id: str | None = Field(default=None, description="ID задачи из browser-api (используется to_captcha tool)")
|
||||
|
||||
@field_validator("task")
|
||||
@classmethod
|
||||
|
|
@ -69,10 +81,14 @@ def _json_response(handler, status_code: int, payload: dict[str, Any] | BaseMode
|
|||
handler.wfile.write(data)
|
||||
|
||||
|
||||
async def run_browser_task(task: str) -> RunTaskSuccessResponse | RunTaskErrorResponse:
|
||||
async def run_browser_task(task: str, task_id: str | None = None) -> RunTaskSuccessResponse | RunTaskErrorResponse:
|
||||
cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222")
|
||||
browser_view_url = os.getenv("BROWSER_VIEW_URL", "")
|
||||
|
||||
if task_id:
|
||||
# Прокидываем task_id в окружение, чтобы to_captcha tool знал, куда POST'ить.
|
||||
os.environ["CURRENT_TASK_ID"] = task_id
|
||||
|
||||
browser = Browser(cdp_url=cdp_url)
|
||||
|
||||
llm = ChatOpenAI(
|
||||
|
|
@ -82,13 +98,27 @@ async def run_browser_task(task: str) -> RunTaskSuccessResponse | RunTaskErrorRe
|
|||
temperature=0.0,
|
||||
)
|
||||
|
||||
agent = Agent(task=task,
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
flash_mode=True,
|
||||
use_vision=False,
|
||||
extend_system_message=SPEED_OPTIMIZATION_PROMPT,
|
||||
)
|
||||
controller = None
|
||||
try:
|
||||
from browser_use import Controller # type: ignore
|
||||
controller = Controller()
|
||||
captcha_tool.register(controller)
|
||||
except Exception:
|
||||
# Если у установленной версии browser-use нет Controller — продолжаем без custom action
|
||||
controller = None
|
||||
|
||||
agent_kwargs = dict(
|
||||
task=task,
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
flash_mode=True,
|
||||
use_vision=False,
|
||||
extend_system_message=SPEED_OPTIMIZATION_PROMPT + CAPTCHA_PROMPT,
|
||||
)
|
||||
if controller is not None:
|
||||
agent_kwargs["controller"] = controller
|
||||
|
||||
agent = Agent(**agent_kwargs)
|
||||
|
||||
try:
|
||||
history = await agent.run()
|
||||
|
|
@ -219,7 +249,7 @@ class BrowserUseRPCHandler(BaseHTTPRequestHandler):
|
|||
payload = json.loads(raw.decode("utf-8") if raw else "{}")
|
||||
request_model = RunTaskRequest.model_validate(payload)
|
||||
|
||||
result_model = asyncio.run(run_browser_task(request_model.task))
|
||||
result_model = asyncio.run(run_browser_task(request_model.task, task_id=request_model.task_id))
|
||||
code = 200 if result_model.success else 500
|
||||
_json_response(self, code, result_model)
|
||||
except ValidationError as err:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue