BrowserUse_and_ComputerUse_.../hermes_code/tools/browser_use_tool.py

130 lines
4.4 KiB
Python

import json
import os
import time
from urllib import error, request
from tools.registry import registry
def _browser_api_base_url() -> str:
return os.getenv("BROWSER_API_URL", "http://browser-api:8088/api/browser").rstrip("/")
def _http_json(url: str, method: str = "GET", payload: dict | None = None, timeout_sec: int = 30) -> dict:
body = None
headers = {"Content-Type": "application/json"}
if payload is not None:
body = json.dumps(payload).encode("utf-8")
req = request.Request(url, data=body, headers=headers, method=method)
try:
with request.urlopen(req, timeout=timeout_sec) as resp:
raw = resp.read().decode("utf-8")
return json.loads(raw) if raw else {}
except error.HTTPError as http_err:
raw = http_err.read().decode("utf-8", errors="replace")
try:
data = json.loads(raw) if raw else {}
except json.JSONDecodeError:
data = {"details": raw}
return {
"success": False,
"error": f"Browser API returned HTTP {http_err.code}",
"details": data,
}
except Exception as err:
return {"success": False, "error": f"Browser API request failed: {err}"}
def run_browser_task(task: str):
if not task or not str(task).strip():
return json.dumps({"success": False, "error": "Task is required"}, ensure_ascii=False)
timeout_sec = int(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900"))
poll_interval = float(os.getenv("BROWSER_API_POLL_INTERVAL", "1.5"))
api_base = _browser_api_base_url()
accepted = _http_json(
f"{api_base}/tasks",
method="POST",
payload={"task": task, "timeout": timeout_sec, "metadata": {"source": "internet_browser"}},
timeout_sec=30,
)
task_id = accepted.get("task_id")
if not task_id:
return json.dumps(
{
"success": False,
"error": accepted.get("error", "Browser task was not accepted"),
"details": accepted,
},
ensure_ascii=False,
)
deadline = time.time() + timeout_sec + 10
status_url = f"{api_base}/tasks/{task_id}"
result_url = f"{api_base}/tasks/{task_id}/result"
while time.time() < deadline:
status_payload = _http_json(status_url, timeout_sec=15)
status = status_payload.get("status")
if not status and status_payload.get("error"):
return json.dumps(
{
"success": False,
"status": "failed",
"task_id": task_id,
"error": status_payload.get("error"),
"details": status_payload.get("details"),
},
ensure_ascii=False,
)
if status == "awaiting_user_captcha":
return json.dumps(
{
"success": False,
"status": status,
"task_id": task_id,
"human_intervention": status_payload.get("human_intervention"),
},
ensure_ascii=False,
)
if status in {"succeeded", "failed"}:
result_payload = _http_json(result_url, timeout_sec=30)
result_payload.setdefault("task_id", task_id)
return json.dumps(result_payload, ensure_ascii=False)
time.sleep(poll_interval)
return json.dumps(
{
"success": False,
"status": "failed",
"task_id": task_id,
"error": "Timed out while waiting for browser task result",
},
ensure_ascii=False,
)
registry.register(
name="internet_browser",
toolset="browse_cmd",
schema={
"name": "internet_browser",
"description": (
"Main browser automation tool for internet tasks. Call it directly via a normal tool/function call. "
"Do not use execute_code or delegate_task for browser work. Pass the task in natural language."
),
"parameters": {
"type": "object",
"properties": {
"task": {
"type": "string",
"description": "Detailed natural-language browser task."
}
},
"required": ["task"]
}
},
handler=lambda args, **kw: run_browser_task(args.get("task")),
emoji="🌐",
)