import json import time from pathlib import Path import urllib.request from mind2web_runner import run_task TASK_FILES = [ "test_1_task_0.json", "test_1_task_1.json", "test_1_task_2.json", ] OUTPUT_PATH = "../results_small.jsonl" TIMEOUT_SEC = 600 def wait_browser_ready(url: str = "http://localhost:9222/json/version", timeout: int = 120) -> bool: started = time.time() last_error = None while time.time() - started < timeout: try: with urllib.request.urlopen(url, timeout=5) as resp: body = resp.read().decode("utf-8", errors="ignore") if resp.status == 200 and "webSocketDebuggerUrl" in body: print("browser ready") return True except Exception as e: last_error = e time.sleep(2) print("browser not ready in time") print("last error:", last_error) return False # def reset_browser() -> None: # print("resetting browser container...") # subprocess.run( # ["docker", "compose", "restart", "browser"], # check=True, # cwd="/Users/aleksandr/Desktop/Quality_evaluation/BrowserUse_and_ComputerUse_skills", # ) # # ready = wait_browser_ready() # if not ready: # raise RuntimeError("Browser did not become ready after restart") # # time.sleep(3) # print("browser restarted") def load_single_task(path: str) -> dict: with open(path, "r", encoding="utf-8") as f: return json.load(f) def append_jsonl(path: str, row: dict) -> None: with open(path, "a", encoding="utf-8") as f: f.write(json.dumps(row, ensure_ascii=False) + "\n") def main(): out_path = Path(OUTPUT_PATH) if out_path.exists(): out_path.unlink() summary = {"success": 0, "timeout": 0, "error": 0, "failed": 0} for i, task_file in enumerate(TASK_FILES, start=1): print(f"\n===== TASK {i}/{len(TASK_FILES)} =====") print("file:", task_file) task = load_single_task(task_file) print("annotation_id:", task.get("annotation_id")) print("instruction:", task.get("confirmed_task")) # reset_browser() started = time.time() result = run_task(task, timeout_sec=TIMEOUT_SEC) elapsed = round(time.time() - started, 2) result["source_file"] = task_file result["elapsed_sec"] = elapsed append_jsonl(OUTPUT_PATH, result) status = result["status"] if status in summary: summary[status] += 1 else: summary["failed"] += 1 print("status:", result["status"]) print("success:", result["success"]) print("elapsed:", elapsed) print("error:", result["error"]) print("browser_view:", result["browser_view"]) print("\n===== SUMMARY =====") print(summary) if __name__ == "__main__": main()