106 lines
2.8 KiB
Python
106 lines
2.8 KiB
Python
import json
|
|
import time
|
|
from pathlib import Path
|
|
import urllib.request
|
|
from mind2web_runner import run_task
|
|
|
|
TASK_FILES = [
|
|
"test_1_task_0.json",
|
|
"test_1_task_1.json",
|
|
"test_1_task_2.json",
|
|
]
|
|
|
|
OUTPUT_PATH = "../results_small.jsonl"
|
|
TIMEOUT_SEC = 600
|
|
|
|
|
|
def wait_browser_ready(url: str = "http://localhost:9222/json/version", timeout: int = 120) -> bool:
|
|
started = time.time()
|
|
last_error = None
|
|
|
|
while time.time() - started < timeout:
|
|
try:
|
|
with urllib.request.urlopen(url, timeout=5) as resp:
|
|
body = resp.read().decode("utf-8", errors="ignore")
|
|
if resp.status == 200 and "webSocketDebuggerUrl" in body:
|
|
print("browser ready")
|
|
return True
|
|
except Exception as e:
|
|
last_error = e
|
|
|
|
time.sleep(2)
|
|
|
|
print("browser not ready in time")
|
|
print("last error:", last_error)
|
|
return False
|
|
|
|
|
|
# def reset_browser() -> None:
|
|
# print("resetting browser container...")
|
|
# subprocess.run(
|
|
# ["docker", "compose", "restart", "browser"],
|
|
# check=True,
|
|
# cwd="/Users/aleksandr/Desktop/Quality_evaluation/BrowserUse_and_ComputerUse_skills",
|
|
# )
|
|
#
|
|
# ready = wait_browser_ready()
|
|
# if not ready:
|
|
# raise RuntimeError("Browser did not become ready after restart")
|
|
#
|
|
# time.sleep(3)
|
|
# print("browser restarted")
|
|
|
|
|
|
def load_single_task(path: str) -> dict:
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
|
|
|
|
def append_jsonl(path: str, row: dict) -> None:
|
|
with open(path, "a", encoding="utf-8") as f:
|
|
f.write(json.dumps(row, ensure_ascii=False) + "\n")
|
|
|
|
|
|
def main():
|
|
out_path = Path(OUTPUT_PATH)
|
|
if out_path.exists():
|
|
out_path.unlink()
|
|
|
|
summary = {"success": 0, "timeout": 0, "error": 0, "failed": 0}
|
|
|
|
for i, task_file in enumerate(TASK_FILES, start=1):
|
|
print(f"\n===== TASK {i}/{len(TASK_FILES)} =====")
|
|
print("file:", task_file)
|
|
|
|
task = load_single_task(task_file)
|
|
print("annotation_id:", task.get("annotation_id"))
|
|
print("instruction:", task.get("confirmed_task"))
|
|
|
|
# reset_browser()
|
|
|
|
started = time.time()
|
|
result = run_task(task, timeout_sec=TIMEOUT_SEC)
|
|
elapsed = round(time.time() - started, 2)
|
|
|
|
result["source_file"] = task_file
|
|
result["elapsed_sec"] = elapsed
|
|
append_jsonl(OUTPUT_PATH, result)
|
|
|
|
status = result["status"]
|
|
if status in summary:
|
|
summary[status] += 1
|
|
else:
|
|
summary["failed"] += 1
|
|
|
|
print("status:", result["status"])
|
|
print("success:", result["success"])
|
|
print("elapsed:", elapsed)
|
|
print("error:", result["error"])
|
|
print("browser_view:", result["browser_view"])
|
|
|
|
print("\n===== SUMMARY =====")
|
|
print(summary)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|