mind2web
This commit is contained in:
parent
2b5d923f63
commit
98d5e90894
754 changed files with 1175740 additions and 142424 deletions
106
Mind2Web/run_dataset.py
Normal file
106
Mind2Web/run_dataset.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
import urllib.request
|
||||
from mind2web_runner import run_task
|
||||
|
||||
TASK_FILES = [
|
||||
"test_1_task_0.json",
|
||||
"test_1_task_1.json",
|
||||
"test_1_task_2.json",
|
||||
]
|
||||
|
||||
OUTPUT_PATH = "../results_small.jsonl"
|
||||
TIMEOUT_SEC = 600
|
||||
|
||||
|
||||
def wait_browser_ready(url: str = "http://localhost:9222/json/version", timeout: int = 120) -> bool:
|
||||
started = time.time()
|
||||
last_error = None
|
||||
|
||||
while time.time() - started < timeout:
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=5) as resp:
|
||||
body = resp.read().decode("utf-8", errors="ignore")
|
||||
if resp.status == 200 and "webSocketDebuggerUrl" in body:
|
||||
print("browser ready")
|
||||
return True
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
print("browser not ready in time")
|
||||
print("last error:", last_error)
|
||||
return False
|
||||
|
||||
|
||||
# def reset_browser() -> None:
|
||||
# print("resetting browser container...")
|
||||
# subprocess.run(
|
||||
# ["docker", "compose", "restart", "browser"],
|
||||
# check=True,
|
||||
# cwd="/Users/aleksandr/Desktop/Quality_evaluation/BrowserUse_and_ComputerUse_skills",
|
||||
# )
|
||||
#
|
||||
# ready = wait_browser_ready()
|
||||
# if not ready:
|
||||
# raise RuntimeError("Browser did not become ready after restart")
|
||||
#
|
||||
# time.sleep(3)
|
||||
# print("browser restarted")
|
||||
|
||||
|
||||
def load_single_task(path: str) -> dict:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def append_jsonl(path: str, row: dict) -> None:
|
||||
with open(path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(row, ensure_ascii=False) + "\n")
|
||||
|
||||
|
||||
def main():
|
||||
out_path = Path(OUTPUT_PATH)
|
||||
if out_path.exists():
|
||||
out_path.unlink()
|
||||
|
||||
summary = {"success": 0, "timeout": 0, "error": 0, "failed": 0}
|
||||
|
||||
for i, task_file in enumerate(TASK_FILES, start=1):
|
||||
print(f"\n===== TASK {i}/{len(TASK_FILES)} =====")
|
||||
print("file:", task_file)
|
||||
|
||||
task = load_single_task(task_file)
|
||||
print("annotation_id:", task.get("annotation_id"))
|
||||
print("instruction:", task.get("confirmed_task"))
|
||||
|
||||
# reset_browser()
|
||||
|
||||
started = time.time()
|
||||
result = run_task(task, timeout_sec=TIMEOUT_SEC)
|
||||
elapsed = round(time.time() - started, 2)
|
||||
|
||||
result["source_file"] = task_file
|
||||
result["elapsed_sec"] = elapsed
|
||||
append_jsonl(OUTPUT_PATH, result)
|
||||
|
||||
status = result["status"]
|
||||
if status in summary:
|
||||
summary[status] += 1
|
||||
else:
|
||||
summary["failed"] += 1
|
||||
|
||||
print("status:", result["status"])
|
||||
print("success:", result["success"])
|
||||
print("elapsed:", elapsed)
|
||||
print("error:", result["error"])
|
||||
print("browser_view:", result["browser_view"])
|
||||
|
||||
print("\n===== SUMMARY =====")
|
||||
print(summary)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue