Quality_evaluation/Mind2Web/run_one_task.py
Aleksandr Dubchak 98d5e90894 mind2web
2026-04-23 00:04:11 +03:00

47 lines
No EOL
1.3 KiB
Python

from __future__ import annotations
import json
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
from mind2web_runner import run_task
load_dotenv()
def main() -> None:
task_file = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("test_1_task_0.json")
output_dir = Path(sys.argv[2]) if len(sys.argv) > 2 else Path("eval_v2/tmp_single_run")
output_dir.mkdir(parents=True, exist_ok=True)
print("RUNNER BROWSER_URL =", os.getenv("BROWSER_URL"))
print("RUNNER BROWSER_VIEW_URL =", os.getenv("BROWSER_VIEW_URL"))
with task_file.open("r", encoding="utf-8") as f:
task = json.load(f)
result = run_task(task)
print("==== RESULT ====")
print(json.dumps(result, ensure_ascii=False, indent=2))
if result.get("result") is None:
final_answer = "Task failed: agent did not complete the task."
else:
final_answer = str(result["result"])
with (output_dir / "agent_final.txt").open("w", encoding="utf-8") as f:
f.write(final_answer)
with (output_dir / "agent_result.json").open("w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
print("\n==== FINAL ANSWER SAVED ====")
print(final_answer)
if __name__ == "__main__":
main()