Quality_evaluation/stuff/loaders.py
Aleksandr Dubchak 98d5e90894 mind2web
2026-04-23 00:04:11 +03:00

42 lines
1.1 KiB
Python

import json
from typing import List
from one_Task_class import Task
def load_mind2web_tasks(path: str, limit: int | None = None) -> List[Task]:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, list):
raise ValueError(f"Expected list of tasks in {path}, got {type(data).__name__}")
tasks: List[Task] = []
for item in data:
if not isinstance(item, dict):
continue
annotation_id = item.get("annotation_id")
confirmed_task = item.get("confirmed_task")
website = item.get("website")
# пропускаем битые записи
if not annotation_id or not confirmed_task:
continue
task = Task(
id=annotation_id,
dataset="mind2web",
website=website,
instruction=confirmed_task,
start_url=None,
expected=None,
raw=item,
)
tasks.append(task)
if limit is not None and len(tasks) >= limit:
break
return tasks