import json from typing import List from one_Task_class import Task def load_mind2web_tasks(path: str, limit: int | None = None) -> List[Task]: with open(path, "r", encoding="utf-8") as f: data = json.load(f) if not isinstance(data, list): raise ValueError(f"Expected list of tasks in {path}, got {type(data).__name__}") tasks: List[Task] = [] for item in data: if not isinstance(item, dict): continue annotation_id = item.get("annotation_id") confirmed_task = item.get("confirmed_task") website = item.get("website") # пропускаем битые записи if not annotation_id or not confirmed_task: continue task = Task( id=annotation_id, dataset="mind2web", website=website, instruction=confirmed_task, start_url=None, expected=None, raw=item, ) tasks.append(task) if limit is not None and len(tasks) >= limit: break return tasks