This commit is contained in:
Aleksandr Dubchak 2026-04-23 00:04:11 +03:00
parent 2b5d923f63
commit 98d5e90894
754 changed files with 1175740 additions and 142424 deletions

42
stuff/loaders.py Normal file
View file

@ -0,0 +1,42 @@
import json
from typing import List
from one_Task_class import Task
def load_mind2web_tasks(path: str, limit: int | None = None) -> List[Task]:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, list):
raise ValueError(f"Expected list of tasks in {path}, got {type(data).__name__}")
tasks: List[Task] = []
for item in data:
if not isinstance(item, dict):
continue
annotation_id = item.get("annotation_id")
confirmed_task = item.get("confirmed_task")
website = item.get("website")
# пропускаем битые записи
if not annotation_id or not confirmed_task:
continue
task = Task(
id=annotation_id,
dataset="mind2web",
website=website,
instruction=confirmed_task,
start_url=None,
expected=None,
raw=item,
)
tasks.append(task)
if limit is not None and len(tasks) >= limit:
break
return tasks