Quality_evaluation/Mind2Web/eval_v2/dataset_loader.py
Aleksandr Dubchak 98d5e90894 mind2web
2026-04-23 00:04:11 +03:00

120 lines
3.1 KiB
Python

# Он должен:
# • читать task json
# • доставать instruction
# • доставать gold actions
# • возвращать всё в нормальном виде
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
def load_task_json(path: str | Path) -> dict[str, Any]:
file_path = Path(path)
with file_path.open("r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, dict):
raise ValueError(f"Expected dict in {file_path}, got {type(data).__name__}")
return data
def extract_instruction(task: dict[str, Any]) -> str:
candidates = [
task.get("confirmed_task"),
task.get("task"),
task.get("instruction"),
task.get("intent"),
]
for value in candidates:
if isinstance(value, str) and value.strip():
return value.strip()
return ""
def extract_annotation_id(task: dict[str, Any]) -> str:
candidates = [
task.get("annotation_id"),
task.get("id"),
task.get("task_id"),
]
for value in candidates:
if isinstance(value, str) and value.strip():
return value.strip()
return ""
def extract_gold_actions(task: dict[str, Any]) -> list[dict[str, Any]]:
"""
Mind2Web variants may store actions under different keys.
We try common candidates in a safe order.
"""
candidates = [
task.get("actions"),
task.get("action_reprs"),
task.get("operation"),
task.get("operations"),
task.get("action_uid"),
]
for value in candidates:
if isinstance(value, list):
return value
# Some datasets may nest actions deeper
if isinstance(task.get("trace"), list):
return task["trace"]
if isinstance(task.get("gold_actions"), list):
return task["gold_actions"]
return []
def summarize_task(task: dict[str, Any]) -> dict[str, Any]:
instruction = extract_instruction(task)
annotation_id = extract_annotation_id(task)
gold_actions = extract_gold_actions(task)
return {
"annotation_id": annotation_id,
"instruction": instruction,
"gold_actions_count": len(gold_actions),
"gold_actions_preview": gold_actions[:2],
}
def normalize_gold_actions(gold_actions: list[dict[str, Any]]) -> list[dict[str, Any]]:
normalized = []
for action in gold_actions:
if not isinstance(action, dict):
normalized.append({
"type": "unknown",
"raw": action,
})
continue
op = action.get("operation", {})
if isinstance(op, dict):
action_type = (
op.get("op")
or op.get("operation")
or op.get("type")
or "unknown"
)
normalized.append({
"type": action_type,
"raw": op,
})
else:
normalized.append({
"type": "unknown",
"raw": op,
})
return normalized