Quality_evaluation/Mind2Web/eval_v2/dataset_loader.py

# Он должен:
# 	•	читать task json
# 	•	доставать instruction
# 	•	доставать gold actions
# 	•	возвращать всё в нормальном виде

from __future__ import annotations

import json
from pathlib import Path
from typing import Any


def load_task_json(path: str | Path) -> dict[str, Any]:
    file_path = Path(path)
    with file_path.open("r", encoding="utf-8") as f:
        data = json.load(f)

    if not isinstance(data, dict):
        raise ValueError(f"Expected dict in {file_path}, got {type(data).__name__}")

    return data


def extract_instruction(task: dict[str, Any]) -> str:
    candidates = [
        task.get("confirmed_task"),
        task.get("task"),
        task.get("instruction"),
        task.get("intent"),
    ]
    for value in candidates:
        if isinstance(value, str) and value.strip():
            return value.strip()
    return ""


def extract_annotation_id(task: dict[str, Any]) -> str:
    candidates = [
        task.get("annotation_id"),
        task.get("id"),
        task.get("task_id"),
    ]
    for value in candidates:
        if isinstance(value, str) and value.strip():
            return value.strip()
    return ""


def extract_gold_actions(task: dict[str, Any]) -> list[dict[str, Any]]:
    """
    Mind2Web variants may store actions under different keys.
    We try common candidates in a safe order.
    """
    candidates = [
        task.get("actions"),
        task.get("action_reprs"),
        task.get("operation"),
        task.get("operations"),
        task.get("action_uid"),
    ]

    for value in candidates:
        if isinstance(value, list):
            return value

    # Some datasets may nest actions deeper
    if isinstance(task.get("trace"), list):
        return task["trace"]

    if isinstance(task.get("gold_actions"), list):
        return task["gold_actions"]

    return []


def summarize_task(task: dict[str, Any]) -> dict[str, Any]:
    instruction = extract_instruction(task)
    annotation_id = extract_annotation_id(task)
    gold_actions = extract_gold_actions(task)

    return {
        "annotation_id": annotation_id,
        "instruction": instruction,
        "gold_actions_count": len(gold_actions),
        "gold_actions_preview": gold_actions[:2],
    }


def normalize_gold_actions(gold_actions: list[dict[str, Any]]) -> list[dict[str, Any]]:
    normalized = []

    for action in gold_actions:
        if not isinstance(action, dict):
            normalized.append({
                "type": "unknown",
                "raw": action,
            })
            continue

        op = action.get("operation", {})

        if isinstance(op, dict):
            action_type = (
                    op.get("op")
                    or op.get("operation")
                    or op.get("type")
                    or "unknown"
            )
            normalized.append({
                "type": action_type,
                "raw": op,
            })
        else:
            normalized.append({
                "type": "unknown",
                "raw": op,
            })

    return normalized