mind2web

2026-04-23 00:04:11 +03:00 · 2026-04-23 00:04:11 +03:00 · 98d5e90894
commit 98d5e90894
parent 2b5d923f63
754 changed files with 1175740 additions and 142424 deletions
--- a/Mind2Web/eval_v2/comparator.py
+++ b/Mind2Web/eval_v2/comparator.py
@ -0,0 +1,91 @@
+from __future__ import annotations
+
+
+def canonical_action_type(action_type: str) -> str:
+    if not action_type:
+        return "unknown"
+
+    t = action_type.strip().lower()
+
+    mapping = {
+        "click": "click",
+        "type": "type",
+        "select": "select",
+        "hover": "hover",
+        "enter": "type",
+
+        "navigate": "navigate",
+        "scroll": "scroll",
+        "search_page": "search_page",
+        "extract": "extract",
+        "done": "done",
+    }
+
+    return mapping.get(t, t)
+
+
+def normalize_for_compare(actions: list[dict]) -> list[dict]:
+    result = []
+
+    for action in actions:
+        if not isinstance(action, dict):
+            result.append({"type": "unknown", "raw": action})
+            continue
+
+        action_type = canonical_action_type(action.get("type", "unknown"))
+        result.append({
+            "type": action_type,
+            "raw": action.get("raw"),
+        })
+
+    return result
+
+
+def compare_action_sequences(gold_actions: list[dict], agent_actions: list[dict]) -> dict:
+    gold = normalize_for_compare(gold_actions)
+    agent = normalize_for_compare(agent_actions)
+
+    min_len = min(len(gold), len(agent))
+
+    aligned = []
+    exact_matches = 0
+
+    for i in range(min_len):
+        g = gold[i]
+        a = agent[i]
+        matched = g["type"] == a["type"]
+        if matched:
+            exact_matches += 1
+
+        aligned.append({
+            "step": i + 1,
+            "gold_type": g["type"],
+            "agent_type": a["type"],
+            "match": matched,
+        })
+
+    missing_gold = gold[min_len:]
+    extra_agent = agent[min_len:]
+
+    gold_len = len(gold)
+    agent_len = len(agent)
+
+    precision = exact_matches / agent_len if agent_len else 0.0
+    recall = exact_matches / gold_len if gold_len else 0.0
+    f1 = (
+        2 * precision * recall / (precision + recall)
+        if (precision + recall) > 0
+        else 0.0
+    )
+
+    return {
+        "gold_len": gold_len,
+        "agent_len": agent_len,
+        "exact_matches": exact_matches,
+        "precision": round(precision, 3),
+        "recall": round(recall, 3),
+        "f1": round(f1, 3),
+        "aligned": aligned,
+        "missing_gold": missing_gold,
+        "extra_agent": extra_agent,
+    }