mind2web
This commit is contained in:
parent
2b5d923f63
commit
98d5e90894
754 changed files with 1175740 additions and 142424 deletions
91
Mind2Web/eval_v2/comparator.py
Normal file
91
Mind2Web/eval_v2/comparator.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
from __future__ import annotations
|
||||
|
||||
|
||||
def canonical_action_type(action_type: str) -> str:
|
||||
if not action_type:
|
||||
return "unknown"
|
||||
|
||||
t = action_type.strip().lower()
|
||||
|
||||
mapping = {
|
||||
"click": "click",
|
||||
"type": "type",
|
||||
"select": "select",
|
||||
"hover": "hover",
|
||||
"enter": "type",
|
||||
|
||||
"navigate": "navigate",
|
||||
"scroll": "scroll",
|
||||
"search_page": "search_page",
|
||||
"extract": "extract",
|
||||
"done": "done",
|
||||
}
|
||||
|
||||
return mapping.get(t, t)
|
||||
|
||||
|
||||
def normalize_for_compare(actions: list[dict]) -> list[dict]:
|
||||
result = []
|
||||
|
||||
for action in actions:
|
||||
if not isinstance(action, dict):
|
||||
result.append({"type": "unknown", "raw": action})
|
||||
continue
|
||||
|
||||
action_type = canonical_action_type(action.get("type", "unknown"))
|
||||
result.append({
|
||||
"type": action_type,
|
||||
"raw": action.get("raw"),
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def compare_action_sequences(gold_actions: list[dict], agent_actions: list[dict]) -> dict:
|
||||
gold = normalize_for_compare(gold_actions)
|
||||
agent = normalize_for_compare(agent_actions)
|
||||
|
||||
min_len = min(len(gold), len(agent))
|
||||
|
||||
aligned = []
|
||||
exact_matches = 0
|
||||
|
||||
for i in range(min_len):
|
||||
g = gold[i]
|
||||
a = agent[i]
|
||||
matched = g["type"] == a["type"]
|
||||
if matched:
|
||||
exact_matches += 1
|
||||
|
||||
aligned.append({
|
||||
"step": i + 1,
|
||||
"gold_type": g["type"],
|
||||
"agent_type": a["type"],
|
||||
"match": matched,
|
||||
})
|
||||
|
||||
missing_gold = gold[min_len:]
|
||||
extra_agent = agent[min_len:]
|
||||
|
||||
gold_len = len(gold)
|
||||
agent_len = len(agent)
|
||||
|
||||
precision = exact_matches / agent_len if agent_len else 0.0
|
||||
recall = exact_matches / gold_len if gold_len else 0.0
|
||||
f1 = (
|
||||
2 * precision * recall / (precision + recall)
|
||||
if (precision + recall) > 0
|
||||
else 0.0
|
||||
)
|
||||
|
||||
return {
|
||||
"gold_len": gold_len,
|
||||
"agent_len": agent_len,
|
||||
"exact_matches": exact_matches,
|
||||
"precision": round(precision, 3),
|
||||
"recall": round(recall, 3),
|
||||
"f1": round(f1, 3),
|
||||
"aligned": aligned,
|
||||
"missing_gold": missing_gold,
|
||||
"extra_agent": extra_agent,
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue