91 lines
2.2 KiB
Python
91 lines
2.2 KiB
Python
from __future__ import annotations
|
|
|
|
|
|
def canonical_action_type(action_type: str) -> str:
|
|
if not action_type:
|
|
return "unknown"
|
|
|
|
t = action_type.strip().lower()
|
|
|
|
mapping = {
|
|
"click": "click",
|
|
"type": "type",
|
|
"select": "select",
|
|
"hover": "hover",
|
|
"enter": "type",
|
|
|
|
"navigate": "navigate",
|
|
"scroll": "scroll",
|
|
"search_page": "search_page",
|
|
"extract": "extract",
|
|
"done": "done",
|
|
}
|
|
|
|
return mapping.get(t, t)
|
|
|
|
|
|
def normalize_for_compare(actions: list[dict]) -> list[dict]:
|
|
result = []
|
|
|
|
for action in actions:
|
|
if not isinstance(action, dict):
|
|
result.append({"type": "unknown", "raw": action})
|
|
continue
|
|
|
|
action_type = canonical_action_type(action.get("type", "unknown"))
|
|
result.append({
|
|
"type": action_type,
|
|
"raw": action.get("raw"),
|
|
})
|
|
|
|
return result
|
|
|
|
|
|
def compare_action_sequences(gold_actions: list[dict], agent_actions: list[dict]) -> dict:
|
|
gold = normalize_for_compare(gold_actions)
|
|
agent = normalize_for_compare(agent_actions)
|
|
|
|
min_len = min(len(gold), len(agent))
|
|
|
|
aligned = []
|
|
exact_matches = 0
|
|
|
|
for i in range(min_len):
|
|
g = gold[i]
|
|
a = agent[i]
|
|
matched = g["type"] == a["type"]
|
|
if matched:
|
|
exact_matches += 1
|
|
|
|
aligned.append({
|
|
"step": i + 1,
|
|
"gold_type": g["type"],
|
|
"agent_type": a["type"],
|
|
"match": matched,
|
|
})
|
|
|
|
missing_gold = gold[min_len:]
|
|
extra_agent = agent[min_len:]
|
|
|
|
gold_len = len(gold)
|
|
agent_len = len(agent)
|
|
|
|
precision = exact_matches / agent_len if agent_len else 0.0
|
|
recall = exact_matches / gold_len if gold_len else 0.0
|
|
f1 = (
|
|
2 * precision * recall / (precision + recall)
|
|
if (precision + recall) > 0
|
|
else 0.0
|
|
)
|
|
|
|
return {
|
|
"gold_len": gold_len,
|
|
"agent_len": agent_len,
|
|
"exact_matches": exact_matches,
|
|
"precision": round(precision, 3),
|
|
"recall": round(recall, 3),
|
|
"f1": round(f1, 3),
|
|
"aligned": aligned,
|
|
"missing_gold": missing_gold,
|
|
"extra_agent": extra_agent,
|
|
}
|