Quality_evaluation/Mind2Web/eval_v2/agent_parser.py

from __future__ import annotations

import re
from typing import Any


ANSI_RE = re.compile(r"\x1b\[[0-9;]*m")


def _clean(text: str) -> str:
    text = ANSI_RE.sub("", text)
    return text.rstrip()


def parse_agent_log(log_text: str) -> list[dict[str, Any]]:
    actions: list[dict[str, Any]] = []

    for raw_line in log_text.splitlines():
        line = _clean(raw_line)

        if "▶️" not in line:
            continue

        # Берём только часть строки ПОСЛЕ стрелки
        line = line.split("▶️", 1)[1].strip()

        # Убираем префиксы вида [1/2]
        line = re.sub(r"^\[\d+/\d+\]\s*", "", line).strip()

        if line.startswith("navigate:"):
            actions.append({"type": "navigate", "raw": line})
        elif line.startswith("click:"):
            actions.append({"type": "click", "raw": line})
        elif line.startswith("input:"):
            actions.append({"type": "input", "raw": line})
        elif line.startswith("scroll:"):
            actions.append({"type": "scroll", "raw": line})
        elif line.startswith("wait:"):
            actions.append({"type": "wait", "raw": line})
        elif line.startswith("switch:"):
            actions.append({"type": "switch", "raw": line})
        elif line.startswith("done:"):
            actions.append({"type": "done", "raw": line})
        elif line.startswith("search_page:"):
            actions.append({"type": "search_page", "raw": line})
        elif line.startswith("extract:"):
            actions.append({"type": "extract", "raw": line})

    return actions


def extract_final_answer(agent_actions: list[dict[str, Any]]) -> str:
    for action in reversed(agent_actions):
        if action["type"] != "done":
            continue

        raw = action["raw"]
        m = re.search(r"done:\s*text:\s*(.*?)(?:,\s*success:|$)", raw, flags=re.DOTALL)
        if m:
            return m.group(1).strip()

        return raw

    return ""