Quality_evaluation/Mind2Web/run_agent_task.py
Aleksandr Dubchak 98d5e90894 mind2web
2026-04-23 00:04:11 +03:00

96 lines
No EOL
2.8 KiB
Python

from __future__ import annotations
import io
import logging
import traceback
from contextlib import redirect_stdout, redirect_stderr
from typing import Any
from dotenv import load_dotenv
from mind2web_runner import run_task
load_dotenv()
def build_fallback_final_answer(task: dict[str, Any], result: dict[str, Any] | None = None) -> str:
instruction = (
task.get("confirmed_task")
or task.get("task")
or task.get("instruction")
or ""
)
if result is not None:
error = result.get("error")
status = result.get("status")
if error:
return f"Task failed: {error}. Instruction was: {instruction}"
if status:
return f"Task failed with status={status}. Instruction was: {instruction}"
return f"Task failed: agent did not complete the task. Instruction was: {instruction}"
def run_agent_on_task(task: dict[str, Any]) -> dict[str, Any]:
log_buffer = io.StringIO()
result: dict[str, Any] | None = None
# --- сохраняем старое состояние логгера ---
root_logger = logging.getLogger()
old_handlers = root_logger.handlers[:]
old_level = root_logger.level
# --- создаём один handler ---
stream_handler = logging.StreamHandler(log_buffer)
stream_handler.setLevel(logging.INFO)
stream_handler.setFormatter(
logging.Formatter("%(levelname)-8s [%(name)s] %(message)s")
)
try:
# --- чистим handlers и ставим только наш ---
root_logger.handlers = []
root_logger.addHandler(stream_handler)
root_logger.setLevel(logging.INFO)
# --- перехватываем print / stderr ---
with redirect_stdout(log_buffer), redirect_stderr(log_buffer):
result = run_task(task)
if not isinstance(result, dict):
result = {
"success": False,
"status": "invalid_result",
"result": None,
"error": f"run_task returned {type(result).__name__}",
"raw": None,
}
except Exception as e:
traceback.print_exc(file=log_buffer)
result = {
"success": False,
"status": "exception",
"result": None,
"error": str(e),
"raw": None,
}
finally:
# --- возвращаем логгер как был ---
root_logger.handlers = old_handlers
root_logger.setLevel(old_level)
# --- финальный ответ ---
if result.get("result") is None:
final_answer = build_fallback_final_answer(task, result)
else:
final_answer = str(result["result"])
return {
"result": result,
"log_text": log_buffer.getvalue(),
"final_answer": final_answer,
}