Quality_evaluation/stuff/run_task.py
Aleksandr Dubchak 98d5e90894 mind2web
2026-04-23 00:04:11 +03:00

78 lines
2.5 KiB
Python

import time
import uuid
from one_Task_class import Task
from RunTrace import RunTrace
from StepTrace import StepTrace
from run_agent_task import run_agent_task
async def run_task(task: Task, agent, model_name: str = None) -> RunTrace:
started_at = time.time()
trace = RunTrace(
run_id=str(uuid.uuid4()),
task_id=task.id,
dataset=task.dataset,
instruction=task.instruction,
model_name=model_name,
started_at=started_at,
finished_at=None,
success=False,
final_url=None,
final_answer=None,
error=None,
fail_reason=None,
total_steps=0,
total_tokens=None,
total_latency_sec=None,
screenshots_dir=None,
steps=[],
)
try:
result = await run_agent_task(
agent=agent,
instruction=task.instruction,
session_id=f"eval-{task.dataset}-{task.id}",
history=[],
)
trace.success = bool(result.get("success", False))
trace.final_url = result.get("final_url")
trace.final_answer = result.get("final_answer")
trace.error = result.get("error")
trace.fail_reason = result.get("fail_reason")
trace.total_tokens = result.get("total_tokens")
trace.screenshots_dir = result.get("screenshots_dir")
raw_steps = result.get("steps", [])
trace.total_steps = len(raw_steps)
for i, step in enumerate(raw_steps):
trace.steps.append(
StepTrace(
step_no=i,
timestamp=step.get("timestamp", time.time()),
thought=step.get("thought"),
action_type=step.get("action_type", "unknown"),
action_target=step.get("action_target"),
action_value=step.get("action_value"),
url_before=step.get("url_before"),
url_after=step.get("url_after"),
screenshot_before=step.get("screenshot_before"),
screenshot_after=step.get("screenshot_after"),
success=step.get("success", True),
error=step.get("error"),
)
)
except Exception as e:
trace.error = str(e)
trace.fail_reason = "runtime_exception"
finally:
finished_at = time.time()
trace.finished_at = finished_at
trace.total_latency_sec = finished_at - started_at
return trace