78 lines
2.5 KiB
Python
78 lines
2.5 KiB
Python
import time
|
|
import uuid
|
|
from one_Task_class import Task
|
|
from RunTrace import RunTrace
|
|
from StepTrace import StepTrace
|
|
from run_agent_task import run_agent_task
|
|
|
|
|
|
async def run_task(task: Task, agent, model_name: str = None) -> RunTrace:
|
|
started_at = time.time()
|
|
|
|
trace = RunTrace(
|
|
run_id=str(uuid.uuid4()),
|
|
task_id=task.id,
|
|
dataset=task.dataset,
|
|
instruction=task.instruction,
|
|
model_name=model_name,
|
|
started_at=started_at,
|
|
finished_at=None,
|
|
success=False,
|
|
final_url=None,
|
|
final_answer=None,
|
|
error=None,
|
|
fail_reason=None,
|
|
total_steps=0,
|
|
total_tokens=None,
|
|
total_latency_sec=None,
|
|
screenshots_dir=None,
|
|
steps=[],
|
|
)
|
|
|
|
try:
|
|
result = await run_agent_task(
|
|
agent=agent,
|
|
instruction=task.instruction,
|
|
session_id=f"eval-{task.dataset}-{task.id}",
|
|
history=[],
|
|
)
|
|
|
|
trace.success = bool(result.get("success", False))
|
|
trace.final_url = result.get("final_url")
|
|
trace.final_answer = result.get("final_answer")
|
|
trace.error = result.get("error")
|
|
trace.fail_reason = result.get("fail_reason")
|
|
trace.total_tokens = result.get("total_tokens")
|
|
trace.screenshots_dir = result.get("screenshots_dir")
|
|
|
|
raw_steps = result.get("steps", [])
|
|
trace.total_steps = len(raw_steps)
|
|
|
|
for i, step in enumerate(raw_steps):
|
|
trace.steps.append(
|
|
StepTrace(
|
|
step_no=i,
|
|
timestamp=step.get("timestamp", time.time()),
|
|
thought=step.get("thought"),
|
|
action_type=step.get("action_type", "unknown"),
|
|
action_target=step.get("action_target"),
|
|
action_value=step.get("action_value"),
|
|
url_before=step.get("url_before"),
|
|
url_after=step.get("url_after"),
|
|
screenshot_before=step.get("screenshot_before"),
|
|
screenshot_after=step.get("screenshot_after"),
|
|
success=step.get("success", True),
|
|
error=step.get("error"),
|
|
)
|
|
)
|
|
|
|
except Exception as e:
|
|
trace.error = str(e)
|
|
trace.fail_reason = "runtime_exception"
|
|
|
|
finally:
|
|
finished_at = time.time()
|
|
trace.finished_at = finished_at
|
|
trace.total_latency_sec = finished_at - started_at
|
|
|
|
return trace
|