99 lines
3.4 KiB
Python
99 lines
3.4 KiB
Python
import json
|
|
import time
|
|
from typing import Any, Dict, Optional
|
|
|
|
|
|
async def run_agent_task(
|
|
agent,
|
|
instruction: str,
|
|
session_id: str,
|
|
history: Optional[list] = None,
|
|
) -> Dict[str, Any]:
|
|
history = history or []
|
|
|
|
started_at = time.time()
|
|
|
|
result = agent.run_conversation(
|
|
instruction,
|
|
conversation_history=history,
|
|
task_id=session_id,
|
|
)
|
|
|
|
finished_at = time.time()
|
|
|
|
normalized: Dict[str, Any] = {
|
|
"success": False,
|
|
"final_url": None,
|
|
"final_answer": result.get("final_response"),
|
|
"fail_reason": None,
|
|
"error": None,
|
|
"total_tokens": result.get("total_tokens"),
|
|
"input_tokens": result.get("input_tokens"),
|
|
"output_tokens": result.get("output_tokens"),
|
|
"model_name": result.get("model"),
|
|
"screenshots_dir": None,
|
|
"steps": [],
|
|
"started_at": started_at,
|
|
"finished_at": finished_at,
|
|
"raw_result": result,
|
|
}
|
|
|
|
# Общая диагностика верхнего уровня
|
|
if not result.get("completed", False):
|
|
normalized["fail_reason"] = "not_completed"
|
|
|
|
if result.get("interrupted", False):
|
|
normalized["fail_reason"] = "interrupted"
|
|
|
|
# Ищем tool output
|
|
messages = result.get("messages", [])
|
|
for msg in messages:
|
|
if msg.get("role") != "tool":
|
|
continue
|
|
|
|
content = msg.get("content")
|
|
if not content:
|
|
continue
|
|
|
|
try:
|
|
tool_payload = json.loads(content)
|
|
except Exception:
|
|
continue
|
|
|
|
# Основной success браузерного шага
|
|
normalized["success"] = bool(tool_payload.get("success", False))
|
|
|
|
# Публичная ссылка на browser view
|
|
normalized["final_url"] = tool_payload.get("browser_view")
|
|
|
|
# Если tool сам вернул текст результата, это полезно сохранить
|
|
if not normalized["final_answer"]:
|
|
normalized["final_answer"] = tool_payload.get("result")
|
|
|
|
if not normalized["success"] and normalized["fail_reason"] is None:
|
|
normalized["fail_reason"] = "tool_failed"
|
|
|
|
# Сохраняем шаг как минимум на уровне tool-call
|
|
normalized["steps"].append(
|
|
{
|
|
"timestamp": finished_at,
|
|
"thought": None,
|
|
"action_type": "TOOL_CALL",
|
|
"action_target": msg.get("tool_call_id"),
|
|
"action_value": None,
|
|
"url_before": None,
|
|
"url_after": tool_payload.get("browser_view"),
|
|
"screenshot_before": None,
|
|
"screenshot_after": None,
|
|
"success": bool(tool_payload.get("success", False)),
|
|
"error": None if tool_payload.get("success", False) else tool_payload.get("result"),
|
|
}
|
|
)
|
|
# Если tool output не нашли, но completed=True, это отдельный класс ошибки
|
|
if not normalized["steps"] and result.get("completed", False):
|
|
normalized["fail_reason"] = normalized["fail_reason"] or "no_tool_output"
|
|
|
|
# Верхнеуровневая ошибка, если агент вообще не завершился нормально
|
|
if not normalized["success"] and normalized["error"] is None and normalized["fail_reason"] is None:
|
|
normalized["fail_reason"] = "unknown_failure"
|
|
return normalized
|