Quality_evaluation/Mind2Web/mind2web_runner.py
Aleksandr Dubchak 98d5e90894 mind2web
2026-04-23 00:04:11 +03:00

140 lines
No EOL
4.6 KiB
Python

import os
import sys
import json
import asyncio
from dotenv import load_dotenv
PROJECT_ROOT = os.path.abspath(
os.path.join(os.path.dirname(__file__), "..")
)
SKILLS_DIR = os.path.join(PROJECT_ROOT, "BrowserUse_and_ComputerUse_skills")
HERMES_CODE_DIR = os.path.join(SKILLS_DIR, "hermes_code")
ENV_PATH = os.path.join(SKILLS_DIR, ".env")
for path in [PROJECT_ROOT, SKILLS_DIR, HERMES_CODE_DIR]:
if path not in sys.path:
sys.path.append(path)
load_dotenv(ENV_PATH, override=True)
# локальные overrides
os.environ["MODEL"] = "qwen3.5-122b"
os.environ["MODEL_DEFAULT"] = "qwen3.5-122b"
os.environ["BASE_URL"] = "https://llm.lambda.coredump.ru/v1"
os.environ["OPENAI_BASE_URL"] = "https://llm.lambda.coredump.ru/v1"
os.environ["API_KEY"] = os.environ.get("API_KEY", "sk-...")
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY", "sk-...")
os.environ["PROVIDER"] = "custom"
# важно: для локального запуска
os.environ["BROWSER_URL"] = "http://localhost:9222"
os.environ["BROWSER_VIEW_URL"] = "http://localhost:6080"
print("RUNNER BROWSER_URL =", os.environ.get("BROWSER_URL"))
print("RUNNER BROWSER_VIEW_URL =", os.environ.get("BROWSER_VIEW_URL"))
from BrowserUse_and_ComputerUse_skills.hermes_code.tools.browser_use_tool import run_browser_task
def get_task_text(task: dict) -> str:
for key in ("confirmed_task", "task", "instruction", "intent"):
value = task.get(key)
if isinstance(value, str) and value.strip():
return value.strip()
return ""
def run_task(task: dict, timeout_sec: int = 300) -> dict:
task_text = get_task_text(task)
instruction = f"""
Task: {task_text}
Rules:
- Do NOT use Google search.
- Go directly to relevant websites.
- Keep reasoning short.
- Avoid repeating the same scroll or search action many times.
- Prefer fast completion over exhaustive browsing.
- If a page already shows relevant results, do not keep exploring unnecessarily.
- If filters are available, use them directly.
- Do not get stuck searching for perfect filters forever.
""".strip()
try:
raw_result = asyncio.run(
asyncio.wait_for(
run_browser_task(instruction),
timeout=timeout_sec,
)
)
if isinstance(raw_result, str):
parsed = json.loads(raw_result)
elif isinstance(raw_result, dict):
parsed = raw_result
else:
parsed = {"success": False, "result": None, "error": f"unexpected result type: {type(raw_result).__name__}"}
result_text = (parsed.get("result") or "").lower()
failure_markers = [
"status: incomplete",
"judge verdict: ❌ fail",
"judge verdict: fail",
"task not completed",
"could not",
"unable to",
"manual filtering needed",
"recommendation:",
"issue:",
"incomplete -",
]
looks_failed = any(marker in result_text for marker in failure_markers)
task_success = bool(parsed.get("success", False)) and not looks_failed
return {
"task_id": task.get("annotation_id"),
"instruction": instruction,
"website": task.get("website"),
"domain": task.get("domain"),
"subdomain": task.get("subdomain"),
"status": "success" if task_success else "failed",
"success": task_success,
"result": parsed.get("result"),
"browser_view": parsed.get("browser_view"),
"error": parsed.get("error"),
"raw": parsed,
}
except asyncio.TimeoutError:
return {
"task_id": task.get("annotation_id"),
"instruction": instruction,
"website": task.get("website"),
"domain": task.get("domain"),
"subdomain": task.get("subdomain"),
"status": "timeout",
"success": False,
"result": None,
"browser_view": os.getenv("BROWSER_VIEW_URL"),
"error": f"timeout after {timeout_sec} seconds",
"raw": None,
}
except Exception as e:
return {
"task_id": task.get("annotation_id"),
"instruction": instruction,
"website": task.get("website"),
"domain": task.get("domain"),
"subdomain": task.get("subdomain"),
"status": "error",
"success": False,
"result": None,
"browser_view": os.getenv("BROWSER_VIEW_URL"),
"error": str(e),
"raw": None,
}