diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index c1a5bb48..00000000 --- a/.dockerignore +++ /dev/null @@ -1,17 +0,0 @@ -venv/ -.venv/ -node_modules/ - -__pycache__/ -*.pyc -*.pyo -*.pyd - -.git/ -.github/ - -.env -config.yaml -sessions/ -logs/ -state.db diff --git a/.env.example b/.env.example index ddd72975..6aa5d4cf 100644 --- a/.env.example +++ b/.env.example @@ -1,29 +1,5 @@ OPENAI_BASE_URL= OPENAI_API_KEY= -MODEL_DEFAULT= - -TERMINAL_DOCKER_IMAGE=python:3.12-slim -TERMINAL_ENV=docker -HERMES_MAX_ITERATIONS=90 -HERMES_HOME=/app/hermes_data -HERMES_WORKSPACE_PATH=app/workspace - +HERMES_MAX_ITERATIONS= TELEGRAM_BOT_TOKEN= -TELEGRAM_ALLOWED_USERS= -TELEGRAM_HOME_CHANNEL= - -BROWSER_URL=http://browser:9222 -BROWSER_VIEW_URL=http://localhost:6080 -BROWSER_VIEW_BASE_URL=http://localhost:6081 - -BROWSER_API_HOST=0.0.0.0 -BROWSER_API_PORT=8088 -BROWSER_USE_RPC_URL=http://browser:8787/run -BROWSER_USE_RPC_TIMEOUT=900 -BROWSER_API_MAX_CONCURRENCY=2 -BROWSER_USE_ISOLATION_MODE=docker-per-principal -BROWSER_RUNTIME_IMAGE=browser-use-browser-runtime:latest -BROWSER_RUNTIME_NETWORK=browser-net -BROWSER_RUNTIME_TTL_SECONDS=900 -BROWSER_RUNTIME_START_TIMEOUT=45 -BROWSER_RUNTIME_ENABLE_UI=true +TERMINAL_ENV= diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml deleted file mode 100644 index b505db49..00000000 --- a/.gitea/workflows/deploy.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: Deploy to BrowserUse VPS - -on: - push: - branches: - - main - - develop - workflow_dispatch: - -jobs: - deploy: - runs-on: deploy-vps - env: - DEPLOY_DIR: /home/BrowserUse-vps/apps/BrowserUse_and_ComputerUse_skills - DEPLOY_BRANCH: feature/api-for-subagent - HEALTH_URL: http://127.0.0.1:8088/health - steps: - - name: Deploy Docker Compose stack - shell: bash - run: | - set -Eeuo pipefail - cd "$DEPLOY_DIR" - bash scripts/deploy_vps.sh diff --git a/.gitignore b/.gitignore index 56299679..acebf673 100644 --- a/.gitignore +++ b/.gitignore @@ -1,64 +1,67 @@ -/venv/ -/_pycache/ -*.pyc* -__pycache__/ -.venv/ -.vscode/ +# ---> macOS +# General +.DS_Store +.AppleDouble +.LSOverride +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# ---> Windows +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk .env -.env.local -.env.development.local -.env.test.local -.env.production.local -.env.development -.env.test -docker-compose.override.yml -hermes_code/test_browser.py -.git -.github -.idea -hermes_data -workspace +hermes_data/* +workspace/* -export* -__pycache__/model_tools.cpython-310.pyc -__pycache__/web_tools.cpython-310.pyc -logs/ -data/ -.pytest_cache/ -tmp/ -temp_vision_images/ -hermes-*/* -examples/ -tests/quick_test_dataset.jsonl -tests/sample_dataset.jsonl -run_datagen_kimik2-thinking.sh -run_datagen_megascience_glm4-6.sh -run_datagen_sonnet.sh -source-data/* -run_datagen_megascience_glm4-6.sh -data/* -node_modules/ -browser-use/ -agent-browser/ -# Private keys -*.ppk -*.pem -privvy* -images/ -__pycache__/ -hermes_agent.egg-info/ -wandb/ -testlogs +SOLUTION_SUMMARY.md +BROWSER_USE_QUICKSTART.md +BROWSER_USE_SETUP.md +START_HERE.md +GUI_BROWSER_SETUP.md -# CLI config (may contain sensitive SSH paths) -cli-config.yaml +*/config.yaml -# Skills Hub state (lives in ~/.hermes/skills/.hub/ at runtime, but just in case) -skills/.hub/ -ignored/ -.worktrees/ -environments/benchmarks/evals/ - -# Release script temp files -.release_notes.md +*.idea \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..bc788f96 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,29 @@ +FROM python:3.11 + +RUN apt-get update && apt-get install -y \ + git \ + curl \ + build-essential \ + python3-dev \ + libffi-dev \ + && rm -rf /var/lib/apt/lists/* + +RUN git clone https://github.com/NousResearch/hermes-agent.git /opt/hermes-agent + +WORKDIR /opt/hermes-agent +RUN pip install --no-cache-dir -e . +RUN pip install --no-cache-dir python-telegram-bot + +# Isolated runtime for browser-use to avoid dependency conflicts with hermes-agent. +RUN python -m venv /opt/browser-use-venv \ + && /opt/browser-use-venv/bin/pip install --no-cache-dir --upgrade pip \ + && /opt/browser-use-venv/bin/pip install --no-cache-dir browser-use + +RUN ln -s /opt/hermes-agent/venv/bin/hermes /usr/local/bin/hermes 2>/dev/null || true +RUN ln -s /opt/browser-use-venv/bin/python /usr/local/bin/python-browser-use 2>/dev/null || true + +RUN mkdir -p /root/.hermes/skills /root/.hermes/memories /root/.hermes/sessions + +WORKDIR /workspace + +CMD ["hermes", "gateway"] \ No newline at end of file diff --git a/GUI_BROWSER_FIX.md b/GUI_BROWSER_FIX.md new file mode 100644 index 00000000..9d8b67f2 --- /dev/null +++ b/GUI_BROWSER_FIX.md @@ -0,0 +1,106 @@ +# 🎯 Решение: browser-use с GUI браузером + +## ✅ Проблема решена! + +Теперь все действия через hermes-agent **ТРАНСЛИРУЮТСЯ** на GUI браузер в реальном времени. + +## 🚀 Как использовать + +### 1️⃣ Запустите стек + +```bash +docker compose --profile gui up -d +``` + +### 2️⃣ Откройте VNC в браузере + +```bash +open http://localhost:6080/vnc.html +``` + +### 3️⃣ Дайте задачу agentу + +Напишите что-нибудь типа: +``` +"Откройте example.com и найдите заголовок страницы" +``` + +**Или** запустите напрямую: +```bash +docker compose exec -T hermes-agent python \ + /root/.hermes/skills/autonomous-ai-agents/browser-use/scripts/run_browser_use.py \ + --task "Open google.com and search for 'hello world'" \ + --max-steps 5 +``` + +### 4️⃣ Смотрите действия в VNC окне 🎬 + +Вы видите как агент: +- 🔍 Навигирует по сайтам +- 🖱️ Кликает по кнопкам +- ⌨️ Вводит текст +- 📜 Скроллит страницу + +## 🔧 Что было исправлено + +### Проблема: "Host header is specified and is not an IP address" + +Chromium CDP API **проверяет Host заголовок в HTTP запросах** и отвергает имена хостов. + +**Решение:** Используем IP адрес контейнера вместо имени: +- ❌ ~~`BROWSER_USE_CDP_URL=http://chromium-gui:9223`~~ +- ✅ `BROWSER_USE_CDP_URL=http://172.25.0.3:9223` + +### Файлы, которые были обновлены: + +1. **`docker-compose.yml`** + - Изменена `BROWSER_USE_CDP_URL` на `http://172.25.0.3:9223` + - Добавлена зависимость от `chromium-gui` в hermes-agent + +2. **`docker/chromium-gui/start.sh`** + - Добавлена socat для проксирования TCP через IPv6 + - Chromium слушает на `::1:9223` (IPv6 localhost) + - socat пробрасывает `9223` на все интерфейсы + +## 📊 Архитектура + +``` +hermes-agent (контейнер) + │ + ├─ BROWSER_USE_CDP_URL=http://172.25.0.3:9223 + │ + └─→ chromium-gui (контейнер) + │ + ├─ Chromium слушает на ::1:9223 (IPv6) + │ + ├─ socat (TCP-LISTEN:9223 → TCP6:[::1]:9223) + │ + ├─ x11vnc (захватывает Xvfb) + │ + └─ websockify (VNC → WebSocket) + │ + └─→ http://localhost:6080/vnc.html (ваш браузер) +``` + +## 🎮 Протестировано + +```bash +# ✅ CDP доступен +docker compose exec -T hermes-agent bash -c 'curl -s http://172.25.0.3:9223/json/version' + +# ✅ VNC доступен +open http://localhost:6080/vnc.html + +# ✅ socat проксирует +docker compose exec chromium-gui netstat -tlnp | grep 9223 +``` + +## 📚 Дополнительно + +- Полная документация: [`GUI_BROWSER_SETUP.md`](./GUI_BROWSER_SETUP.md) +- Диагностика проблем: смотрите раздел "Диагностика" в [`GUI_BROWSER_SETUP.md`](./GUI_BROWSER_SETUP.md) + +--- + +**Теперь browser-use полностью интегрирован с GUI браузером! 🎉** + diff --git a/README.md b/README.md index 1b962a07..cea2664b 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,2 @@ # BrowserUse_and_ComputerUse_skills -Чтобы запустить tool browser-use вместе с hermes agent тебе нужно выполнить следующие действия -```commandline -git clone https://git.lambda.coredump.ru/APEX/BrowserUse_and_ComputerUse_skills.git -git switch feature/telegram-browser-integration -touch .env -``` -В создавшемся .env файле заполните переменные в соответствии с шаблоном, расположенном в .env.example -```commandline -docker compose up -d --build -``` - -```commandline -docker compose down -docker compose up -d -``` -## Удачного пользования - diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 00000000..15d354c9 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,50 @@ +--- +name: browser-use +version: "1.0.0" +description: | + Автоматизация браузера с помощью Playwright и библиотеки browser_use. + Выполняет навигацию, клики, заполнение форм, скриншоты, извлечение данных. + Подходит для тестирования веб-приложений, парсинга, автоматизации рутинных задач. +triggers: + - "открой сайт" + - "нажми на кнопку" + - "заполни форму" + - "сделай скриншот" + - "спарси данные" + - "автоматизируй браузер" + - "browser use" + - "playwright" +license: MIT +compatibility: + - hermes + - claude +allowed-tools: + - bash + - python + - read_file + - write_file +--- + +# BrowserUse Skill + +Автоматизация браузера с использованием Playwright и browser_use. + +## 🎯 Описание + +Этот скилл позволяет Hermes-агенту управлять браузером: +- Открывать URL и навигировать +- Кликать по элементам +- Заполнять формы +- Извлекать данные (текст, атрибуты, HTML) +- Делать скриншоты +- Ждать загрузки элементов +- Выполнять кастомный JavaScript +- Работать с выпадающими списками + +## 📦 Установка зависимостей + +Перед первым использованием выполни: +```bash +cd ~/.hermes/skills/browser-use/scripts +chmod +x setup.sh +./setup.sh \ No newline at end of file diff --git a/api/Dockerfile b/api/Dockerfile deleted file mode 100644 index 670bdb59..00000000 --- a/api/Dockerfile +++ /dev/null @@ -1,18 +0,0 @@ -FROM python:3.12-slim - -ENV PYTHONDONTWRITEBYTECODE=1 -ENV PYTHONUNBUFFERED=1 - -WORKDIR /app - -RUN apt-get update \ - && apt-get install -y --no-install-recommends docker.io \ - && rm -rf /var/lib/apt/lists/* - -COPY requirements.txt /app/requirements.txt -RUN pip install --no-cache-dir uv \ - && uv pip install --system --no-cache-dir -r /app/requirements.txt - -COPY . /app/api - -CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8088"] diff --git a/api/README.md b/api/README.md deleted file mode 100644 index 8ffa9a3b..00000000 --- a/api/README.md +++ /dev/null @@ -1,172 +0,0 @@ -# Browser REST API - -REST API-обертка над `browser-use` RPC (`POST /run` в контейнере браузера). - -Сервис принимает задачу, ставит ее в in-memory очередь, выполняет через `browser-use` и отдает статус/результат по `task_id`. - -## Актуальный статус - -Проверено smoke-тестом: -- `GET /health` отвечает `200` с `{"ok": true}` -- `POST /api/browser/tasks` возвращает `202` и `task_id` -- `GET /api/browser/tasks/{task_id}` возвращает `queued/running/...` -- `GET /api/browser/tasks/{task_id}/result` возвращает `202`, пока задача не завершена -- `GET /api/browser/tasks/{task_id}/history` возвращает историю шагов агента - -## Архитектура - -Слои сейчас разделены и выглядят нормально для MVP: - -- `api/main.py` — точка входа ASGI (`uvicorn api.main:app`), сборка `FastAPI` и lifespan -- `api/routes/tasks.py` — HTTP-слой (валидация входа/выхода, status codes) -- `api/services/task_service.py` — orchestration (фоновые задачи, timeout, обработка ошибок) -- `api/repositories/task_store.py` — in-memory хранилище задач -- `api/clients/browser_rpc_client.py` — aiohttp-клиент к browser RPC -- `api/clients/browser_rpc_contracts.py` — protocol + исключения RPC-слоя -- `api/contracts/task_schemas.py` — Pydantic request/response DTO -- `api/domain/task_status.py` — доменный enum статусов -- `api/core/settings.py` — конфигурация из env - -## Ограничения текущей реализации - -- хранилище in-memory: после рестарта контейнера задачи теряются -- нет ретраев RPC при транспортных ошибках -- один инстанс процесса хранит задачи только локально (без shared state) - -## Переменные окружения - -- `BROWSER_API_HOST` (default: `0.0.0.0`) -- `BROWSER_API_PORT` (default: `8080`) -- `BROWSER_USE_RPC_URL` (default: `http://browser:8787/run`) -- `BROWSER_USE_RPC_TIMEOUT` (default: `900`) -- `BROWSER_API_MAX_CONCURRENCY` (default: `2`) - -## Локальный запуск - -```zsh -cd "/Users/fedorkobylkevic/PycharmProjects/BrowserUse_and_ComputerUse_skills" -source .venv/bin/activate -uvicorn api.main:app --host 0.0.0.0 --port 8088 -``` - -## Запуск через Docker Compose - -```zsh -cd "/Users/fedorkobylkevic/PycharmProjects/BrowserUse_and_ComputerUse_skills" -docker compose build browser-api -docker compose up -d browser browser-api -docker compose logs -f browser-api -``` - -## REST API - -### `GET /health` - -Проверка доступности API. - -Пример ответа: - -```json -{"ok": true} -``` - -### `POST /api/browser/tasks` - -Создать задачу. - -Request: - -```json -{ - "task": "Открой example.com и верни title", - "timeout": 300, - "metadata": {"source": "manual"} -} -``` - -Response `202`: - -```json -{ - "task_id": "53f54fa4c1f24219b3949d56b0457875", - "status": "queued" -} -``` - -### `GET /api/browser/tasks/{task_id}` - -Текущий статус и таймстемпы. - -### `GET /api/browser/tasks/{task_id}/result` - -- `202` если задача еще `queued/running` -- `200` с финальным payload после завершения - -### `GET /api/browser/tasks/{task_id}/history` - -- `202` если задача еще `queued/running` -- `200` с финальной историей шагов после завершения - -Пример ответа `200`: - -```json -{ - "task_id": "53f54fa4c1f24219b3949d56b0457875", - "status": "succeeded", - "history": [ - { - "step": 1, - "kind": "thought", - "content": "Open target page", - "data": {"value": "Open target page"} - }, - { - "step": 2, - "kind": "action", - "content": "Click login", - "data": {"value": "Click login"} - } - ] -} -``` - -## Runs API (background runs) - -Новый набор endpoint-ов для фоновых запусков: - -- `POST /runs` — создать run в фоне -- `GET /runs/{run_id}` — получить run и его статус -- `POST /runs/{run_id}/cancel` — отменить pending/running run -- `DELETE /runs/{run_id}` — удалить завершенный run -- `GET /runs/{run_id}/wait` — дождаться завершения и вернуть финальный output -- `GET /runs/{run_id}/stream` — подключиться к live-потоку новых событий run (SSE) -- `GET /threads/{thread_id}/runs` — список run-ов в треде - -Пример создания run: - -```json -{ - "thread_id": "thread-demo", - "input": "Открой example.com и верни title", - "timeout": 60, - "metadata": {"source": "manual"} -} -``` - -## Быстрый end-to-end пример - -```zsh -curl -sS http://localhost:8088/health - -RESP=$(curl -sS -X POST http://localhost:8088/api/browser/tasks \ - -H "Content-Type: application/json" \ - -d '{"task":"Открой example.com и верни title","timeout":30}') - -echo "$RESP" - -TASK_ID=$(python -c "import json,sys;print(json.loads(sys.argv[1])['task_id'])" "$RESP") - -curl -sS "http://localhost:8088/api/browser/tasks/$TASK_ID" -curl -sS "http://localhost:8088/api/browser/tasks/$TASK_ID/result" -curl -sS "http://localhost:8088/api/browser/tasks/$TASK_ID/history" -``` diff --git a/api/clients/__init__.py b/api/clients/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/api/clients/browser_rpc_client.py b/api/clients/browser_rpc_client.py deleted file mode 100644 index ce227d43..00000000 --- a/api/clients/browser_rpc_client.py +++ /dev/null @@ -1,39 +0,0 @@ -from typing import Any - -import aiohttp - -from api.clients.browser_rpc_contracts import BrowserRpcError - - -class BrowserRpcClient: - def __init__(self, rpc_url: str, session: aiohttp.ClientSession) -> None: - self._rpc_url = rpc_url - self._session = session - - async def run(self, task: str, timeout_sec: float, rpc_url: str | None = None) -> dict[str, Any]: - payload = {"task": task} - timeout = aiohttp.ClientTimeout(total=timeout_sec) - target_url = rpc_url or self._rpc_url - - try: - async with self._session.post(target_url, json=payload, timeout=timeout) as response: - if response.status >= 400: - body = await response.text() - raise BrowserRpcError(f"RPC HTTP: {response.status}: {body}") - - try: - data = await response.json(content_type=None) - except aiohttp.ContentTypeError as exc: - raise BrowserRpcError("RPC returned non-JSON response") from exc - except aiohttp.ClientError as exc: - raise BrowserRpcError(f"Transport error: {exc}") from exc - - if not isinstance(data, dict): - raise BrowserRpcError("RPC returned invalid payload type") - - return data - - -async def run_browser_task(rpc_url: str, task: str, timeout_sec: float) -> dict[str, Any]: - async with aiohttp.ClientSession() as session: - return await BrowserRpcClient(rpc_url, session=session).run(task=task, timeout_sec=timeout_sec) diff --git a/api/clients/browser_rpc_contracts.py b/api/clients/browser_rpc_contracts.py deleted file mode 100644 index bec7c968..00000000 --- a/api/clients/browser_rpc_contracts.py +++ /dev/null @@ -1,8 +0,0 @@ -from typing import Any, Protocol - - -class BrowserRpcError(RuntimeError): ... - - -class BrowserRpcRunner(Protocol): - async def run(self, task: str, timeout_sec: float, rpc_url: str | None = None) -> dict[str, Any]: ... diff --git a/api/contracts/__init__.py b/api/contracts/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/api/contracts/task_schemas.py b/api/contracts/task_schemas.py deleted file mode 100644 index 11da4213..00000000 --- a/api/contracts/task_schemas.py +++ /dev/null @@ -1,115 +0,0 @@ -from typing import Any - -from pydantic import BaseModel, Field - -from api.domain.task_status import TaskStatus - - -class BrowserTaskRequest(BaseModel): - """Запрос на запуск задачи в browser-use агенте.""" - - task: str = Field(..., description="Текстовая задача для browser-use агента") - timeout: int = Field(300, description="Максимальное время выполнения задачи в секундах") - metadata: dict[str, Any] | None = Field(default=None, description="Дополнительные метаданные клиента") - - -class BrowserTaskAcceptedResponse(BaseModel): - """Ответ о том, что задача принята в обработку.""" - - task_id: str - status: TaskStatus - - -class BrowserTaskStatusResponse(BaseModel): - """Текущий статус задачи и временные отметки ее выполнения.""" - - task_id: str - status: TaskStatus - create_at: float = Field(..., description="Время создания задачи в Unix timestamp") - started_at: float | None = Field(default=None, description="Время начала выполнения в Unix timestamp") - finished_at: float | None = Field(default=None, description="Время завершения выполнения в Unix timestamp") - error: str | None = Field(default=None, description="Текст ошибки, если задача завершилась с ошибкой") - - -class BrowserTaskResultResponse(BaseModel): - """Финальный результат выполнения задачи в browser-use.""" - - task_id: str - status: TaskStatus - success: bool = Field(..., description="Успешно ли выполнена задача") - execution_time: float = Field(..., description="Фактическое время выполнения в секундах") - result: str | None = Field(default=None, description="Итоговый текстовый результат") - error: str | None = Field(default=None, description="Текст ошибки, если выполнение не удалось") - raw_response: dict[str, Any] | None = Field(default=None, description="Сырой ответ от browser-use RPC") - - -class TaskHistoryEvent(BaseModel): - """Одно действие/шаг в истории выполнения browser-use агента.""" - - step: int = Field(..., description="Порядковый номер события в истории") - kind: str = Field(..., description="Тип события (thought/action/error/system)") - content: str | None = Field(default=None, description="Краткое текстовое описание события") - data: dict[str, Any] = Field(default_factory=dict, description="Дополнительные структурированные данные") - - -class BrowserTaskHistoryResponse(BaseModel): - """История действий агента для конкретной задачи.""" - - task_id: str - status: TaskStatus - history: list[TaskHistoryEvent] = Field(default_factory=list) - - -class RunCreateRequest(BaseModel): - """Запрос на создание фонового run.""" - - thread_id: str = Field(..., description="Идентификатор треда/контекста") - input: str = Field(..., description="Пользовательский prompt для browser-use") - timeout: int = Field(300, description="Максимальное время выполнения run в секундах") - metadata: dict[str, Any] | None = Field(default=None, description="Дополнительные метаданные") - - -class RunSummaryResponse(BaseModel): - """Краткая информация о run.""" - - run_id: str - thread_id: str - status: TaskStatus - created_at: float - started_at: float | None = None - finished_at: float | None = None - error: str | None = None - - -class RunResponse(RunSummaryResponse): - """Полная информация о run.""" - - input: str - metadata: dict[str, Any] | None = None - output: str | None = None - raw_response: dict[str, Any] | None = None - history: list[TaskHistoryEvent] = Field(default_factory=list) - - -class RunListResponse(BaseModel): - """Список run-ов для треда.""" - - thread_id: str - runs: list[RunSummaryResponse] = Field(default_factory=list) - - -class RunWaitResponse(BaseModel): - """Ответ ожидания завершения run.""" - - run: RunResponse - - -class RunStreamEvent(BaseModel): - """Событие потока выполнения run.""" - - run_id: str - event: str - ts: float - data: dict[str, Any] = Field(default_factory=dict) - - diff --git a/api/core/__init__.py b/api/core/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/api/core/settings.py b/api/core/settings.py deleted file mode 100644 index c0839f7c..00000000 --- a/api/core/settings.py +++ /dev/null @@ -1,16 +0,0 @@ -import os -from dataclasses import dataclass - - -@dataclass(frozen=True) -class Settings: - app_host: str = os.getenv("BROWSER_API_HOST", "0.0.0.0") - app_port: int = int(os.getenv("BROWSER_API_PORT", "8080")) - - browser_rpc_url: str = os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run") - browser_rpc_timeout: float = float(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900")) - - max_concurrency: int = int(os.getenv("BROWSER_API_MAX_CONCURRENCY", "2")) - - -settings = Settings() diff --git a/api/domain/__init__.py b/api/domain/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/api/domain/task_status.py b/api/domain/task_status.py deleted file mode 100644 index e24b1887..00000000 --- a/api/domain/task_status.py +++ /dev/null @@ -1,10 +0,0 @@ -from enum import Enum - - -class TaskStatus(str, Enum): - """Состояние задачи браузерного агента.""" - queued = "queued" - running = "running" - succeeded = "succeeded" - failed = "failed" - cancelled = "cancelled" diff --git a/api/main.py b/api/main.py deleted file mode 100644 index b45c1b88..00000000 --- a/api/main.py +++ /dev/null @@ -1,48 +0,0 @@ -from contextlib import asynccontextmanager - -import aiohttp -from fastapi import FastAPI - -from api.clients.browser_rpc_client import BrowserRpcClient -from api.core.settings import settings -from api.repositories.task_store import TaskStore -from api.routes.runs import router as runs_router -from api.routes.tasks import router as tasks_router -from api.services.task_service import TaskService - - -@asynccontextmanager -async def lifespan(app: FastAPI): - session = aiohttp.ClientSession() - task_service = TaskService( - store=TaskStore(), - rpc_client=BrowserRpcClient(settings.browser_rpc_url, session=session), - max_concurrency=settings.max_concurrency, - rpc_timeout_cap=settings.browser_rpc_timeout, - ) - app.state.task_service = task_service - try: - yield - finally: - await task_service.close() - await session.close() - - -def create_app() -> FastAPI: - app = FastAPI( - title="Browser API", - version="1.0.0", - description="REST API for submitting tasks to browser-use and retrieving their status/results.", - lifespan=lifespan, - ) - app.include_router(tasks_router) - app.include_router(runs_router) - - @app.get("/health") - async def health() -> dict: - return {"ok": True} - - return app - - -app = create_app() diff --git a/api/mappers/__init__.py b/api/mappers/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/api/mappers/task_record_mapper.py b/api/mappers/task_record_mapper.py deleted file mode 100644 index 3ce2792e..00000000 --- a/api/mappers/task_record_mapper.py +++ /dev/null @@ -1,127 +0,0 @@ -from __future__ import annotations - -from typing import Any - -from api.contracts.task_schemas import ( - BrowserTaskAcceptedResponse, - BrowserTaskHistoryResponse, - BrowserTaskResultResponse, - BrowserTaskStatusResponse, - RunListResponse, - RunResponse, - RunSummaryResponse, - RunWaitResponse, - TaskHistoryEvent, -) -from api.domain.task_status import TaskStatus -from api.repositories.task_store import TaskRecord - - -class TaskRecordMapper: - ACTIVE_STATUSES = (TaskStatus.queued, TaskStatus.running) - - @classmethod - def is_active_status(cls, status: TaskStatus) -> bool: - return status in cls.ACTIVE_STATUSES - - @staticmethod - def to_task_accepted(rec: TaskRecord) -> BrowserTaskAcceptedResponse: - return BrowserTaskAcceptedResponse(task_id=rec.task_id, status=rec.status) - - @staticmethod - def to_task_status(rec: TaskRecord) -> BrowserTaskStatusResponse: - return BrowserTaskStatusResponse( - task_id=rec.task_id, - status=rec.status, - create_at=rec.create_at, - started_at=rec.started_at, - finished_at=rec.finished_at, - error=rec.error, - ) - - @staticmethod - def to_task_result(rec: TaskRecord) -> BrowserTaskResultResponse: - return BrowserTaskResultResponse( - task_id=rec.task_id, - status=rec.status, - success=(rec.status == TaskStatus.succeeded), - execution_time=rec.execution_time, - result=rec.result, - error=rec.error, - raw_response=rec.raw_response, - ) - - @staticmethod - def to_pending_task_result(rec: TaskRecord) -> BrowserTaskResultResponse: - return BrowserTaskResultResponse( - task_id=rec.task_id, - status=rec.status, - success=False, - execution_time=rec.execution_time, - result=None, - error=None, - raw_response=None, - ) - - @staticmethod - def to_history_events(raw_history: list[dict[str, Any]]) -> list[TaskHistoryEvent]: - events: list[TaskHistoryEvent] = [] - for index, item in enumerate(raw_history, start=1): - raw_step = item.get("step") - step = raw_step if isinstance(raw_step, int) else index - kind = str(item.get("kind") or item.get("type") or "system") - content = item.get("content") - if content is not None: - content = str(content) - data = item.get("data") if isinstance(item.get("data"), dict) else {} - events.append(TaskHistoryEvent(step=step, kind=kind, content=content, data=data)) - return events - - @classmethod - def to_task_history(cls, rec: TaskRecord) -> BrowserTaskHistoryResponse: - return BrowserTaskHistoryResponse(task_id=rec.task_id, status=rec.status, - history=cls.to_history_events(rec.history)) - - @classmethod - def to_pending_task_history(cls, rec: TaskRecord) -> BrowserTaskHistoryResponse: - return BrowserTaskHistoryResponse(task_id=rec.task_id, status=rec.status, - history=cls.to_history_events(rec.history)) - - @staticmethod - def to_run_summary(rec: TaskRecord) -> RunSummaryResponse: - return RunSummaryResponse( - run_id=rec.task_id, - thread_id=rec.thread_id, - status=rec.status, - created_at=rec.create_at, - started_at=rec.started_at, - finished_at=rec.finished_at, - error=rec.error, - ) - - @classmethod - def to_run_response(cls, rec: TaskRecord) -> RunResponse: - return RunResponse.model_validate( - { - "run_id": rec.task_id, - "thread_id": rec.thread_id, - "status": rec.status, - "created_at": rec.create_at, - "started_at": rec.started_at, - "finished_at": rec.finished_at, - "error": rec.error, - "input": rec.task, - "metadata": rec.metadata, - "output": rec.result, - "raw_response": rec.raw_response, - "history": cls.to_history_events(rec.history), - } - ) - - @classmethod - def to_run_wait(cls, rec: TaskRecord) -> RunWaitResponse: - return RunWaitResponse(run=cls.to_run_response(rec)) - - @classmethod - def to_thread_run_list(cls, thread_id: str, runs: list[TaskRecord]) -> RunListResponse: - return RunListResponse(thread_id=thread_id, runs=[cls.to_run_summary(item) for item in runs]) diff --git a/api/repositories/__init__.py b/api/repositories/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/api/repositories/task_store.py b/api/repositories/task_store.py deleted file mode 100644 index b13ee7b3..00000000 --- a/api/repositories/task_store.py +++ /dev/null @@ -1,164 +0,0 @@ -import time -import uuid -from asyncio import Event, Lock, Queue -from dataclasses import dataclass, field -from typing import Any - -from api.domain.task_status import TaskStatus - - -@dataclass -class TaskRecord: - task_id: str - thread_id: str - task: str - timeout: int - metadata: dict[str, Any] | None - status: TaskStatus = TaskStatus.queued - create_at: float = field(default_factory=time.time) - started_at: float | None = None - finished_at: float | None = None - result: str | None = None - error: str | None = None - raw_response: dict[str, Any] | None = None - history: list[dict[str, Any]] = field(default_factory=list) - cancel_requested: bool = False - done_event: Event = field(default_factory=Event) - - @property - def execution_time(self) -> float: - if self.started_at is None: - return 0 - end = self.finished_at if self.finished_at is not None else time.time() - return max(0, end - self.started_at) - - -class TaskStore: - def __init__(self) -> None: - self._lock = Lock() - self._tasks: dict[str, TaskRecord] = {} - self._thread_index: dict[str, list[str]] = {} - self._subscribers: dict[str, set[Queue[dict[str, Any]]]] = {} - - async def create( - self, - task: str, - timeout: int, - metadata: dict[str, Any] | None, - thread_id: str = "default", - ) -> TaskRecord: - task_id = uuid.uuid4().hex - rec = TaskRecord(task_id=task_id, thread_id=thread_id, task=task, timeout=timeout, metadata=metadata) - async with self._lock: - self._tasks[task_id] = rec - self._thread_index.setdefault(thread_id, []).append(task_id) - self._subscribers.setdefault(task_id, set()) - return rec - - async def list_by_thread(self, thread_id: str) -> list[TaskRecord]: - async with self._lock: - ids = list(self._thread_index.get(thread_id, [])) - return [self._tasks[item] for item in ids if item in self._tasks] - - async def get(self, task_id: str) -> TaskRecord | None: - async with self._lock: - return self._tasks.get(task_id) - - async def set_running(self, task_id: str) -> TaskRecord | None: - async with self._lock: - rec = self._tasks.get(task_id) - if rec is None: - return None - if rec.status == TaskStatus.cancelled: - return rec - rec.status = TaskStatus.running - rec.started_at = time.time() - return rec - - async def set_done( - self, - task_id: str, - success: bool, - raw_response: dict[str, Any] | None, - error: str | None, - result: str | None = None, - history: list[dict[str, Any]] | None = None, - ) -> TaskRecord | None: - async with self._lock: - rec = self._tasks.get(task_id) - if rec is None: - return None - rec.finished_at = time.time() - rec.raw_response = raw_response - rec.error = error if error is not None else ( - raw_response.get("error") if isinstance(raw_response, dict) else None) - rec.result = result if result is not None else ( - raw_response.get("result") if isinstance(raw_response, dict) else None) - rec.history = list(history or []) - rec.status = TaskStatus.succeeded if success else TaskStatus.failed - rec.done_event.set() - return rec - - async def set_cancel_requested(self, task_id: str) -> TaskRecord | None: - async with self._lock: - rec = self._tasks.get(task_id) - if rec is None: - return None - rec.cancel_requested = True - if rec.status == TaskStatus.queued: - rec.status = TaskStatus.cancelled - rec.finished_at = time.time() - rec.error = "Cancelled by user" - rec.done_event.set() - return rec - - async def set_cancelled(self, task_id: str, error: str = "Cancelled by user") -> TaskRecord | None: - async with self._lock: - rec = self._tasks.get(task_id) - if rec is None: - return None - if rec.status in (TaskStatus.succeeded, TaskStatus.failed, TaskStatus.cancelled): - return rec - rec.status = TaskStatus.cancelled - rec.finished_at = time.time() - rec.error = error - rec.done_event.set() - return rec - - async def delete_if_finished(self, task_id: str) -> tuple[bool, bool]: - async with self._lock: - rec = self._tasks.get(task_id) - if rec is None: - return False, False - if rec.status in (TaskStatus.queued, TaskStatus.running): - return True, False - - del self._tasks[task_id] - thread_list = self._thread_index.get(rec.thread_id, []) - if task_id in thread_list: - thread_list.remove(task_id) - self._subscribers.pop(task_id, None) - return True, True - - async def subscribe(self, task_id: str) -> Queue[dict[str, Any]] | None: - queue: Queue[dict[str, Any]] = Queue() - async with self._lock: - if task_id not in self._tasks: - return None - self._subscribers.setdefault(task_id, set()).add(queue) - return queue - - async def unsubscribe(self, task_id: str, queue: Queue[dict[str, Any]]) -> None: - async with self._lock: - subscribers = self._subscribers.get(task_id) - if subscribers is not None: - subscribers.discard(queue) - - async def publish(self, task_id: str, event: dict[str, Any]) -> None: - async with self._lock: - subscribers = list(self._subscribers.get(task_id, set())) - for queue in subscribers: - try: - queue.put_nowait(event) - except Exception: - continue diff --git a/api/requirements.txt b/api/requirements.txt deleted file mode 100644 index 67867496..00000000 --- a/api/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -fastapi==0.136.1 -uvicorn[standard]==0.46.0 -aiohttp==3.13.5 -pydantic==2.13.3 diff --git a/api/routes/__init__.py b/api/routes/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/api/routes/dependencies.py b/api/routes/dependencies.py deleted file mode 100644 index abd9c3b6..00000000 --- a/api/routes/dependencies.py +++ /dev/null @@ -1,8 +0,0 @@ -from fastapi import Request - -from api.services.protocols import TaskServiceProtocol - - -def get_task_service(request: Request) -> TaskServiceProtocol: - return request.app.state.task_service - diff --git a/api/routes/runs.py b/api/routes/runs.py deleted file mode 100644 index 1b051766..00000000 --- a/api/routes/runs.py +++ /dev/null @@ -1,130 +0,0 @@ -import asyncio -import json -from typing import AsyncIterator - -from fastapi import APIRouter, Depends, HTTPException, Query, Response -from fastapi.responses import JSONResponse, StreamingResponse - -from api.contracts.task_schemas import ( - RunCreateRequest, - RunListResponse, - RunResponse, - RunStreamEvent, - RunSummaryResponse, - RunWaitResponse, -) -from api.mappers.task_record_mapper import TaskRecordMapper -from api.routes.dependencies import get_task_service -from api.services.protocols import TaskServiceProtocol - -router = APIRouter(tags=["runs"]) - - -@router.get("/threads/{thread_id}/runs", response_model=RunListResponse) -async def list_thread_runs( - thread_id: str, - service: TaskServiceProtocol = Depends(get_task_service), -) -> RunListResponse: - runs = await service.list_thread_runs(thread_id) - return TaskRecordMapper.to_thread_run_list(thread_id, runs) - - -@router.post("/runs", response_model=RunSummaryResponse, status_code=202) -async def create_run( - payload: RunCreateRequest, - service: TaskServiceProtocol = Depends(get_task_service), -) -> RunSummaryResponse: - rec = await service.create_run( - thread_id=payload.thread_id.strip(), - user_input=payload.input.strip(), - timeout=payload.timeout, - metadata=payload.metadata, - ) - return TaskRecordMapper.to_run_summary(rec) - - -@router.get("/runs/{run_id}", response_model=RunResponse) -async def get_run( - run_id: str, - service: TaskServiceProtocol = Depends(get_task_service), -) -> RunResponse: - rec = await service.get_run(run_id) - if rec is None: - raise HTTPException(status_code=404, detail="Run not found") - return TaskRecordMapper.to_run_response(rec) - - -@router.post("/runs/{run_id}/cancel", response_model=RunSummaryResponse) -async def cancel_run( - run_id: str, - service: TaskServiceProtocol = Depends(get_task_service), -) -> RunSummaryResponse: - rec = await service.cancel_run(run_id) - if rec is None: - raise HTTPException(status_code=404, detail="Run not found") - return TaskRecordMapper.to_run_summary(rec) - - -@router.delete("/runs/{run_id}", status_code=204) -async def delete_run( - run_id: str, - service: TaskServiceProtocol = Depends(get_task_service), -) -> Response: - exists, deleted = await service.delete_run(run_id) - if not exists: - raise HTTPException(status_code=404, detail="Run not found") - if not deleted: - raise HTTPException(status_code=409, detail="Run is still active. Cancel it first.") - return Response(status_code=204) - - -@router.get("/runs/{run_id}/wait", response_model=RunWaitResponse) -async def wait_run( - run_id: str, - timeout: float | None = Query(default=None, ge=0), - service: TaskServiceProtocol = Depends(get_task_service), -) -> JSONResponse | RunWaitResponse: - rec = await service.wait_run(run_id, timeout=timeout) - if rec is None: - raise HTTPException(status_code=404, detail="Run not found") - - if TaskRecordMapper.is_active_status(rec.status): - pending = TaskRecordMapper.to_run_wait(rec) - return JSONResponse(status_code=202, content=pending.model_dump(mode="json")) - - return TaskRecordMapper.to_run_wait(rec) - - -@router.get("/runs/{run_id}/stream") -async def stream_run( - run_id: str, - service: TaskServiceProtocol = Depends(get_task_service), -) -> StreamingResponse: - queue = await service.subscribe_run_stream(run_id) - if queue is None: - raise HTTPException(status_code=404, detail="Run not found") - stream_queue = queue - - async def event_stream() -> AsyncIterator[str]: - try: - while True: - try: - item = await asyncio.wait_for(stream_queue.get(), timeout=15) - except asyncio.TimeoutError: - rec = await service.get_run(run_id) - if rec is None: - break - if not TaskRecordMapper.is_active_status(rec.status): - break - yield ": keep-alive\n\n" - continue - - payload = RunStreamEvent.model_validate(item).model_dump(mode="json") - yield f"data: {json.dumps(payload, ensure_ascii=False)}\\n\\n" - - if payload["event"] in ("completed", "failed", "cancelled"): - break - finally: - await service.unsubscribe_run_stream(run_id, stream_queue) - - return StreamingResponse(event_stream(), media_type="text/event-stream") diff --git a/api/routes/tasks.py b/api/routes/tasks.py deleted file mode 100644 index 4cd45966..00000000 --- a/api/routes/tasks.py +++ /dev/null @@ -1,65 +0,0 @@ -from fastapi import APIRouter, Depends, HTTPException -from fastapi.responses import JSONResponse - -from api.contracts.task_schemas import ( - BrowserTaskAcceptedResponse, - BrowserTaskHistoryResponse, - BrowserTaskRequest, - BrowserTaskResultResponse, - BrowserTaskStatusResponse, -) -from api.mappers.task_record_mapper import TaskRecordMapper -from api.routes.dependencies import get_task_service -from api.services.protocols import TaskServiceProtocol - -router = APIRouter(prefix="/api/browser", tags=["browser-tasks"]) - - -@router.post("/tasks", response_model=BrowserTaskAcceptedResponse, status_code=202) -async def create_task( - payload: BrowserTaskRequest, - service: TaskServiceProtocol = Depends(get_task_service), -) -> BrowserTaskAcceptedResponse: - rec = await service.submit_task(task=payload.task.strip(), timeout=payload.timeout, metadata=payload.metadata) - return TaskRecordMapper.to_task_accepted(rec) - - -@router.get("/tasks/{task_id}", response_model=BrowserTaskStatusResponse) -async def get_task_status(task_id: str, service: TaskServiceProtocol = Depends(get_task_service)) -> BrowserTaskStatusResponse: - rec = await service.get_task(task_id) - if rec is None: - raise HTTPException(status_code=404, detail="Task not found") - return TaskRecordMapper.to_task_status(rec) - - -@router.get("/tasks/{task_id}/result", response_model=BrowserTaskResultResponse) -async def get_task_result( - task_id: str, - service: TaskServiceProtocol = Depends(get_task_service), -) -> JSONResponse | BrowserTaskResultResponse: - rec = await service.get_task(task_id) - if rec is None: - raise HTTPException(status_code=404, detail="Task not found") - - if TaskRecordMapper.is_active_status(rec.status): - pending = TaskRecordMapper.to_pending_task_result(rec) - return JSONResponse(status_code=202, content=pending.model_dump(mode="json")) - - return TaskRecordMapper.to_task_result(rec) - - -@router.get("/tasks/{task_id}/history", response_model=BrowserTaskHistoryResponse) -async def get_task_history( - task_id: str, - service: TaskServiceProtocol = Depends(get_task_service), -) -> JSONResponse | BrowserTaskHistoryResponse: - rec = await service.get_task(task_id) - if rec is None: - raise HTTPException(status_code=404, detail="Task not found") - - if TaskRecordMapper.is_active_status(rec.status): - pending = TaskRecordMapper.to_pending_task_history(rec) - return JSONResponse(status_code=202, content=pending.model_dump(mode="json")) - - return TaskRecordMapper.to_task_history(rec) - diff --git a/api/services/__init__.py b/api/services/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/api/services/browser_runtime_manager.py b/api/services/browser_runtime_manager.py deleted file mode 100644 index 23e33a34..00000000 --- a/api/services/browser_runtime_manager.py +++ /dev/null @@ -1,464 +0,0 @@ -"""Provision isolated browser-use Docker runtimes for API runs.""" - -from __future__ import annotations - -import hashlib -import json -import logging -import os -import re -import subprocess -import tempfile -import threading -import time -from dataclasses import dataclass -from pathlib import Path -from typing import Any -from urllib import request - -logger = logging.getLogger(__name__) - -_DEFAULT_SHARED_CDP_URL = "http://browser:9222" -_DEFAULT_SHARED_RPC_URL = "http://browser:8787/run" -_DEFAULT_RUNTIME_IMAGE = "browser-use-browser-runtime:latest" -_DEFAULT_RUNTIME_NETWORK = "browser-net" -_DEFAULT_TTL_SECONDS = 900 -_DEFAULT_START_TIMEOUT = 45 -_DEFAULT_ENABLE_UI = True -_REGISTRY_LOCK = threading.Lock() -_VIEW_URL_CACHE_LOCK = threading.Lock() -_VIEW_URL_CACHE: dict[str, Any] = {"value": "", "expires_at": 0.0} - - -@dataclass(frozen=True) -class BrowserRuntimeConfig: - mode: str - runtime_image: str - runtime_network: str - runtime_ttl_seconds: int - runtime_start_timeout: int - shared_cdp_url: str - enable_ui: bool - - -def _state_dir() -> Path: - return Path(os.getenv("BROWSER_RUNTIME_STATE_DIR", "/tmp/browser-use-api")) - - -def _registry_path() -> Path: - return _state_dir() / "docker_runtimes.json" - - -def _as_int(value: Any, default: int) -> int: - try: - return max(1, int(value)) - except (TypeError, ValueError): - return default - - -def _as_bool(value: Any, default: bool) -> bool: - if value is None: - return default - if isinstance(value, bool): - return value - return str(value).strip().lower() in {"1", "true", "yes", "on"} - - -def get_browser_runtime_config() -> BrowserRuntimeConfig: - mode = str(os.getenv("BROWSER_USE_ISOLATION_MODE", "shared")).strip().lower() - if mode not in {"shared", "docker-per-principal", "docker-per-task"}: - logger.warning("Unknown browser-use isolation mode %r; falling back to shared", mode) - mode = "shared" - - return BrowserRuntimeConfig( - mode=mode, - runtime_image=os.getenv("BROWSER_RUNTIME_IMAGE", _DEFAULT_RUNTIME_IMAGE).strip() - or _DEFAULT_RUNTIME_IMAGE, - runtime_network=os.getenv("BROWSER_RUNTIME_NETWORK", _DEFAULT_RUNTIME_NETWORK).strip() - or _DEFAULT_RUNTIME_NETWORK, - runtime_ttl_seconds=_as_int( - os.getenv("BROWSER_RUNTIME_TTL_SECONDS"), - _DEFAULT_TTL_SECONDS, - ), - runtime_start_timeout=_as_int( - os.getenv("BROWSER_RUNTIME_START_TIMEOUT"), - _DEFAULT_START_TIMEOUT, - ), - shared_cdp_url=os.getenv("BROWSER_URL", _DEFAULT_SHARED_CDP_URL).strip() - or _DEFAULT_SHARED_CDP_URL, - enable_ui=_as_bool( - os.getenv("BROWSER_RUNTIME_ENABLE_UI"), - _DEFAULT_ENABLE_UI, - ), - ) - - -def resolve_isolation_owner( - mode: str, - task_id: str | None, - metadata: dict[str, Any] | None = None, - thread_id: str | None = None, -) -> str: - if mode == "docker-per-task": - return (task_id or "default").strip() or "default" - - metadata = metadata or {} - for key in ("user_id", "session_id"): - value = metadata.get(key) - if value not in (None, ""): - return str(value).strip() or "default" - - return (thread_id or task_id or "default").strip() or "default" - - -def hash_runtime_owner(owner: str) -> str: - return hashlib.sha256(owner.encode("utf-8")).hexdigest()[:16] - - -def _normalize_browser_view_base_url(raw_url: str) -> str: - url = (raw_url or "").strip() - if not url: - return "" - for marker in ("/vnc.html", "/index.html"): - idx = url.find(marker) - if idx != -1: - url = url[:idx] - break - return url.rstrip("/") - - -def _discover_browser_view_base_url_from_tunnel() -> str: - now = time.time() - with _VIEW_URL_CACHE_LOCK: - cached_value = str(_VIEW_URL_CACHE.get("value", "") or "") - expires_at = float(_VIEW_URL_CACHE.get("expires_at", 0.0) or 0.0) - if cached_value and now < expires_at: - return cached_value - - try: - result = _run_docker(["logs", "--tail", "200", "browser-use-tunnel"], check=False) - combined = "\n".join(part for part in [result.stdout or "", result.stderr or ""] if part) - matches = re.findall(r"https://[^\s\"'<>]+", combined) - base_url = _normalize_browser_view_base_url(matches[-1]) if matches else "" - except Exception as exc: - logger.debug("Failed to discover browser view URL from tunnel logs: %s", exc) - base_url = "" - - with _VIEW_URL_CACHE_LOCK: - _VIEW_URL_CACHE["value"] = base_url - _VIEW_URL_CACHE["expires_at"] = now + (60 if base_url else 10) - - return base_url - - -def get_browser_view_url( - task_id: str | None = None, - metadata: dict[str, Any] | None = None, - thread_id: str | None = None, -) -> str: - base_url = _normalize_browser_view_base_url( - os.getenv("BROWSER_VIEW_BASE_URL", "") or os.getenv("BROWSER_VIEW_URL", "") - ) - if not base_url: - base_url = _discover_browser_view_base_url_from_tunnel() - if not base_url: - return "" - - config = get_browser_runtime_config() - if config.mode == "shared": - return f"{base_url}/vnc.html?path=websockify" - - owner = resolve_isolation_owner(config.mode, task_id, metadata, thread_id) - owner_hash = hash_runtime_owner(owner) - return f"{base_url}/view/{owner_hash}/vnc.html?path=view/{owner_hash}/websockify" - - -def _shared_rpc_url() -> str: - return os.getenv("BROWSER_USE_RPC_URL", _DEFAULT_SHARED_RPC_URL).strip() or _DEFAULT_SHARED_RPC_URL - - -def _runtime_rpc_url(container_name: str) -> str: - return f"http://{container_name}:8787/run" - - -def _container_name(owner_hash: str) -> str: - return f"browser-use-browser-{owner_hash}" - - -def _volume_name(owner_hash: str) -> str: - return f"browser-use-profile-{owner_hash}" - - -def _load_registry() -> dict[str, Any]: - path = _registry_path() - if not path.exists(): - return {"runtimes": {}} - try: - with open(path, "r", encoding="utf-8") as fh: - data = json.load(fh) or {} - if isinstance(data, dict) and isinstance(data.get("runtimes"), dict): - return data - except Exception as exc: - logger.warning("Failed to read browser-use runtime registry %s: %s", path, exc) - return {"runtimes": {}} - - -def _save_registry(payload: dict[str, Any]) -> None: - path = _registry_path() - path.parent.mkdir(parents=True, exist_ok=True) - fd, tmp_path = tempfile.mkstemp(dir=str(path.parent), prefix=".browser_use_", suffix=".tmp") - try: - with os.fdopen(fd, "w", encoding="utf-8") as fh: - json.dump(payload, fh, indent=2, sort_keys=True) - fh.flush() - os.fsync(fh.fileno()) - os.replace(tmp_path, path) - except Exception: - try: - os.unlink(tmp_path) - except OSError: - pass - raise - - -def _run_docker(args: list[str], check: bool = True) -> subprocess.CompletedProcess[str]: - cmd = ["docker", *args] - logger.debug("browser-use docker cmd: %s", " ".join(cmd)) - result = subprocess.run( - cmd, - capture_output=True, - text=True, - timeout=120, - ) - if check and result.returncode != 0: - stderr = (result.stderr or result.stdout or "").strip() - raise RuntimeError(f"Docker command failed ({' '.join(cmd)}): {stderr}") - return result - - -def _ensure_docker_access() -> None: - _run_docker(["version"], check=True) - - -def _container_exists(container_name: str) -> bool: - result = _run_docker(["inspect", container_name], check=False) - return result.returncode == 0 - - -def _container_running(container_name: str) -> bool: - result = _run_docker(["inspect", "-f", "{{.State.Running}}", container_name], check=False) - return result.returncode == 0 and result.stdout.strip().lower() == "true" - - -def _remove_container(container_name: str) -> None: - if container_name: - _run_docker(["rm", "-f", container_name], check=False) - - -def _volume_exists(volume_name: str) -> bool: - result = _run_docker(["volume", "inspect", volume_name], check=False) - return result.returncode == 0 - - -def _ensure_volume(volume_name: str, owner_hash: str) -> None: - if _volume_exists(volume_name): - return - _run_docker( - [ - "volume", - "create", - "--label", - "browser_use.runtime=true", - "--label", - f"browser_use.owner_hash={owner_hash}", - volume_name, - ], - check=True, - ) - - -def _remove_volume(volume_name: str) -> None: - if volume_name: - _run_docker(["volume", "rm", "-f", volume_name], check=False) - - -def _runtime_env_args(browser_view_url: str, config: BrowserRuntimeConfig) -> list[str]: - env: dict[str, str] = { - "BROWSER_ENABLE_UI": "true" if config.enable_ui else "false", - "BROWSER_DATA_DIR": "/data", - "BROWSER_USE_RPC_HOST": "0.0.0.0", - "BROWSER_USE_RPC_PORT": "8787", - } - - if browser_view_url: - env["BROWSER_VIEW_URL"] = browser_view_url - - for key in ("MODEL_DEFAULT", "OPENAI_API_KEY", "OPENAI_BASE_URL"): - value = os.getenv(key) - if value is not None: - env[key] = value - - args: list[str] = [] - for key, value in env.items(): - args.extend(["-e", f"{key}={value}"]) - return args - - -def _start_runtime_container( - container_name: str, - volume_name: str, - owner_hash: str, - browser_view_url: str, - config: BrowserRuntimeConfig, -) -> None: - _ensure_volume(volume_name, owner_hash) - run_args = [ - "run", - "-d", - "--name", - container_name, - "--network", - config.runtime_network, - "--shm-size", - "2g", - "--label", - "browser_use.runtime=true", - "--label", - f"browser_use.owner_hash={owner_hash}", - "--label", - "browser_use.managed_by=browser_runtime_manager", - *_runtime_env_args(browser_view_url, config), - "-v", - f"{volume_name}:/data", - config.runtime_image, - ] - _run_docker(run_args, check=True) - - -def _wait_for_runtime(container_name: str, timeout_seconds: int) -> None: - deadline = time.time() + timeout_seconds - health_url = f"http://{container_name}:8787/health" - last_error = "" - while time.time() < deadline: - try: - with request.urlopen(health_url, timeout=2) as response: - if 200 <= response.status < 300: - return - last_error = f"HTTP {response.status}" - except Exception as exc: - last_error = str(exc) - time.sleep(1) - raise RuntimeError(f"Browser runtime {container_name} did not become ready: {last_error}") - - -def _cleanup_expired_runtimes_locked(registry: dict[str, Any], config: BrowserRuntimeConfig) -> None: - now = time.time() - runtimes = registry.setdefault("runtimes", {}) - expired_keys: list[str] = [] - for runtime_key, entry in list(runtimes.items()): - last_used = float(entry.get("last_used", 0) or 0) - if not last_used or now - last_used < config.runtime_ttl_seconds: - continue - - container_name = str(entry.get("container_name", "") or "") - volume_name = str(entry.get("volume_name", "") or "") - mode = str(entry.get("mode", "") or "") - logger.info("Cleaning expired browser-use runtime %s (%s)", runtime_key, container_name) - _remove_container(container_name) - if mode == "docker-per-task": - _remove_volume(volume_name) - expired_keys.append(runtime_key) - - for runtime_key in expired_keys: - runtimes.pop(runtime_key, None) - - -def ensure_browser_runtime( - task_id: str | None = None, - metadata: dict[str, Any] | None = None, - thread_id: str | None = None, -) -> dict[str, str]: - config = get_browser_runtime_config() - if config.mode == "shared": - return { - "cdp_url": config.shared_cdp_url, - "rpc_url": _shared_rpc_url(), - "browser_view": get_browser_view_url(task_id=task_id, metadata=metadata, thread_id=thread_id), - "isolation_mode": "shared", - "owner_hash": "", - } - - _ensure_docker_access() - owner = resolve_isolation_owner(config.mode, task_id, metadata, thread_id) - owner_hash = hash_runtime_owner(owner) - runtime_key = f"{config.mode}:{owner_hash}" - container_name = _container_name(owner_hash) - volume_name = _volume_name(owner_hash) - browser_view_url = get_browser_view_url(task_id=task_id, metadata=metadata, thread_id=thread_id) - - with _REGISTRY_LOCK: - registry = _load_registry() - _cleanup_expired_runtimes_locked(registry, config) - - if _container_running(container_name): - registry.setdefault("runtimes", {})[runtime_key] = { - "container_name": container_name, - "volume_name": volume_name, - "last_used": time.time(), - "mode": config.mode, - "owner_hash": owner_hash, - } - _save_registry(registry) - return { - "cdp_url": f"http://{container_name}:9222", - "rpc_url": _runtime_rpc_url(container_name), - "browser_view": browser_view_url, - "isolation_mode": config.mode, - "owner_hash": owner_hash, - } - - if _container_exists(container_name): - _remove_container(container_name) - - _start_runtime_container(container_name, volume_name, owner_hash, browser_view_url, config) - _wait_for_runtime(container_name, config.runtime_start_timeout) - - registry.setdefault("runtimes", {})[runtime_key] = { - "container_name": container_name, - "volume_name": volume_name, - "last_used": time.time(), - "mode": config.mode, - "owner_hash": owner_hash, - } - _save_registry(registry) - - return { - "cdp_url": f"http://{container_name}:9222", - "rpc_url": _runtime_rpc_url(container_name), - "browser_view": browser_view_url, - "isolation_mode": config.mode, - "owner_hash": owner_hash, - } - - -def cleanup_browser_runtime( - task_id: str | None = None, - metadata: dict[str, Any] | None = None, - thread_id: str | None = None, -) -> None: - config = get_browser_runtime_config() - if config.mode != "docker-per-task": - return - - owner = resolve_isolation_owner(config.mode, task_id, metadata, thread_id) - owner_hash = hash_runtime_owner(owner) - runtime_key = f"{config.mode}:{owner_hash}" - container_name = _container_name(owner_hash) - volume_name = _volume_name(owner_hash) - - with _REGISTRY_LOCK: - registry = _load_registry() - _remove_container(container_name) - _remove_volume(volume_name) - registry.setdefault("runtimes", {}).pop(runtime_key, None) - _save_registry(registry) diff --git a/api/services/protocols.py b/api/services/protocols.py deleted file mode 100644 index 2dd785b1..00000000 --- a/api/services/protocols.py +++ /dev/null @@ -1,28 +0,0 @@ -from __future__ import annotations - -from asyncio import Queue -from typing import Any, Protocol - -from api.repositories.task_store import TaskRecord - - -class TaskServiceProtocol(Protocol): - async def submit_task(self, task: str, timeout: int, metadata: dict | None) -> TaskRecord: ... - - async def get_task(self, task_id: str) -> TaskRecord | None: ... - - async def create_run(self, thread_id: str, user_input: str, timeout: int, metadata: dict | None) -> TaskRecord: ... - - async def get_run(self, run_id: str) -> TaskRecord | None: ... - - async def list_thread_runs(self, thread_id: str) -> list[TaskRecord]: ... - - async def cancel_run(self, run_id: str) -> TaskRecord | None: ... - - async def delete_run(self, run_id: str) -> tuple[bool, bool]: ... - - async def wait_run(self, run_id: str, timeout: float | None = None) -> TaskRecord | None: ... - - async def subscribe_run_stream(self, run_id: str) -> Queue[dict[str, Any]] | None: ... - - async def unsubscribe_run_stream(self, run_id: str, queue: Queue[dict[str, Any]]) -> None: ... diff --git a/api/services/task_service.py b/api/services/task_service.py deleted file mode 100644 index afa5968c..00000000 --- a/api/services/task_service.py +++ /dev/null @@ -1,260 +0,0 @@ -import asyncio -import time -from typing import Any - -from api.clients.browser_rpc_contracts import BrowserRpcError, BrowserRpcRunner -from api.domain.task_status import TaskStatus -from api.repositories.task_store import TaskRecord, TaskStore -from api.services.browser_runtime_manager import cleanup_browser_runtime, ensure_browser_runtime - - -class TaskService: - def __init__( - self, - store: TaskStore, - rpc_client: BrowserRpcRunner, - max_concurrency: int, - rpc_timeout_cap: float | None = None, - ) -> None: - self._store = store - self._rpc_client = rpc_client - self._semaphore = asyncio.Semaphore(max_concurrency) - self._rpc_timeout_cap = rpc_timeout_cap - self._background_tasks: set[asyncio.Task[None]] = set() - self._task_by_run_id: dict[str, asyncio.Task[None]] = {} - - async def submit_task(self, task: str, timeout: int, metadata: dict | None) -> TaskRecord: - record = await self.create_run(thread_id="default", user_input=task, timeout=timeout, metadata=metadata) - return record - - async def create_run(self, thread_id: str, user_input: str, timeout: int, metadata: dict | None) -> TaskRecord: - record = await self._store.create(task=user_input, timeout=timeout, metadata=metadata, thread_id=thread_id) - background_task = asyncio.create_task(self._worker(record.task_id)) - self._background_tasks.add(background_task) - background_task.add_done_callback(self._background_tasks.discard) - self._task_by_run_id[record.task_id] = background_task - - def _cleanup(_: asyncio.Task[None]) -> None: - self._task_by_run_id.pop(record.task_id, None) - - background_task.add_done_callback(_cleanup) - return record - - async def get_task(self, task_id: str) -> TaskRecord | None: - return await self._store.get(task_id) - - async def get_run(self, run_id: str) -> TaskRecord | None: - return await self.get_task(run_id) - - async def list_thread_runs(self, thread_id: str) -> list[TaskRecord]: - return await self._store.list_by_thread(thread_id) - - async def cancel_run(self, run_id: str) -> TaskRecord | None: - rec = await self._store.set_cancel_requested(run_id) - if rec is None: - return None - - if rec.status == TaskStatus.cancelled: - await self._store.publish(run_id, self._event(run_id, "cancelled", {"status": rec.status.value})) - return rec - - task = self._task_by_run_id.get(run_id) - if task is not None and not task.done(): - task.cancel() - return rec - - async def delete_run(self, run_id: str) -> tuple[bool, bool]: - return await self._store.delete_if_finished(run_id) - - async def wait_run(self, run_id: str, timeout: float | None = None) -> TaskRecord | None: - rec = await self._store.get(run_id) - if rec is None: - return None - - if rec.status not in (TaskStatus.queued, TaskStatus.running): - return rec - - try: - if timeout is None: - await rec.done_event.wait() - else: - await asyncio.wait_for(rec.done_event.wait(), timeout=timeout) - except asyncio.TimeoutError: - return await self._store.get(run_id) - return await self._store.get(run_id) - - async def subscribe_run_stream(self, run_id: str): - return await self._store.subscribe(run_id) - - async def unsubscribe_run_stream(self, run_id: str, queue) -> None: - await self._store.unsubscribe(run_id, queue) - - async def close(self) -> None: - if not self._background_tasks: - return - - for task in list(self._background_tasks): - task.cancel() - await asyncio.gather(*self._background_tasks, return_exceptions=True) - self._background_tasks.clear() - self._task_by_run_id.clear() - - async def _worker(self, task_id: str) -> None: - rec = await self._store.set_running(task_id) - if rec is None: - return - if rec.status == TaskStatus.cancelled: - return - - await self._store.publish(task_id, self._event(task_id, "started", {"status": TaskStatus.running.value})) - - async with self._semaphore: - runtime: dict[str, str] | None = None - try: - if rec.cancel_requested: - await self._store.set_cancelled(task_id) - await self._store.publish(task_id, self._event(task_id, "cancelled", {"status": TaskStatus.cancelled.value})) - return - - runtime = await asyncio.to_thread( - ensure_browser_runtime, - task_id=task_id, - metadata=rec.metadata, - thread_id=rec.thread_id, - ) - rpc_timeout = float(rec.timeout) - if self._rpc_timeout_cap is not None: - rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap) - - raw = await asyncio.wait_for( - self._rpc_client.run(task=rec.task, timeout_sec=rpc_timeout, rpc_url=runtime.get("rpc_url")), - timeout=float(rec.timeout) + 5, - ) - raw = self._with_runtime_metadata(raw, runtime) - success = bool(raw.get("success")) - await self._store.set_done( - task_id=task_id, - success=success, - raw_response=raw, - error=None, - result=raw.get("result") if isinstance(raw, dict) else None, - history=self._extract_history(raw), - ) - done = await self._store.get(task_id) - if done is not None: - await self._publish_history_events(done) - await self._store.publish( - task_id, - self._event(task_id, "completed" if success else "failed", { - "status": done.status.value, - "output": done.result, - "error": done.error, - }), - ) - except asyncio.CancelledError: - await self._store.set_cancelled(task_id) - await self._store.publish(task_id, self._event(task_id, "cancelled", {"status": TaskStatus.cancelled.value})) - raise - except asyncio.TimeoutError: - await self._store.set_done( - task_id=task_id, - success=False, - raw_response=None, - error="Timeout exceeded", - history=None, - ) - failed = await self._store.get(task_id) - if failed is not None: - await self._store.publish(task_id, self._event(task_id, "failed", { - "status": failed.status.value, - "error": failed.error, - })) - except BrowserRpcError as exc: - await self._store.set_done( - task_id=task_id, - success=False, - raw_response=None, - error=str(exc), - history=None, - ) - failed = await self._store.get(task_id) - if failed is not None: - await self._store.publish(task_id, self._event(task_id, "failed", { - "status": failed.status.value, - "error": failed.error, - })) - except Exception as exc: - await self._store.set_done( - task_id=task_id, - success=False, - raw_response=None, - error=f"Internal error: {exc}", - history=None, - ) - failed = await self._store.get(task_id) - if failed is not None: - await self._store.publish(task_id, self._event(task_id, "failed", { - "status": failed.status.value, - "error": failed.error, - })) - finally: - try: - await asyncio.to_thread( - cleanup_browser_runtime, - task_id=task_id, - metadata=rec.metadata, - thread_id=rec.thread_id, - ) - except Exception: - pass - - async def _publish_history_events(self, rec: TaskRecord) -> None: - for index, item in enumerate(rec.history, start=1): - await self._store.publish( - rec.task_id, - self._event(rec.task_id, "output", { - "step": item.get("step", index), - "kind": item.get("kind") or item.get("type") or "system", - "content": item.get("content"), - "data": item.get("data") if isinstance(item.get("data"), dict) else {}, - }), - ) - - @staticmethod - def _event(run_id: str, event: str, data: dict[str, Any]) -> dict[str, Any]: - return { - "run_id": run_id, - "event": event, - "ts": time.time(), - "data": data, - } - - @staticmethod - def _extract_history(raw: dict | None) -> list[dict]: - if not isinstance(raw, dict): - return [] - - events = raw.get("history") - if not isinstance(events, list): - return [] - - normalized: list[dict] = [] - for event in events: - if isinstance(event, dict): - normalized.append(event) - return normalized - - @staticmethod - def _with_runtime_metadata(raw: dict[str, Any], runtime: dict[str, str] | None) -> dict[str, Any]: - if not isinstance(raw, dict) or not runtime: - return raw - - enriched = dict(raw) - browser_view = runtime.get("browser_view") - if browser_view and not enriched.get("browser_view"): - enriched["browser_view"] = browser_view - enriched["isolation_mode"] = runtime.get("isolation_mode", "shared") - owner_hash = runtime.get("owner_hash") - if owner_hash: - enriched["owner_hash"] = owner_hash - return enriched diff --git a/api/test-api.py b/api/test-api.py deleted file mode 100644 index 1eef5b68..00000000 --- a/api/test-api.py +++ /dev/null @@ -1,197 +0,0 @@ -import requests -import time -import json -from datasets import load_dataset -from datetime import datetime - -# Конфигурация API -API_URL = "http://localhost:8088/api/browser/tasks" -HEADERS = {"Content-Type": "application/json"} - -# Загружаем датасет -dataset = load_dataset("iMeanAI/Mind2Web-Live", split="train") - -# Для теста берем первые N задач (замените на полный датасет при необходимости) -TEST_SIZE = 10 # или len(dataset) для полного бенчмарка -dataset = dataset.select(range(TEST_SIZE)) - -print(f"Загружено задач: {len(dataset)}") -print(f"Поля: {dataset[0].keys()}\n") -cnt = 3 -results = [] - -for idx, item in enumerate(dataset): - if cnt > 0: - cnt -=1 - continue - # Поля из датасета - task_desc = item['task'] # Описание задачи - ref_length = item['reference_task_length'] # Эталонная длина в шагах - evaluation = item['evaluation'] # Критерии оценки - - # ID задачи (используем index + timestamp для уникальности) - task_id_orig = f"mind2web_{idx}_{int(time.time())}" - - print(f"\n[{idx + 1}/{len(dataset)}] Task: {task_desc[:70]}...") - print(f" Эталонная длина: {ref_length} шагов") - - start_time = time.time() - - # 1. Создаем задачу через API - try: - resp = requests.post( - API_URL, - json={ - "task": task_desc, - "timeout": 300, # Увеличим таймаут для сложных задач - "metadata": { - "source": "mind2web", - "reference_length": ref_length - } - }, - headers=HEADERS, - timeout=10 - ) - - if resp.status_code != 202: - print(f" ❌ Ошибка создания задачи: {resp.status_code}") - print(f" Ответ: {resp.text}") - continue - - api_task_id = resp.json()["task_id"] - created_at = time.time() - queue_time = created_at - start_time - - print(f" 📝 Task ID: {api_task_id} | Очередь: {queue_time:.2f}с") - - # 2. Ожидание завершения с прогрессом - status = "queued" - poll_count = 0 - while status in ["queued", "running"]: - time.sleep(2) # Интервал опроса - poll_count += 1 - - try: - status_resp = requests.get(f"{API_URL}/{api_task_id}", timeout=5) - if status_resp.status_code == 200: - status_data = status_resp.json() - status = status_data.get("status", "unknown") - - # Показываем прогресс каждые 5 опросов - if poll_count % 5 == 0: - elapsed = time.time() - start_time - print(f" ⏳ Статус: {status} | Прошло: {elapsed:.1f}с") - except Exception as e: - print(f" ⚠️ Ошибка опроса: {e}") - pass - - end_time = time.time() - execution_time = end_time - start_time - - # 3. Получение результата - result_resp = requests.get(f"{API_URL}/{api_task_id}/result", timeout=10) - - result_data = None - if result_resp.status_code == 200: - try: - result_data = result_resp.json() - except: - result_data = result_resp.text - - # 4. Запись метрик - result = { - "index": idx, - "original_task_id": task_id_orig, - "api_task_id": api_task_id, - "task_description": task_desc, - "reference_length": ref_length, - "status": status, - "queue_time_sec": round(queue_time, 2), - "execution_time_sec": round(execution_time, 2), - "total_time_sec": round(end_time - start_time, 2), - "result": result_data, - "timestamp": datetime.now().isoformat() - } - results.append(result) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - filename = f"mind2web_benchmark.json" - with open(filename, "w", encoding="utf-8") as f: - json.dump(results, f, indent=2, ensure_ascii=False) - # Эмодзи статуса - status_emoji = "✅" if status == "succeeded" else "❌" - print(f" {status_emoji} Статус: {status} | Время: {execution_time:.1f}с") - - except requests.exceptions.Timeout: - print(f" ❌ Таймаут при создании задачи") - except Exception as e: - print(f" ❌ Ошибка: {type(e).__name__}: {e}") - continue - -# Сохранение детальных результатов -timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") -filename = f"mind2web_benchmark_{timestamp}.json" - -with open(filename, "w", encoding="utf-8") as f: - json.dump(results, f, indent=2, ensure_ascii=False) - -print("\n" + "=" * 60) -print("📊 ИТОГОВЫЕ МЕТРИКИ СКОРОСТИ") -print("=" * 60) - -# Статистика по статусам -completed = [r for r in results if r["status"] == "completed"] -failed = [r for r in results if r["status"] == "failed"] -unknown = [r for r in results if r["status"] not in ["completed", "failed"]] - -print(f"\n📈 СТАТУСЫ:") -print(f" Всего задач: {len(results)}") -print(f" ✅ Успешно: {len(completed)} ({len(completed) / max(len(results), 1) * 100:.1f}%)") -print(f" ❌ Провалено: {len(failed)} ({len(failed) / max(len(results), 1) * 100:.1f}%)") -if unknown: - print(f" ❓ Неизвестный статус: {len(unknown)}") - -if completed: - total_times = [r["total_time_sec"] for r in completed] - queue_times = [r["queue_time_sec"] for r in completed] - exec_times = [r["execution_time_sec"] for r in completed] - - print(f"\n⏱️ ВРЕМЯ ВЫПОЛНЕНИЯ:") - print(f" Среднее: {sum(total_times) / len(total_times):.2f} сек") - print(f" Медиана (p50): {sorted(total_times)[len(total_times) // 2]:.2f} сек") - if len(total_times) >= 20: - print(f" p95: {sorted(total_times)[int(len(total_times) * 0.95)]:.2f} сек") - print(f" Мин: {min(total_times):.2f} сек") - print(f" Макс: {max(total_times):.2f} сек") - - print(f"\n📊 ПРОИЗВОДИТЕЛЬНОСТЬ:") - print(f" Среднее время в очереди: {sum(queue_times) / len(queue_times):.2f} сек") - tasks_per_hour = 3600 / (sum(total_times) / len(total_times)) - print(f" Скорость выполнения: {tasks_per_hour:.1f} задач/час") - - # Эффективность относительно эталонной длины - if all("reference_length" in r for r in completed): - avg_ref_length = sum(r["reference_length"] for r in completed) / len(completed) - time_per_step = (sum(total_times) / len(total_times)) / avg_ref_length - print(f" Среднее время на шаг: {time_per_step:.2f} сек") - -print(f"\n💾 Результаты сохранены в: {filename}") - -# Создание краткого отчета для сравнения -summary = { - "benchmark": "Online-Mind2Web", - "timestamp": timestamp, - "api_endpoint": API_URL, - "total_tasks": len(results), - "completed": len(completed), - "failed": len(failed), - "success_rate": len(completed) / max(len(results), 1) * 100, - "avg_time_sec": sum(total_times) / len(total_times) if completed else None, - "median_time_sec": sorted(total_times)[len(total_times) // 2] if completed else None, - "tasks_per_hour": 3600 / (sum(total_times) / len(total_times)) if completed else None -} - -summary_file = f"mind2web_summary_{timestamp}.json" -with open(summary_file, "w", encoding="utf-8") as f: - json.dump(summary, f, indent=2, ensure_ascii=False) - -print(f"📋 Краткий отчет сохранен в: {summary_file}") \ No newline at end of file diff --git a/api/tests/test_browser_runtime_manager.py b/api/tests/test_browser_runtime_manager.py deleted file mode 100644 index b31f4577..00000000 --- a/api/tests/test_browser_runtime_manager.py +++ /dev/null @@ -1,97 +0,0 @@ -from unittest.mock import MagicMock, patch - - -def test_resolve_isolation_owner_prefers_user_id(): - from api.services.browser_runtime_manager import resolve_isolation_owner - - owner = resolve_isolation_owner( - "docker-per-principal", - task_id="task-1", - metadata={"user_id": "user-7", "session_id": "session-9"}, - thread_id="thread-1", - ) - - assert owner == "user-7" - - -def test_resolve_isolation_owner_uses_task_for_per_task_mode(): - from api.services.browser_runtime_manager import resolve_isolation_owner - - owner = resolve_isolation_owner( - "docker-per-task", - task_id="task-42", - metadata={"user_id": "user-7"}, - thread_id="thread-1", - ) - - assert owner == "task-42" - - -def test_hash_runtime_owner_is_stable(): - from api.services.browser_runtime_manager import hash_runtime_owner - - assert hash_runtime_owner("owner-1") == hash_runtime_owner("owner-1") - assert hash_runtime_owner("owner-1") != hash_runtime_owner("owner-2") - - -def test_shared_mode_returns_shared_runtime(monkeypatch): - from api.services import browser_runtime_manager - - monkeypatch.setenv("BROWSER_USE_ISOLATION_MODE", "shared") - monkeypatch.setenv("BROWSER_URL", "http://shared-browser:9333") - monkeypatch.setenv("BROWSER_USE_RPC_URL", "http://shared-browser:8787/run") - monkeypatch.setenv("BROWSER_VIEW_BASE_URL", "https://viewer.example.com") - - runtime = browser_runtime_manager.ensure_browser_runtime( - task_id="task-1", - metadata={"user_id": "user-7"}, - thread_id="thread-1", - ) - - assert runtime["cdp_url"] == "http://shared-browser:9333" - assert runtime["rpc_url"] == "http://shared-browser:8787/run" - assert runtime["browser_view"] == "https://viewer.example.com/vnc.html?path=websockify" - assert runtime["isolation_mode"] == "shared" - - -def test_isolated_mode_starts_container(monkeypatch): - from api.services import browser_runtime_manager - - monkeypatch.setenv("BROWSER_USE_ISOLATION_MODE", "docker-per-principal") - monkeypatch.setenv("BROWSER_RUNTIME_IMAGE", "browser-use-browser-runtime:test") - monkeypatch.setenv("BROWSER_RUNTIME_NETWORK", "browser-net") - monkeypatch.setenv("BROWSER_VIEW_BASE_URL", "https://viewer.example.com") - - saved_registry = {} - docker_calls = [] - - def fake_run_docker(args, check=True): - docker_calls.append(args) - if args[:2] == ["inspect", "-f"]: - return MagicMock(returncode=1, stdout="", stderr="") - if args[:1] == ["inspect"]: - return MagicMock(returncode=1, stdout="", stderr="") - return MagicMock(returncode=0, stdout="ok", stderr="") - - with ( - patch.object(browser_runtime_manager, "_load_registry", return_value={"runtimes": {}}), - patch.object(browser_runtime_manager, "_save_registry", side_effect=lambda payload: saved_registry.update(payload)), - patch.object(browser_runtime_manager, "_run_docker", side_effect=fake_run_docker), - patch.object(browser_runtime_manager, "_wait_for_runtime") as mock_wait, - ): - runtime = browser_runtime_manager.ensure_browser_runtime( - task_id="task-1", - metadata={"user_id": "user-7"}, - thread_id="thread-1", - ) - - assert runtime["isolation_mode"] == "docker-per-principal" - assert runtime["cdp_url"].startswith("http://browser-use-browser-") - assert runtime["rpc_url"].startswith("http://browser-use-browser-") - assert runtime["rpc_url"].endswith(":8787/run") - assert "/view/" in runtime["browser_view"] - assert saved_registry["runtimes"] - run_commands = [call for call in docker_calls if call[:2] == ["run", "-d"]] - assert run_commands - assert "browser-use-browser-runtime:test" in run_commands[0] - mock_wait.assert_called_once() diff --git a/api/tests/test_task_service_browser_runtime.py b/api/tests/test_task_service_browser_runtime.py deleted file mode 100644 index d1dd29d2..00000000 --- a/api/tests/test_task_service_browser_runtime.py +++ /dev/null @@ -1,62 +0,0 @@ -import asyncio -from typing import Any - - -class FakeRpcClient: - def __init__(self) -> None: - self.calls: list[dict[str, Any]] = [] - - async def run(self, task: str, timeout_sec: float, rpc_url: str | None = None) -> dict[str, Any]: - self.calls.append({"task": task, "timeout_sec": timeout_sec, "rpc_url": rpc_url}) - return {"success": True, "result": "done"} - - -def test_task_service_routes_run_to_browser_runtime(monkeypatch): - from api.repositories.task_store import TaskStore - from api.services import task_service as task_service_module - from api.services.task_service import TaskService - - runtime = { - "rpc_url": "http://browser-use-browser-abc:8787/run", - "browser_view": "https://viewer.example.com/view/abc/vnc.html?path=view/abc/websockify", - "isolation_mode": "docker-per-principal", - "owner_hash": "abc", - } - cleanup_calls = [] - - monkeypatch.setattr(task_service_module, "ensure_browser_runtime", lambda **_: runtime) - monkeypatch.setattr(task_service_module, "cleanup_browser_runtime", lambda **kwargs: cleanup_calls.append(kwargs)) - - async def scenario(): - rpc_client = FakeRpcClient() - service = TaskService( - store=TaskStore(), - rpc_client=rpc_client, - max_concurrency=1, - rpc_timeout_cap=30, - ) - rec = await service.create_run( - thread_id="thread-1", - user_input="open example.com", - timeout=60, - metadata={"user_id": "user-7"}, - ) - done = await service.wait_run(rec.task_id, timeout=2) - await service.close() - return rpc_client, done - - rpc_client, done = asyncio.run(scenario()) - - assert rpc_client.calls == [ - { - "task": "open example.com", - "timeout_sec": 30, - "rpc_url": "http://browser-use-browser-abc:8787/run", - } - ] - assert done is not None - assert done.raw_response is not None - assert done.raw_response["browser_view"] == runtime["browser_view"] - assert done.raw_response["isolation_mode"] == "docker-per-principal" - assert done.raw_response["owner_hash"] == "abc" - assert cleanup_calls diff --git a/assets/config.example.json b/assets/config.example.json new file mode 100644 index 00000000..ae331184 --- /dev/null +++ b/assets/config.example.json @@ -0,0 +1,30 @@ + +--- + +## ⚙️ Файл: assets/config.example.json + +```json +{ + "browser": { + "headless": true, + "timeout": 30000, + "viewport": { + "width": 1280, + "height": 720 + }, + "user_agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + }, + "screenshots": { + "path": "/tmp/browser-use-screenshots", + "format": "png", + "full_page": true + }, + "retry": { + "max_attempts": 3, + "delay_seconds": 2 + }, + "logging": { + "level": "info", + "save_screenshots_on_error": true + } +} \ No newline at end of file diff --git a/browser-use/SKILL.md b/browser-use/SKILL.md new file mode 100644 index 00000000..b38b71bc --- /dev/null +++ b/browser-use/SKILL.md @@ -0,0 +1,191 @@ +--- +name: browser-use +version: "1.1.0" +description: Run web automation tasks through browser-use and Chromium CDP (headless or GUI). +triggers: + - "browser-use" + - "open website and extract" + - "automate browser task" + - "run browser task" + - "открой сайт" + - "заполни форму" + - "найди на странице" + - "сделай в браузере" +allowed-tools: + - terminal + - file + - memory +--- + +# Browser Use (Chromium/CDP) + +Use this skill when a task requires real browser actions: open pages, click, type, submit forms, extract text/data, verify visible results. + +## Decision: when to use this skill + +Use `browser-use` if user asks to: +- navigate websites step-by-step; +- interact with UI elements (buttons, inputs, dropdowns); +- extract structured content from rendered pages; +- complete multi-step flows (login/search/filter/checkout draft). + +Do **not** use `browser-use` if task is: +- pure static fetch/API call (use lighter tools); +- local file manipulation only; +- impossible due to CAPTCHA/2FA/region lock without user intervention. + +## What the agent can and cannot see + +Short answer to common question: **the agent sees the rendered page state, not all JavaScript source by default**. + +The agent typically sees/uses: +- rendered DOM and interactive elements; +- visible text/content after JS execution; +- current URL, titles, form states; +- action results/errors returned by browser-use. + +The agent does **not automatically** get: +- full source code of all loaded JS bundles; +- complete DevTools Network timeline; +- hidden backend logic not exposed in page content. + +If user asks about JS specifically, do explicit steps: +1. locate script URLs from page source/DOM; +2. open script URL(s) directly; +3. extract needed fragments (function names, endpoints, constants). + +## Runtime modes (CDP endpoints) + +This project supports two modes. + +1) Headless browserless Chromium: +- CDP: `ws://chromium:3000/chromium?token=hermes-local` + +2) GUI Chromium (visible in noVNC): +- CDP: `http://172.25.0.3:9223` +- Visual stream: `http://localhost:6080/vnc.html` + +Notes: +- `run_browser_use.py` accepts both `ws://` and `http://` CDP URLs. +- For `http://`, script resolves `/json/version` and converts to websocket URL automatically. + +## Required environment + +Minimum required env vars: +- `OPENAI_API_KEY` +- optional: `OPENAI_BASE_URL` +- optional: `OPENAI_MODEL` or `BROWSER_USE_MODEL` +- optional override: `BROWSER_USE_CDP_URL` + +Defaults in this repo: +- `BROWSER_USE_PYTHON=/opt/browser-use-venv/bin/python` +- `BROWSER_USE_CDP_URL=http://172.25.0.3:9223` (from `docker-compose.yml`) + +## Quick runbook (inside Docker) + +1. Ensure services are up: + +```bash +docker compose --profile gui up -d +docker compose ps +``` + +2. Check env in `hermes-agent`: + +```bash +docker compose exec -T hermes-agent python - <<'PY' +import os +print('OPENAI_API_KEY', '' if os.getenv('OPENAI_API_KEY') else '') +print('BROWSER_USE_CDP_URL', os.getenv('BROWSER_USE_CDP_URL', '')) +print('OPENAI_MODEL', os.getenv('OPENAI_MODEL', '')) +PY +``` + +3. Run a task: + +```bash +python-browser-use /root/.hermes/skills/autonomous-ai-agents/browser-use/scripts/run_browser_use.py \ + --task "Open example.com and return page title" \ + --max-steps 8 +``` + +4. For GUI visibility, open stream: + +```bash +open "http://localhost:6080/vnc.html" +``` + +## Runbook (outside Docker) + +Use one combined command so env vars are available in the same process: + +```bash +export OPENAI_API_KEY="$OPENAI_API_KEY" && \ +export BROWSER_USE_CDP_URL="$BROWSER_USE_CDP_URL" && \ +/opt/browser-use-venv/bin/python /root/.hermes/skills/autonomous-ai-agents/browser-use/scripts/run_browser_use.py \ + --task "" \ + --max-steps 20 +``` + +## How Hermes should call this skill + +Standard pattern: + +```bash +python-browser-use /root/.hermes/skills/autonomous-ai-agents/browser-use/scripts/run_browser_use.py \ + --task "" \ + --max-steps 20 +``` + +If user gave a starting page, add `--start-url`. + +```bash +python-browser-use /root/.hermes/skills/autonomous-ai-agents/browser-use/scripts/run_browser_use.py \ + --task "Find contact email" \ + --start-url "https://example.com" \ + --max-steps 20 +``` + +## Troubleshooting (symptom -> action) + +`{"success": false, "error": "OPENAI_API_KEY is not set"}` +- check `workspace/.env` and `hermes_data/.env`; +- recreate container: + +```bash +docker compose up -d --force-recreate hermes-agent +``` + +`401 key_model_access_denied` +- model is not allowed for API key; +- set `BROWSER_USE_MODEL` or `OPENAI_MODEL` to an allowed model. + +`Connection refused` or CDP errors +- verify browser container is running: + +```bash +docker compose ps +docker compose exec -T hermes-agent bash -lc 'curl -s http://172.25.0.3:9223/json/version | head' +``` + +Timeout / exit code `124` +- not necessarily script failure; +- increase `--max-steps` and/or task timeout envelope. + +## Site-specific limitations + +- Yandex Music: may be blocked by region. +- Wildberries: anti-bot/CAPTCHA may block automation. + +When blocked by anti-bot/2FA/CAPTCHA: +- ask user for manual intervention; +- continue automation after challenge is passed; +- or switch to non-browser strategy if acceptable. + +## Operational notes + +- Script file: `/root/.hermes/skills/autonomous-ai-agents/browser-use/scripts/run_browser_use.py` +- Script output: JSON (`success`, `cdp_url`, `result.final_result`, `result.errors`) +- In current implementation `use_vision=False`, so decisions are based on browser-use structured state rather than visual screenshot reasoning. + + diff --git a/browser-use/assets/config.example.json b/browser-use/assets/config.example.json new file mode 100644 index 00000000..e8e3e146 --- /dev/null +++ b/browser-use/assets/config.example.json @@ -0,0 +1,16 @@ +{ + "browser": { + "cdp_url": "ws://chromium:3000/chromium?token=hermes-local", + "headless": true, + "timeout": 120000 + }, + "agent": { + "model_env": "BROWSER_USE_MODEL", + "max_steps": 20, + "use_vision": false + }, + "logging": { + "level": "info" + } +} + diff --git a/browser-use/assets/config.json b/browser-use/assets/config.json new file mode 100644 index 00000000..8f355553 --- /dev/null +++ b/browser-use/assets/config.json @@ -0,0 +1,16 @@ +{ + "browser": { + "cdp_url": "ws://chromium:3000/playwright?token=hermes-local", + "headless": true, + "timeout": 120000 + }, + "agent": { + "model_env": "BROWSER_USE_MODEL", + "max_steps": 20, + "use_vision": false + }, + "logging": { + "level": "info" + } +} + diff --git a/browser-use/scripts/requirements.txt b/browser-use/scripts/requirements.txt new file mode 100644 index 00000000..33650044 --- /dev/null +++ b/browser-use/scripts/requirements.txt @@ -0,0 +1,2 @@ +browser-use==0.12.5 + diff --git a/browser-use/scripts/run_browser_use.py b/browser-use/scripts/run_browser_use.py new file mode 100644 index 00000000..401e52fc --- /dev/null +++ b/browser-use/scripts/run_browser_use.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 +"""Run browser-use task against a Chromium CDP endpoint.""" + +import argparse +import asyncio +import json +import os +import socket +from pathlib import Path +from typing import Any +from urllib.parse import urlparse, urlunparse +from urllib.request import urlopen + +from browser_use import Agent, BrowserSession +from browser_use.llm import ChatOpenAI + + +ENV_FALLBACK_PATHS = ( + Path("/workspace/.env"), + Path("/workspace/workspace/.env"), + Path("/root/.hermes/.env"), +) + + +def _read_env_from_files(name: str) -> str | None: + for env_path in ENV_FALLBACK_PATHS: + if not env_path.exists(): + continue + try: + for raw_line in env_path.read_text(encoding="utf-8").splitlines(): + line = raw_line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, value = line.split("=", 1) + if key.strip() == name: + cleaned = value.strip().strip('"').strip("'") + return cleaned or None + except OSError: + continue + return None + + +def _get_env(name: str, default: str | None = None) -> str | None: + value = os.getenv(name) + if value: + return value + from_file = _read_env_from_files(name) + if from_file: + return from_file + return default if default else None + + +def _build_task(task: str, start_url: str | None) -> str: + if not start_url: + return task + return f"Start from {start_url}. Task: {task}" + + +def _serialize_history(history: Any) -> dict[str, Any]: + result = "" + errors: list[str] = [] + if hasattr(history, "final_result"): + try: + result = history.final_result() or "" + except Exception: + result = "" + if hasattr(history, "errors"): + try: + raw_errors = list(history.errors()) + errors = [str(e) for e in raw_errors if e] + except Exception: + errors = [] + return { + "final_result": result, + "errors": errors, + "has_errors": bool(errors), + } + + +def _resolve_cdp_url(cdp_url: str) -> str: + if cdp_url.startswith("ws://") or cdp_url.startswith("wss://"): + return cdp_url + if cdp_url.startswith("http://") or cdp_url.startswith("https://"): + parsed = urlparse(cdp_url) + host = parsed.hostname or "" + port = parsed.port + + # Chrome DevTools rejects non-IP/non-localhost Host headers in some setups. + # For docker service names, resolve to IP and query via numeric host. + if host not in {"localhost", "127.0.0.1", "0.0.0.0"}: + try: + resolved_host = socket.gethostbyname(host) + netloc = resolved_host if not port else f"{resolved_host}:{port}" + parsed = parsed._replace(netloc=netloc) + except OSError: + pass + + version_url = urlunparse(parsed).rstrip("/") + if not version_url.endswith("/json/version"): + version_url = f"{version_url}/json/version" + with urlopen(version_url, timeout=10) as response: # nosec B310 + payload = json.loads(response.read().decode("utf-8")) + ws_url = payload.get("webSocketDebuggerUrl") + if not ws_url: + raise RuntimeError(f"CDP endpoint did not return webSocketDebuggerUrl: {version_url}") + + # Keep a reachable host for ws:// URL when input used docker DNS alias. + if host and host not in {"localhost", "127.0.0.1", "0.0.0.0"}: + ws_parsed = urlparse(str(ws_url)) + ws_netloc = ws_parsed.netloc + ws_port = ws_parsed.port + if ws_port is None: + ws_port = 443 if ws_parsed.scheme == "wss" else 80 + try: + resolved_host = socket.gethostbyname(host) + ws_netloc = f"{resolved_host}:{ws_port}" + ws_url = urlunparse(ws_parsed._replace(netloc=ws_netloc)) + except OSError: + pass + + return str(ws_url) + raise RuntimeError(f"Unsupported CDP URL scheme: {cdp_url}") + + +async def _run(args: argparse.Namespace) -> int: + api_key = _get_env("OPENAI_API_KEY") + if not api_key: + print(json.dumps({"success": False, "error": "OPENAI_API_KEY is not set"})) + return 2 + + model = _get_env("BROWSER_USE_MODEL", _get_env("OPENAI_MODEL", "gpt-4o-mini")) + base_url = _get_env("OPENAI_BASE_URL") + raw_cdp_url = args.cdp_url or _get_env("BROWSER_USE_CDP_URL", "ws://chromium:3000/chromium?token=hermes-local") + cdp_url = _resolve_cdp_url(raw_cdp_url) + + llm = ChatOpenAI( + model=model, + api_key=api_key, + base_url=base_url, + temperature=0.0, + ) + + browser_session = BrowserSession(cdp_url=cdp_url) + agent = Agent( + task=_build_task(args.task, args.start_url), + llm=llm, + browser_session=browser_session, + use_vision=False, + ) + + history = await agent.run(max_steps=args.max_steps) + payload = _serialize_history(history) + + print( + json.dumps( + { + "success": not payload["has_errors"], + "model": model, + "cdp_url": cdp_url, + "task": args.task, + "result": payload, + }, + ensure_ascii=True, + ) + ) + return 0 if not payload["has_errors"] else 1 + + +def main() -> int: + parser = argparse.ArgumentParser(description="Run browser-use task") + parser.add_argument("--task", required=True, help="Natural language task for browser-use") + parser.add_argument("--start-url", default=None, help="Optional URL to open first") + parser.add_argument("--max-steps", type=int, default=20, help="Max agent steps") + parser.add_argument("--cdp-url", default=None, help="CDP URL (ws://... or http://.../json/version host)") + args = parser.parse_args() + return asyncio.run(_run(args)) + + +if __name__ == "__main__": + raise SystemExit(main()) + diff --git a/browser-use/scripts/setup.sh b/browser-use/scripts/setup.sh new file mode 100644 index 00000000..6be17146 --- /dev/null +++ b/browser-use/scripts/setup.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VENV_DIR="${SCRIPT_DIR}/.venv" + +python3 -m venv "${VENV_DIR}" +"${VENV_DIR}/bin/pip" install --upgrade pip +"${VENV_DIR}/bin/pip" install -r "${SCRIPT_DIR}/requirements.txt" + +echo "browser-use skill environment is ready: ${VENV_DIR}" + diff --git a/browser_env/Dockerfile.browser b/browser_env/Dockerfile.browser deleted file mode 100644 index ff00455e..00000000 --- a/browser_env/Dockerfile.browser +++ /dev/null @@ -1,36 +0,0 @@ -FROM debian:bookworm-slim - -ENV DEBIAN_FRONTEND=noninteractive - -RUN apt-get update && apt-get install -y --no-install-recommends \ - chromium \ - python3 \ - python3-pip \ - xvfb \ - fluxbox \ - x11vnc \ - novnc \ - websockify \ - dbus-x11 \ - socat \ - procps \ - curl \ - ca-certificates \ - && rm -rf /var/lib/apt/lists/* - -WORKDIR /src -RUN mkdir -p /src/browser_data - -RUN rm -f /usr/lib/python3.*/EXTERNALLY-MANAGED \ - && python3 -m pip install --no-cache-dir --break-system-packages uv \ - && uv pip install --system --no-cache-dir \ - "browser-use>=0.12.5" \ - "langchain-openai>=0.3.0" - -COPY entrypoint.sh /entrypoint.sh -COPY browser_use_runner.py /src/browser_use_runner.py -RUN chmod +x /entrypoint.sh - -EXPOSE 6080 9222 8787 - -ENTRYPOINT ["/entrypoint.sh"] \ No newline at end of file diff --git a/browser_env/browser_use_runner.py b/browser_env/browser_use_runner.py deleted file mode 100644 index 89ff39b3..00000000 --- a/browser_env/browser_use_runner.py +++ /dev/null @@ -1,247 +0,0 @@ -import asyncio -import inspect -import json -import os -from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer -from typing import Any, Literal -from urllib import error, request - -from browser_use import Agent, Browser, ChatOpenAI -from pydantic import BaseModel, Field, ValidationError, field_validator - -SPEED_OPTIMIZATION_PROMPT = """ -Speed optimization instructions: -- Be extremely concise and direct in your responses -- Get to the goal as quickly as possible -- Use multi-action sequences whenever possible to reduce steps -""" - - -class RunTaskRequest(BaseModel): - """RPC payload для запуска browser-use задачи.""" - - task: str = Field(..., min_length=1) - - @field_validator("task") - @classmethod - def validate_task(cls, value: str) -> str: - normalized = value.strip() - if not normalized: - raise ValueError("Field 'task' is required") - return normalized - - -class HistoryEvent(BaseModel): - """Нормализованное событие из history агента.""" - - step: int - kind: str - content: str | None = None - data: dict[str, Any] = Field(default_factory=dict) - - -class RunTaskSuccessResponse(BaseModel): - """Успешный ответ RPC раннера.""" - - success: Literal[True] = True - result: str | None = None - history: list[HistoryEvent] = Field(default_factory=list) - browser_view: str = "" - - -class RunTaskErrorResponse(BaseModel): - """Ошибка выполнения задачи в RPC раннере.""" - - success: Literal[False] = False - error: str - - -def _json_response(handler, status_code: int, payload: dict[str, Any] | BaseModel) -> None: - if isinstance(payload, BaseModel): - body = payload.model_dump(mode="json") - else: - body = payload - data = json.dumps(body, ensure_ascii=False).encode("utf-8") - handler.send_response(status_code) - handler.send_header("Content-Type", "application/json; charset=utf-8") - handler.send_header("Content-Length", str(len(data))) - handler.end_headers() - handler.wfile.write(data) - - -async def run_browser_task(task: str) -> RunTaskSuccessResponse | RunTaskErrorResponse: - cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222") - browser_view_url = os.getenv("BROWSER_VIEW_URL", "") - - browser = Browser(cdp_url=cdp_url) - - llm = ChatOpenAI( - model=os.getenv("MODEL_DEFAULT", "qwen3.5-122b"), - api_key=os.getenv("OPENAI_API_KEY"), - base_url=os.getenv("OPENAI_BASE_URL"), - temperature=0.0, - ) - - agent = Agent(task=task, - llm=llm, - browser=browser, - flash_mode=True, - use_vision=False, - extend_system_message=SPEED_OPTIMIZATION_PROMPT, - ) - - try: - history = await agent.run() - return RunTaskSuccessResponse( - result=history.final_result(), - history=[HistoryEvent.model_validate(item) for item in _extract_history_events(history)], - browser_view=browser_view_url, - ) - except Exception as err: - return RunTaskErrorResponse(error=f"Browser automation failed: {err}") - finally: - try: - close_method = getattr(browser, "close", None) - if callable(close_method): - close_result = close_method() - if inspect.isawaitable(close_result): - await close_result - except Exception: - pass - - -def _to_jsonable(value: Any) -> Any: - if value is None or isinstance(value, (str, int, float, bool)): - return value - if isinstance(value, dict): - return {str(key): _to_jsonable(val) for key, val in value.items()} - if isinstance(value, (list, tuple, set)): - return [_to_jsonable(item) for item in value] - - for method_name in ("model_dump", "dict", "to_dict"): - method = getattr(value, method_name, None) - if callable(method): - try: - dumped = method() - return _to_jsonable(dumped) - except Exception: - pass - - return str(value) - - -def _call_history_items(history: Any, attr_name: str) -> list[Any]: - method = getattr(history, attr_name, None) - if not callable(method): - return [] - - try: - raw: Any = method() - except Exception: - return [] - - if raw is None: - return [] - if isinstance(raw, list): - return raw - if isinstance(raw, (str, bytes, dict)): - return [raw] - - try: - return list(raw) - except TypeError: - return [raw] - except Exception: - return [raw] - - - - -def _extract_history_events(history: Any) -> list[dict[str, Any]]: - events: list[dict[str, Any]] = [] - - def append_many(kind: str, items: list[Any]) -> None: - if not items: - return - for item in items: - normalized = _to_jsonable(item) - payload = normalized if isinstance(normalized, dict) else {"value": normalized} - content = normalized if isinstance(normalized, str) else json.dumps(normalized, ensure_ascii=False) - events.append( - { - "step": len(events) + 1, - "kind": kind, - "content": content, - "data": payload, - } - ) - - append_many("thought", _call_history_items(history, "model_thoughts")) - append_many("action", _call_history_items(history, "model_actions")) - append_many("error", _call_history_items(history, "errors")) - - if events: - return events - - fallback = _to_jsonable(history) - return [ - { - "step": 1, - "kind": "system", - "content": fallback if isinstance(fallback, str) else json.dumps(fallback, ensure_ascii=False), - "data": fallback if isinstance(fallback, dict) else {"value": fallback}, - } - ] - - -class BrowserUseRPCHandler(BaseHTTPRequestHandler): - def do_GET(self): - if self.path != "/health": - _json_response(self, 404, {"success": False, "error": "Not found"}) - return - - try: - debug_url = os.getenv("BROWSER_HEALTH_URL", "http://127.0.0.1:9222/json/version") - with request.urlopen(debug_url, timeout=2): - pass - _json_response(self, 200, {"success": True}) - except Exception as err: - _json_response(self, 503, {"success": False, "error": f"Browser is not ready: {err}"}) - - def do_POST(self): - if self.path != "/run": - _json_response(self, 404, {"success": False, "error": "Not found"}) - return - - try: - content_length = int(self.headers.get("Content-Length", "0")) - raw = self.rfile.read(content_length) - payload = json.loads(raw.decode("utf-8") if raw else "{}") - request_model = RunTaskRequest.model_validate(payload) - - result_model = asyncio.run(run_browser_task(request_model.task)) - code = 200 if result_model.success else 500 - _json_response(self, code, result_model) - except ValidationError as err: - _json_response(self, 400, RunTaskErrorResponse(error=f"Invalid request payload: {err.errors()}")) - except json.JSONDecodeError: - _json_response(self, 400, RunTaskErrorResponse(error="Invalid JSON payload")) - except error.URLError as err: - _json_response(self, 503, RunTaskErrorResponse(error=f"Transport error: {err}")) - except Exception as err: - _json_response(self, 500, RunTaskErrorResponse(error=f"Internal error: {err}")) - - def log_message(self, format_str, *args): - return - - -def main(): - host = os.getenv("BROWSER_USE_RPC_HOST", "0.0.0.0") - port = int(os.getenv("BROWSER_USE_RPC_PORT", "8787")) - server = ThreadingHTTPServer((host, port), BrowserUseRPCHandler) # type: ignore[arg-type] - print(f"browser-use RPC listening on {host}:{port}") - server.serve_forever() - - -if __name__ == "__main__": - main() diff --git a/browser_env/entrypoint.sh b/browser_env/entrypoint.sh deleted file mode 100644 index 34fbabea..00000000 --- a/browser_env/entrypoint.sh +++ /dev/null @@ -1,201 +0,0 @@ -#!/usr/bin/env bash -set -Eeuo pipefail - -export DISPLAY="${DISPLAY:-:99}" -DISPLAY_NUM="${DISPLAY#:}" -XVFB_LOG="/tmp/xvfb.log" - -VNC_PORT="${VNC_PORT:-5900}" -NOVNC_PORT="${NOVNC_PORT:-6080}" -CHROME_LOCAL_DEBUG_PORT="${CHROME_LOCAL_DEBUG_PORT:-${BROWSER_CHROME_DEBUG_PORT:-9223}}" -CHROME_PUBLIC_DEBUG_PORT="${CHROME_PUBLIC_DEBUG_PORT:-${BROWSER_CDP_PROXY_PORT:-9222}}" -BROWSER_USE_RPC_PORT="${BROWSER_USE_RPC_PORT:-8787}" -CHROME_PROFILE_DIR="${CHROME_PROFILE_DIR:-${BROWSER_DATA_DIR:-/src/browser_data}}" -BROWSER_ENABLE_UI="${BROWSER_ENABLE_UI:-true}" - -MAX_RESTARTS="${MAX_RESTARTS:-10}" -RESTART_WINDOW_SEC="${RESTART_WINDOW_SEC:-60}" -RESTART_BACKOFF_SEC="${RESTART_BACKOFF_SEC:-2}" - -PIDS=() -STOPPING=0 -WINDOW_START="$(date +%s)" -RESTART_COUNT=0 - -log() { - printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*" -} - -start_bg() { - "$@" & - local pid=$! - PIDS+=("$pid") - log "started: $* (pid=$pid)" -} - -wait_for_port() { - local host=$1 - local port=$2 - local timeout_sec=$3 - local end_ts=$(( $(date +%s) + timeout_sec )) - - while [ "$(date +%s)" -lt "$end_ts" ]; do - if bash -c "/dev/null 2>&1; then - return 0 - fi - sleep 0.2 - done - return 1 -} - -wait_for_x_display() { - local timeout_sec=$1 - local end_ts=$(( $(date +%s) + timeout_sec )) - - while [ "$(date +%s)" -lt "$end_ts" ]; do - if [ -S "/tmp/.X11-unix/X${DISPLAY_NUM}" ] && DISPLAY="$DISPLAY" bash -c 'echo >/dev/null' >/dev/null 2>&1; then - return 0 - fi - sleep 0.2 - done - return 1 -} - -cleanup() { - if [ "$STOPPING" -eq 1 ]; then - return - fi - STOPPING=1 - - log "shutdown signal received, stopping processes..." - - if [ -n "${CHROME_PID:-}" ] && kill -0 "$CHROME_PID" >/dev/null 2>&1; then - kill "$CHROME_PID" >/dev/null 2>&1 || true - fi - - for pid in "${PIDS[@]:-}"; do - kill "$pid" >/dev/null 2>&1 || true - done - - sleep 1 - - if [ -n "${CHROME_PID:-}" ] && kill -0 "$CHROME_PID" >/dev/null 2>&1; then - kill -9 "$CHROME_PID" >/dev/null 2>&1 || true - fi - - for pid in "${PIDS[@]:-}"; do - if kill -0 "$pid" >/dev/null 2>&1; then - kill -9 "$pid" >/dev/null 2>&1 || true - fi - done - - log "shutdown complete" -} - -trap cleanup SIGTERM SIGINT EXIT - -mkdir -p /var/run/dbus /var/lib/dbus "$CHROME_PROFILE_DIR" -if [ ! -f /var/lib/dbus/machine-id ]; then - dbus-uuidgen > /var/lib/dbus/machine-id 2>/dev/null || true -fi - -# Удаляем stale lock/socket от прошлых падений Xvfb на том же DISPLAY. -rm -f "/tmp/.X${DISPLAY_NUM}-lock" "/tmp/.X11-unix/X${DISPLAY_NUM}" || true - -log "starting X stack on DISPLAY=${DISPLAY}" -Xvfb "$DISPLAY" -screen 0 1280x720x24 -ac +extension GLX +render -noreset >"$XVFB_LOG" 2>&1 & -XVFB_PID=$! -PIDS+=("$XVFB_PID") -log "started: Xvfb $DISPLAY (pid=$XVFB_PID)" - -if ! wait_for_x_display 15; then - log "fatal: Xvfb did not initialize DISPLAY=${DISPLAY}" - if [ -f "$XVFB_LOG" ]; then - log "xvfb log tail:" - tail -n 40 "$XVFB_LOG" || true - fi - exit 1 -fi - -if [ "$BROWSER_ENABLE_UI" != "false" ]; then - start_bg fluxbox - start_bg x11vnc -display "$DISPLAY" -rfbport "$VNC_PORT" -nopw -listen 0.0.0.0 -xkb -forever -shared - start_bg websockify --web=/usr/share/novnc/ "$NOVNC_PORT" "localhost:${VNC_PORT}" -fi -start_bg socat "TCP-LISTEN:${CHROME_PUBLIC_DEBUG_PORT},fork,reuseaddr" "TCP:127.0.0.1:${CHROME_LOCAL_DEBUG_PORT}" -start_bg python3 -u /src/browser_use_runner.py - -if [ "$BROWSER_ENABLE_UI" != "false" ]; then - if ! wait_for_port 127.0.0.1 "$VNC_PORT" 20; then - log "fatal: x11vnc did not open port ${VNC_PORT}" - exit 1 - fi - if ! wait_for_port 127.0.0.1 "$NOVNC_PORT" 20; then - log "fatal: websockify did not open port ${NOVNC_PORT}" - exit 1 - fi -fi -if ! wait_for_port 127.0.0.1 "$BROWSER_USE_RPC_PORT" 20; then - log "fatal: browser-use RPC did not open port ${BROWSER_USE_RPC_PORT}" - exit 1 -fi - -log "browser infrastructure is ready (noVNC:${NOVNC_PORT}, DevTools proxy:${CHROME_PUBLIC_DEBUG_PORT}, browser-use RPC:${BROWSER_USE_RPC_PORT})" - -while true; do - for pid in "${PIDS[@]}"; do - if ! kill -0 "$pid" >/dev/null 2>&1; then - log "fatal: required background process died (pid=${pid})" - exit 1 - fi - done - - rm -f "${CHROME_PROFILE_DIR}/SingletonLock" "${CHROME_PROFILE_DIR}/SingletonCookie" "${CHROME_PROFILE_DIR}/SingletonSocket" 2>/dev/null || true - - log "starting Chromium (local DevTools:${CHROME_LOCAL_DEBUG_PORT})" - chromium \ - --no-sandbox \ - --disable-dev-shm-usage \ - --ozone-platform=x11 \ - --remote-debugging-port="${CHROME_LOCAL_DEBUG_PORT}" \ - --remote-debugging-address=127.0.0.1 \ - --remote-allow-origins='*' \ - --window-size=1280,720 \ - --user-data-dir="${CHROME_PROFILE_DIR}" \ - --disable-blink-features=AutomationControlled \ - --no-first-run \ - --disable-gpu \ - --mute-audio \ - --no-default-browser-check \ - --disable-software-rasterizer \ - --disable-features=site-per-process \ - --disable-crash-reporter \ - --disable-extensions \ - --disable-sync & - - CHROME_PID=$! - wait "$CHROME_PID" || CHROME_EXIT=$? - CHROME_EXIT=${CHROME_EXIT:-0} - - if [ "$STOPPING" -eq 1 ]; then - break - fi - - now="$(date +%s)" - if [ $(( now - WINDOW_START )) -gt "$RESTART_WINDOW_SEC" ]; then - WINDOW_START="$now" - RESTART_COUNT=0 - fi - - RESTART_COUNT=$((RESTART_COUNT + 1)) - log "Chromium exited with code=${CHROME_EXIT}; restart ${RESTART_COUNT}/${MAX_RESTARTS} in current window" - - if [ "$RESTART_COUNT" -ge "$MAX_RESTARTS" ]; then - log "fatal: too many Chromium restarts in ${RESTART_WINDOW_SEC}s" - exit 1 - fi - - sleep "$RESTART_BACKOFF_SEC" - unset CHROME_EXIT - unset CHROME_PID -done diff --git a/browser_env/nginx.browser-view.conf b/browser_env/nginx.browser-view.conf deleted file mode 100644 index 6d2ff069..00000000 --- a/browser_env/nginx.browser-view.conf +++ /dev/null @@ -1,46 +0,0 @@ -events {} - -http { - resolver 127.0.0.11 ipv6=off; - - map $http_upgrade $connection_upgrade { - default upgrade; - '' close; - } - - server { - listen 8080; - server_name _; - - location = / { - add_header Content-Type text/plain; - return 200 "Browser view proxy is running.\n"; - } - - location / { - proxy_http_version 1.1; - proxy_set_header Host $host; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection $connection_upgrade; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_buffering off; - proxy_pass http://browser:6080; - } - - location ~ "^/view/(?[a-f0-9]{16})$" { - return 302 /view/$owner/vnc.html?path=view/$owner/websockify; - } - - location ~ "^/view/(?[a-f0-9]{16})/(?.*)$" { - proxy_http_version 1.1; - proxy_set_header Host $host; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection $connection_upgrade; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_buffering off; - proxy_pass http://browser-use-browser-$owner:6080/$rest$is_args$args; - } - } -} diff --git a/docker-compose.vps.yml b/docker-compose.vps.yml deleted file mode 100644 index 7b396679..00000000 --- a/docker-compose.vps.yml +++ /dev/null @@ -1,14 +0,0 @@ -services: - browser-api: - networks: - - browser-net - - lambdalab_frontend - - browser-view-proxy: - networks: - - browser-net - - lambdalab_frontend - -networks: - lambdalab_frontend: - external: true diff --git a/docker-compose.yml b/docker-compose.yml index 20517ddf..8bd5f39b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,104 +1,49 @@ services: - browser: - build: - context: ./browser_env - dockerfile: Dockerfile.browser - image: browser-use-browser-runtime:latest - container_name: browser-use-browser + chromium: + image: ghcr.io/browserless/chromium:latest + container_name: hermes-chromium + restart: unless-stopped environment: - - MODEL_DEFAULT=${MODEL_DEFAULT:-qwen3.5-122b} - - OPENAI_API_KEY=${OPENAI_API_KEY} - - OPENAI_BASE_URL=${OPENAI_BASE_URL} - - BROWSER_USE_RPC_HOST=0.0.0.0 - - BROWSER_USE_RPC_PORT=8787 + - TOKEN=hermes-local + - TIMEOUT=120000 + - CONCURRENT=5 + shm_size: 1gb ports: - - "${BROWSER_NOVNC_PUBLISH:-6080:6080}" - - "${BROWSER_CDP_PUBLISH:-9222:9222}" - networks: - browser-net: - aliases: - - browser - shm_size: '2gb' - volumes: - - browser_profiles:/src/browser_data - - ./workspace:/app/workspace:rw - restart: always + - "3000:3000" + + chromium-gui: + build: + context: ./docker/chromium-gui + container_name: hermes-chromium-gui + restart: unless-stopped + shm_size: 1gb + ports: + - "127.0.0.1:6080:6080" + - "127.0.0.1:5900:5900" + - "127.0.0.1:9223:9223" healthcheck: - test: [ "CMD-SHELL", "curl -fsS http://127.0.0.1:9222/json/version >/dev/null && curl -fsS http://127.0.0.1:8787/health >/dev/null || exit 1" ] + test: [ "CMD", "curl", "-fsS", "http://localhost:9223/json/version" ] interval: 10s - timeout: 3s - retries: 12 - start_period: 20s + timeout: 5s + retries: 6 - browser-api: - build: - context: ./api - dockerfile: Dockerfile - container_name: browser-use-api + hermes-agent: + build: . + container_name: hermes-agent + restart: unless-stopped + stdin_open: true + tty: true + depends_on: + - chromium + - chromium-gui + env_file: + - ./workspace/.env + - ./hermes_data/.env environment: - - BROWSER_USE_RPC_URL=http://browser:8787/run - - BROWSER_API_HOST=0.0.0.0 - - BROWSER_API_PORT=8088 - - BROWSER_API_MAX_CONCURRENCY=2 - - BROWSER_VIEW_BASE_URL=${BROWSER_VIEW_BASE_URL:-http://localhost:6081} - - BROWSER_USE_ISOLATION_MODE=${BROWSER_USE_ISOLATION_MODE:-docker-per-principal} - - BROWSER_RUNTIME_IMAGE=${BROWSER_RUNTIME_IMAGE:-browser-use-browser-runtime:latest} - - BROWSER_RUNTIME_NETWORK=${BROWSER_RUNTIME_NETWORK:-browser-net} - - BROWSER_RUNTIME_TTL_SECONDS=${BROWSER_RUNTIME_TTL_SECONDS:-900} - - BROWSER_RUNTIME_START_TIMEOUT=${BROWSER_RUNTIME_START_TIMEOUT:-45} - - BROWSER_RUNTIME_ENABLE_UI=${BROWSER_RUNTIME_ENABLE_UI:-true} - - MODEL_DEFAULT=${MODEL_DEFAULT:-qwen3.5-122b} - - OPENAI_API_KEY=${OPENAI_API_KEY} - - OPENAI_BASE_URL=${OPENAI_BASE_URL} - depends_on: - browser: - condition: service_healthy - ports: - - "${BROWSER_API_PUBLISH:-8088:8088}" + - BROWSER_USE_CDP_URL=${BROWSER_USE_CDP_URL:-http://172.25.0.3:9223} + - BROWSER_USE_PYTHON=/opt/browser-use-venv/bin/python volumes: - - /var/run/docker.sock:/var/run/docker.sock - healthcheck: - test: - [ - "CMD-SHELL", - "python -c \"import urllib.request; urllib.request.urlopen('http://127.0.0.1:8088/health', timeout=2).read()\" >/dev/null 2>&1 || exit 1", - ] - interval: 120s - timeout: 3s - retries: 12 - start_period: 10s - restart: always - networks: - - browser-net - - browser-view-proxy: - image: nginx:alpine - container_name: browser-use-view-proxy - volumes: - - ./browser_env/nginx.browser-view.conf:/etc/nginx/nginx.conf:ro - depends_on: - browser: - condition: service_healthy - ports: - - "${BROWSER_VIEW_PROXY_PUBLISH:-6081:8080}" - restart: always - networks: - - browser-net - - tunnel: - image: cloudflare/cloudflared:latest - profiles: - - remote - container_name: browser-use-tunnel - restart: always - command: tunnel --protocol http2 --url http://browser-view-proxy:8080 --no-tls-verify - networks: - - browser-net - -volumes: - browser_profiles: - -networks: - browser-net: - name: browser-net - driver: bridge + - ./workspace:/workspace + - ./hermes_data:/root/.hermes + working_dir: /workspace + command: [ "hermes", "gateway" ] \ No newline at end of file diff --git a/docker/chromium-gui/Dockerfile b/docker/chromium-gui/Dockerfile new file mode 100644 index 00000000..9a282a60 --- /dev/null +++ b/docker/chromium-gui/Dockerfile @@ -0,0 +1,21 @@ +FROM debian:bookworm-slim + +RUN apt-get update && apt-get install -y --no-install-recommends \ + chromium \ + xvfb \ + x11vnc \ + fluxbox \ + novnc \ + websockify \ + socat \ + ca-certificates \ + curl \ + && rm -rf /var/lib/apt/lists/* + +COPY start.sh /usr/local/bin/start-gui-chromium.sh +RUN chmod +x /usr/local/bin/start-gui-chromium.sh + +EXPOSE 6080 5900 9222 + +CMD ["/usr/local/bin/start-gui-chromium.sh"] + diff --git a/docker/chromium-gui/README.md b/docker/chromium-gui/README.md new file mode 100644 index 00000000..6b9fac2c --- /dev/null +++ b/docker/chromium-gui/README.md @@ -0,0 +1,26 @@ +# Chromium GUI Service + +This container provides a full Chromium GUI with three interfaces: + +- noVNC web UI: `http://localhost:6080/vnc.html` +- VNC: `localhost:5900` +- CDP endpoint: `http://localhost:9223/json/version` + +## Run + +```bash +docker compose --profile gui up -d chromium-gui +``` + +## Use with browser-use + +Pass the GUI CDP endpoint as HTTP URL (the runner resolves it to a websocket automatically): + +```bash +docker compose exec -T hermes-agent python-browser-use \ + /root/.hermes/skills/autonomous-ai-agents/browser-use/scripts/run_browser_use.py \ + --cdp-url http://chromium-gui:9223 \ + --task "Open example.com and return page title" \ + --max-steps 5 +``` + diff --git a/docker/chromium-gui/start.sh b/docker/chromium-gui/start.sh new file mode 100644 index 00000000..6cdc373d --- /dev/null +++ b/docker/chromium-gui/start.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +set -euo pipefail + +export DISPLAY=:99 +SCREEN_RESOLUTION="${SCREEN_RESOLUTION:-1920x1080x24}" + +# Clean stale X lock/socket from previous crashed runs in the same container. +rm -f /tmp/.X99-lock +rm -f /tmp/.X11-unix/X99 + +Xvfb :99 -screen 0 "$SCREEN_RESOLUTION" -ac +extension RANDR & +fluxbox >/tmp/fluxbox.log 2>&1 & + +x11vnc -display :99 -forever -shared -rfbport 5900 -nopw >/tmp/x11vnc.log 2>&1 & +websockify --web=/usr/share/novnc/ 6080 localhost:5900 >/tmp/novnc.log 2>&1 & + +# Проксирование CDP на все адреса используя socat +# Chromium слушает на ::1:9223 (IPv6 localhost) +socat TCP-LISTEN:9223,reuseaddr,fork TCP6:[::1]:9223 >/tmp/socat.log 2>&1 & + +exec chromium \ + --no-sandbox \ + --disable-dev-shm-usage \ + --disable-gpu \ + --disable-setuid-sandbox \ + --remote-debugging-address=127.0.0.1 \ + --remote-debugging-port=9223 \ + --user-data-dir=/tmp/chromium-profile \ + --window-size=1920,1080 \ + --no-first-run \ + --no-default-browser-check \ + about:blank >/tmp/chromium.log 2>&1 + diff --git a/docs/vps-deploy.md b/docs/vps-deploy.md deleted file mode 100644 index e329a49a..00000000 --- a/docs/vps-deploy.md +++ /dev/null @@ -1,133 +0,0 @@ -# BrowserUse VPS Deployment - -This project deploys to `BrowserUse-vps@lambda.coredump.ru` with a Gitea/Forgejo Actions runner installed on the VPS. - -The server already has a root-owned `/opt/lambdalab` stack with Caddy on ports `80/443`. Keep this browser service as a separate app under the deploy user home directory, then attach the public-facing containers to the existing `lambdalab_frontend` Docker network through `docker-compose.vps.yml`. - -## SSH Access - -Add the public SSH key to the VPS user: - -```sh -ssh BrowserUse-vps@lambda.coredump.ru -mkdir -p ~/.ssh -chmod 700 ~/.ssh -printf '%s\n' '' >> ~/.ssh/authorized_keys -chmod 600 ~/.ssh/authorized_keys -``` - -The fingerprint `SHA256:/XC5ifPX8j+uRyp0Yw2zAl5nteWc3YcHeVHfCG+rhP4` is not enough by itself. `authorized_keys` needs the full public key line that starts with `ssh-ed25519`. - -## Initial Server Checkout - -Run once on the VPS: - -```sh -mkdir -p ~/apps -cd ~/apps -git clone -b feature/api-for-subagent https://git.lambda.coredump.ru/APEX/BrowserUse_and_ComputerUse_skills.git -cd BrowserUse_and_ComputerUse_skills -``` - -Create a server-local `.env` file in the checkout. It is intentionally not committed: - -```sh -OPENAI_API_KEY=... -OPENAI_BASE_URL=... -MODEL_DEFAULT=qwen3.5-122b -BROWSER_VIEW_BASE_URL=https://browser-view.lambda.coredump.ru -BROWSER_API_PUBLISH=127.0.0.1:8088:8088 -BROWSER_VIEW_PROXY_PUBLISH=127.0.0.1:6081:8080 -BROWSER_NOVNC_PUBLISH=127.0.0.1:6080:6080 -BROWSER_CDP_PUBLISH=127.0.0.1:9222:9222 -``` - -Then run the first deploy manually: - -```sh -bash scripts/deploy_vps.sh -curl -fsS http://127.0.0.1:8088/health -``` - -The deploy script uses both Compose files by default: - -```sh -docker-compose.yml:docker-compose.vps.yml -``` - -`docker-compose.vps.yml` connects `browser-api` and `browser-view-proxy` to the existing external `lambdalab_frontend` network so Caddy can reach them by Docker DNS. - -## Domain Binding - -The active Caddy config is root-owned at: - -```sh -/opt/lambdalab/caddy/Caddyfile -``` - -Add these vhosts to that file from an admin/root account: - -```caddyfile -browser-api.lambda.coredump.ru { - reverse_proxy browser-use-api:8088 -} - -browser-view.lambda.coredump.ru { - reverse_proxy browser-use-view-proxy:8080 -} -``` - -Then reload the existing Caddy container from `/opt/lambdalab`: - -```sh -cd /opt/lambdalab -docker compose exec caddy caddy reload --config /etc/caddy/Caddyfile -``` - -DNS must point both subdomains to the VPS public IP `155.212.185.120`. At inspection time, `lambda.coredump.ru` resolved to that IP, while `browser-api.lambda.coredump.ru` and `browser-view.lambda.coredump.ru` did not resolve yet. - -## Gitea/Forgejo Runner - -Install `act_runner` as the `BrowserUse-vps` user and register it with the repository, organization, or instance runner token: - -```sh -mkdir -p ~/act_runner -cd ~/act_runner -./act_runner generate-config > config.yaml -./act_runner --config config.yaml register \ - --no-interactive \ - --instance https://git.lambda.coredump.ru \ - --token '' \ - --name BrowserUse-vps \ - --labels deploy-vps:host -``` - -Start it under the same user: - -```sh -cd ~/act_runner -nohup ./act_runner daemon --config config.yaml > act_runner.log 2>&1 & -``` - -Because this account has `sudo: no`, a system-wide service cannot be installed from this user. If an admin enables a user-level systemd service for this account, run the same daemon command from that service instead of `nohup`. - -## CI/CD Behavior - -The workflow lives at `.gitea/workflows/deploy.yml`. - -It runs on: - -- push to `feature/api-for-subagent` -- manual `workflow_dispatch` - -The job expects a runner label named `deploy-vps`, registered as `deploy-vps:host`. It enters: - -```sh -/home/BrowserUse-vps/apps/BrowserUse_and_ComputerUse_skills -``` - -Then it fetches `origin/feature/api-for-subagent`, resets the tracked checkout to that commit, runs Docker Compose, and verifies: - -```sh -curl -fsS http://127.0.0.1:8088/health -``` diff --git a/references/common_patterns.md b/references/common_patterns.md new file mode 100644 index 00000000..7965e315 --- /dev/null +++ b/references/common_patterns.md @@ -0,0 +1,27 @@ + +--- + +## 📚 Файл: references/common_patterns.md + +```markdown +# Common Browser Automation Patterns + +## Паттерн 1: Авторизация + +### Сценарий +Пользователь хочет автоматизировать вход в систему. + +### Реализация +```python +{ + "action": "sequence", + "steps": [ + {"action": "goto", "url": "https://example.com/login"}, + {"action": "wait", "selector": "form", "timeout": 5000}, + {"action": "fill", "selector": "input[name='email']", "value": "user@example.com"}, + {"action": "fill", "selector": "input[name='password']", "value": "password123"}, + {"action": "click", "selector": "button[type='submit']"}, + {"action": "wait", "selector": ".dashboard", "timeout": 10000}, + {"action": "screenshot", "path": "/tmp/after_login.png"} + ] +} \ No newline at end of file diff --git a/references/selectors.md b/references/selectors.md new file mode 100644 index 00000000..3a5b889e --- /dev/null +++ b/references/selectors.md @@ -0,0 +1,52 @@ +# CSS Селекторы — Полная шпаргалка + +## Быстрый справочник + +### Базовые селекторы + +| Селектор | Пример | Описание | +|----------|--------|----------| +| `*` | `*` | Все элементы | +| `element` | `div` | Элемент по тегу | +| `#id` | `#main` | Элемент по ID | +| `.class` | `.button` | Элемент по классу | +| `[attr]` | `[disabled]` | Элемент с атрибутом | +| `[attr=value]` | `[type="submit"]` | Точное совпадение атрибута | +| `[attr^=value]` | `[href^="https"]` | Атрибут начинается с | +| `[attr$=value]` | `[href$=".pdf"]` | Атрибут заканчивается на | +| `[attr*=value]` | `[name*="user"]` | Атрибут содержит | + +### Комбинаторы + +| Селектор | Пример | Описание | +|----------|--------|----------| +| `A B` | `div p` | Потомок (любой уровень) | +| `A > B` | `div > p` | Прямой потомок | +| `A + B` | `h1 + p` | Соседний элемент | +| `A ~ B` | `h1 ~ p` | Все следующие соседние | + +### Псевдоклассы + +| Псевдокласс | Пример | Описание | +|-------------|--------|----------| +| `:first-child` | `li:first-child` | Первый дочерний | +| `:last-child` | `li:last-child` | Последний дочерний | +| `:nth-child(n)` | `tr:nth-child(2)` | n-й дочерний | +| `:nth-of-type(n)` | `p:nth-of-type(2)` | n-й элемент типа | +| `:not(selector)` | `div:not(.hidden)` | Исключение | +| `:has(selector)` | `div:has(p)` | Содержит дочерний элемент | +| `:contains(text)` | `a:contains("Click")` | Содержит текст | + +## XPath — Альтернатива + +### Базовые XPath + +```xpath +//element # Все элементы +//div[@id='main'] # По атрибуту +//div[contains(@class, 'btn')] # Частичное совпадение класса +//button[text()='Submit'] # По тексту +//a[contains(text(), 'Learn')] # Частичное совпадение текста +//div[@id='main']//p # Вложенность +//div[1] # Первый div +//div[last()] # Последний div \ No newline at end of file diff --git a/scripts/browser_automation.py b/scripts/browser_automation.py new file mode 100644 index 00000000..ae0eccf7 --- /dev/null +++ b/scripts/browser_automation.py @@ -0,0 +1,338 @@ + + +## 🐍 Файл: scripts/browser_automation.py + + +# !/usr/bin/env python3 +""" +Browser automation core module for Hermes Agent Skill +Автоматизация браузера с использованием Playwright +""" + +import asyncio +import json +import sys +import os +from typing import Dict, Any, Optional, List +from playwright.async_api import async_playwright, Page, Browser, Playwright + + +class BrowserAutomation: + """Основной класс для автоматизации браузера""" + + def __init__(self, headless: bool = True, timeout: int = 30000): + self.headless = headless + self.timeout = timeout + self.playwright: Optional[Playwright] = None + self.browser: Optional[Browser] = None + self.page: Optional[Page] = None + + async def __aenter__(self): + await self.start() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() + + async def start(self): + """Запуск браузера""" + self.playwright = await async_playwright().start() + self.browser = await self.playwright.chromium.launch( + headless=self.headless, + args=[ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-accelerated-2d-canvas', + '--disable-gpu' + ] + ) + self.page = await self.browser.new_page() + self.page.set_default_timeout(self.timeout) + + async def close(self): + """Закрытие браузера""" + if self.browser: + await self.browser.close() + if self.playwright: + await self.playwright.stop() + + async def goto(self, url: str) -> Dict[str, Any]: + """Переход по URL""" + try: + response = await self.page.goto(url, wait_until='networkidle') + status = response.status if response else None + + return { + "success": True, + "url": self.page.url, + "status": status + } + except Exception as e: + return { + "success": False, + "error": f"Failed to navigate to {url}: {str(e)}" + } + + async def click(self, selector: str) -> Dict[str, Any]: + """Клик по элементу""" + try: + await self.page.wait_for_selector(selector, timeout=self.timeout) + await self.page.click(selector) + return { + "success": True, + "selector": selector, + "message": f"Clicked on {selector}" + } + except Exception as e: + return { + "success": False, + "error": f"Failed to click on {selector}: {str(e)}" + } + + async def fill(self, selector: str, value: str) -> Dict[str, Any]: + """Заполнение поля""" + try: + await self.page.wait_for_selector(selector, timeout=self.timeout) + await self.page.fill(selector, value) + return { + "success": True, + "selector": selector, + "value": value, + "message": f"Filled {selector} with '{value}'" + } + except Exception as e: + return { + "success": False, + "error": f"Failed to fill {selector}: {str(e)}" + } + + async def screenshot(self, path: str = "/tmp/screenshot.png") -> Dict[str, Any]: + """Скриншот страницы""" + try: + # Убедимся, что директория существует + os.makedirs(os.path.dirname(path), exist_ok=True) + + await self.page.screenshot(path=path, full_page=True) + return { + "success": True, + "path": path, + "message": f"Screenshot saved to {path}" + } + except Exception as e: + return { + "success": False, + "error": f"Failed to take screenshot: {str(e)}" + } + + async def get_text(self, selector: str) -> Dict[str, Any]: + """Получение текста элемента""" + try: + await self.page.wait_for_selector(selector, timeout=self.timeout) + text = await self.page.text_content(selector) + return { + "success": True, + "text": text.strip() if text else "", + "selector": selector + } + except Exception as e: + return { + "success": False, + "error": f"Failed to get text from {selector}: {str(e)}" + } + + async def get_text_all(self, selector: str) -> Dict[str, Any]: + """Получение текста всех элементов""" + try: + await self.page.wait_for_selector(selector, timeout=self.timeout) + elements = await self.page.query_selector_all(selector) + texts = [] + for el in elements: + text = await el.text_content() + if text: + texts.append(text.strip()) + + return { + "success": True, + "texts": texts, + "count": len(texts), + "selector": selector + } + except Exception as e: + return { + "success": False, + "error": f"Failed to get texts from {selector}: {str(e)}" + } + + async def evaluate(self, js_code: str) -> Dict[str, Any]: + """Выполнение JavaScript""" + try: + result = await self.page.evaluate(js_code) + return { + "success": True, + "result": result, + "code": js_code[:100] # Обрезаем для вывода + } + except Exception as e: + return { + "success": False, + "error": f"Failed to evaluate JavaScript: {str(e)}" + } + + async def select(self, selector: str, value: str) -> Dict[str, Any]: + """Выбор из выпадающего списка""" + try: + await self.page.wait_for_selector(selector, timeout=self.timeout) + await self.page.select_option(selector, value) + return { + "success": True, + "selector": selector, + "value": value, + "message": f"Selected '{value}' from {selector}" + } + except Exception as e: + return { + "success": False, + "error": f"Failed to select from {selector}: {str(e)}" + } + + async def wait_for_selector(self, selector: str, timeout: int = None) -> Dict[str, Any]: + """Ожидание появления элемента""" + timeout_ms = timeout or self.timeout + try: + await self.page.wait_for_selector(selector, timeout=timeout_ms) + return { + "success": True, + "selector": selector, + "timeout": timeout_ms, + "message": f"Element {selector} appeared" + } + except Exception as e: + return { + "success": False, + "error": f"Timeout waiting for {selector}: {str(e)}" + } + + async def get_html(self) -> Dict[str, Any]: + """Получение HTML страницы""" + try: + html = await self.page.content() + return { + "success": True, + "html": html, + "size": len(html) + } + except Exception as e: + return { + "success": False, + "error": f"Failed to get HTML: {str(e)}" + } + + async def get_title(self) -> Dict[str, Any]: + """Получение заголовка страницы""" + try: + title = await self.page.title() + return { + "success": True, + "title": title + } + except Exception as e: + return { + "success": False, + "error": f"Failed to get title: {str(e)}" + } + + async def get_url(self) -> Dict[str, Any]: + """Получение текущего URL""" + try: + url = self.page.url + return { + "success": True, + "url": url + } + except Exception as e: + return { + "success": False, + "error": f"Failed to get URL: {str(e)}" + } + + async def execute_sequence(self, steps: List[Dict[str, Any]]) -> Dict[str, Any]: + """Выполнение последовательности действий""" + results = [] + + for i, step in enumerate(steps): + result = await self.execute_task(step) + results.append({ + "step": i + 1, + "action": step.get("action"), + "result": result + }) + + # Если шаг не удался, прекращаем выполнение + if not result.get("success"): + return { + "success": False, + "error": f"Sequence failed at step {i + 1}", + "results": results + } + + return { + "success": True, + "results": results, + "total_steps": len(steps) + } + + async def execute_task(self, task: Dict[str, Any]) -> Dict[str, Any]: + """Выполнение задачи по описанию""" + action = task.get("action") + + actions_map = { + "goto": lambda: self.goto(task.get("url")), + "click": lambda: self.click(task.get("selector")), + "fill": lambda: self.fill(task.get("selector"), task.get("value")), + "screenshot": lambda: self.screenshot(task.get("path", "/tmp/screenshot.png")), + "get_text": lambda: self.get_text(task.get("selector")), + "get_text_all": lambda: self.get_text_all(task.get("selector")), + "evaluate": lambda: self.evaluate(task.get("code")), + "select": lambda: self.select(task.get("selector"), task.get("value")), + "wait": lambda: self.wait_for_selector(task.get("selector"), task.get("timeout")), + "get_html": lambda: self.get_html(), + "get_title": lambda: self.get_title(), + "get_url": lambda: self.get_url(), + "sequence": lambda: self.execute_sequence(task.get("steps", [])) + } + + if action not in actions_map: + return { + "success": False, + "error": f"Unknown action: {action}. Available: {', '.join(actions_map.keys())}" + } + + return await actions_map[action]() + + +async def run_from_args(): + """Запуск из аргументов командной строки""" + if len(sys.argv) < 2: + print(json.dumps({ + "success": False, + "error": "No task provided. Usage: python3 browser_automation.py ''" + })) + return + + try: + task = json.loads(sys.argv[1]) + except json.JSONDecodeError: + # Если не JSON, пробуем как goto команду + task = {"action": "goto", "url": sys.argv[1]} + + # Определяем режим headless (можно переопределить через переменную окружения) + headless = os.environ.get("BROWSER_HEADLESS", "true").lower() == "true" + + async with BrowserAutomation(headless=headless) as browser: + result = await browser.execute_task(task) + print(json.dumps(result, ensure_ascii=False, indent=2)) + + +if __name__ == "__main__": + asyncio.run(run_from_args()) \ No newline at end of file diff --git a/scripts/deploy_vps.sh b/scripts/deploy_vps.sh deleted file mode 100755 index 37b7dd22..00000000 --- a/scripts/deploy_vps.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env bash -set -Eeuo pipefail - -DEPLOY_BRANCH="${DEPLOY_BRANCH:-feature/api-for-subagent}" -HEALTH_URL="${HEALTH_URL:-http://127.0.0.1:8088/health}" -COMPOSE_FILES="${COMPOSE_FILES:-docker-compose.yml:docker-compose.vps.yml}" - -log() { - printf '[deploy] %s\n' "$*" -} - -fail() { - printf '[deploy] fatal: %s\n' "$*" >&2 - exit 1 -} - -command -v git >/dev/null 2>&1 || fail "git is not installed" -command -v docker >/dev/null 2>&1 || fail "docker is not installed" -command -v curl >/dev/null 2>&1 || fail "curl is not installed" -docker compose version >/dev/null 2>&1 || fail "docker compose plugin is not available" - -[ -d .git ] || fail "current directory is not a git checkout" -[ -f docker-compose.yml ] || fail "docker-compose.yml not found in current directory" -[ -f .env ] || fail ".env is missing; create it on the VPS with OPENAI_API_KEY and related runtime settings" - -compose_args=() -IFS=':' read -r -a compose_files <<< "$COMPOSE_FILES" -for compose_file in "${compose_files[@]}"; do - if [ -f "$compose_file" ]; then - compose_args+=("-f" "$compose_file") - else - fail "compose file not found: ${compose_file}" - fi -done - -log "fetching origin/${DEPLOY_BRANCH}" -git fetch --prune origin "+refs/heads/${DEPLOY_BRANCH}:refs/remotes/origin/${DEPLOY_BRANCH}" - -log "checking out ${DEPLOY_BRANCH}" -git checkout -B "$DEPLOY_BRANCH" "origin/$DEPLOY_BRANCH" -git reset --hard "origin/$DEPLOY_BRANCH" - -log "building Docker Compose services" -docker compose "${compose_args[@]}" build - -log "starting Docker Compose stack" -docker compose "${compose_args[@]}" up -d --remove-orphans - -log "current service state" -docker compose "${compose_args[@]}" ps - -log "waiting for API health at ${HEALTH_URL}" -for attempt in {1..30}; do - if curl -fsS "$HEALTH_URL" >/dev/null; then - log "API is healthy" - exit 0 - fi - - log "health check failed, retry ${attempt}/30" - sleep 2 -done - -log "API did not become healthy; browser-api logs follow" -docker compose "${compose_args[@]}" logs --tail=120 browser-api || true -fail "health check failed: ${HEALTH_URL}" diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 00000000..dbb82102 --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1,2 @@ +playwright>=1.40.0,<2.0.0 +browser-use>=0.1.0,<1.0.0 \ No newline at end of file diff --git a/scripts/setup.sh b/scripts/setup.sh new file mode 100644 index 00000000..bc0d24ec --- /dev/null +++ b/scripts/setup.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# Setup script for BrowserUse skill +# Устанавливает зависимости и браузеры для Playwright + +set -e + +echo "🔧 Installing BrowserUse skill dependencies..." +echo "================================================" + +# Определяем цветной вывод +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Проверка Python +echo -n "Checking Python... " +if command -v python3 &> /dev/null; then + PYTHON_VERSION=$(python3 --version) + echo -e "${GREEN}OK${NC} ($PYTHON_VERSION)" +else + echo -e "${RED}FAILED${NC}" + echo "Python 3 is required but not installed." + exit 1 +fi + +# Проверка pip +echo -n "Checking pip... " +if command -v pip3 &> /dev/null; then + echo -e "${GREEN}OK${NC}" +else + echo -e "${RED}FAILED${NC}" + echo "pip3 is required but not installed." + exit 1 +fi + +# Установка Python пакетов +echo "" +echo "📦 Installing Python packages..." +pip3 install --upgrade pip +pip3 install -r "$(dirname "$0")/requirements.txt" + +# Установка браузеров Playwright +echo "" +echo "🌐 Installing Playwright browsers..." +python3 -m playwright install chromium +python3 -m playwright install-deps # Системные зависимости для Linux + +# Проверка установки +echo "" +echo -n "✅ Verifying installation... " +if python3 -c "import playwright" 2>/dev/null; then + echo -e "${GREEN}OK${NC}" +else + echo -e "${RED}FAILED${NC}" + echo "Playwright installation verification failed." + exit 1 +fi + +# Создание временной директории для скриншотов +mkdir -p /tmp/browser-use-screenshots +echo "📁 Created screenshot directory: /tmp/browser-use-screenshots" + +echo "" +echo "================================================" +echo -e "${GREEN}✅ BrowserUse skill successfully installed!${NC}" +echo "" +echo "📖 Quick test:" +echo " python3 $(dirname "$0")/browser_automation.py '{\"action\":\"goto\",\"url\":\"https://example.com\"}'" +echo "" +echo "📚 For more examples, see SKILL.md" +echo "================================================" \ No newline at end of file