Merge branch 'feature/browser-use-api' into develop

# Conflicts:
#	.env.example
#	browser_env/entrypoint.sh
#	docker-compose.yml
#	hermes_code/tools/browser_use_tool.py
This commit is contained in:
Максим Туревич 2026-04-13 21:58:55 +03:00
commit 50589232d6
31 changed files with 895 additions and 6902 deletions

View file

@ -13,4 +13,10 @@ TELEGRAM_ALLOWED_USERS=
TELEGRAM_HOME_CHANNEL= TELEGRAM_HOME_CHANNEL=
BROWSER_URL=http://browser:9222 BROWSER_URL=http://browser:9222
BROWSER_VIEW_URL=http://localhost:6080 BROWSER_VIEW_URL=
BROWSER_API_HOST=0.0.0.0
BROWSER_API_PORT=8088
BROWSER_USE_RPC_URL=http://browser:8787/run
BROWSER_USE_RPC_TIMEOUT=900
BROWSER_API_MAX_CONCURRENCY=2

13
api/Dockerfile Normal file
View file

@ -0,0 +1,13 @@
FROM python:3.12-slim
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
COPY requirements.txt /app/requirements.txt
RUN pip install --no-cache-dir -r /app/requirements.txt
COPY . /app/api
CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8088"]

120
api/README.md Normal file
View file

@ -0,0 +1,120 @@
# Browser REST API
REST API-обертка над `browser-use` RPC (`POST /run` в контейнере браузера).
Сервис принимает задачу, ставит ее в in-memory очередь, выполняет через `browser-use` и отдает статус/результат по `task_id`.
## Актуальный статус
Проверено smoke-тестом:
- `GET /health` отвечает `200` с `{"ok": true}`
- `POST /api/browser/tasks` возвращает `202` и `task_id`
- `GET /api/browser/tasks/{task_id}` возвращает `queued/running/...`
- `GET /api/browser/tasks/{task_id}/result` возвращает `202`, пока задача не завершена
## Архитектура
Слои сейчас разделены и выглядят нормально для MVP:
- `api/main.py` — точка входа ASGI (`uvicorn api.main:app`), сборка `FastAPI` и lifespan
- `api/routes/tasks.py` — HTTP-слой (валидация входа/выхода, status codes)
- `api/services/task_service.py` — orchestration (фоновые задачи, timeout, обработка ошибок)
- `api/repositories/task_store.py` — in-memory хранилище задач
- `api/clients/browser_rpc_client.py` — aiohttp-клиент к browser RPC
- `api/clients/browser_rpc_contracts.py` — protocol + исключения RPC-слоя
- `api/contracts/task_schemas.py` — Pydantic request/response DTO
- `api/domain/task_status.py` — доменный enum статусов
- `api/core/settings.py` — конфигурация из env
## Ограничения текущей реализации
- хранилище in-memory: после рестарта контейнера задачи теряются
- нет ретраев RPC при транспортных ошибках
- нет отмены задач через API
- один инстанс процесса хранит задачи только локально (без shared state)
## Переменные окружения
- `BROWSER_API_HOST` (default: `0.0.0.0`)
- `BROWSER_API_PORT` (default: `8080`)
- `BROWSER_USE_RPC_URL` (default: `http://browser:8787/run`)
- `BROWSER_USE_RPC_TIMEOUT` (default: `900`)
- `BROWSER_API_MAX_CONCURRENCY` (default: `2`)
## Локальный запуск
```zsh
cd "/Users/fedorkobylkevic/PycharmProjects/BrowserUse_and_ComputerUse_skills"
source .venv/bin/activate
uvicorn api.main:app --host 0.0.0.0 --port 8088
```
## Запуск через Docker Compose
```zsh
cd "/Users/fedorkobylkevic/PycharmProjects/BrowserUse_and_ComputerUse_skills"
docker compose build browser-api
docker compose up -d browser browser-api
docker compose logs -f browser-api
```
## REST API
### `GET /health`
Проверка доступности API.
Пример ответа:
```json
{"ok": true}
```
### `POST /api/browser/tasks`
Создать задачу.
Request:
```json
{
"task": "Открой example.com и верни title",
"timeout": 300,
"metadata": {"source": "manual"}
}
```
Response `202`:
```json
{
"task_id": "53f54fa4c1f24219b3949d56b0457875",
"status": "queued"
}
```
### `GET /api/browser/tasks/{task_id}`
Текущий статус и таймстемпы.
### `GET /api/browser/tasks/{task_id}/result`
- `202` если задача еще `queued/running`
- `200` с финальным payload после завершения
## Быстрый end-to-end пример
```zsh
curl -sS http://localhost:8088/health
RESP=$(curl -sS -X POST http://localhost:8088/api/browser/tasks \
-H "Content-Type: application/json" \
-d '{"task":"Открой example.com и верни title","timeout":30}')
echo "$RESP"
TASK_ID=$(python -c "import json,sys;print(json.loads(sys.argv[1])['task_id'])" "$RESP")
curl -sS "http://localhost:8088/api/browser/tasks/$TASK_ID"
curl -sS "http://localhost:8088/api/browser/tasks/$TASK_ID/result"
```

0
api/clients/__init__.py Normal file
View file

View file

@ -0,0 +1,38 @@
from typing import Any
import aiohttp
from api.clients.browser_rpc_contracts import BrowserRpcError
class BrowserRpcClient:
def __init__(self, rpc_url: str, session: aiohttp.ClientSession) -> None:
self._rpc_url = rpc_url
self._session = session
async def run(self, task: str, timeout_sec: float) -> dict[str, Any]:
payload = {"task": task}
timeout = aiohttp.ClientTimeout(total=timeout_sec)
try:
async with self._session.post(self._rpc_url, json=payload, timeout=timeout) as response:
if response.status >= 400:
body = await response.text()
raise BrowserRpcError(f"RPC HTTP: {response.status}: {body}")
try:
data = await response.json(content_type=None)
except aiohttp.ContentTypeError as exc:
raise BrowserRpcError("RPC returned non-JSON response") from exc
except aiohttp.ClientError as exc:
raise BrowserRpcError(f"Transport error: {exc}") from exc
if not isinstance(data, dict):
raise BrowserRpcError("RPC returned invalid payload type")
return data
async def run_browser_task(rpc_url: str, task: str, timeout_sec: float) -> dict[str, Any]:
async with aiohttp.ClientSession() as session:
return await BrowserRpcClient(rpc_url, session=session).run(task=task, timeout_sec=timeout_sec)

View file

@ -0,0 +1,8 @@
from typing import Any, Protocol
class BrowserRpcError(RuntimeError): ...
class BrowserRpcRunner(Protocol):
async def run(self, task: str, timeout_sec: float) -> dict[str, Any]: ...

View file

View file

@ -0,0 +1,43 @@
from typing import Any
from pydantic import BaseModel, Field
from api.domain.task_status import TaskStatus
class BrowserTaskRequest(BaseModel):
"""Запрос на запуск задачи в browser-use агенте."""
task: str = Field(..., description="Текстовая задача для browser-use агента")
timeout: int = Field(300, description="Максимальное время выполнения задачи в секундах")
metadata: dict[str, Any] | None = Field(default=None, description="Дополнительные метаданные клиента")
class BrowserTaskAcceptedResponse(BaseModel):
"""Ответ о том, что задача принята в обработку."""
task_id: str
status: TaskStatus
class BrowserTaskStatusResponse(BaseModel):
"""Текущий статус задачи и временные отметки ее выполнения."""
task_id: str
status: TaskStatus
create_at: float = Field(..., description="Время создания задачи в Unix timestamp")
started_at: float | None = Field(default=None, description="Время начала выполнения в Unix timestamp")
finished_at: float | None = Field(default=None, description="Время завершения выполнения в Unix timestamp")
error: str | None = Field(default=None, description="Текст ошибки, если задача завершилась с ошибкой")
class BrowserTaskResultResponse(BaseModel):
"""Финальный результат выполнения задачи в browser-use."""
task_id: str
status: TaskStatus
success: bool = Field(..., description="Успешно ли выполнена задача")
execution_time: float = Field(..., description="Фактическое время выполнения в секундах")
result: str | None = Field(default=None, description="Итоговый текстовый результат")
error: str | None = Field(default=None, description="Текст ошибки, если выполнение не удалось")
raw_response: dict[str, Any] | None = Field(default=None, description="Сырой ответ от browser-use RPC")

0
api/core/__init__.py Normal file
View file

16
api/core/settings.py Normal file
View file

@ -0,0 +1,16 @@
import os
from dataclasses import dataclass
@dataclass(frozen=True)
class Settings:
app_host: str = os.getenv("BROWSER_API_HOST", "0.0.0.0")
app_port: int = int(os.getenv("BROWSER_API_PORT", "8080"))
browser_rpc_url: str = os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run")
browser_rpc_timeout: float = float(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900"))
max_concurrency: int = int(os.getenv("BROWSER_API_MAX_CONCURRENCY", "2"))
settings = Settings()

0
api/domain/__init__.py Normal file
View file

View file

@ -0,0 +1,9 @@
from enum import Enum
class TaskStatus(str, Enum):
"""Состояние задачи браузерного агента."""
queued = "queued"
running = "running"
succeeded = "succeeded"
failed = "failed"

46
api/main.py Normal file
View file

@ -0,0 +1,46 @@
from contextlib import asynccontextmanager
import aiohttp
from fastapi import FastAPI
from api.clients.browser_rpc_client import BrowserRpcClient
from api.core.settings import settings
from api.repositories.task_store import TaskStore
from api.routes.tasks import router as tasks_router
from api.services.task_service import TaskService
@asynccontextmanager
async def lifespan(app: FastAPI):
session = aiohttp.ClientSession()
task_service = TaskService(
store=TaskStore(),
rpc_client=BrowserRpcClient(settings.browser_rpc_url, session=session),
max_concurrency=settings.max_concurrency,
rpc_timeout_cap=settings.browser_rpc_timeout,
)
app.state.task_service = task_service
try:
yield
finally:
await task_service.close()
await session.close()
def create_app() -> FastAPI:
app = FastAPI(
title="Browser API",
version="1.0.0",
description="REST API for submitting tasks to browser-use and retrieving their status/results.",
lifespan=lifespan,
)
app.include_router(tasks_router)
@app.get("/health")
async def health() -> dict:
return {"ok": True}
return app
app = create_app()

View file

View file

@ -0,0 +1,76 @@
import time
import uuid
from asyncio import Lock
from dataclasses import dataclass, field
from typing import Any
from api.domain.task_status import TaskStatus
@dataclass
class TaskRecord:
task_id: str
task: str
timeout: int
metadata: dict[str, Any] | None
status: TaskStatus = TaskStatus.queued
create_at: float = field(default_factory=time.time)
started_at: float | None = None
finished_at: float | None = None
result: str | None = None
error: str | None = None
raw_response: dict[str, Any] | None = None
@property
def execution_time(self) -> float:
if self.started_at is None:
return 0
end = self.finished_at if self.finished_at is not None else time.time()
return max(0, end - self.started_at)
class TaskStore:
def __init__(self) -> None:
self._lock = Lock()
self._tasks: dict[str, TaskRecord] = {}
async def create(self, task: str, timeout: int, metadata: dict[str, Any] | None) -> TaskRecord:
task_id = uuid.uuid4().hex
rec = TaskRecord(task_id=task_id, task=task, timeout=timeout, metadata=metadata)
async with self._lock:
self._tasks[task_id] = rec
return rec
async def get(self, task_id: str) -> TaskRecord | None:
async with self._lock:
return self._tasks.get(task_id)
async def set_running(self, task_id: str) -> TaskRecord | None:
async with self._lock:
rec = self._tasks.get(task_id)
if rec is None:
return None
rec.status = TaskStatus.running
rec.started_at = time.time()
return rec
async def set_done(
self,
task_id: str,
success: bool,
raw_response: dict[str, Any] | None,
error: str | None,
result: str | None = None,
) -> TaskRecord | None:
async with self._lock:
rec = self._tasks.get(task_id)
if rec is None:
return None
rec.finished_at = time.time()
rec.raw_response = raw_response
rec.error = error if error is not None else (
raw_response.get("error") if isinstance(raw_response, dict) else None)
rec.result = result if result is not None else (
raw_response.get("result") if isinstance(raw_response, dict) else None)
rec.status = TaskStatus.succeeded if success else TaskStatus.failed
return rec

4
api/requirements.txt Normal file
View file

@ -0,0 +1,4 @@
fastapi==0.135.3
uvicorn[standard]==0.44.0
aiohttp==3.13.5
pydantic==2.12.5

0
api/routes/__init__.py Normal file
View file

80
api/routes/tasks.py Normal file
View file

@ -0,0 +1,80 @@
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import JSONResponse
from api.contracts.task_schemas import (
BrowserTaskAcceptedResponse,
BrowserTaskRequest,
BrowserTaskResultResponse,
BrowserTaskStatusResponse,
)
from api.domain.task_status import TaskStatus
from api.repositories.task_store import TaskRecord
from api.services.task_service import TaskService
router = APIRouter(prefix="/api/browser", tags=["browser-tasks"])
def get_task_service(request: Request) -> TaskService:
return request.app.state.task_service
@router.post("/tasks", response_model=BrowserTaskAcceptedResponse, status_code=202)
async def create_task(
payload: BrowserTaskRequest,
service: TaskService = Depends(get_task_service),
) -> BrowserTaskAcceptedResponse:
rec = await service.submit_task(task=payload.task.strip(), timeout=payload.timeout, metadata=payload.metadata)
return BrowserTaskAcceptedResponse(task_id=rec.task_id, status=rec.status)
@router.get("/tasks/{task_id}", response_model=BrowserTaskStatusResponse)
async def get_task_status(task_id: str, service: TaskService = Depends(get_task_service)) -> BrowserTaskStatusResponse:
rec = await service.get_task(task_id)
if rec is None:
raise HTTPException(status_code=404, detail="Task not found")
return _to_status_response(rec)
@router.get("/tasks/{task_id}/result", response_model=BrowserTaskResultResponse)
async def get_task_result(
task_id: str,
service: TaskService = Depends(get_task_service),
) -> JSONResponse | BrowserTaskResultResponse:
rec = await service.get_task(task_id)
if rec is None:
raise HTTPException(status_code=404, detail="Task not found")
if rec.status in (TaskStatus.queued, TaskStatus.running):
return JSONResponse(
status_code=202,
content={
"task_id": rec.task_id,
"status": rec.status.value,
"success": False,
"execution_time": rec.execution_time,
"result": None,
"error": None,
"raw_response": None,
},
)
return BrowserTaskResultResponse(
task_id=rec.task_id,
status=rec.status,
success=(rec.status == TaskStatus.succeeded),
execution_time=rec.execution_time,
result=rec.result,
error=rec.error,
raw_response=rec.raw_response,
)
def _to_status_response(rec: TaskRecord) -> BrowserTaskStatusResponse:
return BrowserTaskStatusResponse(
task_id=rec.task_id,
status=rec.status,
create_at=rec.create_at,
started_at=rec.started_at,
finished_at=rec.finished_at,
error=rec.error,
)

0
api/services/__init__.py Normal file
View file

View file

@ -0,0 +1,83 @@
import asyncio
from api.clients.browser_rpc_contracts import BrowserRpcError, BrowserRpcRunner
from api.repositories.task_store import TaskRecord, TaskStore
class TaskService:
def __init__(
self,
store: TaskStore,
rpc_client: BrowserRpcRunner,
max_concurrency: int,
rpc_timeout_cap: float | None = None,
) -> None:
self._store = store
self._rpc_client = rpc_client
self._semaphore = asyncio.Semaphore(max_concurrency)
self._rpc_timeout_cap = rpc_timeout_cap
self._background_tasks: set[asyncio.Task[None]] = set()
async def submit_task(self, task: str, timeout: int, metadata: dict | None) -> TaskRecord:
record = await self._store.create(task=task, timeout=timeout, metadata=metadata)
background_task = asyncio.create_task(self._worker(record.task_id))
self._background_tasks.add(background_task)
background_task.add_done_callback(self._background_tasks.discard)
return record
async def get_task(self, task_id: str) -> TaskRecord | None:
return await self._store.get(task_id)
async def close(self) -> None:
if not self._background_tasks:
return
for task in list(self._background_tasks):
task.cancel()
await asyncio.gather(*self._background_tasks, return_exceptions=True)
self._background_tasks.clear()
async def _worker(self, task_id: str) -> None:
rec = await self._store.set_running(task_id)
if rec is None:
return
async with self._semaphore:
try:
rpc_timeout = float(rec.timeout)
if self._rpc_timeout_cap is not None:
rpc_timeout = min(rpc_timeout, self._rpc_timeout_cap)
raw = await asyncio.wait_for(
self._rpc_client.run(task=rec.task, timeout_sec=rpc_timeout),
timeout=float(rec.timeout) + 5,
)
success = bool(raw.get("success"))
await self._store.set_done(
task_id=task_id,
success=success,
raw_response=raw,
error=None,
result=raw.get("result") if isinstance(raw, dict) else None,
)
except asyncio.TimeoutError:
await self._store.set_done(
task_id=task_id,
success=False,
raw_response=None,
error="Timeout exceeded",
)
except BrowserRpcError as exc:
await self._store.set_done(
task_id=task_id,
success=False,
raw_response=None,
error=str(exc),
)
except Exception as exc:
await self._store.set_done(
task_id=task_id,
success=False,
raw_response=None,
error=f"Internal error: {exc}",
)

View file

@ -4,6 +4,8 @@ ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
chromium \ chromium \
python3 \
python3-pip \
xvfb \ xvfb \
fluxbox \ fluxbox \
x11vnc \ x11vnc \
@ -19,9 +21,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
WORKDIR /src WORKDIR /src
RUN mkdir -p /src/browser_data RUN mkdir -p /src/browser_data
RUN python3 -m pip install --no-cache-dir --break-system-packages \
"browser-use>=0.12.5" \
"langchain-openai>=0.3.0"
COPY entrypoint.sh /entrypoint.sh COPY entrypoint.sh /entrypoint.sh
COPY browser_use_runner.py /src/browser_use_runner.py
RUN chmod +x /entrypoint.sh RUN chmod +x /entrypoint.sh
EXPOSE 6080 9222 EXPOSE 6080 9222 8787
ENTRYPOINT ["/entrypoint.sh"] ENTRYPOINT ["/entrypoint.sh"]

View file

@ -0,0 +1,101 @@
import asyncio
import json
import os
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from urllib import error, request
from browser_use import Agent, Browser, ChatOpenAI
def _json_response(handler, status_code, payload):
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
handler.send_response(status_code)
handler.send_header("Content-Type", "application/json; charset=utf-8")
handler.send_header("Content-Length", str(len(data)))
handler.end_headers()
handler.wfile.write(data)
async def run_browser_task(task):
cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222")
browser_view_url = os.getenv("BROWSER_VIEW_URL", "")
browser = Browser(cdp_url=cdp_url)
llm = ChatOpenAI(
model=os.getenv("MODEL_DEFAULT", "qwen3.5-122b"),
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_BASE_URL"),
temperature=0.0,
)
agent = Agent(task=task, llm=llm, browser=browser)
try:
history = await agent.run()
return {
"success": True,
"result": history.final_result(),
"browser_view": browser_view_url,
}
except Exception as err:
return {"success": False, "error": f"Browser automation failed: {err}"}
finally:
try:
await browser.close()
except Exception:
pass
class BrowserUseRPCHandler(BaseHTTPRequestHandler):
def do_GET(self):
if self.path != "/health":
_json_response(self, 404, {"success": False, "error": "Not found"})
return
try:
debug_url = os.getenv("BROWSER_HEALTH_URL", "http://127.0.0.1:9222/json/version")
with request.urlopen(debug_url, timeout=2):
pass
_json_response(self, 200, {"success": True})
except Exception as err:
_json_response(self, 503, {"success": False, "error": f"Browser is not ready: {err}"})
def do_POST(self):
if self.path != "/run":
_json_response(self, 404, {"success": False, "error": "Not found"})
return
try:
content_length = int(self.headers.get("Content-Length", "0"))
raw = self.rfile.read(content_length)
payload = json.loads(raw.decode("utf-8") if raw else "{}")
task = payload.get("task", "")
if not isinstance(task, str) or not task.strip():
_json_response(self, 400, {"success": False, "error": "Field 'task' is required"})
return
result = asyncio.run(run_browser_task(task.strip()))
code = 200 if result.get("success") else 500
_json_response(self, code, result)
except json.JSONDecodeError:
_json_response(self, 400, {"success": False, "error": "Invalid JSON payload"})
except error.URLError as err:
_json_response(self, 503, {"success": False, "error": f"Transport error: {err}"})
except Exception as err:
_json_response(self, 500, {"success": False, "error": f"Internal error: {err}"})
def log_message(self, format_str, *args):
return
def main():
host = os.getenv("BROWSER_USE_RPC_HOST", "0.0.0.0")
port = int(os.getenv("BROWSER_USE_RPC_PORT", "8787"))
server = ThreadingHTTPServer((host, port), BrowserUseRPCHandler)
print(f"browser-use RPC listening on {host}:{port}")
server.serve_forever()
if __name__ == "__main__":
main()

View file

@ -1,41 +1,197 @@
#!/bin/bash #!/usr/bin/env bash
set -Eeuo pipefail
export DISPLAY=:99 export DISPLAY="${DISPLAY:-:99}"
DISPLAY_NUM="${DISPLAY#:}"
XVFB_LOG="/tmp/xvfb.log"
mkdir -p /var/run/dbus VNC_PORT="${VNC_PORT:-5900}"
dbus-uuidgen > /var/lib/dbus/machine-id NOVNC_PORT="${NOVNC_PORT:-6080}"
dbus-daemon --config-file=/usr/share/dbus-1/system.conf --print-address & CHROME_LOCAL_DEBUG_PORT="${CHROME_LOCAL_DEBUG_PORT:-9223}"
CHROME_PUBLIC_DEBUG_PORT="${CHROME_PUBLIC_DEBUG_PORT:-9222}"
BROWSER_USE_RPC_PORT="${BROWSER_USE_RPC_PORT:-8787}"
CHROME_PROFILE_DIR="${CHROME_PROFILE_DIR:-/src/browser_data}"
Xvfb :99 -screen 0 1280x720x16 -ac +extension GLX +render -noreset & MAX_RESTARTS="${MAX_RESTARTS:-10}"
sleep 2 RESTART_WINDOW_SEC="${RESTART_WINDOW_SEC:-60}"
RESTART_BACKOFF_SEC="${RESTART_BACKOFF_SEC:-2}"
fluxbox & PIDS=()
x11vnc -display :99 -nopw -listen 0.0.0.0 -xkb -forever -shared & STOPPING=0
websockify --web=/usr/share/novnc/ 6080 localhost:5900 & WINDOW_START="$(date +%s)"
RESTART_COUNT=0
socat TCP-LISTEN:9222,fork,reuseaddr TCP:127.0.0.1:9223 & log() {
printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*"
}
echo "--- Запуск Chromium в режиме Local-Only (Port 9223) ---" start_bg() {
"$@" &
local pid=$!
PIDS+=("$pid")
log "started: $* (pid=$pid)"
}
wait_for_port() {
local host=$1
local port=$2
local timeout_sec=$3
local end_ts=$(( $(date +%s) + timeout_sec ))
while [ "$(date +%s)" -lt "$end_ts" ]; do
if bash -c "</dev/tcp/${host}/${port}" >/dev/null 2>&1; then
return 0
fi
sleep 0.2
done
return 1
}
wait_for_x_display() {
local timeout_sec=$1
local end_ts=$(( $(date +%s) + timeout_sec ))
while [ "$(date +%s)" -lt "$end_ts" ]; do
if [ -S "/tmp/.X11-unix/X${DISPLAY_NUM}" ] && DISPLAY="$DISPLAY" bash -c 'echo >/dev/null' >/dev/null 2>&1; then
return 0
fi
sleep 0.2
done
return 1
}
cleanup() {
if [ "$STOPPING" -eq 1 ]; then
return
fi
STOPPING=1
log "shutdown signal received, stopping processes..."
if [ -n "${CHROME_PID:-}" ] && kill -0 "$CHROME_PID" >/dev/null 2>&1; then
kill "$CHROME_PID" >/dev/null 2>&1 || true
fi
for pid in "${PIDS[@]:-}"; do
kill "$pid" >/dev/null 2>&1 || true
done
sleep 1
if [ -n "${CHROME_PID:-}" ] && kill -0 "$CHROME_PID" >/dev/null 2>&1; then
kill -9 "$CHROME_PID" >/dev/null 2>&1 || true
fi
for pid in "${PIDS[@]:-}"; do
if kill -0 "$pid" >/dev/null 2>&1; then
kill -9 "$pid" >/dev/null 2>&1 || true
fi
done
log "shutdown complete"
}
trap cleanup SIGTERM SIGINT EXIT
mkdir -p /var/run/dbus /var/lib/dbus "$CHROME_PROFILE_DIR"
if [ ! -f /var/lib/dbus/machine-id ]; then
dbus-uuidgen > /var/lib/dbus/machine-id 2>/dev/null || true
fi
# Удаляем stale lock/socket от прошлых падений Xvfb на том же DISPLAY.
rm -f "/tmp/.X${DISPLAY_NUM}-lock" "/tmp/.X11-unix/X${DISPLAY_NUM}" || true
log "starting X stack on DISPLAY=${DISPLAY}"
Xvfb "$DISPLAY" -screen 0 1280x720x24 -ac +extension GLX +render -noreset >"$XVFB_LOG" 2>&1 &
XVFB_PID=$!
PIDS+=("$XVFB_PID")
log "started: Xvfb $DISPLAY (pid=$XVFB_PID)"
if ! wait_for_x_display 15; then
log "fatal: Xvfb did not initialize DISPLAY=${DISPLAY}"
if [ -f "$XVFB_LOG" ]; then
log "xvfb log tail:"
tail -n 40 "$XVFB_LOG" || true
fi
exit 1
fi
start_bg fluxbox
start_bg x11vnc -display "$DISPLAY" -rfbport "$VNC_PORT" -nopw -listen 0.0.0.0 -xkb -forever -shared
start_bg websockify --web=/usr/share/novnc/ "$NOVNC_PORT" "localhost:${VNC_PORT}"
start_bg socat "TCP-LISTEN:${CHROME_PUBLIC_DEBUG_PORT},fork,reuseaddr" "TCP:127.0.0.1:${CHROME_LOCAL_DEBUG_PORT}"
start_bg python3 -u /src/browser_use_runner.py
if ! wait_for_port 127.0.0.1 "$VNC_PORT" 20; then
log "fatal: x11vnc did not open port ${VNC_PORT}"
exit 1
fi
if ! wait_for_port 127.0.0.1 "$NOVNC_PORT" 20; then
log "fatal: websockify did not open port ${NOVNC_PORT}"
exit 1
fi
if ! wait_for_port 127.0.0.1 "$BROWSER_USE_RPC_PORT" 20; then
log "fatal: browser-use RPC did not open port ${BROWSER_USE_RPC_PORT}"
exit 1
fi
log "browser infrastructure is ready (noVNC:${NOVNC_PORT}, DevTools proxy:${CHROME_PUBLIC_DEBUG_PORT}, browser-use RPC:${BROWSER_USE_RPC_PORT})"
while true; do while true; do
rm -f /src/browser_data/SingletonLock for pid in "${PIDS[@]}"; do
if ! kill -0 "$pid" >/dev/null 2>&1; then
log "fatal: required background process died (pid=${pid})"
exit 1
fi
done
rm -f "${CHROME_PROFILE_DIR}/SingletonLock" "${CHROME_PROFILE_DIR}/SingletonCookie" "${CHROME_PROFILE_DIR}/SingletonSocket" 2>/dev/null || true
log "starting Chromium (local DevTools:${CHROME_LOCAL_DEBUG_PORT})"
chromium \ chromium \
--no-sandbox \ --no-sandbox \
--disable-dev-shm-usage \ --disable-dev-shm-usage \
--remote-debugging-port=9223 \ --ozone-platform=x11 \
--remote-debugging-port="${CHROME_LOCAL_DEBUG_PORT}" \
--remote-debugging-address=127.0.0.1 \ --remote-debugging-address=127.0.0.1 \
--remote-allow-origins=* \ --remote-allow-origins='*' \
--window-size=1280,720 \ --window-size=1280,720 \
--user-data-dir=/src/browser_data \ --user-data-dir="${CHROME_PROFILE_DIR}" \
--disable-blink-features=AutomationControlled \ --disable-blink-features=AutomationControlled \
--no-first-run \ --no-first-run \
--disable-gpu \ --disable-gpu \
--mute-audio \ --mute-audio \
--no-default-browser-check \ --no-default-browser-check \
--disable-software-rasterizer \ --disable-software-rasterizer \
--disable-features=site-per-process --disable-features=site-per-process \
--disable-crash-reporter \
--disable-extensions \
--disable-sync &
echo "Chromium упал или был закрыт агентом, рестарт через 2 секунды..." CHROME_PID=$!
sleep 2 wait "$CHROME_PID" || CHROME_EXIT=$?
CHROME_EXIT=${CHROME_EXIT:-0}
if [ "$STOPPING" -eq 1 ]; then
break
fi
now="$(date +%s)"
if [ $(( now - WINDOW_START )) -gt "$RESTART_WINDOW_SEC" ]; then
WINDOW_START="$now"
RESTART_COUNT=0
fi
RESTART_COUNT=$((RESTART_COUNT + 1))
log "Chromium exited with code=${CHROME_EXIT}; restart ${RESTART_COUNT}/${MAX_RESTARTS} in current window"
if [ "$RESTART_COUNT" -ge "$MAX_RESTARTS" ]; then
log "fatal: too many Chromium restarts in ${RESTART_WINDOW_SEC}s"
exit 1
fi
sleep "$RESTART_BACKOFF_SEC"
unset CHROME_EXIT
unset CHROME_PID
done done

View file

@ -4,14 +4,11 @@ services:
context: ./hermes_code context: ./hermes_code
dockerfile: Dockerfile dockerfile: Dockerfile
container_name: hermes-brain container_name: hermes-brain
sysctls:
- net.ipv4.tcp_keepalive_time=60
- net.ipv4.tcp_keepalive_intvl=10
- net.ipv4.tcp_keepalive_probes=3
env_file: env_file:
- .env - .env
environment: environment:
- BROWSER_URL=http://browser:9222 - BROWSER_URL=http://browser:9222
- BROWSER_USE_RPC_URL=http://browser:8787/run
- HERMES_HOME=/app/hermes_data - HERMES_HOME=/app/hermes_data
volumes: volumes:
- ./hermes_code:/app/hermes_code:ro - ./hermes_code:/app/hermes_code:ro
@ -24,12 +21,6 @@ services:
stdin_open: true stdin_open: true
tty: true tty: true
restart: always restart: always
healthcheck:
test: ["CMD-SHELL", "pgrep -f 'python -m gateway.run' || exit 1"]
interval: 2m
timeout: 10s
retries: 3
start_period: 1m
networks: networks:
- hermes-net - hermes-net
deploy: deploy:
@ -44,12 +35,13 @@ services:
fi; fi;
exec python -m gateway.run exec python -m gateway.run
" "
browser: browser:
build: build:
context: ./browser_env context: ./browser_env
dockerfile: Dockerfile.browser dockerfile: Dockerfile.browser
container_name: hermes-browser container_name: hermes-browser
env_file:
- .env
ports: ports:
- "6080:6080" - "6080:6080"
- "9222:9222" - "9222:9222"
@ -62,14 +54,35 @@ services:
- browser_profiles:/src/browser_data - browser_profiles:/src/browser_data
restart: always restart: always
healthcheck: healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9222/json/version"] test: [ "CMD-SHELL", "curl -fsS http://127.0.0.1:9222/json/version >/dev/null && curl -fsS http://127.0.0.1:8787/health >/dev/null || exit 1" ]
interval: 10s interval: 10s
timeout: 5s timeout: 3s
retries: 5 retries: 12
start_period: 10s start_period: 20s
browser-api:
build:
context: ./api
dockerfile: Dockerfile
container_name: hermes-browser-api
environment:
- BROWSER_USE_RPC_URL=http://browser:8787/run
- BROWSER_API_HOST=0.0.0.0
- BROWSER_API_PORT=8088
- BROWSER_API_MAX_CONCURRENCY=2
depends_on:
browser:
condition: service_healthy
ports:
- "8088:8088"
restart: always
networks:
- hermes-net
tunnel: tunnel:
image: cloudflare/cloudflared:latest image: cloudflare/cloudflared:latest
profiles:
- remote
container_name: hermes-tunnel container_name: hermes-tunnel
restart: always restart: always
command: tunnel --protocol http2 --url http://browser:6080 --no-tls-verify command: tunnel --protocol http2 --url http://browser:6080 --no-tls-verify

File diff suppressed because it is too large Load diff

View file

@ -4,7 +4,7 @@
"description": "An AI agent with advanced tool-calling capabilities, featuring a flexible toolsets system for organizing and managing tools.", "description": "An AI agent with advanced tool-calling capabilities, featuring a flexible toolsets system for organizing and managing tools.",
"private": true, "private": true,
"scripts": { "scripts": {
"postinstall": "echo '✅ Browser tools ready. Run: python run_agent.py --help'" "postinstall": "echo '✅ Hermes Node hooks ready. Run: python run_agent.py --help'"
}, },
"repository": { "repository": {
"type": "git", "type": "git",
@ -15,9 +15,7 @@
"url": "https://github.com/NousResearch/Hermes-Agent/issues" "url": "https://github.com/NousResearch/Hermes-Agent/issues"
}, },
"homepage": "https://github.com/NousResearch/Hermes-Agent#readme", "homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
"dependencies": { "dependencies": {},
"agent-browser": "^0.13.0"
},
"engines": { "engines": {
"node": ">=18.0.0" "node": ">=18.0.0"
} }

View file

@ -34,10 +34,6 @@ dependencies = [
"faster-whisper>=1.0.0,<2", "faster-whisper>=1.0.0,<2",
# Skills Hub (GitHub App JWT auth — optional, only needed for bot identity) # Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
"PyJWT[crypto]>=2.10.1,<3", "PyJWT[crypto]>=2.10.1,<3",
"browser-use>=0.12.5",
"playwright>=1.49.0",
"playwright-stealth>=1.0.6",
"langchain-openai>=1.1.12",
] ]
[project.optional-dependencies] [project.optional-dependencies]

View file

@ -19,9 +19,6 @@ PyJWT[crypto]
# Web tools # Web tools
firecrawl-py firecrawl-py
parallel-web>=0.4.2 parallel-web>=0.4.2
browser-use>=0.12.5
playwright
playwright-stealth
# Image generation # Image generation
fal-client fal-client
@ -36,5 +33,4 @@ croniter
python-telegram-bot>=20.0 python-telegram-bot>=20.0
discord.py>=2.0 discord.py>=2.0
aiohttp>=3.9.0 aiohttp>=3.9.0
langchain-openai>=1.1.12,

View file

@ -874,41 +874,14 @@ install_node_deps() {
fi fi
if [ -f "$INSTALL_DIR/package.json" ]; then if [ -f "$INSTALL_DIR/package.json" ]; then
log_info "Installing Node.js dependencies (browser tools)..." log_info "Installing Node.js dependencies..."
cd "$INSTALL_DIR" cd "$INSTALL_DIR"
npm install --silent 2>/dev/null || { npm install --silent 2>/dev/null || {
log_warn "npm install failed (browser tools may not work)" log_warn "npm install failed (some Node.js helpers may not work)"
} }
log_success "Node.js dependencies installed" log_success "Node.js dependencies installed"
# Install Playwright browser + system dependencies. log_info "Skipping local Playwright/Chromium bootstrap (browser automation runs in browser container)"
# Playwright's install-deps only supports apt/dnf/zypper natively.
# For Arch/Manjaro we install the system libs via pacman first.
log_info "Installing browser engine (Playwright Chromium)..."
case "$DISTRO" in
arch|manjaro)
if command -v pacman &> /dev/null; then
log_info "Arch/Manjaro detected — installing Chromium system dependencies via pacman..."
if command -v sudo &> /dev/null && sudo -n true 2>/dev/null; then
sudo NEEDRESTART_MODE=a pacman -S --noconfirm --needed \
nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib >/dev/null 2>&1 || true
elif [ "$(id -u)" -eq 0 ]; then
pacman -S --noconfirm --needed \
nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib >/dev/null 2>&1 || true
else
log_warn "Cannot install browser deps without sudo. Run manually:"
log_warn " sudo pacman -S nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib"
fi
fi
cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || true
;;
*)
log_info "Playwright may request sudo to install browser system dependencies (shared libraries)."
log_info "This is standard Playwright setup — Hermes itself does not require root access."
cd "$INSTALL_DIR" && npx playwright install --with-deps chromium 2>/dev/null || true
;;
esac
log_success "Browser engine installed"
fi fi
# Install WhatsApp bridge dependencies # Install WhatsApp bridge dependencies

View file

@ -1,70 +1,40 @@
import json import json
import os import os
import asyncio from urllib import error, request
import socket
from browser_use import Agent, Browser, ChatOpenAI
from tools.registry import registry from tools.registry import registry
async def run_browser_task(task): def run_browser_task(task):
browser_host = "browser" if not task or not str(task).strip():
browser_port = 9222 return json.dumps({"success": False, "error": "Task is required"}, ensure_ascii=False)
BROWSER_VIEW_URL = os.getenv("BROWSER_VIEW_URL", "")
rpc_url = os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run")
timeout_sec = int(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900"))
payload = json.dumps({"task": task}).encode("utf-8")
req = request.Request(rpc_url, data=payload, headers={"Content-Type": "application/json"}, method="POST")
try: try:
browser_ip = socket.gethostbyname(browser_host) with request.urlopen(req, timeout=timeout_sec) as resp:
cdp_url = f"http://{browser_ip}:{browser_port}" body = resp.read().decode("utf-8")
except Exception: return body
cdp_url = f"http://{browser_host}:{browser_port}" except error.HTTPError as http_err:
body = http_err.read().decode("utf-8", errors="replace")
browser = Browser(cdp_url=cdp_url) return json.dumps(
{
# Для подключения к Chrome на виртуальной машине раскомментируй "success": False,
# browser = Browser( "error": f"browser-use RPC returned HTTP {http_err.code}",
# executable_path="/usr/bin/google-chrome", # Linux "details": body,
# # Windows: "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" },
# # macOS: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" ensure_ascii=False,
# ) )
# или except Exception as err:
# browser = Browser.from_system_chrome() для автоопределения return json.dumps(
{
llm = ChatOpenAI( "success": False,
model=os.getenv("MODEL_DEFAULT", "qwen3.5-122b"), "error": f"browser-use RPC request failed: {err}",
api_key=os.getenv("OPENAI_API_KEY"), },
base_url=os.getenv("OPENAI_BASE_URL"), ensure_ascii=False,
temperature=0.0, )
)
agent = Agent(
task=task,
llm=llm,
browser=browser,
use_vision=False
)
try:
history = await agent.run()
final_result = history.final_result()
response = {
"success": True,
"result": final_result,
"browser_view": BROWSER_VIEW_URL
}
return json.dumps(response, ensure_ascii=False)
except Exception as e:
return json.dumps({
"success": False,
"error": f"Browser automation failed: {str(e)}"
}, ensure_ascii=False)
finally:
if browser:
try:
await browser.close()
except Exception:
pass
registry.register( registry.register(
@ -90,6 +60,6 @@ registry.register(
} }
}, },
handler=lambda args, **kw: asyncio.run(run_browser_task(args.get("task"))), handler=lambda args, **kw: run_browser_task(args.get("task")),
emoji="🌐", emoji="🌐",
) )

3716
hermes_code/uv.lock generated

File diff suppressed because it is too large Load diff