add schemas and swap browser_use_runner in docker with browser and now agent use skill from the other container
This commit is contained in:
parent
480dfcd36e
commit
890d492de0
12 changed files with 173 additions and 6857 deletions
15
api/schemas.py
Normal file
15
api/schemas.py
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserTaskRequest(BaseModel):
|
||||||
|
task: str = Field(..., description="Задача для браузера")
|
||||||
|
timeout: int = Field(300, description="Максимальное время выполнения задачи в секундах")
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserTaskResponse(BaseModel):
|
||||||
|
success: bool
|
||||||
|
result: Optional[str] = None
|
||||||
|
error: Optional[str] = None
|
||||||
|
execution_time: float
|
||||||
|
|
@ -4,6 +4,8 @@ ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
chromium \
|
chromium \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
xvfb \
|
xvfb \
|
||||||
fluxbox \
|
fluxbox \
|
||||||
x11vnc \
|
x11vnc \
|
||||||
|
|
@ -19,9 +21,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
WORKDIR /src
|
WORKDIR /src
|
||||||
RUN mkdir -p /src/browser_data
|
RUN mkdir -p /src/browser_data
|
||||||
|
|
||||||
|
RUN python3 -m pip install --no-cache-dir --break-system-packages \
|
||||||
|
"browser-use>=0.12.5" \
|
||||||
|
"langchain-openai>=0.3.0"
|
||||||
|
|
||||||
COPY entrypoint.sh /entrypoint.sh
|
COPY entrypoint.sh /entrypoint.sh
|
||||||
|
COPY browser_use_runner.py /src/browser_use_runner.py
|
||||||
RUN chmod +x /entrypoint.sh
|
RUN chmod +x /entrypoint.sh
|
||||||
|
|
||||||
EXPOSE 6080 9222
|
EXPOSE 6080 9222 8787
|
||||||
|
|
||||||
ENTRYPOINT ["/entrypoint.sh"]
|
ENTRYPOINT ["/entrypoint.sh"]
|
||||||
102
browser_env/browser_use_runner.py
Normal file
102
browser_env/browser_use_runner.py
Normal file
|
|
@ -0,0 +1,102 @@
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||||
|
from urllib import error, request
|
||||||
|
|
||||||
|
from browser_use import Agent, Browser, ChatOpenAI
|
||||||
|
|
||||||
|
|
||||||
|
def _json_response(handler, status_code, payload):
|
||||||
|
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||||||
|
handler.send_response(status_code)
|
||||||
|
handler.send_header("Content-Type", "application/json; charset=utf-8")
|
||||||
|
handler.send_header("Content-Length", str(len(data)))
|
||||||
|
handler.end_headers()
|
||||||
|
handler.wfile.write(data)
|
||||||
|
|
||||||
|
|
||||||
|
async def run_browser_task(task):
|
||||||
|
cdp_url = os.getenv("BROWSER_CDP_URL", "http://127.0.0.1:9222")
|
||||||
|
browser_view_url = os.getenv("BROWSER_VIEW_URL", "")
|
||||||
|
|
||||||
|
browser = Browser(cdp_url=cdp_url)
|
||||||
|
|
||||||
|
llm = ChatOpenAI(
|
||||||
|
model=os.getenv("MODEL_DEFAULT", "qwen3.5-122b"),
|
||||||
|
api_key=os.getenv("OPENAI_API_KEY"),
|
||||||
|
base_url=os.getenv("OPENAI_BASE_URL"),
|
||||||
|
temperature=0.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
agent = Agent(task=task, llm=llm, browser=browser, use_vision=False)
|
||||||
|
|
||||||
|
try:
|
||||||
|
history = await agent.run()
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"result": history.final_result(),
|
||||||
|
"browser_view": browser_view_url,
|
||||||
|
}
|
||||||
|
except Exception as err:
|
||||||
|
return {"success": False, "error": f"Browser automation failed: {err}"}
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
await browser.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserUseRPCHandler(BaseHTTPRequestHandler):
|
||||||
|
def do_GET(self):
|
||||||
|
if self.path != "/health":
|
||||||
|
_json_response(self, 404, {"success": False, "error": "Not found"})
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
debug_url = os.getenv("BROWSER_HEALTH_URL", "http://127.0.0.1:9222/json/version")
|
||||||
|
with request.urlopen(debug_url, timeout=2):
|
||||||
|
pass
|
||||||
|
_json_response(self, 200, {"success": True})
|
||||||
|
except Exception as err:
|
||||||
|
_json_response(self, 503, {"success": False, "error": f"Browser is not ready: {err}"})
|
||||||
|
|
||||||
|
def do_POST(self):
|
||||||
|
if self.path != "/run":
|
||||||
|
_json_response(self, 404, {"success": False, "error": "Not found"})
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
content_length = int(self.headers.get("Content-Length", "0"))
|
||||||
|
raw = self.rfile.read(content_length)
|
||||||
|
payload = json.loads(raw.decode("utf-8") if raw else "{}")
|
||||||
|
task = payload.get("task", "")
|
||||||
|
if not isinstance(task, str) or not task.strip():
|
||||||
|
_json_response(self, 400, {"success": False, "error": "Field 'task' is required"})
|
||||||
|
return
|
||||||
|
|
||||||
|
result = asyncio.run(run_browser_task(task.strip()))
|
||||||
|
code = 200 if result.get("success") else 500
|
||||||
|
_json_response(self, code, result)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
_json_response(self, 400, {"success": False, "error": "Invalid JSON payload"})
|
||||||
|
except error.URLError as err:
|
||||||
|
_json_response(self, 503, {"success": False, "error": f"Transport error: {err}"})
|
||||||
|
except Exception as err:
|
||||||
|
_json_response(self, 500, {"success": False, "error": f"Internal error: {err}"})
|
||||||
|
|
||||||
|
def log_message(self, format_str, *args):
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
host = os.getenv("BROWSER_USE_RPC_HOST", "0.0.0.0")
|
||||||
|
port = int(os.getenv("BROWSER_USE_RPC_PORT", "8787"))
|
||||||
|
server = ThreadingHTTPServer((host, port), BrowserUseRPCHandler)
|
||||||
|
print(f"browser-use RPC listening on {host}:{port}")
|
||||||
|
server.serve_forever()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
|
|
@ -9,6 +9,7 @@ VNC_PORT="${VNC_PORT:-5900}"
|
||||||
NOVNC_PORT="${NOVNC_PORT:-6080}"
|
NOVNC_PORT="${NOVNC_PORT:-6080}"
|
||||||
CHROME_LOCAL_DEBUG_PORT="${CHROME_LOCAL_DEBUG_PORT:-9223}"
|
CHROME_LOCAL_DEBUG_PORT="${CHROME_LOCAL_DEBUG_PORT:-9223}"
|
||||||
CHROME_PUBLIC_DEBUG_PORT="${CHROME_PUBLIC_DEBUG_PORT:-9222}"
|
CHROME_PUBLIC_DEBUG_PORT="${CHROME_PUBLIC_DEBUG_PORT:-9222}"
|
||||||
|
BROWSER_USE_RPC_PORT="${BROWSER_USE_RPC_PORT:-8787}"
|
||||||
CHROME_PROFILE_DIR="${CHROME_PROFILE_DIR:-/src/browser_data}"
|
CHROME_PROFILE_DIR="${CHROME_PROFILE_DIR:-/src/browser_data}"
|
||||||
|
|
||||||
MAX_RESTARTS="${MAX_RESTARTS:-10}"
|
MAX_RESTARTS="${MAX_RESTARTS:-10}"
|
||||||
|
|
@ -119,6 +120,7 @@ start_bg fluxbox
|
||||||
start_bg x11vnc -display "$DISPLAY" -rfbport "$VNC_PORT" -nopw -listen 0.0.0.0 -xkb -forever -shared
|
start_bg x11vnc -display "$DISPLAY" -rfbport "$VNC_PORT" -nopw -listen 0.0.0.0 -xkb -forever -shared
|
||||||
start_bg websockify --web=/usr/share/novnc/ "$NOVNC_PORT" "localhost:${VNC_PORT}"
|
start_bg websockify --web=/usr/share/novnc/ "$NOVNC_PORT" "localhost:${VNC_PORT}"
|
||||||
start_bg socat "TCP-LISTEN:${CHROME_PUBLIC_DEBUG_PORT},fork,reuseaddr" "TCP:127.0.0.1:${CHROME_LOCAL_DEBUG_PORT}"
|
start_bg socat "TCP-LISTEN:${CHROME_PUBLIC_DEBUG_PORT},fork,reuseaddr" "TCP:127.0.0.1:${CHROME_LOCAL_DEBUG_PORT}"
|
||||||
|
start_bg python3 -u /src/browser_use_runner.py
|
||||||
|
|
||||||
if ! wait_for_port 127.0.0.1 "$VNC_PORT" 20; then
|
if ! wait_for_port 127.0.0.1 "$VNC_PORT" 20; then
|
||||||
log "fatal: x11vnc did not open port ${VNC_PORT}"
|
log "fatal: x11vnc did not open port ${VNC_PORT}"
|
||||||
|
|
@ -128,8 +130,12 @@ if ! wait_for_port 127.0.0.1 "$NOVNC_PORT" 20; then
|
||||||
log "fatal: websockify did not open port ${NOVNC_PORT}"
|
log "fatal: websockify did not open port ${NOVNC_PORT}"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
if ! wait_for_port 127.0.0.1 "$BROWSER_USE_RPC_PORT" 20; then
|
||||||
|
log "fatal: browser-use RPC did not open port ${BROWSER_USE_RPC_PORT}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
log "browser infrastructure is ready (noVNC:${NOVNC_PORT}, DevTools proxy:${CHROME_PUBLIC_DEBUG_PORT})"
|
log "browser infrastructure is ready (noVNC:${NOVNC_PORT}, DevTools proxy:${CHROME_PUBLIC_DEBUG_PORT}, browser-use RPC:${BROWSER_USE_RPC_PORT})"
|
||||||
|
|
||||||
while true; do
|
while true; do
|
||||||
for pid in "${PIDS[@]}"; do
|
for pid in "${PIDS[@]}"; do
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ services:
|
||||||
- .env
|
- .env
|
||||||
environment:
|
environment:
|
||||||
- BROWSER_URL=http://browser:9222
|
- BROWSER_URL=http://browser:9222
|
||||||
|
- BROWSER_USE_RPC_URL=http://browser:8787/run
|
||||||
- HERMES_HOME=/app/hermes_data
|
- HERMES_HOME=/app/hermes_data
|
||||||
volumes:
|
volumes:
|
||||||
- ./hermes_code:/app/hermes_code:ro
|
- ./hermes_code:/app/hermes_code:ro
|
||||||
|
|
@ -40,6 +41,8 @@ services:
|
||||||
context: ./browser_env
|
context: ./browser_env
|
||||||
dockerfile: Dockerfile.browser
|
dockerfile: Dockerfile.browser
|
||||||
container_name: hermes-browser
|
container_name: hermes-browser
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
ports:
|
ports:
|
||||||
- "6080:6080"
|
- "6080:6080"
|
||||||
- "9222:9222"
|
- "9222:9222"
|
||||||
|
|
@ -52,7 +55,7 @@ services:
|
||||||
- browser_profiles:/src/browser_data
|
- browser_profiles:/src/browser_data
|
||||||
restart: always
|
restart: always
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:9222/json/version >/dev/null || exit 1"]
|
test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:9222/json/version >/dev/null && curl -fsS http://127.0.0.1:8787/health >/dev/null || exit 1"]
|
||||||
interval: 10s
|
interval: 10s
|
||||||
timeout: 3s
|
timeout: 3s
|
||||||
retries: 12
|
retries: 12
|
||||||
|
|
|
||||||
3047
hermes_code/package-lock.json
generated
3047
hermes_code/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -4,7 +4,7 @@
|
||||||
"description": "An AI agent with advanced tool-calling capabilities, featuring a flexible toolsets system for organizing and managing tools.",
|
"description": "An AI agent with advanced tool-calling capabilities, featuring a flexible toolsets system for organizing and managing tools.",
|
||||||
"private": true,
|
"private": true,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"postinstall": "echo '✅ Browser tools ready. Run: python run_agent.py --help'"
|
"postinstall": "echo '✅ Hermes Node hooks ready. Run: python run_agent.py --help'"
|
||||||
},
|
},
|
||||||
"repository": {
|
"repository": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
|
|
@ -15,9 +15,7 @@
|
||||||
"url": "https://github.com/NousResearch/Hermes-Agent/issues"
|
"url": "https://github.com/NousResearch/Hermes-Agent/issues"
|
||||||
},
|
},
|
||||||
"homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
|
"homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
|
||||||
"dependencies": {
|
"dependencies": {},
|
||||||
"agent-browser": "^0.13.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=18.0.0"
|
"node": ">=18.0.0"
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,10 +34,6 @@ dependencies = [
|
||||||
"faster-whisper>=1.0.0,<2",
|
"faster-whisper>=1.0.0,<2",
|
||||||
# Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
|
# Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
|
||||||
"PyJWT[crypto]>=2.10.1,<3",
|
"PyJWT[crypto]>=2.10.1,<3",
|
||||||
"browser-use>=0.12.5",
|
|
||||||
"playwright>=1.49.0",
|
|
||||||
"playwright-stealth>=1.0.6",
|
|
||||||
"langchain-openai>=1.1.12",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|
|
||||||
|
|
@ -19,9 +19,6 @@ PyJWT[crypto]
|
||||||
# Web tools
|
# Web tools
|
||||||
firecrawl-py
|
firecrawl-py
|
||||||
parallel-web>=0.4.2
|
parallel-web>=0.4.2
|
||||||
browser-use>=0.12.5
|
|
||||||
playwright
|
|
||||||
playwright-stealth
|
|
||||||
|
|
||||||
# Image generation
|
# Image generation
|
||||||
fal-client
|
fal-client
|
||||||
|
|
@ -36,5 +33,4 @@ croniter
|
||||||
python-telegram-bot>=20.0
|
python-telegram-bot>=20.0
|
||||||
discord.py>=2.0
|
discord.py>=2.0
|
||||||
aiohttp>=3.9.0
|
aiohttp>=3.9.0
|
||||||
langchain-openai>=1.1.12,
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -874,41 +874,14 @@ install_node_deps() {
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -f "$INSTALL_DIR/package.json" ]; then
|
if [ -f "$INSTALL_DIR/package.json" ]; then
|
||||||
log_info "Installing Node.js dependencies (browser tools)..."
|
log_info "Installing Node.js dependencies..."
|
||||||
cd "$INSTALL_DIR"
|
cd "$INSTALL_DIR"
|
||||||
npm install --silent 2>/dev/null || {
|
npm install --silent 2>/dev/null || {
|
||||||
log_warn "npm install failed (browser tools may not work)"
|
log_warn "npm install failed (some Node.js helpers may not work)"
|
||||||
}
|
}
|
||||||
log_success "Node.js dependencies installed"
|
log_success "Node.js dependencies installed"
|
||||||
|
|
||||||
# Install Playwright browser + system dependencies.
|
log_info "Skipping local Playwright/Chromium bootstrap (browser automation runs in browser container)"
|
||||||
# Playwright's install-deps only supports apt/dnf/zypper natively.
|
|
||||||
# For Arch/Manjaro we install the system libs via pacman first.
|
|
||||||
log_info "Installing browser engine (Playwright Chromium)..."
|
|
||||||
case "$DISTRO" in
|
|
||||||
arch|manjaro)
|
|
||||||
if command -v pacman &> /dev/null; then
|
|
||||||
log_info "Arch/Manjaro detected — installing Chromium system dependencies via pacman..."
|
|
||||||
if command -v sudo &> /dev/null && sudo -n true 2>/dev/null; then
|
|
||||||
sudo NEEDRESTART_MODE=a pacman -S --noconfirm --needed \
|
|
||||||
nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib >/dev/null 2>&1 || true
|
|
||||||
elif [ "$(id -u)" -eq 0 ]; then
|
|
||||||
pacman -S --noconfirm --needed \
|
|
||||||
nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib >/dev/null 2>&1 || true
|
|
||||||
else
|
|
||||||
log_warn "Cannot install browser deps without sudo. Run manually:"
|
|
||||||
log_warn " sudo pacman -S nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || true
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
log_info "Playwright may request sudo to install browser system dependencies (shared libraries)."
|
|
||||||
log_info "This is standard Playwright setup — Hermes itself does not require root access."
|
|
||||||
cd "$INSTALL_DIR" && npx playwright install --with-deps chromium 2>/dev/null || true
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
log_success "Browser engine installed"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Install WhatsApp bridge dependencies
|
# Install WhatsApp bridge dependencies
|
||||||
|
|
|
||||||
|
|
@ -1,61 +1,40 @@
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import asyncio
|
from urllib import error, request
|
||||||
import socket
|
|
||||||
from browser_use import Agent, Browser, ChatOpenAI
|
|
||||||
from tools.registry import registry
|
from tools.registry import registry
|
||||||
|
|
||||||
|
|
||||||
async def run_browser_task(task):
|
def run_browser_task(task):
|
||||||
browser_host = "browser"
|
if not task or not str(task).strip():
|
||||||
browser_port = 9222
|
return json.dumps({"success": False, "error": "Task is required"}, ensure_ascii=False)
|
||||||
BROWSER_VIEW_URL = os.getenv("BROWSER_VIEW_URL", "")
|
|
||||||
|
|
||||||
try:
|
|
||||||
browser_ip = socket.gethostbyname(browser_host)
|
|
||||||
cdp_url = f"http://{browser_ip}:{browser_port}"
|
|
||||||
except Exception:
|
|
||||||
cdp_url = f"http://{browser_host}:{browser_port}"
|
|
||||||
|
|
||||||
browser = Browser(cdp_url=cdp_url)
|
rpc_url = os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run")
|
||||||
|
timeout_sec = int(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900"))
|
||||||
llm = ChatOpenAI(
|
payload = json.dumps({"task": task}).encode("utf-8")
|
||||||
model=os.getenv("MODEL_DEFAULT", "qwen3.5-122b"),
|
req = request.Request(rpc_url, data=payload, headers={"Content-Type": "application/json"}, method="POST")
|
||||||
api_key=os.getenv("OPENAI_API_KEY"),
|
|
||||||
base_url=os.getenv("OPENAI_BASE_URL"),
|
|
||||||
temperature=0.0,
|
|
||||||
)
|
|
||||||
|
|
||||||
agent = Agent(
|
|
||||||
task=task,
|
|
||||||
llm=llm,
|
|
||||||
browser=browser,
|
|
||||||
use_vision=False
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
history = await agent.run()
|
with request.urlopen(req, timeout=timeout_sec) as resp:
|
||||||
final_result = history.final_result()
|
body = resp.read().decode("utf-8")
|
||||||
|
return body
|
||||||
response = {
|
except error.HTTPError as http_err:
|
||||||
"success": True,
|
body = http_err.read().decode("utf-8", errors="replace")
|
||||||
"result": final_result,
|
return json.dumps(
|
||||||
"browser_view": BROWSER_VIEW_URL
|
{
|
||||||
}
|
"success": False,
|
||||||
return json.dumps(response, ensure_ascii=False)
|
"error": f"browser-use RPC returned HTTP {http_err.code}",
|
||||||
|
"details": body,
|
||||||
except Exception as e:
|
},
|
||||||
return json.dumps({
|
ensure_ascii=False,
|
||||||
"success": False,
|
)
|
||||||
"error": f"Browser automation failed: {str(e)}"
|
except Exception as err:
|
||||||
}, ensure_ascii=False)
|
return json.dumps(
|
||||||
|
{
|
||||||
finally:
|
"success": False,
|
||||||
if browser:
|
"error": f"browser-use RPC request failed: {err}",
|
||||||
try:
|
},
|
||||||
await browser.close()
|
ensure_ascii=False,
|
||||||
except Exception:
|
)
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
registry.register(
|
registry.register(
|
||||||
|
|
@ -81,6 +60,6 @@ registry.register(
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
handler=lambda args, **kw: asyncio.run(run_browser_task(args.get("task"))),
|
handler=lambda args, **kw: run_browser_task(args.get("task")),
|
||||||
emoji="🌐",
|
emoji="🌐",
|
||||||
)
|
)
|
||||||
3716
hermes_code/uv.lock
generated
3716
hermes_code/uv.lock
generated
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue