add schemas and swap browser_use_runner in docker with browser and now agent use skill from the other container

This commit is contained in:
Кобылкевич Фёдор 2026-04-06 23:08:03 +03:00
parent 480dfcd36e
commit 890d492de0
12 changed files with 173 additions and 6857 deletions

File diff suppressed because it is too large Load diff

View file

@ -4,7 +4,7 @@
"description": "An AI agent with advanced tool-calling capabilities, featuring a flexible toolsets system for organizing and managing tools.",
"private": true,
"scripts": {
"postinstall": "echo '✅ Browser tools ready. Run: python run_agent.py --help'"
"postinstall": "echo '✅ Hermes Node hooks ready. Run: python run_agent.py --help'"
},
"repository": {
"type": "git",
@ -15,9 +15,7 @@
"url": "https://github.com/NousResearch/Hermes-Agent/issues"
},
"homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
"dependencies": {
"agent-browser": "^0.13.0"
},
"dependencies": {},
"engines": {
"node": ">=18.0.0"
}

View file

@ -34,10 +34,6 @@ dependencies = [
"faster-whisper>=1.0.0,<2",
# Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
"PyJWT[crypto]>=2.10.1,<3",
"browser-use>=0.12.5",
"playwright>=1.49.0",
"playwright-stealth>=1.0.6",
"langchain-openai>=1.1.12",
]
[project.optional-dependencies]

View file

@ -19,9 +19,6 @@ PyJWT[crypto]
# Web tools
firecrawl-py
parallel-web>=0.4.2
browser-use>=0.12.5
playwright
playwright-stealth
# Image generation
fal-client
@ -36,5 +33,4 @@ croniter
python-telegram-bot>=20.0
discord.py>=2.0
aiohttp>=3.9.0
langchain-openai>=1.1.12,

View file

@ -874,41 +874,14 @@ install_node_deps() {
fi
if [ -f "$INSTALL_DIR/package.json" ]; then
log_info "Installing Node.js dependencies (browser tools)..."
log_info "Installing Node.js dependencies..."
cd "$INSTALL_DIR"
npm install --silent 2>/dev/null || {
log_warn "npm install failed (browser tools may not work)"
log_warn "npm install failed (some Node.js helpers may not work)"
}
log_success "Node.js dependencies installed"
# Install Playwright browser + system dependencies.
# Playwright's install-deps only supports apt/dnf/zypper natively.
# For Arch/Manjaro we install the system libs via pacman first.
log_info "Installing browser engine (Playwright Chromium)..."
case "$DISTRO" in
arch|manjaro)
if command -v pacman &> /dev/null; then
log_info "Arch/Manjaro detected — installing Chromium system dependencies via pacman..."
if command -v sudo &> /dev/null && sudo -n true 2>/dev/null; then
sudo NEEDRESTART_MODE=a pacman -S --noconfirm --needed \
nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib >/dev/null 2>&1 || true
elif [ "$(id -u)" -eq 0 ]; then
pacman -S --noconfirm --needed \
nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib >/dev/null 2>&1 || true
else
log_warn "Cannot install browser deps without sudo. Run manually:"
log_warn " sudo pacman -S nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib"
fi
fi
cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || true
;;
*)
log_info "Playwright may request sudo to install browser system dependencies (shared libraries)."
log_info "This is standard Playwright setup — Hermes itself does not require root access."
cd "$INSTALL_DIR" && npx playwright install --with-deps chromium 2>/dev/null || true
;;
esac
log_success "Browser engine installed"
log_info "Skipping local Playwright/Chromium bootstrap (browser automation runs in browser container)"
fi
# Install WhatsApp bridge dependencies

View file

@ -1,61 +1,40 @@
import json
import os
import asyncio
import socket
from browser_use import Agent, Browser, ChatOpenAI
from urllib import error, request
from tools.registry import registry
async def run_browser_task(task):
browser_host = "browser"
browser_port = 9222
BROWSER_VIEW_URL = os.getenv("BROWSER_VIEW_URL", "")
try:
browser_ip = socket.gethostbyname(browser_host)
cdp_url = f"http://{browser_ip}:{browser_port}"
except Exception:
cdp_url = f"http://{browser_host}:{browser_port}"
def run_browser_task(task):
if not task or not str(task).strip():
return json.dumps({"success": False, "error": "Task is required"}, ensure_ascii=False)
browser = Browser(cdp_url=cdp_url)
llm = ChatOpenAI(
model=os.getenv("MODEL_DEFAULT", "qwen3.5-122b"),
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_BASE_URL"),
temperature=0.0,
)
agent = Agent(
task=task,
llm=llm,
browser=browser,
use_vision=False
)
rpc_url = os.getenv("BROWSER_USE_RPC_URL", "http://browser:8787/run")
timeout_sec = int(os.getenv("BROWSER_USE_RPC_TIMEOUT", "900"))
payload = json.dumps({"task": task}).encode("utf-8")
req = request.Request(rpc_url, data=payload, headers={"Content-Type": "application/json"}, method="POST")
try:
history = await agent.run()
final_result = history.final_result()
response = {
"success": True,
"result": final_result,
"browser_view": BROWSER_VIEW_URL
}
return json.dumps(response, ensure_ascii=False)
except Exception as e:
return json.dumps({
"success": False,
"error": f"Browser automation failed: {str(e)}"
}, ensure_ascii=False)
finally:
if browser:
try:
await browser.close()
except Exception:
pass
with request.urlopen(req, timeout=timeout_sec) as resp:
body = resp.read().decode("utf-8")
return body
except error.HTTPError as http_err:
body = http_err.read().decode("utf-8", errors="replace")
return json.dumps(
{
"success": False,
"error": f"browser-use RPC returned HTTP {http_err.code}",
"details": body,
},
ensure_ascii=False,
)
except Exception as err:
return json.dumps(
{
"success": False,
"error": f"browser-use RPC request failed: {err}",
},
ensure_ascii=False,
)
registry.register(
@ -81,6 +60,6 @@ registry.register(
}
},
handler=lambda args, **kw: asyncio.run(run_browser_task(args.get("task"))),
handler=lambda args, **kw: run_browser_task(args.get("task")),
emoji="🌐",
)

3716
hermes_code/uv.lock generated

File diff suppressed because it is too large Load diff