feat(web): add Parallel as alternative web search/extract backend (#1696)
* feat(web): add Parallel as alternative web search/extract backend Adds Parallel (parallel.ai) as a drop-in alternative to Firecrawl for web_search and web_extract tools using the official parallel-web SDK. - Backend selection via WEB_SEARCH_BACKEND env var (auto/parallel/firecrawl) - Auto mode prefers Firecrawl when both keys present; Parallel when sole backend - web_crawl remains Firecrawl-only with clear error when unavailable - Lazy SDK imports, interrupt support, singleton clients - 16 new unit tests for backend selection and client config Co-authored-by: s-jag <s-jag@users.noreply.github.com> * fix: add PARALLEL_API_KEY to config registry and fix web_crawl policy tests Follow-up for Parallel backend integration: - Add PARALLEL_API_KEY to OPTIONAL_ENV_VARS (hermes doctor, env blocklist) - Add to set_config_value api_keys list (hermes config set) - Add to doctor keys display - Fix 2 web_crawl policy tests that didn't set FIRECRAWL_API_KEY (needed now that web_crawl has a Firecrawl availability guard) * refactor: explicit backend selection via hermes tools, not auto-detect Replace the auto-detect backend selection with explicit user choice: - hermes tools saves WEB_SEARCH_BACKEND to .env when user picks a provider - _get_backend() reads the explicit choice first - Fallback only for manual/legacy config (uses whichever key is present) - _is_provider_active() shows [active] for the selected web backend - Updated tests, docs, and .env.example to remove 'auto' mode language * refactor: use config.yaml for web backend, not env var Match the TTS/browser pattern — web.backend is stored in config.yaml (set by hermes tools), not as a WEB_SEARCH_BACKEND env var. - _load_web_config() reads web: section from config.yaml - _get_backend() reads web.backend from config, falls back to key detection - _configure_provider() saves to config dict (saved to config.yaml) - _is_provider_active() reads from config dict - Removed WEB_SEARCH_BACKEND from .env.example, set_config_value, docs - Updated all tests to mock _load_web_config instead of env vars --------- Co-authored-by: s-jag <s-jag@users.noreply.github.com>
This commit is contained in:
parent
6405d389aa
commit
4433b83378
14 changed files with 548 additions and 154 deletions
|
|
@ -65,10 +65,15 @@ OPENCODE_GO_API_KEY=
|
||||||
# TOOL API KEYS
|
# TOOL API KEYS
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
|
# Parallel API Key - AI-native web search and extract
|
||||||
|
# Get at: https://parallel.ai
|
||||||
|
PARALLEL_API_KEY=
|
||||||
|
|
||||||
# Firecrawl API Key - Web search, extract, and crawl
|
# Firecrawl API Key - Web search, extract, and crawl
|
||||||
# Get at: https://firecrawl.dev/
|
# Get at: https://firecrawl.dev/
|
||||||
FIRECRAWL_API_KEY=
|
FIRECRAWL_API_KEY=
|
||||||
|
|
||||||
|
|
||||||
# FAL.ai API Key - Image generation
|
# FAL.ai API Key - Image generation
|
||||||
# Get at: https://fal.ai/
|
# Get at: https://fal.ai/
|
||||||
FAL_KEY=
|
FAL_KEY=
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ hermes-agent/
|
||||||
│ ├── terminal_tool.py # Terminal orchestration
|
│ ├── terminal_tool.py # Terminal orchestration
|
||||||
│ ├── process_registry.py # Background process management
|
│ ├── process_registry.py # Background process management
|
||||||
│ ├── file_tools.py # File read/write/search/patch
|
│ ├── file_tools.py # File read/write/search/patch
|
||||||
│ ├── web_tools.py # Firecrawl search/extract
|
│ ├── web_tools.py # Web search/extract (Parallel + Firecrawl)
|
||||||
│ ├── browser_tool.py # Browserbase browser automation
|
│ ├── browser_tool.py # Browserbase browser automation
|
||||||
│ ├── code_execution_tool.py # execute_code sandbox
|
│ ├── code_execution_tool.py # execute_code sandbox
|
||||||
│ ├── delegate_tool.py # Subagent delegation
|
│ ├── delegate_tool.py # Subagent delegation
|
||||||
|
|
|
||||||
|
|
@ -147,7 +147,7 @@ hermes-agent/
|
||||||
│ ├── approval.py # Dangerous command detection + per-session approval
|
│ ├── approval.py # Dangerous command detection + per-session approval
|
||||||
│ ├── terminal_tool.py # Terminal orchestration (sudo, env lifecycle, backends)
|
│ ├── terminal_tool.py # Terminal orchestration (sudo, env lifecycle, backends)
|
||||||
│ ├── file_operations.py # read_file, write_file, search, patch, etc.
|
│ ├── file_operations.py # read_file, write_file, search, patch, etc.
|
||||||
│ ├── web_tools.py # web_search, web_extract (Firecrawl + Gemini summarization)
|
│ ├── web_tools.py # web_search, web_extract (Parallel/Firecrawl + Gemini summarization)
|
||||||
│ ├── vision_tools.py # Image analysis via multimodal models
|
│ ├── vision_tools.py # Image analysis via multimodal models
|
||||||
│ ├── delegate_tool.py # Subagent spawning and parallel task execution
|
│ ├── delegate_tool.py # Subagent spawning and parallel task execution
|
||||||
│ ├── code_execution_tool.py # Sandboxed Python with RPC tool access
|
│ ├── code_execution_tool.py # Sandboxed Python with RPC tool access
|
||||||
|
|
|
||||||
|
|
@ -550,6 +550,14 @@ OPTIONAL_ENV_VARS = {
|
||||||
},
|
},
|
||||||
|
|
||||||
# ── Tool API keys ──
|
# ── Tool API keys ──
|
||||||
|
"PARALLEL_API_KEY": {
|
||||||
|
"description": "Parallel API key for AI-native web search and extract",
|
||||||
|
"prompt": "Parallel API key",
|
||||||
|
"url": "https://parallel.ai/",
|
||||||
|
"tools": ["web_search", "web_extract"],
|
||||||
|
"password": True,
|
||||||
|
"category": "tool",
|
||||||
|
},
|
||||||
"FIRECRAWL_API_KEY": {
|
"FIRECRAWL_API_KEY": {
|
||||||
"description": "Firecrawl API key for web search and scraping",
|
"description": "Firecrawl API key for web search and scraping",
|
||||||
"prompt": "Firecrawl API key",
|
"prompt": "Firecrawl API key",
|
||||||
|
|
@ -1506,6 +1514,7 @@ def show_config():
|
||||||
keys = [
|
keys = [
|
||||||
("OPENROUTER_API_KEY", "OpenRouter"),
|
("OPENROUTER_API_KEY", "OpenRouter"),
|
||||||
("VOICE_TOOLS_OPENAI_KEY", "OpenAI (STT/TTS)"),
|
("VOICE_TOOLS_OPENAI_KEY", "OpenAI (STT/TTS)"),
|
||||||
|
("PARALLEL_API_KEY", "Parallel"),
|
||||||
("FIRECRAWL_API_KEY", "Firecrawl"),
|
("FIRECRAWL_API_KEY", "Firecrawl"),
|
||||||
("BROWSERBASE_API_KEY", "Browserbase"),
|
("BROWSERBASE_API_KEY", "Browserbase"),
|
||||||
("BROWSER_USE_API_KEY", "Browser Use"),
|
("BROWSER_USE_API_KEY", "Browser Use"),
|
||||||
|
|
@ -1655,7 +1664,7 @@ def set_config_value(key: str, value: str):
|
||||||
# Check if it's an API key (goes to .env)
|
# Check if it's an API key (goes to .env)
|
||||||
api_keys = [
|
api_keys = [
|
||||||
'OPENROUTER_API_KEY', 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'VOICE_TOOLS_OPENAI_KEY',
|
'OPENROUTER_API_KEY', 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'VOICE_TOOLS_OPENAI_KEY',
|
||||||
'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL', 'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID', 'BROWSER_USE_API_KEY',
|
'PARALLEL_API_KEY', 'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL', 'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID', 'BROWSER_USE_API_KEY',
|
||||||
'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN',
|
'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN',
|
||||||
'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY',
|
'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY',
|
||||||
'SUDO_PASSWORD', 'SLACK_BOT_TOKEN', 'SLACK_APP_TOKEN',
|
'SUDO_PASSWORD', 'SLACK_BOT_TOKEN', 'SLACK_APP_TOKEN',
|
||||||
|
|
|
||||||
|
|
@ -444,11 +444,11 @@ def _print_setup_summary(config: dict, hermes_home):
|
||||||
else:
|
else:
|
||||||
tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY"))
|
tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY"))
|
||||||
|
|
||||||
# Firecrawl (web tools)
|
# Web tools (Parallel or Firecrawl)
|
||||||
if get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"):
|
if get_env_value("PARALLEL_API_KEY") or get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"):
|
||||||
tool_status.append(("Web Search & Extract", True, None))
|
tool_status.append(("Web Search & Extract", True, None))
|
||||||
else:
|
else:
|
||||||
tool_status.append(("Web Search & Extract", False, "FIRECRAWL_API_KEY"))
|
tool_status.append(("Web Search & Extract", False, "PARALLEL_API_KEY or FIRECRAWL_API_KEY"))
|
||||||
|
|
||||||
# Browser tools (local Chromium or Browserbase cloud)
|
# Browser tools (local Chromium or Browserbase cloud)
|
||||||
import shutil
|
import shutil
|
||||||
|
|
|
||||||
|
|
@ -151,19 +151,29 @@ TOOL_CATEGORIES = {
|
||||||
"web": {
|
"web": {
|
||||||
"name": "Web Search & Extract",
|
"name": "Web Search & Extract",
|
||||||
"setup_title": "Select Search Provider",
|
"setup_title": "Select Search Provider",
|
||||||
"setup_note": "A free DuckDuckGo search skill is also included — skip this if you don't need Firecrawl.",
|
"setup_note": "A free DuckDuckGo search skill is also included — skip this if you don't need a premium provider.",
|
||||||
"icon": "🔍",
|
"icon": "🔍",
|
||||||
"providers": [
|
"providers": [
|
||||||
{
|
{
|
||||||
"name": "Firecrawl Cloud",
|
"name": "Firecrawl Cloud",
|
||||||
"tag": "Recommended - hosted service",
|
"tag": "Hosted service - search, extract, and crawl",
|
||||||
|
"web_backend": "firecrawl",
|
||||||
"env_vars": [
|
"env_vars": [
|
||||||
{"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
|
{"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "Parallel",
|
||||||
|
"tag": "AI-native search and extract",
|
||||||
|
"web_backend": "parallel",
|
||||||
|
"env_vars": [
|
||||||
|
{"key": "PARALLEL_API_KEY", "prompt": "Parallel API key", "url": "https://parallel.ai"},
|
||||||
|
],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "Firecrawl Self-Hosted",
|
"name": "Firecrawl Self-Hosted",
|
||||||
"tag": "Free - run your own instance",
|
"tag": "Free - run your own instance",
|
||||||
|
"web_backend": "firecrawl",
|
||||||
"env_vars": [
|
"env_vars": [
|
||||||
{"key": "FIRECRAWL_API_URL", "prompt": "Your Firecrawl instance URL (e.g., http://localhost:3002)"},
|
{"key": "FIRECRAWL_API_URL", "prompt": "Your Firecrawl instance URL (e.g., http://localhost:3002)"},
|
||||||
],
|
],
|
||||||
|
|
@ -618,6 +628,9 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
|
||||||
if "browser_provider" in provider:
|
if "browser_provider" in provider:
|
||||||
current = config.get("browser", {}).get("cloud_provider")
|
current = config.get("browser", {}).get("cloud_provider")
|
||||||
return provider["browser_provider"] == current
|
return provider["browser_provider"] == current
|
||||||
|
if provider.get("web_backend"):
|
||||||
|
current = config.get("web", {}).get("backend")
|
||||||
|
return current == provider["web_backend"]
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -650,6 +663,11 @@ def _configure_provider(provider: dict, config: dict):
|
||||||
else:
|
else:
|
||||||
config.get("browser", {}).pop("cloud_provider", None)
|
config.get("browser", {}).pop("cloud_provider", None)
|
||||||
|
|
||||||
|
# Set web search backend in config if applicable
|
||||||
|
if provider.get("web_backend"):
|
||||||
|
config.setdefault("web", {})["backend"] = provider["web_backend"]
|
||||||
|
_print_success(f" Web backend set to: {provider['web_backend']}")
|
||||||
|
|
||||||
if not env_vars:
|
if not env_vars:
|
||||||
_print_success(f" {provider['name']} - no configuration needed!")
|
_print_success(f" {provider['name']} - no configuration needed!")
|
||||||
return
|
return
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ dependencies = [
|
||||||
"prompt_toolkit",
|
"prompt_toolkit",
|
||||||
# Tools
|
# Tools
|
||||||
"firecrawl-py",
|
"firecrawl-py",
|
||||||
|
"parallel-web>=0.4.2",
|
||||||
"fal-client",
|
"fal-client",
|
||||||
# Text-to-speech (Edge TTS is free, no API key needed)
|
# Text-to-speech (Edge TTS is free, no API key needed)
|
||||||
"edge-tts",
|
"edge-tts",
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ PyJWT[crypto]
|
||||||
|
|
||||||
# Web tools
|
# Web tools
|
||||||
firecrawl-py
|
firecrawl-py
|
||||||
|
parallel-web>=0.4.2
|
||||||
|
|
||||||
# Image generation
|
# Image generation
|
||||||
fal-client
|
fal-client
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
Comprehensive Test Suite for Web Tools Module
|
Comprehensive Test Suite for Web Tools Module
|
||||||
|
|
||||||
This script tests all web tools functionality to ensure they work correctly.
|
This script tests all web tools functionality to ensure they work correctly.
|
||||||
Run this after any updates to the web_tools.py module or Firecrawl library.
|
Run this after any updates to the web_tools.py module or backend libraries.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python test_web_tools.py # Run all tests
|
python test_web_tools.py # Run all tests
|
||||||
|
|
@ -11,7 +11,7 @@ Usage:
|
||||||
python test_web_tools.py --verbose # Show detailed output
|
python test_web_tools.py --verbose # Show detailed output
|
||||||
|
|
||||||
Requirements:
|
Requirements:
|
||||||
- FIRECRAWL_API_KEY environment variable must be set
|
- PARALLEL_API_KEY or FIRECRAWL_API_KEY environment variable must be set
|
||||||
- An auxiliary LLM provider (OPENROUTER_API_KEY or Nous Portal auth) (optional, for LLM tests)
|
- An auxiliary LLM provider (OPENROUTER_API_KEY or Nous Portal auth) (optional, for LLM tests)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
@ -32,8 +32,10 @@ from tools.web_tools import (
|
||||||
web_extract_tool,
|
web_extract_tool,
|
||||||
web_crawl_tool,
|
web_crawl_tool,
|
||||||
check_firecrawl_api_key,
|
check_firecrawl_api_key,
|
||||||
|
check_web_api_key,
|
||||||
check_auxiliary_model,
|
check_auxiliary_model,
|
||||||
get_debug_session_info
|
get_debug_session_info,
|
||||||
|
_get_backend,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -121,12 +123,13 @@ class WebToolsTester:
|
||||||
"""Test environment setup and API keys"""
|
"""Test environment setup and API keys"""
|
||||||
print_section("Environment Check")
|
print_section("Environment Check")
|
||||||
|
|
||||||
# Check Firecrawl API key
|
# Check web backend API key (Parallel or Firecrawl)
|
||||||
if not check_firecrawl_api_key():
|
if not check_web_api_key():
|
||||||
self.log_result("Firecrawl API Key", "failed", "FIRECRAWL_API_KEY not set")
|
self.log_result("Web Backend API Key", "failed", "PARALLEL_API_KEY or FIRECRAWL_API_KEY not set")
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
self.log_result("Firecrawl API Key", "passed", "Found")
|
backend = _get_backend()
|
||||||
|
self.log_result("Web Backend API Key", "passed", f"Using {backend} backend")
|
||||||
|
|
||||||
# Check auxiliary LLM provider (optional)
|
# Check auxiliary LLM provider (optional)
|
||||||
if not check_auxiliary_model():
|
if not check_auxiliary_model():
|
||||||
|
|
@ -578,7 +581,9 @@ class WebToolsTester:
|
||||||
},
|
},
|
||||||
"results": self.test_results,
|
"results": self.test_results,
|
||||||
"environment": {
|
"environment": {
|
||||||
|
"web_backend": _get_backend() if check_web_api_key() else None,
|
||||||
"firecrawl_api_key": check_firecrawl_api_key(),
|
"firecrawl_api_key": check_firecrawl_api_key(),
|
||||||
|
"parallel_api_key": bool(os.getenv("PARALLEL_API_KEY")),
|
||||||
"auxiliary_model": check_auxiliary_model(),
|
"auxiliary_model": check_auxiliary_model(),
|
||||||
"debug_mode": get_debug_session_info()["enabled"]
|
"debug_mode": get_debug_session_info()["enabled"]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,11 @@
|
||||||
"""Tests for Firecrawl client configuration and singleton behavior.
|
"""Tests for web backend client configuration and singleton behavior.
|
||||||
|
|
||||||
Coverage:
|
Coverage:
|
||||||
_get_firecrawl_client() — configuration matrix, singleton caching,
|
_get_firecrawl_client() — configuration matrix, singleton caching,
|
||||||
constructor failure recovery, return value verification, edge cases.
|
constructor failure recovery, return value verification, edge cases.
|
||||||
|
_get_backend() — backend selection logic with env var combinations.
|
||||||
|
_get_parallel_client() — Parallel client configuration, singleton caching.
|
||||||
|
check_web_api_key() — unified availability check.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
@ -117,3 +120,157 @@ class TestFirecrawlClientConfig:
|
||||||
from tools.web_tools import _get_firecrawl_client
|
from tools.web_tools import _get_firecrawl_client
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
_get_firecrawl_client()
|
_get_firecrawl_client()
|
||||||
|
|
||||||
|
|
||||||
|
class TestBackendSelection:
|
||||||
|
"""Test suite for _get_backend() backend selection logic.
|
||||||
|
|
||||||
|
The backend is configured via config.yaml (web.backend), set by
|
||||||
|
``hermes tools``. Falls back to key-based detection for legacy/manual
|
||||||
|
setups.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_ENV_KEYS = ("PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL")
|
||||||
|
|
||||||
|
def setup_method(self):
|
||||||
|
for key in self._ENV_KEYS:
|
||||||
|
os.environ.pop(key, None)
|
||||||
|
|
||||||
|
def teardown_method(self):
|
||||||
|
for key in self._ENV_KEYS:
|
||||||
|
os.environ.pop(key, None)
|
||||||
|
|
||||||
|
# ── Config-based selection (web.backend in config.yaml) ───────────
|
||||||
|
|
||||||
|
def test_config_parallel(self):
|
||||||
|
"""web.backend=parallel in config → 'parallel' regardless of keys."""
|
||||||
|
from tools.web_tools import _get_backend
|
||||||
|
with patch("tools.web_tools._load_web_config", return_value={"backend": "parallel"}):
|
||||||
|
assert _get_backend() == "parallel"
|
||||||
|
|
||||||
|
def test_config_firecrawl(self):
|
||||||
|
"""web.backend=firecrawl in config → 'firecrawl' even if Parallel key set."""
|
||||||
|
from tools.web_tools import _get_backend
|
||||||
|
with patch("tools.web_tools._load_web_config", return_value={"backend": "firecrawl"}), \
|
||||||
|
patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||||
|
assert _get_backend() == "firecrawl"
|
||||||
|
|
||||||
|
def test_config_case_insensitive(self):
|
||||||
|
"""web.backend=Parallel (mixed case) → 'parallel'."""
|
||||||
|
from tools.web_tools import _get_backend
|
||||||
|
with patch("tools.web_tools._load_web_config", return_value={"backend": "Parallel"}):
|
||||||
|
assert _get_backend() == "parallel"
|
||||||
|
|
||||||
|
# ── Fallback (no web.backend in config) ───────────────────────────
|
||||||
|
|
||||||
|
def test_fallback_parallel_only_key(self):
|
||||||
|
"""Only PARALLEL_API_KEY set → 'parallel'."""
|
||||||
|
from tools.web_tools import _get_backend
|
||||||
|
with patch("tools.web_tools._load_web_config", return_value={}), \
|
||||||
|
patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||||
|
assert _get_backend() == "parallel"
|
||||||
|
|
||||||
|
def test_fallback_both_keys_defaults_to_firecrawl(self):
|
||||||
|
"""Both keys set, no config → 'firecrawl' (backward compat)."""
|
||||||
|
from tools.web_tools import _get_backend
|
||||||
|
with patch("tools.web_tools._load_web_config", return_value={}), \
|
||||||
|
patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key", "FIRECRAWL_API_KEY": "fc-test"}):
|
||||||
|
assert _get_backend() == "firecrawl"
|
||||||
|
|
||||||
|
def test_fallback_firecrawl_only_key(self):
|
||||||
|
"""Only FIRECRAWL_API_KEY set → 'firecrawl'."""
|
||||||
|
from tools.web_tools import _get_backend
|
||||||
|
with patch("tools.web_tools._load_web_config", return_value={}), \
|
||||||
|
patch.dict(os.environ, {"FIRECRAWL_API_KEY": "fc-test"}):
|
||||||
|
assert _get_backend() == "firecrawl"
|
||||||
|
|
||||||
|
def test_fallback_no_keys_defaults_to_firecrawl(self):
|
||||||
|
"""No keys, no config → 'firecrawl' (will fail at client init)."""
|
||||||
|
from tools.web_tools import _get_backend
|
||||||
|
with patch("tools.web_tools._load_web_config", return_value={}):
|
||||||
|
assert _get_backend() == "firecrawl"
|
||||||
|
|
||||||
|
def test_invalid_config_falls_through_to_fallback(self):
|
||||||
|
"""web.backend=invalid → ignored, uses key-based fallback."""
|
||||||
|
from tools.web_tools import _get_backend
|
||||||
|
with patch("tools.web_tools._load_web_config", return_value={"backend": "tavily"}), \
|
||||||
|
patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||||
|
assert _get_backend() == "parallel"
|
||||||
|
|
||||||
|
|
||||||
|
class TestParallelClientConfig:
|
||||||
|
"""Test suite for Parallel client initialization."""
|
||||||
|
|
||||||
|
def setup_method(self):
|
||||||
|
import tools.web_tools
|
||||||
|
tools.web_tools._parallel_client = None
|
||||||
|
os.environ.pop("PARALLEL_API_KEY", None)
|
||||||
|
|
||||||
|
def teardown_method(self):
|
||||||
|
import tools.web_tools
|
||||||
|
tools.web_tools._parallel_client = None
|
||||||
|
os.environ.pop("PARALLEL_API_KEY", None)
|
||||||
|
|
||||||
|
def test_creates_client_with_key(self):
|
||||||
|
"""PARALLEL_API_KEY set → creates Parallel client."""
|
||||||
|
with patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||||
|
from tools.web_tools import _get_parallel_client
|
||||||
|
from parallel import Parallel
|
||||||
|
client = _get_parallel_client()
|
||||||
|
assert client is not None
|
||||||
|
assert isinstance(client, Parallel)
|
||||||
|
|
||||||
|
def test_no_key_raises_with_helpful_message(self):
|
||||||
|
"""No PARALLEL_API_KEY → ValueError with guidance."""
|
||||||
|
from tools.web_tools import _get_parallel_client
|
||||||
|
with pytest.raises(ValueError, match="PARALLEL_API_KEY"):
|
||||||
|
_get_parallel_client()
|
||||||
|
|
||||||
|
def test_singleton_returns_same_instance(self):
|
||||||
|
"""Second call returns cached client."""
|
||||||
|
with patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||||
|
from tools.web_tools import _get_parallel_client
|
||||||
|
client1 = _get_parallel_client()
|
||||||
|
client2 = _get_parallel_client()
|
||||||
|
assert client1 is client2
|
||||||
|
|
||||||
|
|
||||||
|
class TestCheckWebApiKey:
|
||||||
|
"""Test suite for check_web_api_key() unified availability check."""
|
||||||
|
|
||||||
|
_ENV_KEYS = ("PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL")
|
||||||
|
|
||||||
|
def setup_method(self):
|
||||||
|
for key in self._ENV_KEYS:
|
||||||
|
os.environ.pop(key, None)
|
||||||
|
|
||||||
|
def teardown_method(self):
|
||||||
|
for key in self._ENV_KEYS:
|
||||||
|
os.environ.pop(key, None)
|
||||||
|
|
||||||
|
def test_parallel_key_only(self):
|
||||||
|
with patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||||
|
from tools.web_tools import check_web_api_key
|
||||||
|
assert check_web_api_key() is True
|
||||||
|
|
||||||
|
def test_firecrawl_key_only(self):
|
||||||
|
with patch.dict(os.environ, {"FIRECRAWL_API_KEY": "fc-test"}):
|
||||||
|
from tools.web_tools import check_web_api_key
|
||||||
|
assert check_web_api_key() is True
|
||||||
|
|
||||||
|
def test_firecrawl_url_only(self):
|
||||||
|
with patch.dict(os.environ, {"FIRECRAWL_API_URL": "http://localhost:3002"}):
|
||||||
|
from tools.web_tools import check_web_api_key
|
||||||
|
assert check_web_api_key() is True
|
||||||
|
|
||||||
|
def test_no_keys_returns_false(self):
|
||||||
|
from tools.web_tools import check_web_api_key
|
||||||
|
assert check_web_api_key() is False
|
||||||
|
|
||||||
|
def test_both_keys_returns_true(self):
|
||||||
|
with patch.dict(os.environ, {
|
||||||
|
"PARALLEL_API_KEY": "test-key",
|
||||||
|
"FIRECRAWL_API_KEY": "fc-test",
|
||||||
|
}):
|
||||||
|
from tools.web_tools import check_web_api_key
|
||||||
|
assert check_web_api_key() is True
|
||||||
|
|
|
||||||
|
|
@ -426,6 +426,8 @@ async def test_web_extract_blocks_redirected_final_url(monkeypatch):
|
||||||
async def test_web_crawl_short_circuits_blocked_url(monkeypatch):
|
async def test_web_crawl_short_circuits_blocked_url(monkeypatch):
|
||||||
from tools import web_tools
|
from tools import web_tools
|
||||||
|
|
||||||
|
# web_crawl_tool checks for Firecrawl env before website policy
|
||||||
|
monkeypatch.setenv("FIRECRAWL_API_KEY", "fake-key")
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
web_tools,
|
web_tools,
|
||||||
"check_website_access",
|
"check_website_access",
|
||||||
|
|
@ -453,6 +455,9 @@ async def test_web_crawl_short_circuits_blocked_url(monkeypatch):
|
||||||
async def test_web_crawl_blocks_redirected_final_url(monkeypatch):
|
async def test_web_crawl_blocks_redirected_final_url(monkeypatch):
|
||||||
from tools import web_tools
|
from tools import web_tools
|
||||||
|
|
||||||
|
# web_crawl_tool checks for Firecrawl env before website policy
|
||||||
|
monkeypatch.setenv("FIRECRAWL_API_KEY", "fake-key")
|
||||||
|
|
||||||
def fake_check(url):
|
def fake_check(url):
|
||||||
if url == "https://allowed.test":
|
if url == "https://allowed.test":
|
||||||
return None
|
return None
|
||||||
|
|
|
||||||
|
|
@ -82,6 +82,9 @@ def _build_provider_env_blocklist() -> frozenset:
|
||||||
"FIREWORKS_API_KEY", # Fireworks AI
|
"FIREWORKS_API_KEY", # Fireworks AI
|
||||||
"XAI_API_KEY", # xAI (Grok)
|
"XAI_API_KEY", # xAI (Grok)
|
||||||
"HELICONE_API_KEY", # LLM Observability proxy
|
"HELICONE_API_KEY", # LLM Observability proxy
|
||||||
|
"PARALLEL_API_KEY",
|
||||||
|
"FIRECRAWL_API_KEY",
|
||||||
|
"FIRECRAWL_API_URL",
|
||||||
# Gateway/runtime config not represented in OPTIONAL_ENV_VARS.
|
# Gateway/runtime config not represented in OPTIONAL_ENV_VARS.
|
||||||
"TELEGRAM_HOME_CHANNEL",
|
"TELEGRAM_HOME_CHANNEL",
|
||||||
"TELEGRAM_HOME_CHANNEL_NAME",
|
"TELEGRAM_HOME_CHANNEL_NAME",
|
||||||
|
|
|
||||||
|
|
@ -3,16 +3,16 @@
|
||||||
Standalone Web Tools Module
|
Standalone Web Tools Module
|
||||||
|
|
||||||
This module provides generic web tools that work with multiple backend providers.
|
This module provides generic web tools that work with multiple backend providers.
|
||||||
Currently uses Firecrawl as the backend, and the interface makes it easy to swap
|
Backend is selected during ``hermes tools`` setup (web.backend in config.yaml).
|
||||||
providers without changing the function signatures.
|
|
||||||
|
|
||||||
Available tools:
|
Available tools:
|
||||||
- web_search_tool: Search the web for information
|
- web_search_tool: Search the web for information
|
||||||
- web_extract_tool: Extract content from specific web pages
|
- web_extract_tool: Extract content from specific web pages
|
||||||
- web_crawl_tool: Crawl websites with specific instructions
|
- web_crawl_tool: Crawl websites with specific instructions (Firecrawl only)
|
||||||
|
|
||||||
Backend compatibility:
|
Backend compatibility:
|
||||||
- Firecrawl: https://docs.firecrawl.dev/introduction
|
- Firecrawl: https://docs.firecrawl.dev/introduction (search, extract, crawl)
|
||||||
|
- Parallel: https://docs.parallel.ai (search, extract)
|
||||||
|
|
||||||
LLM Processing:
|
LLM Processing:
|
||||||
- Uses OpenRouter API with Gemini 3 Flash Preview for intelligent content extraction
|
- Uses OpenRouter API with Gemini 3 Flash Preview for intelligent content extraction
|
||||||
|
|
@ -53,6 +53,39 @@ from tools.website_policy import check_website_access
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Backend Selection ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _load_web_config() -> dict:
|
||||||
|
"""Load the ``web:`` section from ~/.hermes/config.yaml."""
|
||||||
|
try:
|
||||||
|
from hermes_cli.config import load_config
|
||||||
|
return load_config().get("web", {})
|
||||||
|
except (ImportError, Exception):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_backend() -> str:
|
||||||
|
"""Determine which web backend to use.
|
||||||
|
|
||||||
|
Reads ``web.backend`` from config.yaml (set by ``hermes tools``).
|
||||||
|
Falls back to whichever API key is present for users who configured
|
||||||
|
keys manually without running setup.
|
||||||
|
"""
|
||||||
|
configured = _load_web_config().get("backend", "").lower().strip()
|
||||||
|
if configured in ("parallel", "firecrawl"):
|
||||||
|
return configured
|
||||||
|
# Fallback for manual / legacy config — use whichever key is present.
|
||||||
|
has_firecrawl = bool(os.getenv("FIRECRAWL_API_KEY") or os.getenv("FIRECRAWL_API_URL"))
|
||||||
|
has_parallel = bool(os.getenv("PARALLEL_API_KEY"))
|
||||||
|
if has_parallel and not has_firecrawl:
|
||||||
|
return "parallel"
|
||||||
|
# Default to firecrawl (backward compat, or when both are set)
|
||||||
|
return "firecrawl"
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Firecrawl Client ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
_firecrawl_client = None
|
_firecrawl_client = None
|
||||||
|
|
||||||
def _get_firecrawl_client():
|
def _get_firecrawl_client():
|
||||||
|
|
@ -81,6 +114,47 @@ def _get_firecrawl_client():
|
||||||
_firecrawl_client = Firecrawl(**kwargs)
|
_firecrawl_client = Firecrawl(**kwargs)
|
||||||
return _firecrawl_client
|
return _firecrawl_client
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Parallel Client ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
_parallel_client = None
|
||||||
|
_async_parallel_client = None
|
||||||
|
|
||||||
|
def _get_parallel_client():
|
||||||
|
"""Get or create the Parallel sync client (lazy initialization).
|
||||||
|
|
||||||
|
Requires PARALLEL_API_KEY environment variable.
|
||||||
|
"""
|
||||||
|
from parallel import Parallel
|
||||||
|
global _parallel_client
|
||||||
|
if _parallel_client is None:
|
||||||
|
api_key = os.getenv("PARALLEL_API_KEY")
|
||||||
|
if not api_key:
|
||||||
|
raise ValueError(
|
||||||
|
"PARALLEL_API_KEY environment variable not set. "
|
||||||
|
"Get your API key at https://parallel.ai"
|
||||||
|
)
|
||||||
|
_parallel_client = Parallel(api_key=api_key)
|
||||||
|
return _parallel_client
|
||||||
|
|
||||||
|
|
||||||
|
def _get_async_parallel_client():
|
||||||
|
"""Get or create the Parallel async client (lazy initialization).
|
||||||
|
|
||||||
|
Requires PARALLEL_API_KEY environment variable.
|
||||||
|
"""
|
||||||
|
from parallel import AsyncParallel
|
||||||
|
global _async_parallel_client
|
||||||
|
if _async_parallel_client is None:
|
||||||
|
api_key = os.getenv("PARALLEL_API_KEY")
|
||||||
|
if not api_key:
|
||||||
|
raise ValueError(
|
||||||
|
"PARALLEL_API_KEY environment variable not set. "
|
||||||
|
"Get your API key at https://parallel.ai"
|
||||||
|
)
|
||||||
|
_async_parallel_client = AsyncParallel(api_key=api_key)
|
||||||
|
return _async_parallel_client
|
||||||
|
|
||||||
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
|
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
|
||||||
|
|
||||||
# Allow per-task override via env var
|
# Allow per-task override via env var
|
||||||
|
|
@ -428,12 +502,88 @@ def clean_base64_images(text: str) -> str:
|
||||||
return cleaned_text
|
return cleaned_text
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Parallel Search & Extract Helpers ────────────────────────────────────────
|
||||||
|
|
||||||
|
def _parallel_search(query: str, limit: int = 5) -> dict:
|
||||||
|
"""Search using the Parallel SDK and return results as a dict."""
|
||||||
|
from tools.interrupt import is_interrupted
|
||||||
|
if is_interrupted():
|
||||||
|
return {"error": "Interrupted", "success": False}
|
||||||
|
|
||||||
|
mode = os.getenv("PARALLEL_SEARCH_MODE", "agentic").lower().strip()
|
||||||
|
if mode not in ("fast", "one-shot", "agentic"):
|
||||||
|
mode = "agentic"
|
||||||
|
|
||||||
|
logger.info("Parallel search: '%s' (mode=%s, limit=%d)", query, mode, limit)
|
||||||
|
response = _get_parallel_client().beta.search(
|
||||||
|
search_queries=[query],
|
||||||
|
objective=query,
|
||||||
|
mode=mode,
|
||||||
|
max_results=min(limit, 20),
|
||||||
|
)
|
||||||
|
|
||||||
|
web_results = []
|
||||||
|
for i, result in enumerate(response.results or []):
|
||||||
|
excerpts = result.excerpts or []
|
||||||
|
web_results.append({
|
||||||
|
"url": result.url or "",
|
||||||
|
"title": result.title or "",
|
||||||
|
"description": " ".join(excerpts) if excerpts else "",
|
||||||
|
"position": i + 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {"success": True, "data": {"web": web_results}}
|
||||||
|
|
||||||
|
|
||||||
|
async def _parallel_extract(urls: List[str]) -> List[Dict[str, Any]]:
|
||||||
|
"""Extract content from URLs using the Parallel async SDK.
|
||||||
|
|
||||||
|
Returns a list of result dicts matching the structure expected by the
|
||||||
|
LLM post-processing pipeline (url, title, content, metadata).
|
||||||
|
"""
|
||||||
|
from tools.interrupt import is_interrupted
|
||||||
|
if is_interrupted():
|
||||||
|
return [{"url": u, "error": "Interrupted", "title": ""} for u in urls]
|
||||||
|
|
||||||
|
logger.info("Parallel extract: %d URL(s)", len(urls))
|
||||||
|
response = await _get_async_parallel_client().beta.extract(
|
||||||
|
urls=urls,
|
||||||
|
full_content=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for result in response.results or []:
|
||||||
|
content = result.full_content or ""
|
||||||
|
if not content:
|
||||||
|
content = "\n\n".join(result.excerpts or [])
|
||||||
|
url = result.url or ""
|
||||||
|
title = result.title or ""
|
||||||
|
results.append({
|
||||||
|
"url": url,
|
||||||
|
"title": title,
|
||||||
|
"content": content,
|
||||||
|
"raw_content": content,
|
||||||
|
"metadata": {"sourceURL": url, "title": title},
|
||||||
|
})
|
||||||
|
|
||||||
|
for error in response.errors or []:
|
||||||
|
results.append({
|
||||||
|
"url": error.url or "",
|
||||||
|
"title": "",
|
||||||
|
"content": "",
|
||||||
|
"error": error.content or error.error_type or "extraction failed",
|
||||||
|
"metadata": {"sourceURL": error.url or ""},
|
||||||
|
})
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
def web_search_tool(query: str, limit: int = 5) -> str:
|
def web_search_tool(query: str, limit: int = 5) -> str:
|
||||||
"""
|
"""
|
||||||
Search the web for information using available search API backend.
|
Search the web for information using available search API backend.
|
||||||
|
|
||||||
This function provides a generic interface for web search that can work
|
This function provides a generic interface for web search that can work
|
||||||
with multiple backends. Currently uses Firecrawl.
|
with multiple backends (Parallel or Firecrawl).
|
||||||
|
|
||||||
Note: This function returns search result metadata only (URLs, titles, descriptions).
|
Note: This function returns search result metadata only (URLs, titles, descriptions).
|
||||||
Use web_extract_tool to get full content from specific URLs.
|
Use web_extract_tool to get full content from specific URLs.
|
||||||
|
|
@ -478,6 +628,17 @@ def web_search_tool(query: str, limit: int = 5) -> str:
|
||||||
if is_interrupted():
|
if is_interrupted():
|
||||||
return json.dumps({"error": "Interrupted", "success": False})
|
return json.dumps({"error": "Interrupted", "success": False})
|
||||||
|
|
||||||
|
# Dispatch to the configured backend
|
||||||
|
backend = _get_backend()
|
||||||
|
if backend == "parallel":
|
||||||
|
response_data = _parallel_search(query, limit)
|
||||||
|
debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", []))
|
||||||
|
result_json = json.dumps(response_data, indent=2, ensure_ascii=False)
|
||||||
|
debug_call_data["final_response_size"] = len(result_json)
|
||||||
|
_debug.log_call("web_search_tool", debug_call_data)
|
||||||
|
_debug.save()
|
||||||
|
return result_json
|
||||||
|
|
||||||
logger.info("Searching the web for: '%s' (limit: %d)", query, limit)
|
logger.info("Searching the web for: '%s' (limit: %d)", query, limit)
|
||||||
|
|
||||||
response = _get_firecrawl_client().search(
|
response = _get_firecrawl_client().search(
|
||||||
|
|
@ -597,6 +758,13 @@ async def web_extract_tool(
|
||||||
try:
|
try:
|
||||||
logger.info("Extracting content from %d URL(s)", len(urls))
|
logger.info("Extracting content from %d URL(s)", len(urls))
|
||||||
|
|
||||||
|
# Dispatch to the configured backend
|
||||||
|
backend = _get_backend()
|
||||||
|
|
||||||
|
if backend == "parallel":
|
||||||
|
results = await _parallel_extract(urls)
|
||||||
|
else:
|
||||||
|
# ── Firecrawl extraction ──
|
||||||
# Determine requested formats for Firecrawl v2
|
# Determine requested formats for Firecrawl v2
|
||||||
formats: List[str] = []
|
formats: List[str] = []
|
||||||
if format == "markdown":
|
if format == "markdown":
|
||||||
|
|
@ -887,6 +1055,14 @@ async def web_crawl_tool(
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# web_crawl requires Firecrawl — Parallel has no crawl API
|
||||||
|
if not (os.getenv("FIRECRAWL_API_KEY") or os.getenv("FIRECRAWL_API_URL")):
|
||||||
|
return json.dumps({
|
||||||
|
"error": "web_crawl requires Firecrawl. Set FIRECRAWL_API_KEY, "
|
||||||
|
"or use web_search + web_extract instead.",
|
||||||
|
"success": False,
|
||||||
|
}, ensure_ascii=False)
|
||||||
|
|
||||||
# Ensure URL has protocol
|
# Ensure URL has protocol
|
||||||
if not url.startswith(('http://', 'https://')):
|
if not url.startswith(('http://', 'https://')):
|
||||||
url = f'https://{url}'
|
url = f'https://{url}'
|
||||||
|
|
@ -1158,6 +1334,15 @@ def check_firecrawl_api_key() -> bool:
|
||||||
return bool(os.getenv("FIRECRAWL_API_KEY"))
|
return bool(os.getenv("FIRECRAWL_API_KEY"))
|
||||||
|
|
||||||
|
|
||||||
|
def check_web_api_key() -> bool:
|
||||||
|
"""Check if any web backend API key is available (Parallel or Firecrawl)."""
|
||||||
|
return bool(
|
||||||
|
os.getenv("PARALLEL_API_KEY")
|
||||||
|
or os.getenv("FIRECRAWL_API_KEY")
|
||||||
|
or os.getenv("FIRECRAWL_API_URL")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def check_auxiliary_model() -> bool:
|
def check_auxiliary_model() -> bool:
|
||||||
"""Check if an auxiliary text model is available for LLM content processing."""
|
"""Check if an auxiliary text model is available for LLM content processing."""
|
||||||
try:
|
try:
|
||||||
|
|
@ -1184,15 +1369,19 @@ if __name__ == "__main__":
|
||||||
print("=" * 40)
|
print("=" * 40)
|
||||||
|
|
||||||
# Check if API keys are available
|
# Check if API keys are available
|
||||||
firecrawl_available = check_firecrawl_api_key()
|
web_available = check_web_api_key()
|
||||||
nous_available = check_auxiliary_model()
|
nous_available = check_auxiliary_model()
|
||||||
|
|
||||||
if not firecrawl_available:
|
if web_available:
|
||||||
print("❌ FIRECRAWL_API_KEY environment variable not set")
|
backend = _get_backend()
|
||||||
print("Please set your API key: export FIRECRAWL_API_KEY='your-key-here'")
|
print(f"✅ Web backend: {backend}")
|
||||||
print("Get API key at: https://firecrawl.dev/")
|
if backend == "parallel":
|
||||||
|
print(" Using Parallel API (https://parallel.ai)")
|
||||||
else:
|
else:
|
||||||
print("✅ Firecrawl API key found")
|
print(" Using Firecrawl API (https://firecrawl.dev)")
|
||||||
|
else:
|
||||||
|
print("❌ No web search backend configured")
|
||||||
|
print("Set PARALLEL_API_KEY (https://parallel.ai) or FIRECRAWL_API_KEY (https://firecrawl.dev)")
|
||||||
|
|
||||||
if not nous_available:
|
if not nous_available:
|
||||||
print("❌ No auxiliary model available for LLM content processing")
|
print("❌ No auxiliary model available for LLM content processing")
|
||||||
|
|
@ -1201,7 +1390,7 @@ if __name__ == "__main__":
|
||||||
else:
|
else:
|
||||||
print(f"✅ Auxiliary model available: {DEFAULT_SUMMARIZER_MODEL}")
|
print(f"✅ Auxiliary model available: {DEFAULT_SUMMARIZER_MODEL}")
|
||||||
|
|
||||||
if not firecrawl_available:
|
if not web_available:
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
print("🛠️ Web tools ready for use!")
|
print("🛠️ Web tools ready for use!")
|
||||||
|
|
@ -1301,8 +1490,8 @@ registry.register(
|
||||||
toolset="web",
|
toolset="web",
|
||||||
schema=WEB_SEARCH_SCHEMA,
|
schema=WEB_SEARCH_SCHEMA,
|
||||||
handler=lambda args, **kw: web_search_tool(args.get("query", ""), limit=5),
|
handler=lambda args, **kw: web_search_tool(args.get("query", ""), limit=5),
|
||||||
check_fn=check_firecrawl_api_key,
|
check_fn=check_web_api_key,
|
||||||
requires_env=["FIRECRAWL_API_KEY"],
|
requires_env=["PARALLEL_API_KEY", "FIRECRAWL_API_KEY"],
|
||||||
emoji="🔍",
|
emoji="🔍",
|
||||||
)
|
)
|
||||||
registry.register(
|
registry.register(
|
||||||
|
|
@ -1311,8 +1500,8 @@ registry.register(
|
||||||
schema=WEB_EXTRACT_SCHEMA,
|
schema=WEB_EXTRACT_SCHEMA,
|
||||||
handler=lambda args, **kw: web_extract_tool(
|
handler=lambda args, **kw: web_extract_tool(
|
||||||
args.get("urls", [])[:5] if isinstance(args.get("urls"), list) else [], "markdown"),
|
args.get("urls", [])[:5] if isinstance(args.get("urls"), list) else [], "markdown"),
|
||||||
check_fn=check_firecrawl_api_key,
|
check_fn=check_web_api_key,
|
||||||
requires_env=["FIRECRAWL_API_KEY"],
|
requires_env=["PARALLEL_API_KEY", "FIRECRAWL_API_KEY"],
|
||||||
is_async=True,
|
is_async=True,
|
||||||
emoji="📄",
|
emoji="📄",
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||||
|
|
||||||
| Variable | Description |
|
| Variable | Description |
|
||||||
|----------|-------------|
|
|----------|-------------|
|
||||||
|
| `PARALLEL_API_KEY` | AI-native web search ([parallel.ai](https://parallel.ai/)) |
|
||||||
| `FIRECRAWL_API_KEY` | Web scraping ([firecrawl.dev](https://firecrawl.dev/)) |
|
| `FIRECRAWL_API_KEY` | Web scraping ([firecrawl.dev](https://firecrawl.dev/)) |
|
||||||
| `FIRECRAWL_API_URL` | Custom Firecrawl API endpoint for self-hosted instances (optional) |
|
| `FIRECRAWL_API_URL` | Custom Firecrawl API endpoint for self-hosted instances (optional) |
|
||||||
| `BROWSERBASE_API_KEY` | Browser automation ([browserbase.com](https://browserbase.com/)) |
|
| `BROWSERBASE_API_KEY` | Browser automation ([browserbase.com](https://browserbase.com/)) |
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue