feat(web): add Parallel as alternative web search/extract backend (#1696)
* feat(web): add Parallel as alternative web search/extract backend Adds Parallel (parallel.ai) as a drop-in alternative to Firecrawl for web_search and web_extract tools using the official parallel-web SDK. - Backend selection via WEB_SEARCH_BACKEND env var (auto/parallel/firecrawl) - Auto mode prefers Firecrawl when both keys present; Parallel when sole backend - web_crawl remains Firecrawl-only with clear error when unavailable - Lazy SDK imports, interrupt support, singleton clients - 16 new unit tests for backend selection and client config Co-authored-by: s-jag <s-jag@users.noreply.github.com> * fix: add PARALLEL_API_KEY to config registry and fix web_crawl policy tests Follow-up for Parallel backend integration: - Add PARALLEL_API_KEY to OPTIONAL_ENV_VARS (hermes doctor, env blocklist) - Add to set_config_value api_keys list (hermes config set) - Add to doctor keys display - Fix 2 web_crawl policy tests that didn't set FIRECRAWL_API_KEY (needed now that web_crawl has a Firecrawl availability guard) * refactor: explicit backend selection via hermes tools, not auto-detect Replace the auto-detect backend selection with explicit user choice: - hermes tools saves WEB_SEARCH_BACKEND to .env when user picks a provider - _get_backend() reads the explicit choice first - Fallback only for manual/legacy config (uses whichever key is present) - _is_provider_active() shows [active] for the selected web backend - Updated tests, docs, and .env.example to remove 'auto' mode language * refactor: use config.yaml for web backend, not env var Match the TTS/browser pattern — web.backend is stored in config.yaml (set by hermes tools), not as a WEB_SEARCH_BACKEND env var. - _load_web_config() reads web: section from config.yaml - _get_backend() reads web.backend from config, falls back to key detection - _configure_provider() saves to config dict (saved to config.yaml) - _is_provider_active() reads from config dict - Removed WEB_SEARCH_BACKEND from .env.example, set_config_value, docs - Updated all tests to mock _load_web_config instead of env vars --------- Co-authored-by: s-jag <s-jag@users.noreply.github.com>
This commit is contained in:
parent
6405d389aa
commit
4433b83378
14 changed files with 548 additions and 154 deletions
|
|
@ -1,8 +1,11 @@
|
|||
"""Tests for Firecrawl client configuration and singleton behavior.
|
||||
"""Tests for web backend client configuration and singleton behavior.
|
||||
|
||||
Coverage:
|
||||
_get_firecrawl_client() — configuration matrix, singleton caching,
|
||||
constructor failure recovery, return value verification, edge cases.
|
||||
_get_backend() — backend selection logic with env var combinations.
|
||||
_get_parallel_client() — Parallel client configuration, singleton caching.
|
||||
check_web_api_key() — unified availability check.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
|
@ -117,3 +120,157 @@ class TestFirecrawlClientConfig:
|
|||
from tools.web_tools import _get_firecrawl_client
|
||||
with pytest.raises(ValueError):
|
||||
_get_firecrawl_client()
|
||||
|
||||
|
||||
class TestBackendSelection:
|
||||
"""Test suite for _get_backend() backend selection logic.
|
||||
|
||||
The backend is configured via config.yaml (web.backend), set by
|
||||
``hermes tools``. Falls back to key-based detection for legacy/manual
|
||||
setups.
|
||||
"""
|
||||
|
||||
_ENV_KEYS = ("PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL")
|
||||
|
||||
def setup_method(self):
|
||||
for key in self._ENV_KEYS:
|
||||
os.environ.pop(key, None)
|
||||
|
||||
def teardown_method(self):
|
||||
for key in self._ENV_KEYS:
|
||||
os.environ.pop(key, None)
|
||||
|
||||
# ── Config-based selection (web.backend in config.yaml) ───────────
|
||||
|
||||
def test_config_parallel(self):
|
||||
"""web.backend=parallel in config → 'parallel' regardless of keys."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={"backend": "parallel"}):
|
||||
assert _get_backend() == "parallel"
|
||||
|
||||
def test_config_firecrawl(self):
|
||||
"""web.backend=firecrawl in config → 'firecrawl' even if Parallel key set."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={"backend": "firecrawl"}), \
|
||||
patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||
assert _get_backend() == "firecrawl"
|
||||
|
||||
def test_config_case_insensitive(self):
|
||||
"""web.backend=Parallel (mixed case) → 'parallel'."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={"backend": "Parallel"}):
|
||||
assert _get_backend() == "parallel"
|
||||
|
||||
# ── Fallback (no web.backend in config) ───────────────────────────
|
||||
|
||||
def test_fallback_parallel_only_key(self):
|
||||
"""Only PARALLEL_API_KEY set → 'parallel'."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={}), \
|
||||
patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||
assert _get_backend() == "parallel"
|
||||
|
||||
def test_fallback_both_keys_defaults_to_firecrawl(self):
|
||||
"""Both keys set, no config → 'firecrawl' (backward compat)."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={}), \
|
||||
patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key", "FIRECRAWL_API_KEY": "fc-test"}):
|
||||
assert _get_backend() == "firecrawl"
|
||||
|
||||
def test_fallback_firecrawl_only_key(self):
|
||||
"""Only FIRECRAWL_API_KEY set → 'firecrawl'."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={}), \
|
||||
patch.dict(os.environ, {"FIRECRAWL_API_KEY": "fc-test"}):
|
||||
assert _get_backend() == "firecrawl"
|
||||
|
||||
def test_fallback_no_keys_defaults_to_firecrawl(self):
|
||||
"""No keys, no config → 'firecrawl' (will fail at client init)."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={}):
|
||||
assert _get_backend() == "firecrawl"
|
||||
|
||||
def test_invalid_config_falls_through_to_fallback(self):
|
||||
"""web.backend=invalid → ignored, uses key-based fallback."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={"backend": "tavily"}), \
|
||||
patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||
assert _get_backend() == "parallel"
|
||||
|
||||
|
||||
class TestParallelClientConfig:
|
||||
"""Test suite for Parallel client initialization."""
|
||||
|
||||
def setup_method(self):
|
||||
import tools.web_tools
|
||||
tools.web_tools._parallel_client = None
|
||||
os.environ.pop("PARALLEL_API_KEY", None)
|
||||
|
||||
def teardown_method(self):
|
||||
import tools.web_tools
|
||||
tools.web_tools._parallel_client = None
|
||||
os.environ.pop("PARALLEL_API_KEY", None)
|
||||
|
||||
def test_creates_client_with_key(self):
|
||||
"""PARALLEL_API_KEY set → creates Parallel client."""
|
||||
with patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||
from tools.web_tools import _get_parallel_client
|
||||
from parallel import Parallel
|
||||
client = _get_parallel_client()
|
||||
assert client is not None
|
||||
assert isinstance(client, Parallel)
|
||||
|
||||
def test_no_key_raises_with_helpful_message(self):
|
||||
"""No PARALLEL_API_KEY → ValueError with guidance."""
|
||||
from tools.web_tools import _get_parallel_client
|
||||
with pytest.raises(ValueError, match="PARALLEL_API_KEY"):
|
||||
_get_parallel_client()
|
||||
|
||||
def test_singleton_returns_same_instance(self):
|
||||
"""Second call returns cached client."""
|
||||
with patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||
from tools.web_tools import _get_parallel_client
|
||||
client1 = _get_parallel_client()
|
||||
client2 = _get_parallel_client()
|
||||
assert client1 is client2
|
||||
|
||||
|
||||
class TestCheckWebApiKey:
|
||||
"""Test suite for check_web_api_key() unified availability check."""
|
||||
|
||||
_ENV_KEYS = ("PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL")
|
||||
|
||||
def setup_method(self):
|
||||
for key in self._ENV_KEYS:
|
||||
os.environ.pop(key, None)
|
||||
|
||||
def teardown_method(self):
|
||||
for key in self._ENV_KEYS:
|
||||
os.environ.pop(key, None)
|
||||
|
||||
def test_parallel_key_only(self):
|
||||
with patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||
from tools.web_tools import check_web_api_key
|
||||
assert check_web_api_key() is True
|
||||
|
||||
def test_firecrawl_key_only(self):
|
||||
with patch.dict(os.environ, {"FIRECRAWL_API_KEY": "fc-test"}):
|
||||
from tools.web_tools import check_web_api_key
|
||||
assert check_web_api_key() is True
|
||||
|
||||
def test_firecrawl_url_only(self):
|
||||
with patch.dict(os.environ, {"FIRECRAWL_API_URL": "http://localhost:3002"}):
|
||||
from tools.web_tools import check_web_api_key
|
||||
assert check_web_api_key() is True
|
||||
|
||||
def test_no_keys_returns_false(self):
|
||||
from tools.web_tools import check_web_api_key
|
||||
assert check_web_api_key() is False
|
||||
|
||||
def test_both_keys_returns_true(self):
|
||||
with patch.dict(os.environ, {
|
||||
"PARALLEL_API_KEY": "test-key",
|
||||
"FIRECRAWL_API_KEY": "fc-test",
|
||||
}):
|
||||
from tools.web_tools import check_web_api_key
|
||||
assert check_web_api_key() is True
|
||||
|
|
|
|||
|
|
@ -426,6 +426,8 @@ async def test_web_extract_blocks_redirected_final_url(monkeypatch):
|
|||
async def test_web_crawl_short_circuits_blocked_url(monkeypatch):
|
||||
from tools import web_tools
|
||||
|
||||
# web_crawl_tool checks for Firecrawl env before website policy
|
||||
monkeypatch.setenv("FIRECRAWL_API_KEY", "fake-key")
|
||||
monkeypatch.setattr(
|
||||
web_tools,
|
||||
"check_website_access",
|
||||
|
|
@ -453,6 +455,9 @@ async def test_web_crawl_short_circuits_blocked_url(monkeypatch):
|
|||
async def test_web_crawl_blocks_redirected_final_url(monkeypatch):
|
||||
from tools import web_tools
|
||||
|
||||
# web_crawl_tool checks for Firecrawl env before website policy
|
||||
monkeypatch.setenv("FIRECRAWL_API_KEY", "fake-key")
|
||||
|
||||
def fake_check(url):
|
||||
if url == "https://allowed.test":
|
||||
return None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue