Merge pull request #740 from NousResearch/hermes/hermes-3cd7c62d
feat: simple fallback model for provider resilience (#737)
This commit is contained in:
commit
315f3ea429
6 changed files with 442 additions and 0 deletions
5
cli.py
5
cli.py
|
|
@ -1118,6 +1118,10 @@ class HermesCLI:
|
||||||
self._provider_require_params = pr.get("require_parameters", False)
|
self._provider_require_params = pr.get("require_parameters", False)
|
||||||
self._provider_data_collection = pr.get("data_collection")
|
self._provider_data_collection = pr.get("data_collection")
|
||||||
|
|
||||||
|
# Fallback model config — tried when primary provider fails after retries
|
||||||
|
fb = CLI_CONFIG.get("fallback_model") or {}
|
||||||
|
self._fallback_model = fb if fb.get("provider") and fb.get("model") else None
|
||||||
|
|
||||||
# Agent will be initialized on first use
|
# Agent will be initialized on first use
|
||||||
self.agent: Optional[AIAgent] = None
|
self.agent: Optional[AIAgent] = None
|
||||||
self._app = None # prompt_toolkit Application (set in run())
|
self._app = None # prompt_toolkit Application (set in run())
|
||||||
|
|
@ -1349,6 +1353,7 @@ class HermesCLI:
|
||||||
session_db=self._session_db,
|
session_db=self._session_db,
|
||||||
clarify_callback=self._clarify_callback,
|
clarify_callback=self._clarify_callback,
|
||||||
honcho_session_key=self.session_id,
|
honcho_session_key=self.session_id,
|
||||||
|
fallback_model=self._fallback_model,
|
||||||
)
|
)
|
||||||
# Apply any pending title now that the session exists in the DB
|
# Apply any pending title now that the session exists in the DB
|
||||||
if self._pending_title and self._session_db:
|
if self._pending_title and self._session_db:
|
||||||
|
|
|
||||||
|
|
@ -194,6 +194,7 @@ class GatewayRunner:
|
||||||
self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
|
self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
|
||||||
self._reasoning_config = self._load_reasoning_config()
|
self._reasoning_config = self._load_reasoning_config()
|
||||||
self._provider_routing = self._load_provider_routing()
|
self._provider_routing = self._load_provider_routing()
|
||||||
|
self._fallback_model = self._load_fallback_model()
|
||||||
|
|
||||||
# Wire process registry into session store for reset protection
|
# Wire process registry into session store for reset protection
|
||||||
from tools.process_registry import process_registry
|
from tools.process_registry import process_registry
|
||||||
|
|
@ -393,6 +394,26 @@ class GatewayRunner:
|
||||||
pass
|
pass
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _load_fallback_model() -> dict | None:
|
||||||
|
"""Load fallback model config from config.yaml.
|
||||||
|
|
||||||
|
Returns a dict with 'provider' and 'model' keys, or None if
|
||||||
|
not configured / both fields empty.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import yaml as _y
|
||||||
|
cfg_path = _hermes_home / "config.yaml"
|
||||||
|
if cfg_path.exists():
|
||||||
|
with open(cfg_path) as _f:
|
||||||
|
cfg = _y.safe_load(_f) or {}
|
||||||
|
fb = cfg.get("fallback_model", {}) or {}
|
||||||
|
if fb.get("provider") and fb.get("model"):
|
||||||
|
return fb
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
async def start(self) -> bool:
|
async def start(self) -> bool:
|
||||||
"""
|
"""
|
||||||
Start the gateway and all configured platform adapters.
|
Start the gateway and all configured platform adapters.
|
||||||
|
|
@ -2632,6 +2653,7 @@ class GatewayRunner:
|
||||||
platform=platform_key,
|
platform=platform_key,
|
||||||
honcho_session_key=session_key,
|
honcho_session_key=session_key,
|
||||||
session_db=self._session_db,
|
session_db=self._session_db,
|
||||||
|
fallback_model=self._fallback_model,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Store agent reference for interrupt support
|
# Store agent reference for interrupt support
|
||||||
|
|
|
||||||
|
|
@ -103,6 +103,27 @@ DEFAULT_CONFIG = {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
# Fallback model — used when the primary model/provider fails after retries.
|
||||||
|
# When the primary hits rate limits (429), overload (529), or service errors (503),
|
||||||
|
# Hermes will automatically switch to this model for the remainder of the session.
|
||||||
|
# Set to None / omit to disable fallback.
|
||||||
|
#
|
||||||
|
# Supported providers (auto-resolve base_url and API key from env):
|
||||||
|
# openrouter (OPENROUTER_API_KEY) — routes to any model
|
||||||
|
# zai (ZAI_API_KEY) — Z.AI / GLM
|
||||||
|
# kimi-coding (KIMI_API_KEY) — Kimi / Moonshot
|
||||||
|
# minimax (MINIMAX_API_KEY) — MiniMax
|
||||||
|
# minimax-cn (MINIMAX_CN_API_KEY) — MiniMax (China)
|
||||||
|
#
|
||||||
|
# For any other OpenAI-compatible endpoint, use base_url + api_key_env.
|
||||||
|
"fallback_model": {
|
||||||
|
"provider": "", # provider name from the list above
|
||||||
|
"model": "", # model slug, e.g. "anthropic/claude-sonnet-4"
|
||||||
|
# Optional overrides (usually auto-resolved from provider):
|
||||||
|
# "base_url": "", # custom endpoint URL
|
||||||
|
# "api_key_env": "", # env var name for API key (e.g. "MY_CUSTOM_KEY")
|
||||||
|
},
|
||||||
|
|
||||||
"display": {
|
"display": {
|
||||||
"compact": False,
|
"compact": False,
|
||||||
"personality": "kawaii",
|
"personality": "kawaii",
|
||||||
|
|
|
||||||
122
run_agent.py
122
run_agent.py
|
|
@ -183,6 +183,7 @@ class AIAgent:
|
||||||
session_db=None,
|
session_db=None,
|
||||||
honcho_session_key: str = None,
|
honcho_session_key: str = None,
|
||||||
iteration_budget: "IterationBudget" = None,
|
iteration_budget: "IterationBudget" = None,
|
||||||
|
fallback_model: Dict[str, Any] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initialize the AI Agent.
|
Initialize the AI Agent.
|
||||||
|
|
@ -406,6 +407,17 @@ class AIAgent:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
|
raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
|
||||||
|
|
||||||
|
# Provider fallback — a single backup model/provider tried when the
|
||||||
|
# primary is exhausted (rate-limit, overload, connection failure).
|
||||||
|
# Config shape: {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}
|
||||||
|
self._fallback_model = fallback_model if isinstance(fallback_model, dict) else None
|
||||||
|
self._fallback_activated = False
|
||||||
|
if self._fallback_model:
|
||||||
|
fb_p = self._fallback_model.get("provider", "")
|
||||||
|
fb_m = self._fallback_model.get("model", "")
|
||||||
|
if fb_p and fb_m and not self.quiet_mode:
|
||||||
|
print(f"🔄 Fallback model: {fb_m} ({fb_p})")
|
||||||
|
|
||||||
# Get available tools with filtering
|
# Get available tools with filtering
|
||||||
self.tools = get_tool_definitions(
|
self.tools = get_tool_definitions(
|
||||||
enabled_toolsets=enabled_toolsets,
|
enabled_toolsets=enabled_toolsets,
|
||||||
|
|
@ -2146,6 +2158,103 @@ class AIAgent:
|
||||||
raise result["error"]
|
raise result["error"]
|
||||||
return result["response"]
|
return result["response"]
|
||||||
|
|
||||||
|
# ── Provider fallback ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
# Maps provider id → (default_base_url, [env_var_names])
|
||||||
|
# Only includes providers that Hermes actually supports.
|
||||||
|
# For anything else, use base_url + api_key_env in the config.
|
||||||
|
_FALLBACK_PROVIDERS = {
|
||||||
|
"openrouter": (OPENROUTER_BASE_URL, ["OPENROUTER_API_KEY"]),
|
||||||
|
"zai": ("https://api.z.ai/api/paas/v4", ["ZAI_API_KEY", "Z_AI_API_KEY"]),
|
||||||
|
"kimi-coding": ("https://api.moonshot.ai/v1", ["KIMI_API_KEY"]),
|
||||||
|
"minimax": ("https://api.minimax.io/v1", ["MINIMAX_API_KEY"]),
|
||||||
|
"minimax-cn": ("https://api.minimaxi.com/v1", ["MINIMAX_CN_API_KEY"]),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _try_activate_fallback(self) -> bool:
|
||||||
|
"""Switch to the configured fallback model/provider.
|
||||||
|
|
||||||
|
Called when the primary model is failing after retries. Swaps the
|
||||||
|
OpenAI client, model slug, and provider in-place so the retry loop
|
||||||
|
can continue with the new backend. One-shot: returns False if
|
||||||
|
already activated or not configured.
|
||||||
|
"""
|
||||||
|
if self._fallback_activated or not self._fallback_model:
|
||||||
|
return False
|
||||||
|
|
||||||
|
fb = self._fallback_model
|
||||||
|
fb_provider = (fb.get("provider") or "").strip().lower()
|
||||||
|
fb_model = (fb.get("model") or "").strip()
|
||||||
|
if not fb_provider or not fb_model:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Resolve API key
|
||||||
|
fb_key = (fb.get("api_key") or "").strip()
|
||||||
|
if not fb_key:
|
||||||
|
key_env = (fb.get("api_key_env") or "").strip()
|
||||||
|
if key_env:
|
||||||
|
fb_key = os.getenv(key_env, "")
|
||||||
|
elif fb_provider in self._FALLBACK_PROVIDERS:
|
||||||
|
for env_var in self._FALLBACK_PROVIDERS[fb_provider][1]:
|
||||||
|
fb_key = os.getenv(env_var, "")
|
||||||
|
if fb_key:
|
||||||
|
break
|
||||||
|
if not fb_key:
|
||||||
|
logging.warning(
|
||||||
|
"Fallback model configured but no API key found for provider '%s'",
|
||||||
|
fb_provider,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Resolve base URL
|
||||||
|
fb_base_url = (fb.get("base_url") or "").strip()
|
||||||
|
if not fb_base_url and fb_provider in self._FALLBACK_PROVIDERS:
|
||||||
|
fb_base_url = self._FALLBACK_PROVIDERS[fb_provider][0]
|
||||||
|
if not fb_base_url:
|
||||||
|
fb_base_url = OPENROUTER_BASE_URL
|
||||||
|
|
||||||
|
# Build new client
|
||||||
|
try:
|
||||||
|
client_kwargs = {"api_key": fb_key, "base_url": fb_base_url}
|
||||||
|
if "openrouter" in fb_base_url.lower():
|
||||||
|
client_kwargs["default_headers"] = {
|
||||||
|
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||||
|
"X-OpenRouter-Title": "Hermes Agent",
|
||||||
|
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||||
|
}
|
||||||
|
elif "api.kimi.com" in fb_base_url.lower():
|
||||||
|
client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
||||||
|
|
||||||
|
self.client = OpenAI(**client_kwargs)
|
||||||
|
self._client_kwargs = client_kwargs
|
||||||
|
old_model = self.model
|
||||||
|
self.model = fb_model
|
||||||
|
self.provider = fb_provider
|
||||||
|
self.base_url = fb_base_url
|
||||||
|
self.api_mode = "chat_completions"
|
||||||
|
self._fallback_activated = True
|
||||||
|
|
||||||
|
# Re-evaluate prompt caching for the new provider/model
|
||||||
|
self._use_prompt_caching = (
|
||||||
|
"openrouter" in fb_base_url.lower()
|
||||||
|
and "claude" in fb_model.lower()
|
||||||
|
)
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"{self.log_prefix}🔄 Primary model failed — switching to fallback: "
|
||||||
|
f"{fb_model} via {fb_provider}"
|
||||||
|
)
|
||||||
|
logging.info(
|
||||||
|
"Fallback activated: %s → %s (%s)",
|
||||||
|
old_model, fb_model, fb_provider,
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logging.error("Failed to activate fallback model: %s", e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ── End provider fallback ──────────────────────────────────────────────
|
||||||
|
|
||||||
def _build_api_kwargs(self, api_messages: list) -> dict:
|
def _build_api_kwargs(self, api_messages: list) -> dict:
|
||||||
"""Build the keyword arguments dict for the active API mode."""
|
"""Build the keyword arguments dict for the active API mode."""
|
||||||
if self.api_mode == "codex_responses":
|
if self.api_mode == "codex_responses":
|
||||||
|
|
@ -3252,6 +3361,10 @@ class AIAgent:
|
||||||
print(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
|
print(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
|
||||||
|
|
||||||
if retry_count >= max_retries:
|
if retry_count >= max_retries:
|
||||||
|
# Try fallback before giving up
|
||||||
|
if self._try_activate_fallback():
|
||||||
|
retry_count = 0
|
||||||
|
continue
|
||||||
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
|
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
|
||||||
logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.")
|
logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.")
|
||||||
self._persist_session(messages, conversation_history)
|
self._persist_session(messages, conversation_history)
|
||||||
|
|
@ -3576,6 +3689,11 @@ class AIAgent:
|
||||||
])) and not is_context_length_error
|
])) and not is_context_length_error
|
||||||
|
|
||||||
if is_client_error:
|
if is_client_error:
|
||||||
|
# Try fallback before aborting — a different provider
|
||||||
|
# may not have the same issue (rate limit, auth, etc.)
|
||||||
|
if self._try_activate_fallback():
|
||||||
|
retry_count = 0
|
||||||
|
continue
|
||||||
self._dump_api_request_debug(
|
self._dump_api_request_debug(
|
||||||
api_kwargs, reason="non_retryable_client_error", error=api_error,
|
api_kwargs, reason="non_retryable_client_error", error=api_error,
|
||||||
)
|
)
|
||||||
|
|
@ -3593,6 +3711,10 @@ class AIAgent:
|
||||||
}
|
}
|
||||||
|
|
||||||
if retry_count >= max_retries:
|
if retry_count >= max_retries:
|
||||||
|
# Try fallback before giving up entirely
|
||||||
|
if self._try_activate_fallback():
|
||||||
|
retry_count = 0
|
||||||
|
continue
|
||||||
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
|
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
|
||||||
logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
|
logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
|
||||||
logging.error(f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}")
|
logging.error(f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}")
|
||||||
|
|
|
||||||
|
|
@ -149,6 +149,7 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
|
||||||
runner._prefill_messages = []
|
runner._prefill_messages = []
|
||||||
runner._reasoning_config = None
|
runner._reasoning_config = None
|
||||||
runner._provider_routing = {}
|
runner._provider_routing = {}
|
||||||
|
runner._fallback_model = None
|
||||||
runner._running_agents = {}
|
runner._running_agents = {}
|
||||||
from unittest.mock import MagicMock, AsyncMock
|
from unittest.mock import MagicMock, AsyncMock
|
||||||
runner.hooks = MagicMock()
|
runner.hooks = MagicMock()
|
||||||
|
|
|
||||||
271
tests/test_fallback_model.py
Normal file
271
tests/test_fallback_model.py
Normal file
|
|
@ -0,0 +1,271 @@
|
||||||
|
"""Tests for the provider fallback model feature.
|
||||||
|
|
||||||
|
Verifies that AIAgent can switch to a configured fallback model/provider
|
||||||
|
when the primary fails after retries.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from run_agent import AIAgent
|
||||||
|
|
||||||
|
|
||||||
|
def _make_tool_defs(*names: str) -> list:
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": n,
|
||||||
|
"description": f"{n} tool",
|
||||||
|
"parameters": {"type": "object", "properties": {}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for n in names
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _make_agent(fallback_model=None):
|
||||||
|
"""Create a minimal AIAgent with optional fallback config."""
|
||||||
|
with (
|
||||||
|
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
|
||||||
|
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||||
|
patch("run_agent.OpenAI"),
|
||||||
|
):
|
||||||
|
agent = AIAgent(
|
||||||
|
api_key="test-key-primary",
|
||||||
|
quiet_mode=True,
|
||||||
|
skip_context_files=True,
|
||||||
|
skip_memory=True,
|
||||||
|
fallback_model=fallback_model,
|
||||||
|
)
|
||||||
|
agent.client = MagicMock()
|
||||||
|
return agent
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# _try_activate_fallback()
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
class TestTryActivateFallback:
|
||||||
|
def test_returns_false_when_not_configured(self):
|
||||||
|
agent = _make_agent(fallback_model=None)
|
||||||
|
assert agent._try_activate_fallback() is False
|
||||||
|
assert agent._fallback_activated is False
|
||||||
|
|
||||||
|
def test_returns_false_for_empty_config(self):
|
||||||
|
agent = _make_agent(fallback_model={"provider": "", "model": ""})
|
||||||
|
assert agent._try_activate_fallback() is False
|
||||||
|
|
||||||
|
def test_returns_false_for_missing_provider(self):
|
||||||
|
agent = _make_agent(fallback_model={"model": "gpt-4.1"})
|
||||||
|
assert agent._try_activate_fallback() is False
|
||||||
|
|
||||||
|
def test_returns_false_for_missing_model(self):
|
||||||
|
agent = _make_agent(fallback_model={"provider": "openrouter"})
|
||||||
|
assert agent._try_activate_fallback() is False
|
||||||
|
|
||||||
|
def test_activates_openrouter_fallback(self):
|
||||||
|
agent = _make_agent(
|
||||||
|
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
||||||
|
)
|
||||||
|
with (
|
||||||
|
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-fallback-key"}),
|
||||||
|
patch("run_agent.OpenAI") as mock_openai,
|
||||||
|
):
|
||||||
|
result = agent._try_activate_fallback()
|
||||||
|
assert result is True
|
||||||
|
assert agent._fallback_activated is True
|
||||||
|
assert agent.model == "anthropic/claude-sonnet-4"
|
||||||
|
assert agent.provider == "openrouter"
|
||||||
|
assert agent.api_mode == "chat_completions"
|
||||||
|
mock_openai.assert_called_once()
|
||||||
|
call_kwargs = mock_openai.call_args[1]
|
||||||
|
assert call_kwargs["api_key"] == "sk-or-fallback-key"
|
||||||
|
assert "openrouter" in call_kwargs["base_url"].lower()
|
||||||
|
# OpenRouter should get attribution headers
|
||||||
|
assert "default_headers" in call_kwargs
|
||||||
|
|
||||||
|
def test_activates_zai_fallback(self):
|
||||||
|
agent = _make_agent(
|
||||||
|
fallback_model={"provider": "zai", "model": "glm-5"},
|
||||||
|
)
|
||||||
|
with (
|
||||||
|
patch.dict("os.environ", {"ZAI_API_KEY": "sk-zai-key"}),
|
||||||
|
patch("run_agent.OpenAI") as mock_openai,
|
||||||
|
):
|
||||||
|
result = agent._try_activate_fallback()
|
||||||
|
assert result is True
|
||||||
|
assert agent.model == "glm-5"
|
||||||
|
assert agent.provider == "zai"
|
||||||
|
call_kwargs = mock_openai.call_args[1]
|
||||||
|
assert call_kwargs["api_key"] == "sk-zai-key"
|
||||||
|
assert "z.ai" in call_kwargs["base_url"].lower()
|
||||||
|
|
||||||
|
def test_activates_kimi_fallback(self):
|
||||||
|
agent = _make_agent(
|
||||||
|
fallback_model={"provider": "kimi-coding", "model": "kimi-k2.5"},
|
||||||
|
)
|
||||||
|
with (
|
||||||
|
patch.dict("os.environ", {"KIMI_API_KEY": "sk-kimi-key"}),
|
||||||
|
patch("run_agent.OpenAI"),
|
||||||
|
):
|
||||||
|
assert agent._try_activate_fallback() is True
|
||||||
|
assert agent.model == "kimi-k2.5"
|
||||||
|
assert agent.provider == "kimi-coding"
|
||||||
|
|
||||||
|
def test_activates_minimax_fallback(self):
|
||||||
|
agent = _make_agent(
|
||||||
|
fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
|
||||||
|
)
|
||||||
|
with (
|
||||||
|
patch.dict("os.environ", {"MINIMAX_API_KEY": "sk-mm-key"}),
|
||||||
|
patch("run_agent.OpenAI") as mock_openai,
|
||||||
|
):
|
||||||
|
assert agent._try_activate_fallback() is True
|
||||||
|
assert agent.model == "MiniMax-M2.5"
|
||||||
|
assert agent.provider == "minimax"
|
||||||
|
call_kwargs = mock_openai.call_args[1]
|
||||||
|
assert "minimax.io" in call_kwargs["base_url"]
|
||||||
|
|
||||||
|
def test_only_fires_once(self):
|
||||||
|
agent = _make_agent(
|
||||||
|
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
||||||
|
)
|
||||||
|
with (
|
||||||
|
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}),
|
||||||
|
patch("run_agent.OpenAI"),
|
||||||
|
):
|
||||||
|
assert agent._try_activate_fallback() is True
|
||||||
|
# Second attempt should return False
|
||||||
|
assert agent._try_activate_fallback() is False
|
||||||
|
|
||||||
|
def test_returns_false_when_no_api_key(self):
|
||||||
|
"""Fallback should fail gracefully when the API key env var is unset."""
|
||||||
|
agent = _make_agent(
|
||||||
|
fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
|
||||||
|
)
|
||||||
|
# Ensure MINIMAX_API_KEY is not in the environment
|
||||||
|
env = {k: v for k, v in os.environ.items() if k != "MINIMAX_API_KEY"}
|
||||||
|
with patch.dict("os.environ", env, clear=True):
|
||||||
|
assert agent._try_activate_fallback() is False
|
||||||
|
assert agent._fallback_activated is False
|
||||||
|
|
||||||
|
def test_custom_base_url(self):
|
||||||
|
"""Custom base_url in config should override the provider default."""
|
||||||
|
agent = _make_agent(
|
||||||
|
fallback_model={
|
||||||
|
"provider": "custom",
|
||||||
|
"model": "my-model",
|
||||||
|
"base_url": "http://localhost:8080/v1",
|
||||||
|
"api_key_env": "MY_CUSTOM_KEY",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
with (
|
||||||
|
patch.dict("os.environ", {"MY_CUSTOM_KEY": "custom-secret"}),
|
||||||
|
patch("run_agent.OpenAI") as mock_openai,
|
||||||
|
):
|
||||||
|
assert agent._try_activate_fallback() is True
|
||||||
|
call_kwargs = mock_openai.call_args[1]
|
||||||
|
assert call_kwargs["base_url"] == "http://localhost:8080/v1"
|
||||||
|
assert call_kwargs["api_key"] == "custom-secret"
|
||||||
|
|
||||||
|
def test_prompt_caching_enabled_for_claude_on_openrouter(self):
|
||||||
|
agent = _make_agent(
|
||||||
|
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
||||||
|
)
|
||||||
|
with (
|
||||||
|
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}),
|
||||||
|
patch("run_agent.OpenAI"),
|
||||||
|
):
|
||||||
|
agent._try_activate_fallback()
|
||||||
|
assert agent._use_prompt_caching is True
|
||||||
|
|
||||||
|
def test_prompt_caching_disabled_for_non_claude(self):
|
||||||
|
agent = _make_agent(
|
||||||
|
fallback_model={"provider": "openrouter", "model": "google/gemini-2.5-flash"},
|
||||||
|
)
|
||||||
|
with (
|
||||||
|
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}),
|
||||||
|
patch("run_agent.OpenAI"),
|
||||||
|
):
|
||||||
|
agent._try_activate_fallback()
|
||||||
|
assert agent._use_prompt_caching is False
|
||||||
|
|
||||||
|
def test_prompt_caching_disabled_for_non_openrouter(self):
|
||||||
|
agent = _make_agent(
|
||||||
|
fallback_model={"provider": "zai", "model": "glm-5"},
|
||||||
|
)
|
||||||
|
with (
|
||||||
|
patch.dict("os.environ", {"ZAI_API_KEY": "sk-zai-key"}),
|
||||||
|
patch("run_agent.OpenAI"),
|
||||||
|
):
|
||||||
|
agent._try_activate_fallback()
|
||||||
|
assert agent._use_prompt_caching is False
|
||||||
|
|
||||||
|
def test_zai_alt_env_var(self):
|
||||||
|
"""Z.AI should also check Z_AI_API_KEY as fallback env var."""
|
||||||
|
agent = _make_agent(
|
||||||
|
fallback_model={"provider": "zai", "model": "glm-5"},
|
||||||
|
)
|
||||||
|
with (
|
||||||
|
patch.dict("os.environ", {"Z_AI_API_KEY": "sk-alt-key"}),
|
||||||
|
patch("run_agent.OpenAI") as mock_openai,
|
||||||
|
):
|
||||||
|
assert agent._try_activate_fallback() is True
|
||||||
|
call_kwargs = mock_openai.call_args[1]
|
||||||
|
assert call_kwargs["api_key"] == "sk-alt-key"
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Fallback config init
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
class TestFallbackInit:
|
||||||
|
def test_fallback_stored_when_configured(self):
|
||||||
|
agent = _make_agent(
|
||||||
|
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
||||||
|
)
|
||||||
|
assert agent._fallback_model is not None
|
||||||
|
assert agent._fallback_model["provider"] == "openrouter"
|
||||||
|
assert agent._fallback_activated is False
|
||||||
|
|
||||||
|
def test_fallback_none_when_not_configured(self):
|
||||||
|
agent = _make_agent(fallback_model=None)
|
||||||
|
assert agent._fallback_model is None
|
||||||
|
assert agent._fallback_activated is False
|
||||||
|
|
||||||
|
def test_fallback_none_for_non_dict(self):
|
||||||
|
agent = _make_agent(fallback_model="not-a-dict")
|
||||||
|
assert agent._fallback_model is None
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Provider credential resolution
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
class TestProviderCredentials:
|
||||||
|
"""Verify that each supported provider resolves its API key correctly."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("provider,env_var,base_url_fragment", [
|
||||||
|
("openrouter", "OPENROUTER_API_KEY", "openrouter"),
|
||||||
|
("zai", "ZAI_API_KEY", "z.ai"),
|
||||||
|
("kimi-coding", "KIMI_API_KEY", "moonshot.ai"),
|
||||||
|
("minimax", "MINIMAX_API_KEY", "minimax.io"),
|
||||||
|
("minimax-cn", "MINIMAX_CN_API_KEY", "minimaxi.com"),
|
||||||
|
])
|
||||||
|
def test_provider_resolves(self, provider, env_var, base_url_fragment):
|
||||||
|
agent = _make_agent(
|
||||||
|
fallback_model={"provider": provider, "model": "test-model"},
|
||||||
|
)
|
||||||
|
with (
|
||||||
|
patch.dict("os.environ", {env_var: "test-key-123"}),
|
||||||
|
patch("run_agent.OpenAI") as mock_openai,
|
||||||
|
):
|
||||||
|
result = agent._try_activate_fallback()
|
||||||
|
assert result is True, f"Failed to activate fallback for {provider}"
|
||||||
|
call_kwargs = mock_openai.call_args[1]
|
||||||
|
assert call_kwargs["api_key"] == "test-key-123"
|
||||||
|
assert base_url_fragment in call_kwargs["base_url"].lower()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue