feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
This commit is contained in:
parent
4d7d9d9715
commit
161436cfdd
6 changed files with 410 additions and 0 deletions
|
|
@ -194,6 +194,7 @@ class GatewayRunner:
|
|||
self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
|
||||
self._reasoning_config = self._load_reasoning_config()
|
||||
self._provider_routing = self._load_provider_routing()
|
||||
self._fallback_model = self._load_fallback_model()
|
||||
|
||||
# Wire process registry into session store for reset protection
|
||||
from tools.process_registry import process_registry
|
||||
|
|
@ -393,6 +394,26 @@ class GatewayRunner:
|
|||
pass
|
||||
return {}
|
||||
|
||||
@staticmethod
|
||||
def _load_fallback_model() -> dict | None:
|
||||
"""Load fallback model config from config.yaml.
|
||||
|
||||
Returns a dict with 'provider' and 'model' keys, or None if
|
||||
not configured / both fields empty.
|
||||
"""
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path) as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
fb = cfg.get("fallback_model", {}) or {}
|
||||
if fb.get("provider") and fb.get("model"):
|
||||
return fb
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
async def start(self) -> bool:
|
||||
"""
|
||||
Start the gateway and all configured platform adapters.
|
||||
|
|
@ -2623,6 +2644,7 @@ class GatewayRunner:
|
|||
platform=platform_key,
|
||||
honcho_session_key=session_key,
|
||||
session_db=self._session_db,
|
||||
fallback_model=self._fallback_model,
|
||||
)
|
||||
|
||||
# Store agent reference for interrupt support
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue