Merge pull request #1375 from NousResearch/hermes/hermes-dd253d81

feat: add direct endpoint overrides for auxiliary and delegation
This commit is contained in:
Teknium 2026-03-14 21:17:42 -07:00 committed by GitHub
commit cb7690b2b5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 526 additions and 99 deletions

View file

@ -30,6 +30,10 @@ Default "auto" follows the chains above.
Per-task model overrides (e.g. AUXILIARY_VISION_MODEL, Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
than the provider's default. than the provider's default.
Per-task direct endpoint overrides (e.g. AUXILIARY_VISION_BASE_URL,
AUXILIARY_VISION_API_KEY) let callers route a specific auxiliary task to a
custom OpenAI-compatible endpoint without touching the main model settings.
""" """
import json import json
@ -418,6 +422,17 @@ def _get_auxiliary_provider(task: str = "") -> str:
return "auto" return "auto"
def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]:
"""Read an auxiliary env override from AUXILIARY_* or CONTEXT_* prefixes."""
if not task:
return None
for prefix in ("AUXILIARY_", "CONTEXT_"):
val = os.getenv(f"{prefix}{task.upper()}_{suffix}", "").strip()
if val:
return val
return None
def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]: def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
or_key = os.getenv("OPENROUTER_API_KEY") or_key = os.getenv("OPENROUTER_API_KEY")
if not or_key: if not or_key:
@ -599,6 +614,8 @@ def resolve_provider_client(
model: str = None, model: str = None,
async_mode: bool = False, async_mode: bool = False,
raw_codex: bool = False, raw_codex: bool = False,
explicit_base_url: str = None,
explicit_api_key: str = None,
) -> Tuple[Optional[Any], Optional[str]]: ) -> Tuple[Optional[Any], Optional[str]]:
"""Central router: given a provider name and optional model, return a """Central router: given a provider name and optional model, return a
configured client with the correct auth, base URL, and API format. configured client with the correct auth, base URL, and API format.
@ -620,6 +637,8 @@ def resolve_provider_client(
instead of wrapping in CodexAuxiliaryClient. Use this when instead of wrapping in CodexAuxiliaryClient. Use this when
the caller needs direct access to responses.stream() (e.g., the caller needs direct access to responses.stream() (e.g.,
the main agent loop). the main agent loop).
explicit_base_url: Optional direct OpenAI-compatible endpoint.
explicit_api_key: Optional API key paired with explicit_base_url.
Returns: Returns:
(client, resolved_model) or (None, None) if auth is unavailable. (client, resolved_model) or (None, None) if auth is unavailable.
@ -696,6 +715,22 @@ def resolve_provider_client(
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ─────────── # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
if provider == "custom": if provider == "custom":
if explicit_base_url:
custom_base = explicit_base_url.strip()
custom_key = (
(explicit_api_key or "").strip()
or os.getenv("OPENAI_API_KEY", "").strip()
)
if not custom_base or not custom_key:
logger.warning(
"resolve_provider_client: explicit custom endpoint requested "
"but no API key was found (set explicit_api_key or OPENAI_API_KEY)"
)
return None, None
final_model = model or _read_main_model() or "gpt-4o-mini"
client = OpenAI(api_key=custom_key, base_url=custom_base)
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# Try custom first, then codex, then API-key providers # Try custom first, then codex, then API-key providers
for try_fn in (_try_custom_endpoint, _try_codex, for try_fn in (_try_custom_endpoint, _try_codex,
_resolve_api_key_provider): _resolve_api_key_provider):
@ -784,10 +819,13 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
Callers may override the returned model with a per-task env var Callers may override the returned model with a per-task env var
(e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL). (e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
""" """
forced = _get_auxiliary_provider(task) provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
if forced != "auto": return resolve_provider_client(
return resolve_provider_client(forced) provider,
return resolve_provider_client("auto") model=model,
explicit_base_url=base_url,
explicit_api_key=api_key,
)
def get_async_text_auxiliary_client(task: str = ""): def get_async_text_auxiliary_client(task: str = ""):
@ -797,10 +835,14 @@ def get_async_text_auxiliary_client(task: str = ""):
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API. (AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
Returns (None, None) when no provider is available. Returns (None, None) when no provider is available.
""" """
forced = _get_auxiliary_provider(task) provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
if forced != "auto": return resolve_provider_client(
return resolve_provider_client(forced, async_mode=True) provider,
return resolve_provider_client("auto", async_mode=True) model=model,
async_mode=True,
explicit_base_url=base_url,
explicit_api_key=api_key,
)
_VISION_AUTO_PROVIDER_ORDER = ( _VISION_AUTO_PROVIDER_ORDER = (
@ -856,26 +898,43 @@ def resolve_vision_provider_client(
provider: Optional[str] = None, provider: Optional[str] = None,
model: Optional[str] = None, model: Optional[str] = None,
*, *,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
async_mode: bool = False, async_mode: bool = False,
) -> Tuple[Optional[str], Optional[Any], Optional[str]]: ) -> Tuple[Optional[str], Optional[Any], Optional[str]]:
"""Resolve the client actually used for vision tasks. """Resolve the client actually used for vision tasks.
Explicit provider overrides still use the generic provider router for Direct endpoint overrides take precedence over provider selection. Explicit
non-standard backends, so users can intentionally force experimental provider overrides still use the generic provider router for non-standard
providers. Auto mode stays conservative and only tries vision backends backends, so users can intentionally force experimental providers. Auto mode
known to work today. stays conservative and only tries vision backends known to work today.
""" """
requested = _normalize_vision_provider(provider or _get_auxiliary_provider("vision")) requested, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
"vision", provider, model, base_url, api_key
)
requested = _normalize_vision_provider(requested)
def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[str]): def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[str]):
if sync_client is None: if sync_client is None:
return resolved_provider, None, None return resolved_provider, None, None
final_model = model or default_model final_model = resolved_model or default_model
if async_mode: if async_mode:
async_client, async_model = _to_async_client(sync_client, final_model) async_client, async_model = _to_async_client(sync_client, final_model)
return resolved_provider, async_client, async_model return resolved_provider, async_client, async_model
return resolved_provider, sync_client, final_model return resolved_provider, sync_client, final_model
if resolved_base_url:
client, final_model = resolve_provider_client(
"custom",
model=resolved_model,
async_mode=async_mode,
explicit_base_url=resolved_base_url,
explicit_api_key=resolved_api_key,
)
if client is None:
return "custom", None, None
return "custom", client, final_model
if requested == "auto": if requested == "auto":
for candidate in get_available_vision_backends(): for candidate in get_available_vision_backends():
sync_client, default_model = _resolve_strict_vision_backend(candidate) sync_client, default_model = _resolve_strict_vision_backend(candidate)
@ -888,7 +947,7 @@ def resolve_vision_provider_client(
sync_client, default_model = _resolve_strict_vision_backend(requested) sync_client, default_model = _resolve_strict_vision_backend(requested)
return _finalize(requested, sync_client, default_model) return _finalize(requested, sync_client, default_model)
client, final_model = _get_cached_client(requested, model, async_mode) client, final_model = _get_cached_client(requested, resolved_model, async_mode)
if client is None: if client is None:
return requested, None, None return requested, None, None
return requested, client, final_model return requested, client, final_model
@ -945,19 +1004,29 @@ def auxiliary_max_tokens_param(value: int) -> dict:
# Every auxiliary LLM consumer should use these instead of manually # Every auxiliary LLM consumer should use these instead of manually
# constructing clients and calling .chat.completions.create(). # constructing clients and calling .chat.completions.create().
# Client cache: (provider, async_mode) -> (client, default_model) # Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model)
_client_cache: Dict[tuple, tuple] = {} _client_cache: Dict[tuple, tuple] = {}
def _get_cached_client( def _get_cached_client(
provider: str, model: str = None, async_mode: bool = False, provider: str,
model: str = None,
async_mode: bool = False,
base_url: str = None,
api_key: str = None,
) -> Tuple[Optional[Any], Optional[str]]: ) -> Tuple[Optional[Any], Optional[str]]:
"""Get or create a cached client for the given provider.""" """Get or create a cached client for the given provider."""
cache_key = (provider, async_mode) cache_key = (provider, async_mode, base_url or "", api_key or "")
if cache_key in _client_cache: if cache_key in _client_cache:
cached_client, cached_default = _client_cache[cache_key] cached_client, cached_default = _client_cache[cache_key]
return cached_client, model or cached_default return cached_client, model or cached_default
client, default_model = resolve_provider_client(provider, model, async_mode) client, default_model = resolve_provider_client(
provider,
model,
async_mode,
explicit_base_url=base_url,
explicit_api_key=api_key,
)
if client is not None: if client is not None:
_client_cache[cache_key] = (client, default_model) _client_cache[cache_key] = (client, default_model)
return client, model or default_model return client, model or default_model
@ -967,57 +1036,75 @@ def _resolve_task_provider_model(
task: str = None, task: str = None,
provider: str = None, provider: str = None,
model: str = None, model: str = None,
) -> Tuple[str, Optional[str]]: base_url: str = None,
api_key: str = None,
) -> Tuple[str, Optional[str], Optional[str], Optional[str]]:
"""Determine provider + model for a call. """Determine provider + model for a call.
Priority: Priority:
1. Explicit provider/model args (always win) 1. Explicit provider/model/base_url/api_key args (always win)
2. Env var overrides (AUXILIARY_{TASK}_PROVIDER, etc.) 2. Env var overrides (AUXILIARY_{TASK}_*, CONTEXT_{TASK}_*)
3. Config file (auxiliary.{task}.provider/model or compression.*) 3. Config file (auxiliary.{task}.* or compression.*)
4. "auto" (full auto-detection chain) 4. "auto" (full auto-detection chain)
Returns (provider, model) where model may be None (use provider default). Returns (provider, model, base_url, api_key) where model may be None
(use provider default). When base_url is set, provider is forced to
"custom" and the task uses that direct endpoint.
""" """
if provider: config = {}
return provider, model cfg_provider = None
cfg_model = None
cfg_base_url = None
cfg_api_key = None
if task: if task:
# Check env var overrides first
env_provider = _get_auxiliary_provider(task)
if env_provider != "auto":
# Check for env var model override too
env_model = None
for prefix in ("AUXILIARY_", "CONTEXT_"):
val = os.getenv(f"{prefix}{task.upper()}_MODEL", "").strip()
if val:
env_model = val
break
return env_provider, model or env_model
# Read from config file
try: try:
from hermes_cli.config import load_config from hermes_cli.config import load_config
config = load_config() config = load_config()
except ImportError: except ImportError:
return "auto", model config = {}
# Check auxiliary.{task} section aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
aux = config.get("auxiliary", {}) task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
task_config = aux.get(task, {}) if not isinstance(task_config, dict):
cfg_provider = task_config.get("provider", "").strip() or None task_config = {}
cfg_model = task_config.get("model", "").strip() or None cfg_provider = str(task_config.get("provider", "")).strip() or None
cfg_model = str(task_config.get("model", "")).strip() or None
cfg_base_url = str(task_config.get("base_url", "")).strip() or None
cfg_api_key = str(task_config.get("api_key", "")).strip() or None
# Backwards compat: compression section has its own keys # Backwards compat: compression section has its own keys
if task == "compression" and not cfg_provider: if task == "compression" and not cfg_provider:
comp = config.get("compression", {}) comp = config.get("compression", {}) if isinstance(config, dict) else {}
cfg_provider = comp.get("summary_provider", "").strip() or None if isinstance(comp, dict):
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None cfg_provider = comp.get("summary_provider", "").strip() or None
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
resolved_model = model or env_model or cfg_model
if base_url:
return "custom", resolved_model, base_url, api_key
if provider:
return provider, resolved_model, base_url, api_key
if task:
env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
env_api_key = _get_auxiliary_env_override(task, "API_KEY")
if env_base_url:
return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key
env_provider = _get_auxiliary_provider(task)
if env_provider != "auto":
return env_provider, resolved_model, None, None
if cfg_base_url:
return "custom", resolved_model, cfg_base_url, cfg_api_key
if cfg_provider and cfg_provider != "auto": if cfg_provider and cfg_provider != "auto":
return cfg_provider, model or cfg_model return cfg_provider, resolved_model, None, None
return "auto", model or cfg_model return "auto", resolved_model, None, None
return "auto", model return "auto", resolved_model, None, None
def _build_call_kwargs( def _build_call_kwargs(
@ -1029,6 +1116,7 @@ def _build_call_kwargs(
tools: Optional[list] = None, tools: Optional[list] = None,
timeout: float = 30.0, timeout: float = 30.0,
extra_body: Optional[dict] = None, extra_body: Optional[dict] = None,
base_url: Optional[str] = None,
) -> dict: ) -> dict:
"""Build kwargs for .chat.completions.create() with model/provider adjustments.""" """Build kwargs for .chat.completions.create() with model/provider adjustments."""
kwargs: Dict[str, Any] = { kwargs: Dict[str, Any] = {
@ -1044,7 +1132,7 @@ def _build_call_kwargs(
# Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens. # Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens. # Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
if provider == "custom": if provider == "custom":
custom_base = _current_custom_base_url() custom_base = base_url or _current_custom_base_url()
if "api.openai.com" in custom_base.lower(): if "api.openai.com" in custom_base.lower():
kwargs["max_completion_tokens"] = max_tokens kwargs["max_completion_tokens"] = max_tokens
else: else:
@ -1070,6 +1158,8 @@ def call_llm(
*, *,
provider: str = None, provider: str = None,
model: str = None, model: str = None,
base_url: str = None,
api_key: str = None,
messages: list, messages: list,
temperature: float = None, temperature: float = None,
max_tokens: int = None, max_tokens: int = None,
@ -1101,16 +1191,18 @@ def call_llm(
Raises: Raises:
RuntimeError: If no provider is configured. RuntimeError: If no provider is configured.
""" """
resolved_provider, resolved_model = _resolve_task_provider_model( resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
task, provider, model) task, provider, model, base_url, api_key)
if task == "vision": if task == "vision":
effective_provider, client, final_model = resolve_vision_provider_client( effective_provider, client, final_model = resolve_vision_provider_client(
provider=resolved_provider, provider=provider,
model=resolved_model, model=model,
base_url=base_url,
api_key=api_key,
async_mode=False, async_mode=False,
) )
if client is None and resolved_provider != "auto": if client is None and resolved_provider != "auto" and not resolved_base_url:
logger.warning( logger.warning(
"Vision provider %s unavailable, falling back to auto vision backends", "Vision provider %s unavailable, falling back to auto vision backends",
resolved_provider, resolved_provider,
@ -1127,10 +1219,15 @@ def call_llm(
) )
resolved_provider = effective_provider or resolved_provider resolved_provider = effective_provider or resolved_provider
else: else:
client, final_model = _get_cached_client(resolved_provider, resolved_model) client, final_model = _get_cached_client(
resolved_provider,
resolved_model,
base_url=resolved_base_url,
api_key=resolved_api_key,
)
if client is None: if client is None:
# Fallback: try openrouter # Fallback: try openrouter
if resolved_provider != "openrouter": if resolved_provider != "openrouter" and not resolved_base_url:
logger.warning("Provider %s unavailable, falling back to openrouter", logger.warning("Provider %s unavailable, falling back to openrouter",
resolved_provider) resolved_provider)
client, final_model = _get_cached_client( client, final_model = _get_cached_client(
@ -1143,7 +1240,8 @@ def call_llm(
kwargs = _build_call_kwargs( kwargs = _build_call_kwargs(
resolved_provider, final_model, messages, resolved_provider, final_model, messages,
temperature=temperature, max_tokens=max_tokens, temperature=temperature, max_tokens=max_tokens,
tools=tools, timeout=timeout, extra_body=extra_body) tools=tools, timeout=timeout, extra_body=extra_body,
base_url=resolved_base_url)
# Handle max_tokens vs max_completion_tokens retry # Handle max_tokens vs max_completion_tokens retry
try: try:
@ -1162,6 +1260,8 @@ async def async_call_llm(
*, *,
provider: str = None, provider: str = None,
model: str = None, model: str = None,
base_url: str = None,
api_key: str = None,
messages: list, messages: list,
temperature: float = None, temperature: float = None,
max_tokens: int = None, max_tokens: int = None,
@ -1173,16 +1273,18 @@ async def async_call_llm(
Same as call_llm() but async. See call_llm() for full documentation. Same as call_llm() but async. See call_llm() for full documentation.
""" """
resolved_provider, resolved_model = _resolve_task_provider_model( resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
task, provider, model) task, provider, model, base_url, api_key)
if task == "vision": if task == "vision":
effective_provider, client, final_model = resolve_vision_provider_client( effective_provider, client, final_model = resolve_vision_provider_client(
provider=resolved_provider, provider=provider,
model=resolved_model, model=model,
base_url=base_url,
api_key=api_key,
async_mode=True, async_mode=True,
) )
if client is None and resolved_provider != "auto": if client is None and resolved_provider != "auto" and not resolved_base_url:
logger.warning( logger.warning(
"Vision provider %s unavailable, falling back to auto vision backends", "Vision provider %s unavailable, falling back to auto vision backends",
resolved_provider, resolved_provider,
@ -1200,9 +1302,14 @@ async def async_call_llm(
resolved_provider = effective_provider or resolved_provider resolved_provider = effective_provider or resolved_provider
else: else:
client, final_model = _get_cached_client( client, final_model = _get_cached_client(
resolved_provider, resolved_model, async_mode=True) resolved_provider,
resolved_model,
async_mode=True,
base_url=resolved_base_url,
api_key=resolved_api_key,
)
if client is None: if client is None:
if resolved_provider != "openrouter": if resolved_provider != "openrouter" and not resolved_base_url:
logger.warning("Provider %s unavailable, falling back to openrouter", logger.warning("Provider %s unavailable, falling back to openrouter",
resolved_provider) resolved_provider)
client, final_model = _get_cached_client( client, final_model = _get_cached_client(
@ -1216,7 +1323,8 @@ async def async_call_llm(
kwargs = _build_call_kwargs( kwargs = _build_call_kwargs(
resolved_provider, final_model, messages, resolved_provider, final_model, messages,
temperature=temperature, max_tokens=max_tokens, temperature=temperature, max_tokens=max_tokens,
tools=tools, timeout=timeout, extra_body=extra_body) tools=tools, timeout=timeout, extra_body=extra_body,
base_url=resolved_base_url)
try: try:
return await client.chat.completions.create(**kwargs) return await client.chat.completions.create(**kwargs)

48
cli.py
View file

@ -218,11 +218,27 @@ def load_cli_config() -> Dict[str, Any]:
"timeout": 300, # Max seconds a sandbox script can run before being killed (5 min) "timeout": 300, # Max seconds a sandbox script can run before being killed (5 min)
"max_tool_calls": 50, # Max RPC tool calls per execution "max_tool_calls": 50, # Max RPC tool calls per execution
}, },
"auxiliary": {
"vision": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
},
"web_extract": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
},
},
"delegation": { "delegation": {
"max_iterations": 45, # Max tool-calling turns per child agent "max_iterations": 45, # Max tool-calling turns per child agent
"default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents "default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents
"model": "", # Subagent model override (empty = inherit parent model) "model": "", # Subagent model override (empty = inherit parent model)
"provider": "", # Subagent provider override (empty = inherit parent provider) "provider": "", # Subagent provider override (empty = inherit parent provider)
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
}, },
} }
@ -363,28 +379,44 @@ def load_cli_config() -> Dict[str, Any]:
if config_key in compression_config: if config_key in compression_config:
os.environ[env_var] = str(compression_config[config_key]) os.environ[env_var] = str(compression_config[config_key])
# Apply auxiliary model overrides to environment variables. # Apply auxiliary model/direct-endpoint overrides to environment variables.
# Vision and web_extract each have their own provider + model pair. # Vision and web_extract each have their own provider/model/base_url/api_key tuple.
# (Compression is handled in the compression section above.) # (Compression is handled in the compression section above.)
# Only set env vars for non-empty / non-default values so auto-detection # Only set env vars for non-empty / non-default values so auto-detection
# still works. # still works.
auxiliary_config = defaults.get("auxiliary", {}) auxiliary_config = defaults.get("auxiliary", {})
auxiliary_task_env = { auxiliary_task_env = {
# config key → (provider env var, model env var) # config key → env var mapping
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"), "vision": {
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"), "provider": "AUXILIARY_VISION_PROVIDER",
"model": "AUXILIARY_VISION_MODEL",
"base_url": "AUXILIARY_VISION_BASE_URL",
"api_key": "AUXILIARY_VISION_API_KEY",
},
"web_extract": {
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
},
} }
for task_key, (prov_env, model_env) in auxiliary_task_env.items(): for task_key, env_map in auxiliary_task_env.items():
task_cfg = auxiliary_config.get(task_key, {}) task_cfg = auxiliary_config.get(task_key, {})
if not isinstance(task_cfg, dict): if not isinstance(task_cfg, dict):
continue continue
prov = str(task_cfg.get("provider", "")).strip() prov = str(task_cfg.get("provider", "")).strip()
model = str(task_cfg.get("model", "")).strip() model = str(task_cfg.get("model", "")).strip()
base_url = str(task_cfg.get("base_url", "")).strip()
api_key = str(task_cfg.get("api_key", "")).strip()
if prov and prov != "auto": if prov and prov != "auto":
os.environ[prov_env] = prov os.environ[env_map["provider"]] = prov
if model: if model:
os.environ[model_env] = model os.environ[env_map["model"]] = model
if base_url:
os.environ[env_map["base_url"]] = base_url
if api_key:
os.environ[env_map["api_key"]] = api_key
# Security settings # Security settings
security_config = defaults.get("security", {}) security_config = defaults.get("security", {})

View file

@ -100,24 +100,40 @@ if _config_path.exists():
for _cfg_key, _env_var in _compression_env_map.items(): for _cfg_key, _env_var in _compression_env_map.items():
if _cfg_key in _compression_cfg: if _cfg_key in _compression_cfg:
os.environ[_env_var] = str(_compression_cfg[_cfg_key]) os.environ[_env_var] = str(_compression_cfg[_cfg_key])
# Auxiliary model overrides (vision, web_extract). # Auxiliary model/direct-endpoint overrides (vision, web_extract).
# Each task has provider + model; bridge non-default values to env vars. # Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
_auxiliary_cfg = _cfg.get("auxiliary", {}) _auxiliary_cfg = _cfg.get("auxiliary", {})
if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict): if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
_aux_task_env = { _aux_task_env = {
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"), "vision": {
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"), "provider": "AUXILIARY_VISION_PROVIDER",
"model": "AUXILIARY_VISION_MODEL",
"base_url": "AUXILIARY_VISION_BASE_URL",
"api_key": "AUXILIARY_VISION_API_KEY",
},
"web_extract": {
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
},
} }
for _task_key, (_prov_env, _model_env) in _aux_task_env.items(): for _task_key, _env_map in _aux_task_env.items():
_task_cfg = _auxiliary_cfg.get(_task_key, {}) _task_cfg = _auxiliary_cfg.get(_task_key, {})
if not isinstance(_task_cfg, dict): if not isinstance(_task_cfg, dict):
continue continue
_prov = str(_task_cfg.get("provider", "")).strip() _prov = str(_task_cfg.get("provider", "")).strip()
_model = str(_task_cfg.get("model", "")).strip() _model = str(_task_cfg.get("model", "")).strip()
_base_url = str(_task_cfg.get("base_url", "")).strip()
_api_key = str(_task_cfg.get("api_key", "")).strip()
if _prov and _prov != "auto": if _prov and _prov != "auto":
os.environ[_prov_env] = _prov os.environ[_env_map["provider"]] = _prov
if _model: if _model:
os.environ[_model_env] = _model os.environ[_env_map["model"]] = _model
if _base_url:
os.environ[_env_map["base_url"]] = _base_url
if _api_key:
os.environ[_env_map["api_key"]] = _api_key
_agent_cfg = _cfg.get("agent", {}) _agent_cfg = _cfg.get("agent", {})
if _agent_cfg and isinstance(_agent_cfg, dict): if _agent_cfg and isinstance(_agent_cfg, dict):
if "max_turns" in _agent_cfg: if "max_turns" in _agent_cfg:

View file

@ -150,30 +150,44 @@ DEFAULT_CONFIG = {
"vision": { "vision": {
"provider": "auto", # auto | openrouter | nous | codex | custom "provider": "auto", # auto | openrouter | nous | codex | custom
"model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o" "model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o"
"base_url": "", # direct OpenAI-compatible endpoint (takes precedence over provider)
"api_key": "", # API key for base_url (falls back to OPENAI_API_KEY)
}, },
"web_extract": { "web_extract": {
"provider": "auto", "provider": "auto",
"model": "", "model": "",
"base_url": "",
"api_key": "",
}, },
"compression": { "compression": {
"provider": "auto", "provider": "auto",
"model": "", "model": "",
"base_url": "",
"api_key": "",
}, },
"session_search": { "session_search": {
"provider": "auto", "provider": "auto",
"model": "", "model": "",
"base_url": "",
"api_key": "",
}, },
"skills_hub": { "skills_hub": {
"provider": "auto", "provider": "auto",
"model": "", "model": "",
"base_url": "",
"api_key": "",
}, },
"mcp": { "mcp": {
"provider": "auto", "provider": "auto",
"model": "", "model": "",
"base_url": "",
"api_key": "",
}, },
"flush_memories": { "flush_memories": {
"provider": "auto", "provider": "auto",
"model": "", "model": "",
"base_url": "",
"api_key": "",
}, },
}, },
@ -243,6 +257,8 @@ DEFAULT_CONFIG = {
"delegation": { "delegation": {
"model": "", # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model) "model": "", # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model)
"provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials) "provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials)
"base_url": "", # direct OpenAI-compatible endpoint for subagents
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
}, },
# Ephemeral prefill messages file — JSON list of {role, content} dicts # Ephemeral prefill messages file — JSON list of {role, content} dicts

View file

@ -24,9 +24,11 @@ def _clean_env(monkeypatch):
for key in ( for key in (
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY", "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL", "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
# Per-task provider/model overrides # Per-task provider/model/direct-endpoint overrides
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL", "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL", "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL", "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
): ):
monkeypatch.delenv(key, raising=False) monkeypatch.delenv(key, raising=False)
@ -142,6 +144,27 @@ class TestGetTextAuxiliaryClient:
call_kwargs = mock_openai.call_args call_kwargs = mock_openai.call_args
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1" assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
def test_task_direct_endpoint_override(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_API_KEY", "task-key")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client("web_extract")
assert model == "task-model"
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
assert mock_openai.call_args.kwargs["api_key"] == "task-key"
def test_task_direct_endpoint_without_openai_key_does_not_fall_back(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client("web_extract")
assert client is None
assert model is None
mock_openai.assert_not_called()
def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch): def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch):
config = { config = {
"model": { "model": {
@ -217,6 +240,27 @@ class TestVisionClientFallback:
client, model = get_vision_auxiliary_client() client, model = get_vision_auxiliary_client()
assert client is not None # Custom endpoint picked up as fallback assert client is not None # Custom endpoint picked up as fallback
def test_vision_direct_endpoint_override(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
monkeypatch.setenv("AUXILIARY_VISION_API_KEY", "vision-key")
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_vision_auxiliary_client()
assert model == "vision-model"
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:4567/v1"
assert mock_openai.call_args.kwargs["api_key"] == "vision-key"
def test_vision_direct_endpoint_requires_openai_api_key(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_vision_auxiliary_client()
assert client is None
assert model is None
mock_openai.assert_not_called()
def test_vision_uses_openrouter_when_available(self, monkeypatch): def test_vision_uses_openrouter_when_available(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
with patch("agent.auxiliary_client.OpenAI") as mock_openai: with patch("agent.auxiliary_client.OpenAI") as mock_openai:
@ -434,6 +478,24 @@ class TestTaskSpecificOverrides:
client, model = get_text_auxiliary_client("web_extract") client, model = get_text_auxiliary_client("web_extract")
assert model == "google/gemini-3-flash-preview" assert model == "google/gemini-3-flash-preview"
def test_task_direct_endpoint_from_config(self, monkeypatch, tmp_path):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "config.yaml").write_text(
"""auxiliary:
web_extract:
base_url: http://localhost:3456/v1
api_key: config-key
model: config-model
"""
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client("web_extract")
assert model == "config-model"
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:3456/v1"
assert mock_openai.call_args.kwargs["api_key"] == "config-key"
def test_task_without_override_uses_auto(self, monkeypatch): def test_task_without_override_uses_auto(self, monkeypatch):
"""A task with no provider env var falls through to auto chain.""" """A task with no provider env var falls through to auto chain."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")

View file

@ -26,6 +26,12 @@ def _isolate_hermes_home(tmp_path, monkeypatch):
(fake_home / "memories").mkdir() (fake_home / "memories").mkdir()
(fake_home / "skills").mkdir() (fake_home / "skills").mkdir()
monkeypatch.setenv("HERMES_HOME", str(fake_home)) monkeypatch.setenv("HERMES_HOME", str(fake_home))
# Tests should not inherit the agent's current gateway/messaging surface.
# Individual tests that need gateway behavior set these explicitly.
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
@pytest.fixture() @pytest.fixture()

View file

@ -25,7 +25,9 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
# Clear env vars # Clear env vars
for key in ( for key in (
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL", "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL", "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL", "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
): ):
monkeypatch.delenv(key, raising=False) monkeypatch.delenv(key, raising=False)
@ -47,19 +49,35 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
auxiliary_cfg = config_dict.get("auxiliary", {}) auxiliary_cfg = config_dict.get("auxiliary", {})
if auxiliary_cfg and isinstance(auxiliary_cfg, dict): if auxiliary_cfg and isinstance(auxiliary_cfg, dict):
aux_task_env = { aux_task_env = {
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"), "vision": {
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"), "provider": "AUXILIARY_VISION_PROVIDER",
"model": "AUXILIARY_VISION_MODEL",
"base_url": "AUXILIARY_VISION_BASE_URL",
"api_key": "AUXILIARY_VISION_API_KEY",
},
"web_extract": {
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
},
} }
for task_key, (prov_env, model_env) in aux_task_env.items(): for task_key, env_map in aux_task_env.items():
task_cfg = auxiliary_cfg.get(task_key, {}) task_cfg = auxiliary_cfg.get(task_key, {})
if not isinstance(task_cfg, dict): if not isinstance(task_cfg, dict):
continue continue
prov = str(task_cfg.get("provider", "")).strip() prov = str(task_cfg.get("provider", "")).strip()
model = str(task_cfg.get("model", "")).strip() model = str(task_cfg.get("model", "")).strip()
base_url = str(task_cfg.get("base_url", "")).strip()
api_key = str(task_cfg.get("api_key", "")).strip()
if prov and prov != "auto": if prov and prov != "auto":
os.environ[prov_env] = prov os.environ[env_map["provider"]] = prov
if model: if model:
os.environ[model_env] = model os.environ[env_map["model"]] = model
if base_url:
os.environ[env_map["base_url"]] = base_url
if api_key:
os.environ[env_map["api_key"]] = api_key
# ── Config bridging tests ──────────────────────────────────────────────────── # ── Config bridging tests ────────────────────────────────────────────────────
@ -101,6 +119,21 @@ class TestAuxiliaryConfigBridge:
assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous" assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash" assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash"
def test_direct_endpoint_bridged(self, monkeypatch):
config = {
"auxiliary": {
"vision": {
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
"model": "qwen2.5-vl",
}
}
}
_run_auxiliary_bridge(config, monkeypatch)
assert os.environ.get("AUXILIARY_VISION_BASE_URL") == "http://localhost:1234/v1"
assert os.environ.get("AUXILIARY_VISION_API_KEY") == "local-key"
assert os.environ.get("AUXILIARY_VISION_MODEL") == "qwen2.5-vl"
def test_compression_provider_bridged(self, monkeypatch): def test_compression_provider_bridged(self, monkeypatch):
config = { config = {
"compression": { "compression": {
@ -200,8 +233,12 @@ class TestGatewayBridgeCodeParity:
# Check for key patterns that indicate the bridge is present # Check for key patterns that indicate the bridge is present
assert "AUXILIARY_VISION_PROVIDER" in content assert "AUXILIARY_VISION_PROVIDER" in content
assert "AUXILIARY_VISION_MODEL" in content assert "AUXILIARY_VISION_MODEL" in content
assert "AUXILIARY_VISION_BASE_URL" in content
assert "AUXILIARY_VISION_API_KEY" in content
assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content
assert "AUXILIARY_WEB_EXTRACT_MODEL" in content assert "AUXILIARY_WEB_EXTRACT_MODEL" in content
assert "AUXILIARY_WEB_EXTRACT_BASE_URL" in content
assert "AUXILIARY_WEB_EXTRACT_API_KEY" in content
def test_gateway_has_compression_provider(self): def test_gateway_has_compression_provider(self):
"""Gateway must bridge compression.summary_provider.""" """Gateway must bridge compression.summary_provider."""

View file

@ -10,6 +10,7 @@ Run with: python -m pytest tests/test_delegate.py -v
""" """
import json import json
import os
import sys import sys
import unittest import unittest
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
@ -462,6 +463,43 @@ class TestDelegationCredentialResolution(unittest.TestCase):
self.assertEqual(creds["api_mode"], "chat_completions") self.assertEqual(creds["api_mode"], "chat_completions")
mock_resolve.assert_called_once_with(requested="openrouter") mock_resolve.assert_called_once_with(requested="openrouter")
def test_direct_endpoint_uses_configured_base_url_and_api_key(self):
parent = _make_mock_parent(depth=0)
cfg = {
"model": "qwen2.5-coder",
"provider": "openrouter",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
}
creds = _resolve_delegation_credentials(cfg, parent)
self.assertEqual(creds["model"], "qwen2.5-coder")
self.assertEqual(creds["provider"], "custom")
self.assertEqual(creds["base_url"], "http://localhost:1234/v1")
self.assertEqual(creds["api_key"], "local-key")
self.assertEqual(creds["api_mode"], "chat_completions")
def test_direct_endpoint_falls_back_to_openai_api_key_env(self):
parent = _make_mock_parent(depth=0)
cfg = {
"model": "qwen2.5-coder",
"base_url": "http://localhost:1234/v1",
}
with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False):
creds = _resolve_delegation_credentials(cfg, parent)
self.assertEqual(creds["api_key"], "env-openai-key")
self.assertEqual(creds["provider"], "custom")
def test_direct_endpoint_does_not_fall_back_to_openrouter_api_key_env(self):
parent = _make_mock_parent(depth=0)
cfg = {
"model": "qwen2.5-coder",
"base_url": "http://localhost:1234/v1",
}
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "env-openrouter-key"}, clear=False):
with self.assertRaises(ValueError) as ctx:
_resolve_delegation_credentials(cfg, parent)
self.assertIn("OPENAI_API_KEY", str(ctx.exception))
@patch("hermes_cli.runtime_provider.resolve_runtime_provider") @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
def test_nous_provider_resolves_nous_credentials(self, mock_resolve): def test_nous_provider_resolves_nous_credentials(self, mock_resolve):
"""Nous provider resolves Nous Portal base_url and api_key.""" """Nous provider resolves Nous Portal base_url and api_key."""
@ -589,6 +627,40 @@ class TestDelegationProviderIntegration(unittest.TestCase):
self.assertNotEqual(kwargs["base_url"], parent.base_url) self.assertNotEqual(kwargs["base_url"], parent.base_url)
self.assertNotEqual(kwargs["api_key"], parent.api_key) self.assertNotEqual(kwargs["api_key"], parent.api_key)
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg):
mock_cfg.return_value = {
"max_iterations": 45,
"model": "qwen2.5-coder",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
}
mock_creds.return_value = {
"model": "qwen2.5-coder",
"provider": "custom",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
"api_mode": "chat_completions",
}
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True, "api_calls": 1
}
MockAgent.return_value = mock_child
delegate_task(goal="Direct endpoint test", parent_agent=parent)
_, kwargs = MockAgent.call_args
self.assertEqual(kwargs["model"], "qwen2.5-coder")
self.assertEqual(kwargs["provider"], "custom")
self.assertEqual(kwargs["base_url"], "http://localhost:1234/v1")
self.assertEqual(kwargs["api_key"], "local-key")
self.assertEqual(kwargs["api_mode"], "chat_completions")
@patch("tools.delegate_tool._load_config") @patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials") @patch("tools.delegate_tool._resolve_delegation_credentials")
def test_empty_config_inherits_parent(self, mock_creds, mock_cfg): def test_empty_config_inherits_parent(self, mock_creds, mock_cfg):

View file

@ -540,18 +540,51 @@ def delegate_task(
def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict: def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
"""Resolve credentials for subagent delegation. """Resolve credentials for subagent delegation.
If ``delegation.provider`` is configured, resolves the full credential If ``delegation.base_url`` is configured, subagents use that direct
bundle (base_url, api_key, api_mode, provider) via the runtime provider OpenAI-compatible endpoint. Otherwise, if ``delegation.provider`` is
system the same path used by CLI/gateway startup. This lets subagents configured, the full credential bundle (base_url, api_key, api_mode,
run on a completely different provider:model pair. provider) is resolved via the runtime provider system the same path used
by CLI/gateway startup. This lets subagents run on a completely different
provider:model pair.
If no provider is configured, returns None values so the child inherits If neither base_url nor provider is configured, returns None values so the
everything from the parent agent. child inherits everything from the parent agent.
Raises ValueError with a user-friendly message on credential failure. Raises ValueError with a user-friendly message on credential failure.
""" """
configured_model = cfg.get("model") or None configured_model = str(cfg.get("model") or "").strip() or None
configured_provider = cfg.get("provider") or None configured_provider = str(cfg.get("provider") or "").strip() or None
configured_base_url = str(cfg.get("base_url") or "").strip() or None
configured_api_key = str(cfg.get("api_key") or "").strip() or None
if configured_base_url:
api_key = (
configured_api_key
or os.getenv("OPENAI_API_KEY", "").strip()
)
if not api_key:
raise ValueError(
"Delegation base_url is configured but no API key was found. "
"Set delegation.api_key or OPENAI_API_KEY."
)
base_lower = configured_base_url.lower()
provider = "custom"
api_mode = "chat_completions"
if "chatgpt.com/backend-api/codex" in base_lower:
provider = "openai-codex"
api_mode = "codex_responses"
elif "api.anthropic.com" in base_lower:
provider = "anthropic"
api_mode = "anthropic_messages"
return {
"model": configured_model,
"provider": provider,
"base_url": configured_base_url,
"api_key": api_key,
"api_mode": api_mode,
}
if not configured_provider: if not configured_provider:
# No provider override — child inherits everything from parent # No provider override — child inherits everything from parent
@ -570,7 +603,8 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
except Exception as exc: except Exception as exc:
raise ValueError( raise ValueError(
f"Cannot resolve delegation provider '{configured_provider}': {exc}. " f"Cannot resolve delegation provider '{configured_provider}': {exc}. "
f"Check that the provider is configured (API key set, valid provider name). " f"Check that the provider is configured (API key set, valid provider name), "
f"or set delegation.base_url/delegation.api_key for a direct endpoint. "
f"Available providers: openrouter, nous, zai, kimi-coding, minimax." f"Available providers: openrouter, nous, zai, kimi-coding, minimax."
) from exc ) from exc

View file

@ -180,6 +180,23 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
| `CONTEXT_COMPRESSION_THRESHOLD` | Trigger at this % of limit (default: 0.50) | | `CONTEXT_COMPRESSION_THRESHOLD` | Trigger at this % of limit (default: 0.50) |
| `CONTEXT_COMPRESSION_MODEL` | Model for summaries | | `CONTEXT_COMPRESSION_MODEL` | Model for summaries |
## Auxiliary Task Overrides
| Variable | Description |
|----------|-------------|
| `AUXILIARY_VISION_PROVIDER` | Override provider for vision tasks |
| `AUXILIARY_VISION_MODEL` | Override model for vision tasks |
| `AUXILIARY_VISION_BASE_URL` | Direct OpenAI-compatible endpoint for vision tasks |
| `AUXILIARY_VISION_API_KEY` | API key paired with `AUXILIARY_VISION_BASE_URL` |
| `AUXILIARY_WEB_EXTRACT_PROVIDER` | Override provider for web extraction/summarization |
| `AUXILIARY_WEB_EXTRACT_MODEL` | Override model for web extraction/summarization |
| `AUXILIARY_WEB_EXTRACT_BASE_URL` | Direct OpenAI-compatible endpoint for web extraction/summarization |
| `AUXILIARY_WEB_EXTRACT_API_KEY` | API key paired with `AUXILIARY_WEB_EXTRACT_BASE_URL` |
| `CONTEXT_COMPRESSION_PROVIDER` | Override provider for context compression summaries |
| `CONTEXT_COMPRESSION_MODEL` | Override model for context compression summaries |
For task-specific direct endpoints, Hermes uses the task's configured API key or `OPENAI_API_KEY`. It does not reuse `OPENROUTER_API_KEY` for those custom endpoints.
## Provider Routing (config.yaml only) ## Provider Routing (config.yaml only)
These go in `~/.hermes/config.yaml` under the `provider_routing` section: These go in `~/.hermes/config.yaml` under the `provider_routing` section:

View file

@ -571,11 +571,15 @@ auxiliary:
vision: vision:
provider: "auto" # "auto", "openrouter", "nous", "main" provider: "auto" # "auto", "openrouter", "nous", "main"
model: "" # e.g. "openai/gpt-4o", "google/gemini-2.5-flash" model: "" # e.g. "openai/gpt-4o", "google/gemini-2.5-flash"
base_url: "" # direct OpenAI-compatible endpoint (takes precedence over provider)
api_key: "" # API key for base_url (falls back to OPENAI_API_KEY)
# Web page summarization + browser page text extraction # Web page summarization + browser page text extraction
web_extract: web_extract:
provider: "auto" provider: "auto"
model: "" # e.g. "google/gemini-2.5-flash" model: "" # e.g. "google/gemini-2.5-flash"
base_url: ""
api_key: ""
``` ```
### Changing the Vision Model ### Changing the Vision Model
@ -606,6 +610,17 @@ AUXILIARY_VISION_MODEL=openai/gpt-4o
### Common Setups ### Common Setups
**Using a direct custom endpoint** (clearer than `provider: "main"` for local/self-hosted APIs):
```yaml
auxiliary:
vision:
base_url: "http://localhost:1234/v1"
api_key: "local-key"
model: "qwen2.5-vl"
```
`base_url` takes precedence over `provider`, so this is the most explicit way to route an auxiliary task to a specific endpoint. For direct endpoint overrides, Hermes uses the configured `api_key` or falls back to `OPENAI_API_KEY`; it does not reuse `OPENROUTER_API_KEY` for that custom endpoint.
**Using OpenAI API key for vision:** **Using OpenAI API key for vision:**
```yaml ```yaml
# In ~/.hermes/.env: # In ~/.hermes/.env:
@ -852,13 +867,17 @@ delegation:
- web - web
# model: "google/gemini-3-flash-preview" # Override model (empty = inherit parent) # model: "google/gemini-3-flash-preview" # Override model (empty = inherit parent)
# provider: "openrouter" # Override provider (empty = inherit parent) # provider: "openrouter" # Override provider (empty = inherit parent)
# base_url: "http://localhost:1234/v1" # Direct OpenAI-compatible endpoint (takes precedence over provider)
# api_key: "local-key" # API key for base_url (falls back to OPENAI_API_KEY)
``` ```
**Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model. **Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model.
**Direct endpoint override:** If you want the obvious custom-endpoint path, set `delegation.base_url`, `delegation.api_key`, and `delegation.model`. That sends subagents directly to that OpenAI-compatible endpoint and takes precedence over `delegation.provider`. If `delegation.api_key` is omitted, Hermes falls back to `OPENAI_API_KEY` only.
The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed. The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed.
**Precedence:** `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter). **Precedence:** `delegation.base_url` in config → `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
## Clarify ## Clarify

View file

@ -209,6 +209,14 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children
delegation: delegation:
max_iterations: 50 # Max turns per child (default: 50) max_iterations: 50 # Max turns per child (default: 50)
default_toolsets: ["terminal", "file", "web"] # Default toolsets default_toolsets: ["terminal", "file", "web"] # Default toolsets
model: "google/gemini-3-flash-preview" # Optional provider/model override
provider: "openrouter" # Optional built-in provider
# Or use a direct custom endpoint instead of provider:
delegation:
model: "qwen2.5-coder"
base_url: "http://localhost:1234/v1"
api_key: "local-key"
``` ```
:::tip :::tip