Merge origin/main into hermes/hermes-7ef7cb6a

This commit is contained in:
teknium1 2026-03-14 21:39:01 -07:00
commit 9c322f7f59
31 changed files with 956 additions and 121 deletions

View file

@ -30,6 +30,10 @@ Default "auto" follows the chains above.
Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
than the provider's default.
Per-task direct endpoint overrides (e.g. AUXILIARY_VISION_BASE_URL,
AUXILIARY_VISION_API_KEY) let callers route a specific auxiliary task to a
custom OpenAI-compatible endpoint without touching the main model settings.
"""
import json
@ -418,6 +422,17 @@ def _get_auxiliary_provider(task: str = "") -> str:
return "auto"
def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]:
"""Read an auxiliary env override from AUXILIARY_* or CONTEXT_* prefixes."""
if not task:
return None
for prefix in ("AUXILIARY_", "CONTEXT_"):
val = os.getenv(f"{prefix}{task.upper()}_{suffix}", "").strip()
if val:
return val
return None
def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
or_key = os.getenv("OPENROUTER_API_KEY")
if not or_key:
@ -599,6 +614,8 @@ def resolve_provider_client(
model: str = None,
async_mode: bool = False,
raw_codex: bool = False,
explicit_base_url: str = None,
explicit_api_key: str = None,
) -> Tuple[Optional[Any], Optional[str]]:
"""Central router: given a provider name and optional model, return a
configured client with the correct auth, base URL, and API format.
@ -620,6 +637,8 @@ def resolve_provider_client(
instead of wrapping in CodexAuxiliaryClient. Use this when
the caller needs direct access to responses.stream() (e.g.,
the main agent loop).
explicit_base_url: Optional direct OpenAI-compatible endpoint.
explicit_api_key: Optional API key paired with explicit_base_url.
Returns:
(client, resolved_model) or (None, None) if auth is unavailable.
@ -696,6 +715,22 @@ def resolve_provider_client(
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
if provider == "custom":
if explicit_base_url:
custom_base = explicit_base_url.strip()
custom_key = (
(explicit_api_key or "").strip()
or os.getenv("OPENAI_API_KEY", "").strip()
)
if not custom_base or not custom_key:
logger.warning(
"resolve_provider_client: explicit custom endpoint requested "
"but no API key was found (set explicit_api_key or OPENAI_API_KEY)"
)
return None, None
final_model = model or _read_main_model() or "gpt-4o-mini"
client = OpenAI(api_key=custom_key, base_url=custom_base)
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# Try custom first, then codex, then API-key providers
for try_fn in (_try_custom_endpoint, _try_codex,
_resolve_api_key_provider):
@ -784,10 +819,13 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
Callers may override the returned model with a per-task env var
(e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
"""
forced = _get_auxiliary_provider(task)
if forced != "auto":
return resolve_provider_client(forced)
return resolve_provider_client("auto")
provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
return resolve_provider_client(
provider,
model=model,
explicit_base_url=base_url,
explicit_api_key=api_key,
)
def get_async_text_auxiliary_client(task: str = ""):
@ -797,10 +835,14 @@ def get_async_text_auxiliary_client(task: str = ""):
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
Returns (None, None) when no provider is available.
"""
forced = _get_auxiliary_provider(task)
if forced != "auto":
return resolve_provider_client(forced, async_mode=True)
return resolve_provider_client("auto", async_mode=True)
provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
return resolve_provider_client(
provider,
model=model,
async_mode=True,
explicit_base_url=base_url,
explicit_api_key=api_key,
)
_VISION_AUTO_PROVIDER_ORDER = (
@ -856,26 +898,43 @@ def resolve_vision_provider_client(
provider: Optional[str] = None,
model: Optional[str] = None,
*,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
async_mode: bool = False,
) -> Tuple[Optional[str], Optional[Any], Optional[str]]:
"""Resolve the client actually used for vision tasks.
Explicit provider overrides still use the generic provider router for
non-standard backends, so users can intentionally force experimental
providers. Auto mode stays conservative and only tries vision backends
known to work today.
Direct endpoint overrides take precedence over provider selection. Explicit
provider overrides still use the generic provider router for non-standard
backends, so users can intentionally force experimental providers. Auto mode
stays conservative and only tries vision backends known to work today.
"""
requested = _normalize_vision_provider(provider or _get_auxiliary_provider("vision"))
requested, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
"vision", provider, model, base_url, api_key
)
requested = _normalize_vision_provider(requested)
def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[str]):
if sync_client is None:
return resolved_provider, None, None
final_model = model or default_model
final_model = resolved_model or default_model
if async_mode:
async_client, async_model = _to_async_client(sync_client, final_model)
return resolved_provider, async_client, async_model
return resolved_provider, sync_client, final_model
if resolved_base_url:
client, final_model = resolve_provider_client(
"custom",
model=resolved_model,
async_mode=async_mode,
explicit_base_url=resolved_base_url,
explicit_api_key=resolved_api_key,
)
if client is None:
return "custom", None, None
return "custom", client, final_model
if requested == "auto":
for candidate in get_available_vision_backends():
sync_client, default_model = _resolve_strict_vision_backend(candidate)
@ -888,7 +947,7 @@ def resolve_vision_provider_client(
sync_client, default_model = _resolve_strict_vision_backend(requested)
return _finalize(requested, sync_client, default_model)
client, final_model = _get_cached_client(requested, model, async_mode)
client, final_model = _get_cached_client(requested, resolved_model, async_mode)
if client is None:
return requested, None, None
return requested, client, final_model
@ -945,19 +1004,29 @@ def auxiliary_max_tokens_param(value: int) -> dict:
# Every auxiliary LLM consumer should use these instead of manually
# constructing clients and calling .chat.completions.create().
# Client cache: (provider, async_mode) -> (client, default_model)
# Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model)
_client_cache: Dict[tuple, tuple] = {}
def _get_cached_client(
provider: str, model: str = None, async_mode: bool = False,
provider: str,
model: str = None,
async_mode: bool = False,
base_url: str = None,
api_key: str = None,
) -> Tuple[Optional[Any], Optional[str]]:
"""Get or create a cached client for the given provider."""
cache_key = (provider, async_mode)
cache_key = (provider, async_mode, base_url or "", api_key or "")
if cache_key in _client_cache:
cached_client, cached_default = _client_cache[cache_key]
return cached_client, model or cached_default
client, default_model = resolve_provider_client(provider, model, async_mode)
client, default_model = resolve_provider_client(
provider,
model,
async_mode,
explicit_base_url=base_url,
explicit_api_key=api_key,
)
if client is not None:
_client_cache[cache_key] = (client, default_model)
return client, model or default_model
@ -967,57 +1036,75 @@ def _resolve_task_provider_model(
task: str = None,
provider: str = None,
model: str = None,
) -> Tuple[str, Optional[str]]:
base_url: str = None,
api_key: str = None,
) -> Tuple[str, Optional[str], Optional[str], Optional[str]]:
"""Determine provider + model for a call.
Priority:
1. Explicit provider/model args (always win)
2. Env var overrides (AUXILIARY_{TASK}_PROVIDER, etc.)
3. Config file (auxiliary.{task}.provider/model or compression.*)
1. Explicit provider/model/base_url/api_key args (always win)
2. Env var overrides (AUXILIARY_{TASK}_*, CONTEXT_{TASK}_*)
3. Config file (auxiliary.{task}.* or compression.*)
4. "auto" (full auto-detection chain)
Returns (provider, model) where model may be None (use provider default).
Returns (provider, model, base_url, api_key) where model may be None
(use provider default). When base_url is set, provider is forced to
"custom" and the task uses that direct endpoint.
"""
if provider:
return provider, model
config = {}
cfg_provider = None
cfg_model = None
cfg_base_url = None
cfg_api_key = None
if task:
# Check env var overrides first
env_provider = _get_auxiliary_provider(task)
if env_provider != "auto":
# Check for env var model override too
env_model = None
for prefix in ("AUXILIARY_", "CONTEXT_"):
val = os.getenv(f"{prefix}{task.upper()}_MODEL", "").strip()
if val:
env_model = val
break
return env_provider, model or env_model
# Read from config file
try:
from hermes_cli.config import load_config
config = load_config()
except ImportError:
return "auto", model
config = {}
# Check auxiliary.{task} section
aux = config.get("auxiliary", {})
task_config = aux.get(task, {})
cfg_provider = task_config.get("provider", "").strip() or None
cfg_model = task_config.get("model", "").strip() or None
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
if not isinstance(task_config, dict):
task_config = {}
cfg_provider = str(task_config.get("provider", "")).strip() or None
cfg_model = str(task_config.get("model", "")).strip() or None
cfg_base_url = str(task_config.get("base_url", "")).strip() or None
cfg_api_key = str(task_config.get("api_key", "")).strip() or None
# Backwards compat: compression section has its own keys
if task == "compression" and not cfg_provider:
comp = config.get("compression", {})
cfg_provider = comp.get("summary_provider", "").strip() or None
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
comp = config.get("compression", {}) if isinstance(config, dict) else {}
if isinstance(comp, dict):
cfg_provider = comp.get("summary_provider", "").strip() or None
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
resolved_model = model or env_model or cfg_model
if base_url:
return "custom", resolved_model, base_url, api_key
if provider:
return provider, resolved_model, base_url, api_key
if task:
env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
env_api_key = _get_auxiliary_env_override(task, "API_KEY")
if env_base_url:
return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key
env_provider = _get_auxiliary_provider(task)
if env_provider != "auto":
return env_provider, resolved_model, None, None
if cfg_base_url:
return "custom", resolved_model, cfg_base_url, cfg_api_key
if cfg_provider and cfg_provider != "auto":
return cfg_provider, model or cfg_model
return "auto", model or cfg_model
return cfg_provider, resolved_model, None, None
return "auto", resolved_model, None, None
return "auto", model
return "auto", resolved_model, None, None
def _build_call_kwargs(
@ -1029,6 +1116,7 @@ def _build_call_kwargs(
tools: Optional[list] = None,
timeout: float = 30.0,
extra_body: Optional[dict] = None,
base_url: Optional[str] = None,
) -> dict:
"""Build kwargs for .chat.completions.create() with model/provider adjustments."""
kwargs: Dict[str, Any] = {
@ -1044,7 +1132,7 @@ def _build_call_kwargs(
# Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
if provider == "custom":
custom_base = _current_custom_base_url()
custom_base = base_url or _current_custom_base_url()
if "api.openai.com" in custom_base.lower():
kwargs["max_completion_tokens"] = max_tokens
else:
@ -1070,6 +1158,8 @@ def call_llm(
*,
provider: str = None,
model: str = None,
base_url: str = None,
api_key: str = None,
messages: list,
temperature: float = None,
max_tokens: int = None,
@ -1101,16 +1191,18 @@ def call_llm(
Raises:
RuntimeError: If no provider is configured.
"""
resolved_provider, resolved_model = _resolve_task_provider_model(
task, provider, model)
resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
task, provider, model, base_url, api_key)
if task == "vision":
effective_provider, client, final_model = resolve_vision_provider_client(
provider=resolved_provider,
model=resolved_model,
provider=provider,
model=model,
base_url=base_url,
api_key=api_key,
async_mode=False,
)
if client is None and resolved_provider != "auto":
if client is None and resolved_provider != "auto" and not resolved_base_url:
logger.warning(
"Vision provider %s unavailable, falling back to auto vision backends",
resolved_provider,
@ -1127,10 +1219,15 @@ def call_llm(
)
resolved_provider = effective_provider or resolved_provider
else:
client, final_model = _get_cached_client(resolved_provider, resolved_model)
client, final_model = _get_cached_client(
resolved_provider,
resolved_model,
base_url=resolved_base_url,
api_key=resolved_api_key,
)
if client is None:
# Fallback: try openrouter
if resolved_provider != "openrouter":
if resolved_provider != "openrouter" and not resolved_base_url:
logger.warning("Provider %s unavailable, falling back to openrouter",
resolved_provider)
client, final_model = _get_cached_client(
@ -1143,7 +1240,8 @@ def call_llm(
kwargs = _build_call_kwargs(
resolved_provider, final_model, messages,
temperature=temperature, max_tokens=max_tokens,
tools=tools, timeout=timeout, extra_body=extra_body)
tools=tools, timeout=timeout, extra_body=extra_body,
base_url=resolved_base_url)
# Handle max_tokens vs max_completion_tokens retry
try:
@ -1162,6 +1260,8 @@ async def async_call_llm(
*,
provider: str = None,
model: str = None,
base_url: str = None,
api_key: str = None,
messages: list,
temperature: float = None,
max_tokens: int = None,
@ -1173,16 +1273,18 @@ async def async_call_llm(
Same as call_llm() but async. See call_llm() for full documentation.
"""
resolved_provider, resolved_model = _resolve_task_provider_model(
task, provider, model)
resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
task, provider, model, base_url, api_key)
if task == "vision":
effective_provider, client, final_model = resolve_vision_provider_client(
provider=resolved_provider,
model=resolved_model,
provider=provider,
model=model,
base_url=base_url,
api_key=api_key,
async_mode=True,
)
if client is None and resolved_provider != "auto":
if client is None and resolved_provider != "auto" and not resolved_base_url:
logger.warning(
"Vision provider %s unavailable, falling back to auto vision backends",
resolved_provider,
@ -1200,9 +1302,14 @@ async def async_call_llm(
resolved_provider = effective_provider or resolved_provider
else:
client, final_model = _get_cached_client(
resolved_provider, resolved_model, async_mode=True)
resolved_provider,
resolved_model,
async_mode=True,
base_url=resolved_base_url,
api_key=resolved_api_key,
)
if client is None:
if resolved_provider != "openrouter":
if resolved_provider != "openrouter" and not resolved_base_url:
logger.warning("Provider %s unavailable, falling back to openrouter",
resolved_provider)
client, final_model = _get_cached_client(
@ -1216,7 +1323,8 @@ async def async_call_llm(
kwargs = _build_call_kwargs(
resolved_provider, final_model, messages,
temperature=temperature, max_tokens=max_tokens,
tools=tools, timeout=timeout, extra_body=extra_body)
tools=tools, timeout=timeout, extra_body=extra_body,
base_url=resolved_base_url)
try:
return await client.chat.completions.create(**kwargs)

View file

@ -1,17 +1,42 @@
"""Skill slash commands — scan installed skills and build invocation messages.
"""Shared slash command helpers for skills and built-in prompt-style modes.
Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
can invoke skills via /skill-name commands.
can invoke skills via /skill-name commands and prompt-only built-ins like
/plan.
"""
import json
import logging
import re
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
_skill_commands: Dict[str, Dict[str, Any]] = {}
_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
def build_plan_path(
user_instruction: str = "",
*,
now: datetime | None = None,
) -> Path:
"""Return the default workspace-relative markdown path for a /plan invocation.
Relative paths are intentional: file tools are task/backend-aware and resolve
them against the active working directory for local, docker, ssh, modal,
daytona, and similar terminal backends. That keeps the plan with the active
workspace instead of the Hermes host's global home directory.
"""
slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else ""
slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-")
if slug:
slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-")
slug = slug or "conversation-plan"
timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S")
return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md"
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
@ -56,6 +81,7 @@ def _build_skill_message(
skill_dir: Path | None,
activation_note: str,
user_instruction: str = "",
runtime_note: str = "",
) -> str:
"""Format a loaded skill into a user/system message payload."""
from tools.skills_tool import SKILLS_DIR
@ -115,6 +141,10 @@ def _build_skill_message(
parts.append("")
parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}")
if runtime_note:
parts.append("")
parts.append(f"[Runtime note: {runtime_note}]")
return "\n".join(parts)
@ -172,6 +202,7 @@ def build_skill_invocation_message(
cmd_key: str,
user_instruction: str = "",
task_id: str | None = None,
runtime_note: str = "",
) -> Optional[str]:
"""Build the user message content for a skill slash command invocation.
@ -201,6 +232,7 @@ def build_skill_invocation_message(
skill_dir,
activation_note,
user_instruction=user_instruction,
runtime_note=runtime_note,
)

77
cli.py
View file

@ -218,11 +218,27 @@ def load_cli_config() -> Dict[str, Any]:
"timeout": 300, # Max seconds a sandbox script can run before being killed (5 min)
"max_tool_calls": 50, # Max RPC tool calls per execution
},
"auxiliary": {
"vision": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
},
"web_extract": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
},
},
"delegation": {
"max_iterations": 45, # Max tool-calling turns per child agent
"default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents
"model": "", # Subagent model override (empty = inherit parent model)
"provider": "", # Subagent provider override (empty = inherit parent provider)
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
},
}
@ -363,28 +379,44 @@ def load_cli_config() -> Dict[str, Any]:
if config_key in compression_config:
os.environ[env_var] = str(compression_config[config_key])
# Apply auxiliary model overrides to environment variables.
# Vision and web_extract each have their own provider + model pair.
# Apply auxiliary model/direct-endpoint overrides to environment variables.
# Vision and web_extract each have their own provider/model/base_url/api_key tuple.
# (Compression is handled in the compression section above.)
# Only set env vars for non-empty / non-default values so auto-detection
# still works.
auxiliary_config = defaults.get("auxiliary", {})
auxiliary_task_env = {
# config key → (provider env var, model env var)
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"),
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"),
# config key → env var mapping
"vision": {
"provider": "AUXILIARY_VISION_PROVIDER",
"model": "AUXILIARY_VISION_MODEL",
"base_url": "AUXILIARY_VISION_BASE_URL",
"api_key": "AUXILIARY_VISION_API_KEY",
},
"web_extract": {
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
},
}
for task_key, (prov_env, model_env) in auxiliary_task_env.items():
for task_key, env_map in auxiliary_task_env.items():
task_cfg = auxiliary_config.get(task_key, {})
if not isinstance(task_cfg, dict):
continue
prov = str(task_cfg.get("provider", "")).strip()
model = str(task_cfg.get("model", "")).strip()
base_url = str(task_cfg.get("base_url", "")).strip()
api_key = str(task_cfg.get("api_key", "")).strip()
if prov and prov != "auto":
os.environ[prov_env] = prov
os.environ[env_map["provider"]] = prov
if model:
os.environ[model_env] = model
os.environ[env_map["model"]] = model
if base_url:
os.environ[env_map["base_url"]] = base_url
if api_key:
os.environ[env_map["api_key"]] = api_key
# Security settings
security_config = defaults.get("security", {})
@ -1048,6 +1080,7 @@ from agent.skill_commands import (
scan_skill_commands,
get_skill_commands,
build_skill_invocation_message,
build_plan_path,
build_preloaded_skills_prompt,
)
@ -3161,6 +3194,8 @@ class HermesCLI:
elif cmd_lower.startswith("/personality"):
# Use original case (handler lowercases the personality name itself)
self._handle_personality_command(cmd_original)
elif cmd_lower == "/plan" or cmd_lower.startswith("/plan "):
self._handle_plan_command(cmd_original)
elif cmd_lower == "/retry":
retry_msg = self.retry_last()
if retry_msg and hasattr(self, '_pending_input'):
@ -3272,6 +3307,32 @@ class HermesCLI:
return True
def _handle_plan_command(self, cmd: str):
"""Handle /plan [request] — load the bundled plan skill."""
parts = cmd.strip().split(maxsplit=1)
user_instruction = parts[1].strip() if len(parts) > 1 else ""
plan_path = build_plan_path(user_instruction)
msg = build_skill_invocation_message(
"/plan",
user_instruction,
task_id=self.session_id,
runtime_note=(
"Save the markdown plan with write_file to this exact relative path "
f"inside the active workspace/backend cwd: {plan_path}"
),
)
if not msg:
self.console.print("[bold red]Failed to load the bundled /plan skill[/]")
return
_cprint(f" 📝 Plan mode queued via skill. Markdown plan target: {plan_path}")
if hasattr(self, '_pending_input'):
self._pending_input.put(msg)
else:
self.console.print("[bold red]Plan mode unavailable: input queue not initialized[/]")
def _handle_background_command(self, cmd: str):
"""Handle /background <prompt> — run a prompt in a separate background session.

View file

@ -7,7 +7,8 @@ This module provides scheduled task execution, allowing the agent to:
- Execute tasks in isolated sessions (no prior context)
Cron jobs are executed automatically by the gateway daemon:
hermes gateway install # Install as system service (recommended)
hermes gateway install # Install as a user service
sudo hermes gateway install --system # Linux servers: boot-time system service
hermes gateway # Or run in foreground
The gateway ticks the scheduler every 60 seconds. A file lock prevents

View file

@ -100,24 +100,40 @@ if _config_path.exists():
for _cfg_key, _env_var in _compression_env_map.items():
if _cfg_key in _compression_cfg:
os.environ[_env_var] = str(_compression_cfg[_cfg_key])
# Auxiliary model overrides (vision, web_extract).
# Each task has provider + model; bridge non-default values to env vars.
# Auxiliary model/direct-endpoint overrides (vision, web_extract).
# Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
_auxiliary_cfg = _cfg.get("auxiliary", {})
if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
_aux_task_env = {
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"),
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"),
"vision": {
"provider": "AUXILIARY_VISION_PROVIDER",
"model": "AUXILIARY_VISION_MODEL",
"base_url": "AUXILIARY_VISION_BASE_URL",
"api_key": "AUXILIARY_VISION_API_KEY",
},
"web_extract": {
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
},
}
for _task_key, (_prov_env, _model_env) in _aux_task_env.items():
for _task_key, _env_map in _aux_task_env.items():
_task_cfg = _auxiliary_cfg.get(_task_key, {})
if not isinstance(_task_cfg, dict):
continue
_prov = str(_task_cfg.get("provider", "")).strip()
_model = str(_task_cfg.get("model", "")).strip()
_base_url = str(_task_cfg.get("base_url", "")).strip()
_api_key = str(_task_cfg.get("api_key", "")).strip()
if _prov and _prov != "auto":
os.environ[_prov_env] = _prov
os.environ[_env_map["provider"]] = _prov
if _model:
os.environ[_model_env] = _model
os.environ[_env_map["model"]] = _model
if _base_url:
os.environ[_env_map["base_url"]] = _base_url
if _api_key:
os.environ[_env_map["api_key"]] = _api_key
_agent_cfg = _cfg.get("agent", {})
if _agent_cfg and isinstance(_agent_cfg, dict):
if "max_turns" in _agent_cfg:
@ -1098,7 +1114,7 @@ class GatewayRunner:
# Emit command:* hook for any recognized slash command
_known_commands = {"new", "reset", "help", "status", "stop", "model", "reasoning",
"personality", "retry", "undo", "sethome", "set-home",
"personality", "plan", "retry", "undo", "sethome", "set-home",
"compress", "usage", "insights", "reload-mcp", "reload_mcp",
"update", "title", "resume", "provider", "rollback",
"background", "reasoning", "voice"}
@ -1134,6 +1150,28 @@ class GatewayRunner:
if command == "personality":
return await self._handle_personality_command(event)
if command == "plan":
try:
from agent.skill_commands import build_plan_path, build_skill_invocation_message
user_instruction = event.get_command_args().strip()
plan_path = build_plan_path(user_instruction)
event.text = build_skill_invocation_message(
"/plan",
user_instruction,
task_id=_quick_key,
runtime_note=(
"Save the markdown plan with write_file to this exact relative path "
f"inside the active workspace/backend cwd: {plan_path}"
),
)
if not event.text:
return "Failed to load the bundled /plan skill."
command = None
except Exception as e:
logger.exception("Failed to prepare /plan command")
return f"Failed to enter plan mode: {e}"
if command == "retry":
return await self._handle_retry_command(event)

View file

@ -150,30 +150,44 @@ DEFAULT_CONFIG = {
"vision": {
"provider": "auto", # auto | openrouter | nous | codex | custom
"model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o"
"base_url": "", # direct OpenAI-compatible endpoint (takes precedence over provider)
"api_key": "", # API key for base_url (falls back to OPENAI_API_KEY)
},
"web_extract": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
},
"compression": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
},
"session_search": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
},
"skills_hub": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
},
"mcp": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
},
"flush_memories": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
},
},
@ -243,6 +257,8 @@ DEFAULT_CONFIG = {
"delegation": {
"model": "", # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model)
"provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials)
"base_url": "", # direct OpenAI-compatible endpoint for subagents
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
},
# Ephemeral prefill messages file — JSON list of {role, content} dicts

View file

@ -96,6 +96,7 @@ def cron_list(show_all: bool = False):
if not find_gateway_pids():
print(color(" ⚠ Gateway is not running — jobs won't fire automatically.", Colors.YELLOW))
print(color(" Start it with: hermes gateway install", Colors.DIM))
print(color(" sudo hermes gateway install --system # Linux servers", Colors.DIM))
print()
@ -120,7 +121,8 @@ def cron_status():
print(color("✗ Gateway is not running — cron jobs will NOT fire", Colors.RED))
print()
print(" To enable automatic execution:")
print(" hermes gateway install # Install as system service (recommended)")
print(" hermes gateway install # Install as a user service")
print(" sudo hermes gateway install --system # Linux servers: boot-time system service")
print(" hermes gateway # Or run in foreground")
print()

View file

@ -2313,7 +2313,7 @@ Examples:
hermes gateway Run messaging gateway
hermes -s hermes-agent-dev,github-auth
hermes -w Start in isolated git worktree
hermes gateway install Install as system service
hermes gateway install Install gateway background service
hermes sessions list List past sessions
hermes sessions browse Interactive session picker
hermes sessions rename ID T Rename/title a session

View file

@ -0,0 +1,57 @@
---
name: plan
description: Plan mode for Hermes — inspect context, write a markdown plan into the active workspace's `.hermes/plans/` directory, and do not execute the work.
version: 1.0.0
author: Hermes Agent
license: MIT
metadata:
hermes:
tags: [planning, plan-mode, implementation, workflow]
related_skills: [writing-plans, subagent-driven-development]
---
# Plan Mode
Use this skill when the user wants a plan instead of execution.
## Core behavior
For this turn, you are planning only.
- Do not implement code.
- Do not edit project files except the plan markdown file.
- Do not run mutating terminal commands, commit, push, or perform external actions.
- You may inspect the repo or other context with read-only commands/tools when needed.
- Your deliverable is a markdown plan saved inside the active workspace under `.hermes/plans/`.
## Output requirements
Write a markdown plan that is concrete and actionable.
Include, when relevant:
- Goal
- Current context / assumptions
- Proposed approach
- Step-by-step plan
- Files likely to change
- Tests / validation
- Risks, tradeoffs, and open questions
If the task is code-related, include exact file paths, likely test targets, and verification steps.
## Save location
Save the plan with `write_file` under:
- `.hermes/plans/YYYY-MM-DD_HHMMSS-<slug>.md`
Treat that as relative to the active working directory / backend workspace. Hermes file tools are backend-aware, so using this relative path keeps the plan with the workspace on local, docker, ssh, modal, and daytona backends.
If the runtime provides a specific target path, use that exact path.
If not, create a sensible timestamped filename yourself under `.hermes/plans/`.
## Interaction style
- If the request is clear enough, write the plan directly.
- If no explicit instruction accompanies `/plan`, infer the task from the current conversation context.
- If it is genuinely underspecified, ask a brief clarifying question instead of guessing.
- After saving the plan, reply briefly with what you planned and the saved path.

View file

@ -24,9 +24,11 @@ def _clean_env(monkeypatch):
for key in (
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
# Per-task provider/model overrides
# Per-task provider/model/direct-endpoint overrides
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
):
monkeypatch.delenv(key, raising=False)
@ -142,6 +144,27 @@ class TestGetTextAuxiliaryClient:
call_kwargs = mock_openai.call_args
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
def test_task_direct_endpoint_override(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_API_KEY", "task-key")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client("web_extract")
assert model == "task-model"
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
assert mock_openai.call_args.kwargs["api_key"] == "task-key"
def test_task_direct_endpoint_without_openai_key_does_not_fall_back(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client("web_extract")
assert client is None
assert model is None
mock_openai.assert_not_called()
def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch):
config = {
"model": {
@ -217,6 +240,27 @@ class TestVisionClientFallback:
client, model = get_vision_auxiliary_client()
assert client is not None # Custom endpoint picked up as fallback
def test_vision_direct_endpoint_override(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
monkeypatch.setenv("AUXILIARY_VISION_API_KEY", "vision-key")
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_vision_auxiliary_client()
assert model == "vision-model"
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:4567/v1"
assert mock_openai.call_args.kwargs["api_key"] == "vision-key"
def test_vision_direct_endpoint_requires_openai_api_key(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_vision_auxiliary_client()
assert client is None
assert model is None
mock_openai.assert_not_called()
def test_vision_uses_openrouter_when_available(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
@ -434,6 +478,24 @@ class TestTaskSpecificOverrides:
client, model = get_text_auxiliary_client("web_extract")
assert model == "google/gemini-3-flash-preview"
def test_task_direct_endpoint_from_config(self, monkeypatch, tmp_path):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "config.yaml").write_text(
"""auxiliary:
web_extract:
base_url: http://localhost:3456/v1
api_key: config-key
model: config-model
"""
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client("web_extract")
assert model == "config-model"
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:3456/v1"
assert mock_openai.call_args.kwargs["api_key"] == "config-key"
def test_task_without_override_uses_auto(self, monkeypatch):
"""A task with no provider env var falls through to auto chain."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")

View file

@ -1,13 +1,16 @@
"""Tests for agent/skill_commands.py — skill slash command scanning and platform filtering."""
import os
from datetime import datetime
from pathlib import Path
from unittest.mock import patch
import tools.skills_tool as skills_tool_module
from agent.skill_commands import (
scan_skill_commands,
build_skill_invocation_message,
build_plan_path,
build_preloaded_skills_prompt,
build_skill_invocation_message,
scan_skill_commands,
)
@ -272,3 +275,37 @@ Generate some audio.
assert msg is not None
assert 'file_path="<path>"' in msg
class TestPlanSkillHelpers:
def test_build_plan_path_uses_workspace_relative_dir_and_slugifies_request(self):
path = build_plan_path(
"Implement OAuth login + refresh tokens!",
now=datetime(2026, 3, 15, 9, 30, 45),
)
assert path == Path(".hermes") / "plans" / "2026-03-15_093045-implement-oauth-login-refresh-tokens.md"
def test_plan_skill_message_can_include_runtime_save_path_note(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
_make_skill(
tmp_path,
"plan",
body="Save plans under .hermes/plans in the active workspace and do not execute the work.",
)
scan_skill_commands()
msg = build_skill_invocation_message(
"/plan",
"Add a /plan command",
runtime_note=(
"Save the markdown plan with write_file to this exact relative path inside "
"the active workspace/backend cwd: .hermes/plans/plan.md"
),
)
assert msg is not None
assert "Save plans under $HERMES_HOME/plans" not in msg
assert ".hermes/plans" in msg
assert "Add a /plan command" in msg
assert ".hermes/plans/plan.md" in msg
assert "Runtime note:" in msg

View file

@ -26,6 +26,12 @@ def _isolate_hermes_home(tmp_path, monkeypatch):
(fake_home / "memories").mkdir()
(fake_home / "skills").mkdir()
monkeypatch.setenv("HERMES_HOME", str(fake_home))
# Tests should not inherit the agent's current gateway/messaging surface.
# Individual tests that need gateway behavior set these explicitly.
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
@pytest.fixture()

View file

@ -0,0 +1,129 @@
"""Tests for the /plan gateway slash command."""
from datetime import datetime
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from agent.skill_commands import scan_skill_commands
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.platforms.base import MessageEvent
from gateway.session import SessionEntry, SessionSource
def _make_runner():
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner.config = GatewayConfig(
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
)
runner.adapters = {}
runner._voice_mode = {}
runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
runner.session_store = MagicMock()
runner.session_store.get_or_create_session.return_value = SessionEntry(
session_key="agent:main:telegram:dm:c1:u1",
session_id="sess-1",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
)
runner.session_store.load_transcript.return_value = []
runner.session_store.has_any_sessions.return_value = True
runner.session_store.append_to_transcript = MagicMock()
runner.session_store.rewrite_transcript = MagicMock()
runner._running_agents = {}
runner._pending_messages = {}
runner._pending_approvals = {}
runner._session_db = None
runner._reasoning_config = None
runner._provider_routing = {}
runner._fallback_model = None
runner._show_reasoning = False
runner._is_user_authorized = lambda _source: True
runner._set_session_env = lambda _context: None
runner._run_agent = AsyncMock(
return_value={
"final_response": "planned",
"messages": [],
"tools": [],
"history_offset": 0,
"last_prompt_tokens": 0,
}
)
return runner
def _make_event(text="/plan"):
return MessageEvent(
text=text,
source=SessionSource(
platform=Platform.TELEGRAM,
user_id="u1",
chat_id="c1",
user_name="tester",
chat_type="dm",
),
message_id="m1",
)
def _make_plan_skill(skills_dir):
skill_dir = skills_dir / "plan"
skill_dir.mkdir(parents=True, exist_ok=True)
(skill_dir / "SKILL.md").write_text(
"""---
name: plan
description: Plan mode skill.
---
# Plan
Use the current conversation context when no explicit instruction is provided.
Save plans under the active workspace's .hermes/plans directory.
"""
)
class TestGatewayPlanCommand:
@pytest.mark.asyncio
async def test_plan_command_loads_skill_and_runs_agent(self, monkeypatch, tmp_path):
import gateway.run as gateway_run
runner = _make_runner()
event = _make_event("/plan Add OAuth login")
monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
monkeypatch.setattr(
"agent.model_metadata.get_model_context_length",
lambda *_args, **_kwargs: 100_000,
)
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
_make_plan_skill(tmp_path)
scan_skill_commands()
result = await runner._handle_message(event)
assert result == "planned"
forwarded = runner._run_agent.call_args.kwargs["message"]
assert "Plan mode skill" in forwarded
assert "Add OAuth login" in forwarded
assert ".hermes/plans" in forwarded
assert str(tmp_path / "plans") not in forwarded
assert "active workspace/backend cwd" in forwarded
assert "Runtime note:" in forwarded
@pytest.mark.asyncio
async def test_plan_command_appears_in_help_output_via_skill_listing(self, tmp_path):
runner = _make_runner()
event = _make_event("/help")
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
_make_plan_skill(tmp_path)
scan_skill_commands()
result = await runner._handle_help_command(event)
assert "/plan" in result

View file

@ -25,7 +25,9 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
# Clear env vars
for key in (
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
):
monkeypatch.delenv(key, raising=False)
@ -47,19 +49,35 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
auxiliary_cfg = config_dict.get("auxiliary", {})
if auxiliary_cfg and isinstance(auxiliary_cfg, dict):
aux_task_env = {
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"),
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"),
"vision": {
"provider": "AUXILIARY_VISION_PROVIDER",
"model": "AUXILIARY_VISION_MODEL",
"base_url": "AUXILIARY_VISION_BASE_URL",
"api_key": "AUXILIARY_VISION_API_KEY",
},
"web_extract": {
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
},
}
for task_key, (prov_env, model_env) in aux_task_env.items():
for task_key, env_map in aux_task_env.items():
task_cfg = auxiliary_cfg.get(task_key, {})
if not isinstance(task_cfg, dict):
continue
prov = str(task_cfg.get("provider", "")).strip()
model = str(task_cfg.get("model", "")).strip()
base_url = str(task_cfg.get("base_url", "")).strip()
api_key = str(task_cfg.get("api_key", "")).strip()
if prov and prov != "auto":
os.environ[prov_env] = prov
os.environ[env_map["provider"]] = prov
if model:
os.environ[model_env] = model
os.environ[env_map["model"]] = model
if base_url:
os.environ[env_map["base_url"]] = base_url
if api_key:
os.environ[env_map["api_key"]] = api_key
# ── Config bridging tests ────────────────────────────────────────────────────
@ -101,6 +119,21 @@ class TestAuxiliaryConfigBridge:
assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash"
def test_direct_endpoint_bridged(self, monkeypatch):
config = {
"auxiliary": {
"vision": {
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
"model": "qwen2.5-vl",
}
}
}
_run_auxiliary_bridge(config, monkeypatch)
assert os.environ.get("AUXILIARY_VISION_BASE_URL") == "http://localhost:1234/v1"
assert os.environ.get("AUXILIARY_VISION_API_KEY") == "local-key"
assert os.environ.get("AUXILIARY_VISION_MODEL") == "qwen2.5-vl"
def test_compression_provider_bridged(self, monkeypatch):
config = {
"compression": {
@ -200,8 +233,12 @@ class TestGatewayBridgeCodeParity:
# Check for key patterns that indicate the bridge is present
assert "AUXILIARY_VISION_PROVIDER" in content
assert "AUXILIARY_VISION_MODEL" in content
assert "AUXILIARY_VISION_BASE_URL" in content
assert "AUXILIARY_VISION_API_KEY" in content
assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content
assert "AUXILIARY_WEB_EXTRACT_MODEL" in content
assert "AUXILIARY_WEB_EXTRACT_BASE_URL" in content
assert "AUXILIARY_WEB_EXTRACT_API_KEY" in content
def test_gateway_has_compression_provider(self):
"""Gateway must bridge compression.summary_provider."""

View file

@ -0,0 +1,67 @@
"""Tests for the /plan CLI slash command."""
from unittest.mock import MagicMock, patch
from agent.skill_commands import scan_skill_commands
from cli import HermesCLI
def _make_cli():
cli_obj = HermesCLI.__new__(HermesCLI)
cli_obj.config = {}
cli_obj.console = MagicMock()
cli_obj.agent = None
cli_obj.conversation_history = []
cli_obj.session_id = "sess-123"
cli_obj._pending_input = MagicMock()
return cli_obj
def _make_plan_skill(skills_dir):
skill_dir = skills_dir / "plan"
skill_dir.mkdir(parents=True, exist_ok=True)
(skill_dir / "SKILL.md").write_text(
"""---
name: plan
description: Plan mode skill.
---
# Plan
Use the current conversation context when no explicit instruction is provided.
Save plans under the active workspace's .hermes/plans directory.
"""
)
class TestCLIPlanCommand:
def test_plan_command_queues_plan_skill_message(self, tmp_path, monkeypatch):
cli_obj = _make_cli()
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
_make_plan_skill(tmp_path)
scan_skill_commands()
result = cli_obj.process_command("/plan Add OAuth login")
assert result is True
cli_obj._pending_input.put.assert_called_once()
queued = cli_obj._pending_input.put.call_args[0][0]
assert "Plan mode skill" in queued
assert "Add OAuth login" in queued
assert ".hermes/plans" in queued
assert str(tmp_path / "plans") not in queued
assert "active workspace/backend cwd" in queued
assert "Runtime note:" in queued
def test_plan_without_args_uses_skill_context_guidance(self, tmp_path, monkeypatch):
cli_obj = _make_cli()
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
_make_plan_skill(tmp_path)
scan_skill_commands()
cli_obj.process_command("/plan")
queued = cli_obj._pending_input.put.call_args[0][0]
assert "current conversation context" in queued
assert ".hermes/plans/" in queued
assert "conversation-plan.md" in queued

View file

@ -10,6 +10,7 @@ Run with: python -m pytest tests/test_delegate.py -v
"""
import json
import os
import sys
import unittest
from unittest.mock import MagicMock, patch
@ -462,6 +463,43 @@ class TestDelegationCredentialResolution(unittest.TestCase):
self.assertEqual(creds["api_mode"], "chat_completions")
mock_resolve.assert_called_once_with(requested="openrouter")
def test_direct_endpoint_uses_configured_base_url_and_api_key(self):
parent = _make_mock_parent(depth=0)
cfg = {
"model": "qwen2.5-coder",
"provider": "openrouter",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
}
creds = _resolve_delegation_credentials(cfg, parent)
self.assertEqual(creds["model"], "qwen2.5-coder")
self.assertEqual(creds["provider"], "custom")
self.assertEqual(creds["base_url"], "http://localhost:1234/v1")
self.assertEqual(creds["api_key"], "local-key")
self.assertEqual(creds["api_mode"], "chat_completions")
def test_direct_endpoint_falls_back_to_openai_api_key_env(self):
parent = _make_mock_parent(depth=0)
cfg = {
"model": "qwen2.5-coder",
"base_url": "http://localhost:1234/v1",
}
with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False):
creds = _resolve_delegation_credentials(cfg, parent)
self.assertEqual(creds["api_key"], "env-openai-key")
self.assertEqual(creds["provider"], "custom")
def test_direct_endpoint_does_not_fall_back_to_openrouter_api_key_env(self):
parent = _make_mock_parent(depth=0)
cfg = {
"model": "qwen2.5-coder",
"base_url": "http://localhost:1234/v1",
}
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "env-openrouter-key"}, clear=False):
with self.assertRaises(ValueError) as ctx:
_resolve_delegation_credentials(cfg, parent)
self.assertIn("OPENAI_API_KEY", str(ctx.exception))
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
def test_nous_provider_resolves_nous_credentials(self, mock_resolve):
"""Nous provider resolves Nous Portal base_url and api_key."""
@ -589,6 +627,40 @@ class TestDelegationProviderIntegration(unittest.TestCase):
self.assertNotEqual(kwargs["base_url"], parent.base_url)
self.assertNotEqual(kwargs["api_key"], parent.api_key)
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg):
mock_cfg.return_value = {
"max_iterations": 45,
"model": "qwen2.5-coder",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
}
mock_creds.return_value = {
"model": "qwen2.5-coder",
"provider": "custom",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
"api_mode": "chat_completions",
}
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True, "api_calls": 1
}
MockAgent.return_value = mock_child
delegate_task(goal="Direct endpoint test", parent_agent=parent)
_, kwargs = MockAgent.call_args
self.assertEqual(kwargs["model"], "qwen2.5-coder")
self.assertEqual(kwargs["provider"], "custom")
self.assertEqual(kwargs["base_url"], "http://localhost:1234/v1")
self.assertEqual(kwargs["api_key"], "local-key")
self.assertEqual(kwargs["api_mode"], "chat_completions")
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_empty_config_inherits_parent(self, mock_creds, mock_cfg):

View file

@ -540,18 +540,51 @@ def delegate_task(
def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
"""Resolve credentials for subagent delegation.
If ``delegation.provider`` is configured, resolves the full credential
bundle (base_url, api_key, api_mode, provider) via the runtime provider
system the same path used by CLI/gateway startup. This lets subagents
run on a completely different provider:model pair.
If ``delegation.base_url`` is configured, subagents use that direct
OpenAI-compatible endpoint. Otherwise, if ``delegation.provider`` is
configured, the full credential bundle (base_url, api_key, api_mode,
provider) is resolved via the runtime provider system the same path used
by CLI/gateway startup. This lets subagents run on a completely different
provider:model pair.
If no provider is configured, returns None values so the child inherits
everything from the parent agent.
If neither base_url nor provider is configured, returns None values so the
child inherits everything from the parent agent.
Raises ValueError with a user-friendly message on credential failure.
"""
configured_model = cfg.get("model") or None
configured_provider = cfg.get("provider") or None
configured_model = str(cfg.get("model") or "").strip() or None
configured_provider = str(cfg.get("provider") or "").strip() or None
configured_base_url = str(cfg.get("base_url") or "").strip() or None
configured_api_key = str(cfg.get("api_key") or "").strip() or None
if configured_base_url:
api_key = (
configured_api_key
or os.getenv("OPENAI_API_KEY", "").strip()
)
if not api_key:
raise ValueError(
"Delegation base_url is configured but no API key was found. "
"Set delegation.api_key or OPENAI_API_KEY."
)
base_lower = configured_base_url.lower()
provider = "custom"
api_mode = "chat_completions"
if "chatgpt.com/backend-api/codex" in base_lower:
provider = "openai-codex"
api_mode = "codex_responses"
elif "api.anthropic.com" in base_lower:
provider = "anthropic"
api_mode = "anthropic_messages"
return {
"model": configured_model,
"provider": provider,
"base_url": configured_base_url,
"api_key": api_key,
"api_mode": api_mode,
}
if not configured_provider:
# No provider override — child inherits everything from parent
@ -570,7 +603,8 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
except Exception as exc:
raise ValueError(
f"Cannot resolve delegation provider '{configured_provider}': {exc}. "
f"Check that the provider is configured (API key set, valid provider name). "
f"Check that the provider is configured (API key set, valid provider name), "
f"or set delegation.base_url/delegation.api_key for a direct endpoint. "
f"Available providers: openrouter, nous, zai, kimi-coding, minimax."
) from exc

View file

@ -29,7 +29,8 @@ Before starting, make sure you have:
- **Hermes Agent installed** — see the [Installation guide](/docs/getting-started/installation)
- **Gateway running** — the gateway daemon handles cron execution:
```bash
hermes gateway install # Install as system service (recommended)
hermes gateway install # Install as a user service
sudo hermes gateway install --system # Linux servers: boot-time system service
# or
hermes gateway # Run in foreground
```
@ -242,10 +243,12 @@ Make sure the scheduler is actually running:
hermes cron status
```
If the gateway isn't running, your jobs won't execute. Install it as a system service for reliability:
If the gateway isn't running, your jobs won't execute. Install it as a background service for reliability:
```bash
hermes gateway install
# or on Linux servers
sudo hermes gateway install --system
```
## Going Further

View file

@ -143,12 +143,13 @@ For a persistent deployment that survives reboots:
```bash
hermes gateway install
sudo hermes gateway install --system # Linux only: boot-time system service
```
This creates a **systemd** service (Linux) or **launchd** service (macOS) that runs automatically.
This creates a background service: a user-level **systemd** service on Linux by default, a **launchd** service on macOS, or a boot-time Linux system service if you pass `--system`.
```bash
# Linux — manage the service
# Linux — manage the default user service
hermes gateway start
hermes gateway stop
hermes gateway status
@ -158,6 +159,11 @@ journalctl --user -u hermes-gateway -f
# Keep running after SSH logout
sudo loginctl enable-linger $USER
# Linux servers — explicit system-service commands
sudo hermes gateway start --system
sudo hermes gateway status --system
journalctl -u hermes-gateway -f
```
```bash

View file

@ -180,6 +180,23 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
| `CONTEXT_COMPRESSION_THRESHOLD` | Trigger at this % of limit (default: 0.50) |
| `CONTEXT_COMPRESSION_MODEL` | Model for summaries |
## Auxiliary Task Overrides
| Variable | Description |
|----------|-------------|
| `AUXILIARY_VISION_PROVIDER` | Override provider for vision tasks |
| `AUXILIARY_VISION_MODEL` | Override model for vision tasks |
| `AUXILIARY_VISION_BASE_URL` | Direct OpenAI-compatible endpoint for vision tasks |
| `AUXILIARY_VISION_API_KEY` | API key paired with `AUXILIARY_VISION_BASE_URL` |
| `AUXILIARY_WEB_EXTRACT_PROVIDER` | Override provider for web extraction/summarization |
| `AUXILIARY_WEB_EXTRACT_MODEL` | Override model for web extraction/summarization |
| `AUXILIARY_WEB_EXTRACT_BASE_URL` | Direct OpenAI-compatible endpoint for web extraction/summarization |
| `AUXILIARY_WEB_EXTRACT_API_KEY` | API key paired with `AUXILIARY_WEB_EXTRACT_BASE_URL` |
| `CONTEXT_COMPRESSION_PROVIDER` | Override provider for context compression summaries |
| `CONTEXT_COMPRESSION_MODEL` | Override model for context compression summaries |
For task-specific direct endpoints, Hermes uses the task's configured API key or `OPENAI_API_KEY`. It does not reuse `OPENROUTER_API_KEY` for those custom endpoints.
## Provider Routing (config.yaml only)
These go in `~/.hermes/config.yaml` under the `provider_routing` section:

View file

@ -236,6 +236,7 @@ Skills for controlling smart home devices — lights, switches, sensors, and hom
| Skill | Description | Path |
|-------|-------------|------|
| `code-review` | Guidelines for performing thorough code reviews with security and quality focus | `software-development/code-review` |
| `plan` | Plan mode for Hermes — inspect context, write a markdown plan into `.hermes/plans/` in the active workspace/backend working directory, and do not execute the work. | `software-development/plan` |
| `requesting-code-review` | Use when completing tasks, implementing major features, or before merging. Validates work meets requirements through systematic review process. | `software-development/requesting-code-review` |
| `subagent-driven-development` | Use when executing implementation plans with independent tasks. Dispatches fresh delegate_task per task with two-stage review (spec compliance then code quality). | `software-development/subagent-driven-development` |
| `systematic-debugging` | Use when encountering any bug, test failure, or unexpected behavior. 4-phase root cause investigation — NO fixes without understanding the problem first. | `software-development/systematic-debugging` |

View file

@ -11,7 +11,7 @@ Hermes has two slash-command surfaces:
- **Interactive CLI slash commands** — handled by `cli.py` / `hermes_cli/commands.py`
- **Messaging slash commands** — handled by `gateway/run.py`
Installed skills are also exposed as dynamic slash commands on both surfaces.
Installed skills are also exposed as dynamic slash commands on both surfaces. That includes bundled skills like `/plan`, which opens plan mode and saves markdown plans under `.hermes/plans/` relative to the active workspace/backend working directory.
## Interactive CLI slash commands
@ -32,6 +32,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
| `/compress` | Manually compress conversation context (flush memories + summarize) |
| `/rollback` | List or restore filesystem checkpoints (usage: /rollback [number]) |
| `/background` | Run a prompt in the background (usage: /background &lt;prompt&gt;) |
| `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. |
### Configuration
@ -109,6 +110,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
| `/voice [on\|off\|tts\|join\|channel\|leave\|status]` | Control spoken replies in chat. `join`/`channel`/`leave` manage Discord voice-channel mode. |
| `/rollback [number]` | List or restore filesystem checkpoints. |
| `/background &lt;prompt&gt;` | Run a prompt in a separate background session. |
| `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. |
| `/reload-mcp` | Reload MCP servers from config. |
| `/update` | Update Hermes Agent to the latest version. |
| `/help` | Show messaging help. |

View file

@ -571,11 +571,15 @@ auxiliary:
vision:
provider: "auto" # "auto", "openrouter", "nous", "main"
model: "" # e.g. "openai/gpt-4o", "google/gemini-2.5-flash"
base_url: "" # direct OpenAI-compatible endpoint (takes precedence over provider)
api_key: "" # API key for base_url (falls back to OPENAI_API_KEY)
# Web page summarization + browser page text extraction
web_extract:
provider: "auto"
model: "" # e.g. "google/gemini-2.5-flash"
base_url: ""
api_key: ""
```
### Changing the Vision Model
@ -606,6 +610,17 @@ AUXILIARY_VISION_MODEL=openai/gpt-4o
### Common Setups
**Using a direct custom endpoint** (clearer than `provider: "main"` for local/self-hosted APIs):
```yaml
auxiliary:
vision:
base_url: "http://localhost:1234/v1"
api_key: "local-key"
model: "qwen2.5-vl"
```
`base_url` takes precedence over `provider`, so this is the most explicit way to route an auxiliary task to a specific endpoint. For direct endpoint overrides, Hermes uses the configured `api_key` or falls back to `OPENAI_API_KEY`; it does not reuse `OPENROUTER_API_KEY` for that custom endpoint.
**Using OpenAI API key for vision:**
```yaml
# In ~/.hermes/.env:
@ -852,13 +867,17 @@ delegation:
- web
# model: "google/gemini-3-flash-preview" # Override model (empty = inherit parent)
# provider: "openrouter" # Override provider (empty = inherit parent)
# base_url: "http://localhost:1234/v1" # Direct OpenAI-compatible endpoint (takes precedence over provider)
# api_key: "local-key" # API key for base_url (falls back to OPENAI_API_KEY)
```
**Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model.
**Direct endpoint override:** If you want the obvious custom-endpoint path, set `delegation.base_url`, `delegation.api_key`, and `delegation.model`. That sends subagents directly to that OpenAI-compatible endpoint and takes precedence over `delegation.provider`. If `delegation.api_key` is omitted, Hermes falls back to `OPENAI_API_KEY` only.
The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed.
**Precedence:** `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
**Precedence:** `delegation.base_url` in config → `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
## Clarify

View file

@ -156,7 +156,8 @@ What they do:
**Cron execution is handled by the gateway daemon.** The gateway ticks the scheduler every 60 seconds, running any due jobs in isolated agent sessions.
```bash
hermes gateway install # Install as system service (recommended)
hermes gateway install # Install as a user service
sudo hermes gateway install --system # Linux: boot-time system service for servers
hermes gateway # Or run in foreground
hermes cron list

View file

@ -209,6 +209,14 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children
delegation:
max_iterations: 50 # Max turns per child (default: 50)
default_toolsets: ["terminal", "file", "web"] # Default toolsets
model: "google/gemini-3-flash-preview" # Optional provider/model override
provider: "openrouter" # Optional built-in provider
# Or use a direct custom endpoint instead of provider:
delegation:
model: "qwen2.5-coder"
base_url: "http://localhost:1234/v1"
api_key: "local-key"
```
:::tip

View file

@ -24,11 +24,14 @@ Every installed skill is automatically available as a slash command:
/gif-search funny cats
/axolotl help me fine-tune Llama 3 on my dataset
/github-pr-workflow create a PR for the auth refactor
/plan design a rollout for migrating our auth provider
# Just the skill name loads it and lets the agent ask what you need:
/excalidraw
```
The bundled `plan` skill is a good example of a skill-backed slash command with custom behavior. Running `/plan [request]` tells Hermes to inspect context if needed, write a markdown implementation plan instead of executing the task, and save the result under `.hermes/plans/` relative to the active workspace/backend working directory.
You can also interact with skills through natural conversation:
```bash

View file

@ -80,7 +80,8 @@ EMAIL_HOME_ADDRESS=your@email.com # Default delivery target for cron jobs
```bash
hermes gateway # Run in foreground
hermes gateway install # Install as a system service
hermes gateway install # Install as a user service
sudo hermes gateway install --system # Linux only: boot-time system service
```
On startup, the adapter:

View file

@ -54,10 +54,12 @@ This walks you through configuring each platform with arrow-key selection, shows
```bash
hermes gateway # Run in foreground
hermes gateway setup # Configure messaging platforms interactively
hermes gateway install # Install as systemd service (Linux) / launchd (macOS)
hermes gateway start # Start the service
hermes gateway stop # Stop the service
hermes gateway status # Check service status
hermes gateway install # Install as a user service (Linux) / launchd service (macOS)
sudo hermes gateway install --system # Linux only: install a boot-time system service
hermes gateway start # Start the default service
hermes gateway stop # Stop the default service
hermes gateway status # Check default service status
hermes gateway status --system # Linux only: inspect the system service explicitly
```
## Chat Commands (Inside Messaging)
@ -188,8 +190,18 @@ journalctl --user -u hermes-gateway -f
# Enable lingering (keeps running after logout)
sudo loginctl enable-linger $USER
# Or install a boot-time system service that still runs as your user
sudo hermes gateway install --system
sudo hermes gateway start --system
sudo hermes gateway status --system
journalctl -u hermes-gateway -f
```
Use the user service on laptops and dev boxes. Use the system service on VPS or headless hosts that should come back at boot without relying on systemd linger.
Avoid keeping both the user and system gateway units installed at once unless you really mean to. Hermes will warn if it detects both because start/stop/status behavior gets ambiguous.
### macOS (launchd)
```bash

View file

@ -127,7 +127,8 @@ Then start the gateway:
```bash
hermes gateway # Foreground
hermes gateway install # Install as a system service
hermes gateway install # Install as a user service
sudo hermes gateway install --system # Linux only: boot-time system service
```
---

View file

@ -168,7 +168,8 @@ Then start the gateway:
```bash
hermes gateway # Foreground
hermes gateway install # Install as a system service
hermes gateway install # Install as a user service
sudo hermes gateway install --system # Linux only: boot-time system service
```
---

View file

@ -101,7 +101,8 @@ Then start the gateway:
```bash
hermes gateway # Foreground
hermes gateway install # Install as a system service
hermes gateway install # Install as a user service
sudo hermes gateway install --system # Linux only: boot-time system service
```
The gateway starts the WhatsApp bridge automatically using the saved session.