Merge origin/main into hermes/hermes-7ef7cb6a

2026-03-14 21:39:01 -07:00 · 2026-03-14 21:39:01 -07:00 · 9c322f7f59
commit 9c322f7f59
parent 4f4e2671ac b14a07315b
31 changed files with 956 additions and 121 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -30,6 +30,10 @@ Default "auto" follows the chains above.
 Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
 AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
 than the provider's default.
 Per-task direct endpoint overrides (e.g. AUXILIARY_VISION_BASE_URL,
 AUXILIARY_VISION_API_KEY) let callers route a specific auxiliary task to a
 custom OpenAI-compatible endpoint without touching the main model settings.
 """
 import json
@ -418,6 +422,17 @@ def _get_auxiliary_provider(task: str = "") -> str:
    return "auto"
 def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]:
    """Read an auxiliary env override from AUXILIARY_* or CONTEXT_* prefixes."""
    if not task:
        return None
    for prefix in ("AUXILIARY_", "CONTEXT_"):
        val = os.getenv(f"{prefix}{task.upper()}_{suffix}", "").strip()
        if val:
            return val
    return None
 def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
    or_key = os.getenv("OPENROUTER_API_KEY")
    if not or_key:
@ -599,6 +614,8 @@ def resolve_provider_client(
    model: str = None,
    async_mode: bool = False,
    raw_codex: bool = False,
    explicit_base_url: str = None,
    explicit_api_key: str = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@ -620,6 +637,8 @@ def resolve_provider_client(
            instead of wrapping in CodexAuxiliaryClient.  Use this when
            the caller needs direct access to responses.stream() (e.g.,
            the main agent loop).
        explicit_base_url: Optional direct OpenAI-compatible endpoint.
        explicit_api_key: Optional API key paired with explicit_base_url.
    Returns:
        (client, resolved_model) or (None, None) if auth is unavailable.
@ -696,6 +715,22 @@ def resolve_provider_client(
    # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
    if provider == "custom":
        if explicit_base_url:
            custom_base = explicit_base_url.strip()
            custom_key = (
                (explicit_api_key or "").strip()
                or os.getenv("OPENAI_API_KEY", "").strip()
            )
            if not custom_base or not custom_key:
                logger.warning(
                    "resolve_provider_client: explicit custom endpoint requested "
                    "but no API key was found (set explicit_api_key or OPENAI_API_KEY)"
                )
                return None, None
            final_model = model or _read_main_model() or "gpt-4o-mini"
            client = OpenAI(api_key=custom_key, base_url=custom_base)
            return (_to_async_client(client, final_model) if async_mode
                    else (client, final_model))
        # Try custom first, then codex, then API-key providers
        for try_fn in (_try_custom_endpoint, _try_codex,
                       _resolve_api_key_provider):
@ -784,10 +819,13 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
    Callers may override the returned model with a per-task env var
    (e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
    """
-    forced = _get_auxiliary_provider(task)
+    provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
-    if forced != "auto":
+    return resolve_provider_client(
-        return resolve_provider_client(forced)
+        provider,
-    return resolve_provider_client("auto")
+        model=model,
        explicit_base_url=base_url,
        explicit_api_key=api_key,
    )
 def get_async_text_auxiliary_client(task: str = ""):
@ -797,10 +835,14 @@ def get_async_text_auxiliary_client(task: str = ""):
    (AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
    Returns (None, None) when no provider is available.
    """
-    forced = _get_auxiliary_provider(task)
+    provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
-    if forced != "auto":
+    return resolve_provider_client(
-        return resolve_provider_client(forced, async_mode=True)
+        provider,
-    return resolve_provider_client("auto", async_mode=True)
+        model=model,
        async_mode=True,
        explicit_base_url=base_url,
        explicit_api_key=api_key,
    )
 _VISION_AUTO_PROVIDER_ORDER = (
@ -856,26 +898,43 @@ def resolve_vision_provider_client(
    provider: Optional[str] = None,
    model: Optional[str] = None,
    *,
    base_url: Optional[str] = None,
    api_key: Optional[str] = None,
    async_mode: bool = False,
 ) -> Tuple[Optional[str], Optional[Any], Optional[str]]:
    """Resolve the client actually used for vision tasks.
-    Explicit provider overrides still use the generic provider router for
+    Direct endpoint overrides take precedence over provider selection. Explicit
-    non-standard backends, so users can intentionally force experimental
+    provider overrides still use the generic provider router for non-standard
-    providers. Auto mode stays conservative and only tries vision backends
+    backends, so users can intentionally force experimental providers. Auto mode
-    known to work today.
+    stays conservative and only tries vision backends known to work today.
    """
-    requested = _normalize_vision_provider(provider or _get_auxiliary_provider("vision"))
+    requested, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
        "vision", provider, model, base_url, api_key
    )
    requested = _normalize_vision_provider(requested)
    def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[str]):
        if sync_client is None:
            return resolved_provider, None, None
-        final_model = model or default_model
+        final_model = resolved_model or default_model
        if async_mode:
            async_client, async_model = _to_async_client(sync_client, final_model)
            return resolved_provider, async_client, async_model
        return resolved_provider, sync_client, final_model
    if resolved_base_url:
        client, final_model = resolve_provider_client(
            "custom",
            model=resolved_model,
            async_mode=async_mode,
            explicit_base_url=resolved_base_url,
            explicit_api_key=resolved_api_key,
        )
        if client is None:
            return "custom", None, None
        return "custom", client, final_model
    if requested == "auto":
        for candidate in get_available_vision_backends():
            sync_client, default_model = _resolve_strict_vision_backend(candidate)
@ -888,7 +947,7 @@ def resolve_vision_provider_client(
        sync_client, default_model = _resolve_strict_vision_backend(requested)
        return _finalize(requested, sync_client, default_model)
-    client, final_model = _get_cached_client(requested, model, async_mode)
+    client, final_model = _get_cached_client(requested, resolved_model, async_mode)
    if client is None:
        return requested, None, None
    return requested, client, final_model
@ -945,19 +1004,29 @@ def auxiliary_max_tokens_param(value: int) -> dict:
 # Every auxiliary LLM consumer should use these instead of manually
 # constructing clients and calling .chat.completions.create().
-# Client cache: (provider, async_mode) -> (client, default_model)
+# Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model)
 _client_cache: Dict[tuple, tuple] = {}
 def _get_cached_client(
-    provider: str, model: str = None, async_mode: bool = False,
+    provider: str,
    model: str = None,
    async_mode: bool = False,
    base_url: str = None,
    api_key: str = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider."""
-    cache_key = (provider, async_mode)
+    cache_key = (provider, async_mode, base_url or "", api_key or "")
    if cache_key in _client_cache:
        cached_client, cached_default = _client_cache[cache_key]
        return cached_client, model or cached_default
-    client, default_model = resolve_provider_client(provider, model, async_mode)
+    client, default_model = resolve_provider_client(
        provider,
        model,
        async_mode,
        explicit_base_url=base_url,
        explicit_api_key=api_key,
    )
    if client is not None:
        _client_cache[cache_key] = (client, default_model)
    return client, model or default_model
@ -967,57 +1036,75 @@ def _resolve_task_provider_model(
    task: str = None,
    provider: str = None,
    model: str = None,
-) -> Tuple[str, Optional[str]]:
+    base_url: str = None,
    api_key: str = None,
 ) -> Tuple[str, Optional[str], Optional[str], Optional[str]]:
    """Determine provider + model for a call.
    Priority:
-      1. Explicit provider/model args (always win)
+      1. Explicit provider/model/base_url/api_key args (always win)
-      2. Env var overrides (AUXILIARY_{TASK}_PROVIDER, etc.)
+      2. Env var overrides (AUXILIARY_{TASK}_*, CONTEXT_{TASK}_*)
-      3. Config file (auxiliary.{task}.provider/model or compression.*)
+      3. Config file (auxiliary.{task}.* or compression.*)
      4. "auto" (full auto-detection chain)
-    Returns (provider, model) where model may be None (use provider default).
+    Returns (provider, model, base_url, api_key) where model may be None
    (use provider default). When base_url is set, provider is forced to
    "custom" and the task uses that direct endpoint.
    """
-    if provider:
+    config = {}
-        return provider, model
+    cfg_provider = None
    cfg_model = None
    cfg_base_url = None
    cfg_api_key = None
    if task:
        # Check env var overrides first
        env_provider = _get_auxiliary_provider(task)
        if env_provider != "auto":
            # Check for env var model override too
            env_model = None
            for prefix in ("AUXILIARY_", "CONTEXT_"):
                val = os.getenv(f"{prefix}{task.upper()}_MODEL", "").strip()
                if val:
                    env_model = val
                    break
            return env_provider, model or env_model
        # Read from config file
        try:
            from hermes_cli.config import load_config
            config = load_config()
        except ImportError:
-            return "auto", model
+            config = {}
-        # Check auxiliary.{task} section
+        aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
-        aux = config.get("auxiliary", {})
+        task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
-        task_config = aux.get(task, {})
+        if not isinstance(task_config, dict):
-        cfg_provider = task_config.get("provider", "").strip() or None
+            task_config = {}
-        cfg_model = task_config.get("model", "").strip() or None
+        cfg_provider = str(task_config.get("provider", "")).strip() or None
        cfg_model = str(task_config.get("model", "")).strip() or None
        cfg_base_url = str(task_config.get("base_url", "")).strip() or None
        cfg_api_key = str(task_config.get("api_key", "")).strip() or None
        # Backwards compat: compression section has its own keys
        if task == "compression" and not cfg_provider:
-            comp = config.get("compression", {})
+            comp = config.get("compression", {}) if isinstance(config, dict) else {}
-            cfg_provider = comp.get("summary_provider", "").strip() or None
+            if isinstance(comp, dict):
-            cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
+                cfg_provider = comp.get("summary_provider", "").strip() or None
                cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
    env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
    resolved_model = model or env_model or cfg_model
    if base_url:
        return "custom", resolved_model, base_url, api_key
    if provider:
        return provider, resolved_model, base_url, api_key
    if task:
        env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
        env_api_key = _get_auxiliary_env_override(task, "API_KEY")
        if env_base_url:
            return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key
        env_provider = _get_auxiliary_provider(task)
        if env_provider != "auto":
            return env_provider, resolved_model, None, None
        if cfg_base_url:
            return "custom", resolved_model, cfg_base_url, cfg_api_key
        if cfg_provider and cfg_provider != "auto":
-            return cfg_provider, model or cfg_model
+            return cfg_provider, resolved_model, None, None
-        return "auto", model or cfg_model
+        return "auto", resolved_model, None, None
-    return "auto", model
+    return "auto", resolved_model, None, None
 def _build_call_kwargs(
@ -1029,6 +1116,7 @@ def _build_call_kwargs(
    tools: Optional[list] = None,
    timeout: float = 30.0,
    extra_body: Optional[dict] = None,
    base_url: Optional[str] = None,
 ) -> dict:
    """Build kwargs for .chat.completions.create() with model/provider adjustments."""
    kwargs: Dict[str, Any] = {
@ -1044,7 +1132,7 @@ def _build_call_kwargs(
        # Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
        # Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
        if provider == "custom":
-            custom_base = _current_custom_base_url()
+            custom_base = base_url or _current_custom_base_url()
            if "api.openai.com" in custom_base.lower():
                kwargs["max_completion_tokens"] = max_tokens
            else:
@ -1070,6 +1158,8 @@ def call_llm(
    *,
    provider: str = None,
    model: str = None,
    base_url: str = None,
    api_key: str = None,
    messages: list,
    temperature: float = None,
    max_tokens: int = None,
@ -1101,16 +1191,18 @@ def call_llm(
    Raises:
        RuntimeError: If no provider is configured.
    """
-    resolved_provider, resolved_model = _resolve_task_provider_model(
+    resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
-        task, provider, model)
+        task, provider, model, base_url, api_key)
    if task == "vision":
        effective_provider, client, final_model = resolve_vision_provider_client(
-            provider=resolved_provider,
+            provider=provider,
-            model=resolved_model,
+            model=model,
            base_url=base_url,
            api_key=api_key,
            async_mode=False,
        )
-        if client is None and resolved_provider != "auto":
+        if client is None and resolved_provider != "auto" and not resolved_base_url:
            logger.warning(
                "Vision provider %s unavailable, falling back to auto vision backends",
                resolved_provider,
@ -1127,10 +1219,15 @@ def call_llm(
            )
        resolved_provider = effective_provider or resolved_provider
    else:
-        client, final_model = _get_cached_client(resolved_provider, resolved_model)
+        client, final_model = _get_cached_client(
            resolved_provider,
            resolved_model,
            base_url=resolved_base_url,
            api_key=resolved_api_key,
        )
        if client is None:
            # Fallback: try openrouter
-            if resolved_provider != "openrouter":
+            if resolved_provider != "openrouter" and not resolved_base_url:
                logger.warning("Provider %s unavailable, falling back to openrouter",
                               resolved_provider)
                client, final_model = _get_cached_client(
@ -1143,7 +1240,8 @@ def call_llm(
    kwargs = _build_call_kwargs(
        resolved_provider, final_model, messages,
        temperature=temperature, max_tokens=max_tokens,
-        tools=tools, timeout=timeout, extra_body=extra_body)
+        tools=tools, timeout=timeout, extra_body=extra_body,
        base_url=resolved_base_url)
    # Handle max_tokens vs max_completion_tokens retry
    try:
@ -1162,6 +1260,8 @@ async def async_call_llm(
    *,
    provider: str = None,
    model: str = None,
    base_url: str = None,
    api_key: str = None,
    messages: list,
    temperature: float = None,
    max_tokens: int = None,
@ -1173,16 +1273,18 @@ async def async_call_llm(
    Same as call_llm() but async. See call_llm() for full documentation.
    """
-    resolved_provider, resolved_model = _resolve_task_provider_model(
+    resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
-        task, provider, model)
+        task, provider, model, base_url, api_key)
    if task == "vision":
        effective_provider, client, final_model = resolve_vision_provider_client(
-            provider=resolved_provider,
+            provider=provider,
-            model=resolved_model,
+            model=model,
            base_url=base_url,
            api_key=api_key,
            async_mode=True,
        )
-        if client is None and resolved_provider != "auto":
+        if client is None and resolved_provider != "auto" and not resolved_base_url:
            logger.warning(
                "Vision provider %s unavailable, falling back to auto vision backends",
                resolved_provider,
@ -1200,9 +1302,14 @@ async def async_call_llm(
        resolved_provider = effective_provider or resolved_provider
    else:
        client, final_model = _get_cached_client(
-            resolved_provider, resolved_model, async_mode=True)
+            resolved_provider,
            resolved_model,
            async_mode=True,
            base_url=resolved_base_url,
            api_key=resolved_api_key,
        )
        if client is None:
-            if resolved_provider != "openrouter":
+            if resolved_provider != "openrouter" and not resolved_base_url:
                logger.warning("Provider %s unavailable, falling back to openrouter",
                               resolved_provider)
                client, final_model = _get_cached_client(
@ -1216,7 +1323,8 @@ async def async_call_llm(
    kwargs = _build_call_kwargs(
        resolved_provider, final_model, messages,
        temperature=temperature, max_tokens=max_tokens,
-        tools=tools, timeout=timeout, extra_body=extra_body)
+        tools=tools, timeout=timeout, extra_body=extra_body,
        base_url=resolved_base_url)
    try:
        return await client.chat.completions.create(**kwargs)
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@ -1,17 +1,42 @@
-"""Skill slash commands — scan installed skills and build invocation messages.
+"""Shared slash command helpers for skills and built-in prompt-style modes.
 Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
-can invoke skills via /skill-name commands.
+can invoke skills via /skill-name commands and prompt-only built-ins like
 /plan.
 """
 import json
 import logging
 import re
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional
 logger = logging.getLogger(__name__)
 _skill_commands: Dict[str, Dict[str, Any]] = {}
 _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
 def build_plan_path(
    user_instruction: str = "",
    *,
    now: datetime | None = None,
 ) -> Path:
    """Return the default workspace-relative markdown path for a /plan invocation.
    Relative paths are intentional: file tools are task/backend-aware and resolve
    them against the active working directory for local, docker, ssh, modal,
    daytona, and similar terminal backends. That keeps the plan with the active
    workspace instead of the Hermes host's global home directory.
    """
    slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else ""
    slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-")
    if slug:
        slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-")
    slug = slug or "conversation-plan"
    timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S")
    return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md"
 def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
@ -56,6 +81,7 @@ def _build_skill_message(
    skill_dir: Path | None,
    activation_note: str,
    user_instruction: str = "",
    runtime_note: str = "",
 ) -> str:
    """Format a loaded skill into a user/system message payload."""
    from tools.skills_tool import SKILLS_DIR
@ -115,6 +141,10 @@ def _build_skill_message(
        parts.append("")
        parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}")
    if runtime_note:
        parts.append("")
        parts.append(f"[Runtime note: {runtime_note}]")
    return "\n".join(parts)
@ -172,6 +202,7 @@ def build_skill_invocation_message(
    cmd_key: str,
    user_instruction: str = "",
    task_id: str | None = None,
    runtime_note: str = "",
 ) -> Optional[str]:
    """Build the user message content for a skill slash command invocation.
@ -201,6 +232,7 @@ def build_skill_invocation_message(
        skill_dir,
        activation_note,
        user_instruction=user_instruction,
        runtime_note=runtime_note,
    )
--- a/cli.py
+++ b/cli.py
@ -218,11 +218,27 @@ def load_cli_config() -> Dict[str, Any]:
            "timeout": 300,    # Max seconds a sandbox script can run before being killed (5 min)
            "max_tool_calls": 50,  # Max RPC tool calls per execution
        },
        "auxiliary": {
            "vision": {
                "provider": "auto",
                "model": "",
                "base_url": "",
                "api_key": "",
            },
            "web_extract": {
                "provider": "auto",
                "model": "",
                "base_url": "",
                "api_key": "",
            },
        },
        "delegation": {
            "max_iterations": 45,  # Max tool-calling turns per child agent
            "default_toolsets": ["terminal", "file", "web"],  # Default toolsets for subagents
            "model": "",       # Subagent model override (empty = inherit parent model)
            "provider": "",    # Subagent provider override (empty = inherit parent provider)
            "base_url": "",    # Direct OpenAI-compatible endpoint for subagents
            "api_key": "",     # API key for delegation.base_url (falls back to OPENAI_API_KEY)
        },
    }
@ -363,28 +379,44 @@ def load_cli_config() -> Dict[str, Any]:
        if config_key in compression_config:
            os.environ[env_var] = str(compression_config[config_key])
-    # Apply auxiliary model overrides to environment variables.
+    # Apply auxiliary model/direct-endpoint overrides to environment variables.
-    # Vision and web_extract each have their own provider + model pair.
+    # Vision and web_extract each have their own provider/model/base_url/api_key tuple.
    # (Compression is handled in the compression section above.)
    # Only set env vars for non-empty / non-default values so auto-detection
    # still works.
    auxiliary_config = defaults.get("auxiliary", {})
    auxiliary_task_env = {
-        # config key → (provider env var, model env var)
+        # config key → env var mapping
-        "vision":      ("AUXILIARY_VISION_PROVIDER",      "AUXILIARY_VISION_MODEL"),
+        "vision": {
-        "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER",  "AUXILIARY_WEB_EXTRACT_MODEL"),
+            "provider": "AUXILIARY_VISION_PROVIDER",
            "model": "AUXILIARY_VISION_MODEL",
            "base_url": "AUXILIARY_VISION_BASE_URL",
            "api_key": "AUXILIARY_VISION_API_KEY",
        },
        "web_extract": {
            "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
            "model": "AUXILIARY_WEB_EXTRACT_MODEL",
            "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
            "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
        },
    }
-    for task_key, (prov_env, model_env) in auxiliary_task_env.items():
+    for task_key, env_map in auxiliary_task_env.items():
        task_cfg = auxiliary_config.get(task_key, {})
        if not isinstance(task_cfg, dict):
            continue
        prov = str(task_cfg.get("provider", "")).strip()
        model = str(task_cfg.get("model", "")).strip()
        base_url = str(task_cfg.get("base_url", "")).strip()
        api_key = str(task_cfg.get("api_key", "")).strip()
        if prov and prov != "auto":
-            os.environ[prov_env] = prov
+            os.environ[env_map["provider"]] = prov
        if model:
-            os.environ[model_env] = model
+            os.environ[env_map["model"]] = model
        if base_url:
            os.environ[env_map["base_url"]] = base_url
        if api_key:
            os.environ[env_map["api_key"]] = api_key
    # Security settings
    security_config = defaults.get("security", {})
@ -1048,6 +1080,7 @@ from agent.skill_commands import (
    scan_skill_commands,
    get_skill_commands,
    build_skill_invocation_message,
    build_plan_path,
    build_preloaded_skills_prompt,
 )
@ -3161,6 +3194,8 @@ class HermesCLI:
        elif cmd_lower.startswith("/personality"):
            # Use original case (handler lowercases the personality name itself)
            self._handle_personality_command(cmd_original)
        elif cmd_lower == "/plan" or cmd_lower.startswith("/plan "):
            self._handle_plan_command(cmd_original)
        elif cmd_lower == "/retry":
            retry_msg = self.retry_last()
            if retry_msg and hasattr(self, '_pending_input'):
@ -3272,6 +3307,32 @@ class HermesCLI:
        return True
    def _handle_plan_command(self, cmd: str):
        """Handle /plan [request] — load the bundled plan skill."""
        parts = cmd.strip().split(maxsplit=1)
        user_instruction = parts[1].strip() if len(parts) > 1 else ""
        plan_path = build_plan_path(user_instruction)
        msg = build_skill_invocation_message(
            "/plan",
            user_instruction,
            task_id=self.session_id,
            runtime_note=(
                "Save the markdown plan with write_file to this exact relative path "
                f"inside the active workspace/backend cwd: {plan_path}"
            ),
        )
        if not msg:
            self.console.print("[bold red]Failed to load the bundled /plan skill[/]")
            return
        _cprint(f"  📝 Plan mode queued via skill. Markdown plan target: {plan_path}")
        if hasattr(self, '_pending_input'):
            self._pending_input.put(msg)
        else:
            self.console.print("[bold red]Plan mode unavailable: input queue not initialized[/]")
    def _handle_background_command(self, cmd: str):
        """Handle /background <prompt> — run a prompt in a separate background session.
--- a/cron/init.py
+++ b/cron/init.py
@ -7,7 +7,8 @@ This module provides scheduled task execution, allowing the agent to:
 - Execute tasks in isolated sessions (no prior context)
 Cron jobs are executed automatically by the gateway daemon:
-    hermes gateway install    # Install as system service (recommended)
+    hermes gateway install    # Install as a user service
    sudo hermes gateway install --system  # Linux servers: boot-time system service
    hermes gateway            # Or run in foreground
 The gateway ticks the scheduler every 60 seconds. A file lock prevents
--- a/gateway/run.py
+++ b/gateway/run.py
@ -100,24 +100,40 @@ if _config_path.exists():
            for _cfg_key, _env_var in _compression_env_map.items():
                if _cfg_key in _compression_cfg:
                    os.environ[_env_var] = str(_compression_cfg[_cfg_key])
-        # Auxiliary model overrides (vision, web_extract).
+        # Auxiliary model/direct-endpoint overrides (vision, web_extract).
-        # Each task has provider + model; bridge non-default values to env vars.
+        # Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
        _auxiliary_cfg = _cfg.get("auxiliary", {})
        if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
            _aux_task_env = {
-                "vision":      ("AUXILIARY_VISION_PROVIDER",      "AUXILIARY_VISION_MODEL"),
+                "vision": {
-                "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER",  "AUXILIARY_WEB_EXTRACT_MODEL"),
+                    "provider": "AUXILIARY_VISION_PROVIDER",
                    "model": "AUXILIARY_VISION_MODEL",
                    "base_url": "AUXILIARY_VISION_BASE_URL",
                    "api_key": "AUXILIARY_VISION_API_KEY",
                },
                "web_extract": {
                    "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
                    "model": "AUXILIARY_WEB_EXTRACT_MODEL",
                    "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
                    "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
                },
            }
-            for _task_key, (_prov_env, _model_env) in _aux_task_env.items():
+            for _task_key, _env_map in _aux_task_env.items():
                _task_cfg = _auxiliary_cfg.get(_task_key, {})
                if not isinstance(_task_cfg, dict):
                    continue
                _prov = str(_task_cfg.get("provider", "")).strip()
                _model = str(_task_cfg.get("model", "")).strip()
                _base_url = str(_task_cfg.get("base_url", "")).strip()
                _api_key = str(_task_cfg.get("api_key", "")).strip()
                if _prov and _prov != "auto":
-                    os.environ[_prov_env] = _prov
+                    os.environ[_env_map["provider"]] = _prov
                if _model:
-                    os.environ[_model_env] = _model
+                    os.environ[_env_map["model"]] = _model
                if _base_url:
                    os.environ[_env_map["base_url"]] = _base_url
                if _api_key:
                    os.environ[_env_map["api_key"]] = _api_key
        _agent_cfg = _cfg.get("agent", {})
        if _agent_cfg and isinstance(_agent_cfg, dict):
            if "max_turns" in _agent_cfg:
@ -1098,7 +1114,7 @@ class GatewayRunner:
        # Emit command:* hook for any recognized slash command
        _known_commands = {"new", "reset", "help", "status", "stop", "model", "reasoning",
-                          "personality", "retry", "undo", "sethome", "set-home",
+                          "personality", "plan", "retry", "undo", "sethome", "set-home",
                          "compress", "usage", "insights", "reload-mcp", "reload_mcp",
                          "update", "title", "resume", "provider", "rollback",
                          "background", "reasoning", "voice"}
@ -1134,6 +1150,28 @@ class GatewayRunner:
        if command == "personality":
            return await self._handle_personality_command(event)
        if command == "plan":
            try:
                from agent.skill_commands import build_plan_path, build_skill_invocation_message
                user_instruction = event.get_command_args().strip()
                plan_path = build_plan_path(user_instruction)
                event.text = build_skill_invocation_message(
                    "/plan",
                    user_instruction,
                    task_id=_quick_key,
                    runtime_note=(
                        "Save the markdown plan with write_file to this exact relative path "
                        f"inside the active workspace/backend cwd: {plan_path}"
                    ),
                )
                if not event.text:
                    return "Failed to load the bundled /plan skill."
                command = None
            except Exception as e:
                logger.exception("Failed to prepare /plan command")
                return f"Failed to enter plan mode: {e}"
        if command == "retry":
            return await self._handle_retry_command(event)
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -150,30 +150,44 @@ DEFAULT_CONFIG = {
        "vision": {
            "provider": "auto",    # auto | openrouter | nous | codex | custom
            "model": "",           # e.g. "google/gemini-2.5-flash", "gpt-4o"
            "base_url": "",        # direct OpenAI-compatible endpoint (takes precedence over provider)
            "api_key": "",         # API key for base_url (falls back to OPENAI_API_KEY)
        },
        "web_extract": {
            "provider": "auto",
            "model": "",
            "base_url": "",
            "api_key": "",
        },
        "compression": {
            "provider": "auto",
            "model": "",
            "base_url": "",
            "api_key": "",
        },
        "session_search": {
            "provider": "auto",
            "model": "",
            "base_url": "",
            "api_key": "",
        },
        "skills_hub": {
            "provider": "auto",
            "model": "",
            "base_url": "",
            "api_key": "",
        },
        "mcp": {
            "provider": "auto",
            "model": "",
            "base_url": "",
            "api_key": "",
        },
        "flush_memories": {
            "provider": "auto",
            "model": "",
            "base_url": "",
            "api_key": "",
        },
    },
@ -243,6 +257,8 @@ DEFAULT_CONFIG = {
    "delegation": {
        "model": "",       # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model)
        "provider": "",    # e.g. "openrouter" (empty = inherit parent provider + credentials)
        "base_url": "",    # direct OpenAI-compatible endpoint for subagents
        "api_key": "",     # API key for delegation.base_url (falls back to OPENAI_API_KEY)
    },
    # Ephemeral prefill messages file — JSON list of {role, content} dicts
--- a/hermes_cli/cron.py
+++ b/hermes_cli/cron.py
@ -96,6 +96,7 @@ def cron_list(show_all: bool = False):
    if not find_gateway_pids():
        print(color("  ⚠  Gateway is not running — jobs won't fire automatically.", Colors.YELLOW))
        print(color("     Start it with: hermes gateway install", Colors.DIM))
        print(color("                    sudo hermes gateway install --system  # Linux servers", Colors.DIM))
        print()
@ -120,7 +121,8 @@ def cron_status():
        print(color("✗ Gateway is not running — cron jobs will NOT fire", Colors.RED))
        print()
        print("  To enable automatic execution:")
-        print("    hermes gateway install    # Install as system service (recommended)")
+        print("    hermes gateway install    # Install as a user service")
        print("    sudo hermes gateway install --system  # Linux servers: boot-time system service")
        print("    hermes gateway            # Or run in foreground")
    print()
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -2313,7 +2313,7 @@ Examples:
    hermes gateway                Run messaging gateway
    hermes -s hermes-agent-dev,github-auth
    hermes -w                     Start in isolated git worktree
-    hermes gateway install        Install as system service
+    hermes gateway install        Install gateway background service
    hermes sessions list          List past sessions
    hermes sessions browse        Interactive session picker
    hermes sessions rename ID T   Rename/title a session
--- a/skills/software-development/plan/SKILL.md
+++ b/skills/software-development/plan/SKILL.md
@ -0,0 +1,57 @@
 ---
 name: plan
 description: Plan mode for Hermes — inspect context, write a markdown plan into the active workspace's `.hermes/plans/` directory, and do not execute the work.
 version: 1.0.0
 author: Hermes Agent
 license: MIT
 metadata:
  hermes:
    tags: [planning, plan-mode, implementation, workflow]
    related_skills: [writing-plans, subagent-driven-development]
 ---
 # Plan Mode
 Use this skill when the user wants a plan instead of execution.
 ## Core behavior
 For this turn, you are planning only.
 - Do not implement code.
 - Do not edit project files except the plan markdown file.
 - Do not run mutating terminal commands, commit, push, or perform external actions.
 - You may inspect the repo or other context with read-only commands/tools when needed.
 - Your deliverable is a markdown plan saved inside the active workspace under `.hermes/plans/`.
 ## Output requirements
 Write a markdown plan that is concrete and actionable.
 Include, when relevant:
 - Goal
 - Current context / assumptions
 - Proposed approach
 - Step-by-step plan
 - Files likely to change
 - Tests / validation
 - Risks, tradeoffs, and open questions
 If the task is code-related, include exact file paths, likely test targets, and verification steps.
 ## Save location
 Save the plan with `write_file` under:
 - `.hermes/plans/YYYY-MM-DD_HHMMSS-<slug>.md`
 Treat that as relative to the active working directory / backend workspace. Hermes file tools are backend-aware, so using this relative path keeps the plan with the workspace on local, docker, ssh, modal, and daytona backends.
 If the runtime provides a specific target path, use that exact path.
 If not, create a sensible timestamped filename yourself under `.hermes/plans/`.
 ## Interaction style
 - If the request is clear enough, write the plan directly.
 - If no explicit instruction accompanies `/plan`, infer the task from the current conversation context.
 - If it is genuinely underspecified, ask a brief clarifying question instead of guessing.
 - After saving the plan, reply briefly with what you planned and the saved path.
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -24,9 +24,11 @@ def _clean_env(monkeypatch):
    for key in (
        "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
        "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
-        # Per-task provider/model overrides
+        # Per-task provider/model/direct-endpoint overrides
        "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
        "AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
        "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
        "AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
        "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
    ):
        monkeypatch.delenv(key, raising=False)
@ -142,6 +144,27 @@ class TestGetTextAuxiliaryClient:
        call_kwargs = mock_openai.call_args
        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
    def test_task_direct_endpoint_override(self, monkeypatch):
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_API_KEY", "task-key")
        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_text_auxiliary_client("web_extract")
        assert model == "task-model"
        assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
        assert mock_openai.call_args.kwargs["api_key"] == "task-key"
    def test_task_direct_endpoint_without_openai_key_does_not_fall_back(self, monkeypatch):
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_text_auxiliary_client("web_extract")
        assert client is None
        assert model is None
        mock_openai.assert_not_called()
    def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch):
        config = {
            "model": {
@ -217,6 +240,27 @@ class TestVisionClientFallback:
            client, model = get_vision_auxiliary_client()
        assert client is not None  # Custom endpoint picked up as fallback
    def test_vision_direct_endpoint_override(self, monkeypatch):
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
        monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
        monkeypatch.setenv("AUXILIARY_VISION_API_KEY", "vision-key")
        monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_vision_auxiliary_client()
        assert model == "vision-model"
        assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:4567/v1"
        assert mock_openai.call_args.kwargs["api_key"] == "vision-key"
    def test_vision_direct_endpoint_requires_openai_api_key(self, monkeypatch):
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
        monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
        monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_vision_auxiliary_client()
        assert client is None
        assert model is None
        mock_openai.assert_not_called()
    def test_vision_uses_openrouter_when_available(self, monkeypatch):
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
@ -434,6 +478,24 @@ class TestTaskSpecificOverrides:
            client, model = get_text_auxiliary_client("web_extract")
        assert model == "google/gemini-3-flash-preview"
    def test_task_direct_endpoint_from_config(self, monkeypatch, tmp_path):
        hermes_home = tmp_path / "hermes"
        hermes_home.mkdir(parents=True, exist_ok=True)
        (hermes_home / "config.yaml").write_text(
            """auxiliary:
  web_extract:
    base_url: http://localhost:3456/v1
    api_key: config-key
    model: config-model
 """
        )
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_text_auxiliary_client("web_extract")
        assert model == "config-model"
        assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:3456/v1"
        assert mock_openai.call_args.kwargs["api_key"] == "config-key"
    def test_task_without_override_uses_auto(self, monkeypatch):
        """A task with no provider env var falls through to auto chain."""
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
--- a/tests/agent/test_skill_commands.py
+++ b/tests/agent/test_skill_commands.py
@ -1,13 +1,16 @@
 """Tests for agent/skill_commands.py — skill slash command scanning and platform filtering."""
 import os
 from datetime import datetime
 from pathlib import Path
 from unittest.mock import patch
 import tools.skills_tool as skills_tool_module
 from agent.skill_commands import (
-    scan_skill_commands,
+    build_plan_path,
    build_skill_invocation_message,
    build_preloaded_skills_prompt,
    build_skill_invocation_message,
    scan_skill_commands,
 )
@ -272,3 +275,37 @@ Generate some audio.
        assert msg is not None
        assert 'file_path="<path>"' in msg
 class TestPlanSkillHelpers:
    def test_build_plan_path_uses_workspace_relative_dir_and_slugifies_request(self):
        path = build_plan_path(
            "Implement OAuth login + refresh tokens!",
            now=datetime(2026, 3, 15, 9, 30, 45),
        )
        assert path == Path(".hermes") / "plans" / "2026-03-15_093045-implement-oauth-login-refresh-tokens.md"
    def test_plan_skill_message_can_include_runtime_save_path_note(self, tmp_path):
        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
            _make_skill(
                tmp_path,
                "plan",
                body="Save plans under .hermes/plans in the active workspace and do not execute the work.",
            )
            scan_skill_commands()
            msg = build_skill_invocation_message(
                "/plan",
                "Add a /plan command",
                runtime_note=(
                    "Save the markdown plan with write_file to this exact relative path inside "
                    "the active workspace/backend cwd: .hermes/plans/plan.md"
                ),
            )
        assert msg is not None
        assert "Save plans under $HERMES_HOME/plans" not in msg
        assert ".hermes/plans" in msg
        assert "Add a /plan command" in msg
        assert ".hermes/plans/plan.md" in msg
        assert "Runtime note:" in msg
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -26,6 +26,12 @@ def _isolate_hermes_home(tmp_path, monkeypatch):
    (fake_home / "memories").mkdir()
    (fake_home / "skills").mkdir()
    monkeypatch.setenv("HERMES_HOME", str(fake_home))
    # Tests should not inherit the agent's current gateway/messaging surface.
    # Individual tests that need gateway behavior set these explicitly.
    monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
    monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
    monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
    monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
@pytest.fixture()
--- a/tests/gateway/test_plan_command.py
+++ b/tests/gateway/test_plan_command.py
@ -0,0 +1,129 @@
 """Tests for the /plan gateway slash command."""
 from datetime import datetime
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 from agent.skill_commands import scan_skill_commands
 from gateway.config import GatewayConfig, Platform, PlatformConfig
 from gateway.platforms.base import MessageEvent
 from gateway.session import SessionEntry, SessionSource
 def _make_runner():
    from gateway.run import GatewayRunner
    runner = object.__new__(GatewayRunner)
    runner.config = GatewayConfig(
        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
    )
    runner.adapters = {}
    runner._voice_mode = {}
    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
    runner.session_store = MagicMock()
    runner.session_store.get_or_create_session.return_value = SessionEntry(
        session_key="agent:main:telegram:dm:c1:u1",
        session_id="sess-1",
        created_at=datetime.now(),
        updated_at=datetime.now(),
        platform=Platform.TELEGRAM,
        chat_type="dm",
    )
    runner.session_store.load_transcript.return_value = []
    runner.session_store.has_any_sessions.return_value = True
    runner.session_store.append_to_transcript = MagicMock()
    runner.session_store.rewrite_transcript = MagicMock()
    runner._running_agents = {}
    runner._pending_messages = {}
    runner._pending_approvals = {}
    runner._session_db = None
    runner._reasoning_config = None
    runner._provider_routing = {}
    runner._fallback_model = None
    runner._show_reasoning = False
    runner._is_user_authorized = lambda _source: True
    runner._set_session_env = lambda _context: None
    runner._run_agent = AsyncMock(
        return_value={
            "final_response": "planned",
            "messages": [],
            "tools": [],
            "history_offset": 0,
            "last_prompt_tokens": 0,
        }
    )
    return runner
 def _make_event(text="/plan"):
    return MessageEvent(
        text=text,
        source=SessionSource(
            platform=Platform.TELEGRAM,
            user_id="u1",
            chat_id="c1",
            user_name="tester",
            chat_type="dm",
        ),
        message_id="m1",
    )
 def _make_plan_skill(skills_dir):
    skill_dir = skills_dir / "plan"
    skill_dir.mkdir(parents=True, exist_ok=True)
    (skill_dir / "SKILL.md").write_text(
        """---
 name: plan
 description: Plan mode skill.
 ---
 # Plan
 Use the current conversation context when no explicit instruction is provided.
 Save plans under the active workspace's .hermes/plans directory.
 """
    )
 class TestGatewayPlanCommand:
    @pytest.mark.asyncio
    async def test_plan_command_loads_skill_and_runs_agent(self, monkeypatch, tmp_path):
        import gateway.run as gateway_run
        runner = _make_runner()
        event = _make_event("/plan Add OAuth login")
        monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
        monkeypatch.setattr(
            "agent.model_metadata.get_model_context_length",
            lambda *_args, **_kwargs: 100_000,
        )
        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
            _make_plan_skill(tmp_path)
            scan_skill_commands()
            result = await runner._handle_message(event)
        assert result == "planned"
        forwarded = runner._run_agent.call_args.kwargs["message"]
        assert "Plan mode skill" in forwarded
        assert "Add OAuth login" in forwarded
        assert ".hermes/plans" in forwarded
        assert str(tmp_path / "plans") not in forwarded
        assert "active workspace/backend cwd" in forwarded
        assert "Runtime note:" in forwarded
    @pytest.mark.asyncio
    async def test_plan_command_appears_in_help_output_via_skill_listing(self, tmp_path):
        runner = _make_runner()
        event = _make_event("/help")
        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
            _make_plan_skill(tmp_path)
            scan_skill_commands()
            result = await runner._handle_help_command(event)
        assert "/plan" in result
--- a/tests/test_auxiliary_config_bridge.py
+++ b/tests/test_auxiliary_config_bridge.py
@ -25,7 +25,9 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
    # Clear env vars
    for key in (
        "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
        "AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
        "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
        "AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
        "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
    ):
        monkeypatch.delenv(key, raising=False)
@ -47,19 +49,35 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
    auxiliary_cfg = config_dict.get("auxiliary", {})
    if auxiliary_cfg and isinstance(auxiliary_cfg, dict):
        aux_task_env = {
-            "vision":      ("AUXILIARY_VISION_PROVIDER",      "AUXILIARY_VISION_MODEL"),
+            "vision": {
-            "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER",  "AUXILIARY_WEB_EXTRACT_MODEL"),
+                "provider": "AUXILIARY_VISION_PROVIDER",
                "model": "AUXILIARY_VISION_MODEL",
                "base_url": "AUXILIARY_VISION_BASE_URL",
                "api_key": "AUXILIARY_VISION_API_KEY",
            },
            "web_extract": {
                "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
                "model": "AUXILIARY_WEB_EXTRACT_MODEL",
                "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
                "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
            },
        }
-        for task_key, (prov_env, model_env) in aux_task_env.items():
+        for task_key, env_map in aux_task_env.items():
            task_cfg = auxiliary_cfg.get(task_key, {})
            if not isinstance(task_cfg, dict):
                continue
            prov = str(task_cfg.get("provider", "")).strip()
            model = str(task_cfg.get("model", "")).strip()
            base_url = str(task_cfg.get("base_url", "")).strip()
            api_key = str(task_cfg.get("api_key", "")).strip()
            if prov and prov != "auto":
-                os.environ[prov_env] = prov
+                os.environ[env_map["provider"]] = prov
            if model:
-                os.environ[model_env] = model
+                os.environ[env_map["model"]] = model
            if base_url:
                os.environ[env_map["base_url"]] = base_url
            if api_key:
                os.environ[env_map["api_key"]] = api_key
 # ── Config bridging tests ────────────────────────────────────────────────────
@ -101,6 +119,21 @@ class TestAuxiliaryConfigBridge:
        assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
        assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash"
    def test_direct_endpoint_bridged(self, monkeypatch):
        config = {
            "auxiliary": {
                "vision": {
                    "base_url": "http://localhost:1234/v1",
                    "api_key": "local-key",
                    "model": "qwen2.5-vl",
                }
            }
        }
        _run_auxiliary_bridge(config, monkeypatch)
        assert os.environ.get("AUXILIARY_VISION_BASE_URL") == "http://localhost:1234/v1"
        assert os.environ.get("AUXILIARY_VISION_API_KEY") == "local-key"
        assert os.environ.get("AUXILIARY_VISION_MODEL") == "qwen2.5-vl"
    def test_compression_provider_bridged(self, monkeypatch):
        config = {
            "compression": {
@ -200,8 +233,12 @@ class TestGatewayBridgeCodeParity:
        # Check for key patterns that indicate the bridge is present
        assert "AUXILIARY_VISION_PROVIDER" in content
        assert "AUXILIARY_VISION_MODEL" in content
        assert "AUXILIARY_VISION_BASE_URL" in content
        assert "AUXILIARY_VISION_API_KEY" in content
        assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content
        assert "AUXILIARY_WEB_EXTRACT_MODEL" in content
        assert "AUXILIARY_WEB_EXTRACT_BASE_URL" in content
        assert "AUXILIARY_WEB_EXTRACT_API_KEY" in content
    def test_gateway_has_compression_provider(self):
        """Gateway must bridge compression.summary_provider."""
--- a/tests/test_cli_plan_command.py
+++ b/tests/test_cli_plan_command.py
@ -0,0 +1,67 @@
 """Tests for the /plan CLI slash command."""
 from unittest.mock import MagicMock, patch
 from agent.skill_commands import scan_skill_commands
 from cli import HermesCLI
 def _make_cli():
    cli_obj = HermesCLI.__new__(HermesCLI)
    cli_obj.config = {}
    cli_obj.console = MagicMock()
    cli_obj.agent = None
    cli_obj.conversation_history = []
    cli_obj.session_id = "sess-123"
    cli_obj._pending_input = MagicMock()
    return cli_obj
 def _make_plan_skill(skills_dir):
    skill_dir = skills_dir / "plan"
    skill_dir.mkdir(parents=True, exist_ok=True)
    (skill_dir / "SKILL.md").write_text(
        """---
 name: plan
 description: Plan mode skill.
 ---
 # Plan
 Use the current conversation context when no explicit instruction is provided.
 Save plans under the active workspace's .hermes/plans directory.
 """
    )
 class TestCLIPlanCommand:
    def test_plan_command_queues_plan_skill_message(self, tmp_path, monkeypatch):
        cli_obj = _make_cli()
        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
            _make_plan_skill(tmp_path)
            scan_skill_commands()
            result = cli_obj.process_command("/plan Add OAuth login")
        assert result is True
        cli_obj._pending_input.put.assert_called_once()
        queued = cli_obj._pending_input.put.call_args[0][0]
        assert "Plan mode skill" in queued
        assert "Add OAuth login" in queued
        assert ".hermes/plans" in queued
        assert str(tmp_path / "plans") not in queued
        assert "active workspace/backend cwd" in queued
        assert "Runtime note:" in queued
    def test_plan_without_args_uses_skill_context_guidance(self, tmp_path, monkeypatch):
        cli_obj = _make_cli()
        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
            _make_plan_skill(tmp_path)
            scan_skill_commands()
            cli_obj.process_command("/plan")
        queued = cli_obj._pending_input.put.call_args[0][0]
        assert "current conversation context" in queued
        assert ".hermes/plans/" in queued
        assert "conversation-plan.md" in queued
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@ -10,6 +10,7 @@ Run with:  python -m pytest tests/test_delegate.py -v
 """
 import json
 import os
 import sys
 import unittest
 from unittest.mock import MagicMock, patch
@ -462,6 +463,43 @@ class TestDelegationCredentialResolution(unittest.TestCase):
        self.assertEqual(creds["api_mode"], "chat_completions")
        mock_resolve.assert_called_once_with(requested="openrouter")
    def test_direct_endpoint_uses_configured_base_url_and_api_key(self):
        parent = _make_mock_parent(depth=0)
        cfg = {
            "model": "qwen2.5-coder",
            "provider": "openrouter",
            "base_url": "http://localhost:1234/v1",
            "api_key": "local-key",
        }
        creds = _resolve_delegation_credentials(cfg, parent)
        self.assertEqual(creds["model"], "qwen2.5-coder")
        self.assertEqual(creds["provider"], "custom")
        self.assertEqual(creds["base_url"], "http://localhost:1234/v1")
        self.assertEqual(creds["api_key"], "local-key")
        self.assertEqual(creds["api_mode"], "chat_completions")
    def test_direct_endpoint_falls_back_to_openai_api_key_env(self):
        parent = _make_mock_parent(depth=0)
        cfg = {
            "model": "qwen2.5-coder",
            "base_url": "http://localhost:1234/v1",
        }
        with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False):
            creds = _resolve_delegation_credentials(cfg, parent)
        self.assertEqual(creds["api_key"], "env-openai-key")
        self.assertEqual(creds["provider"], "custom")
    def test_direct_endpoint_does_not_fall_back_to_openrouter_api_key_env(self):
        parent = _make_mock_parent(depth=0)
        cfg = {
            "model": "qwen2.5-coder",
            "base_url": "http://localhost:1234/v1",
        }
        with patch.dict(os.environ, {"OPENROUTER_API_KEY": "env-openrouter-key"}, clear=False):
            with self.assertRaises(ValueError) as ctx:
                _resolve_delegation_credentials(cfg, parent)
        self.assertIn("OPENAI_API_KEY", str(ctx.exception))
    @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
    def test_nous_provider_resolves_nous_credentials(self, mock_resolve):
        """Nous provider resolves Nous Portal base_url and api_key."""
@ -589,6 +627,40 @@ class TestDelegationProviderIntegration(unittest.TestCase):
            self.assertNotEqual(kwargs["base_url"], parent.base_url)
            self.assertNotEqual(kwargs["api_key"], parent.api_key)
    @patch("tools.delegate_tool._load_config")
    @patch("tools.delegate_tool._resolve_delegation_credentials")
    def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg):
        mock_cfg.return_value = {
            "max_iterations": 45,
            "model": "qwen2.5-coder",
            "base_url": "http://localhost:1234/v1",
            "api_key": "local-key",
        }
        mock_creds.return_value = {
            "model": "qwen2.5-coder",
            "provider": "custom",
            "base_url": "http://localhost:1234/v1",
            "api_key": "local-key",
            "api_mode": "chat_completions",
        }
        parent = _make_mock_parent(depth=0)
        with patch("run_agent.AIAgent") as MockAgent:
            mock_child = MagicMock()
            mock_child.run_conversation.return_value = {
                "final_response": "done", "completed": True, "api_calls": 1
            }
            MockAgent.return_value = mock_child
            delegate_task(goal="Direct endpoint test", parent_agent=parent)
            _, kwargs = MockAgent.call_args
            self.assertEqual(kwargs["model"], "qwen2.5-coder")
            self.assertEqual(kwargs["provider"], "custom")
            self.assertEqual(kwargs["base_url"], "http://localhost:1234/v1")
            self.assertEqual(kwargs["api_key"], "local-key")
            self.assertEqual(kwargs["api_mode"], "chat_completions")
    @patch("tools.delegate_tool._load_config")
    @patch("tools.delegate_tool._resolve_delegation_credentials")
    def test_empty_config_inherits_parent(self, mock_creds, mock_cfg):
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@ -540,18 +540,51 @@ def delegate_task(
 def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
    """Resolve credentials for subagent delegation.
-    If ``delegation.provider`` is configured, resolves the full credential
+    If ``delegation.base_url`` is configured, subagents use that direct
-    bundle (base_url, api_key, api_mode, provider) via the runtime provider
+    OpenAI-compatible endpoint. Otherwise, if ``delegation.provider`` is
-    system — the same path used by CLI/gateway startup.  This lets subagents
+    configured, the full credential bundle (base_url, api_key, api_mode,
-    run on a completely different provider:model pair.
+    provider) is resolved via the runtime provider system — the same path used
    by CLI/gateway startup. This lets subagents run on a completely different
    provider:model pair.
-    If no provider is configured, returns None values so the child inherits
+    If neither base_url nor provider is configured, returns None values so the
-    everything from the parent agent.
+    child inherits everything from the parent agent.
    Raises ValueError with a user-friendly message on credential failure.
    """
-    configured_model = cfg.get("model") or None
+    configured_model = str(cfg.get("model") or "").strip() or None
-    configured_provider = cfg.get("provider") or None
+    configured_provider = str(cfg.get("provider") or "").strip() or None
    configured_base_url = str(cfg.get("base_url") or "").strip() or None
    configured_api_key = str(cfg.get("api_key") or "").strip() or None
    if configured_base_url:
        api_key = (
            configured_api_key
            or os.getenv("OPENAI_API_KEY", "").strip()
        )
        if not api_key:
            raise ValueError(
                "Delegation base_url is configured but no API key was found. "
                "Set delegation.api_key or OPENAI_API_KEY."
            )
        base_lower = configured_base_url.lower()
        provider = "custom"
        api_mode = "chat_completions"
        if "chatgpt.com/backend-api/codex" in base_lower:
            provider = "openai-codex"
            api_mode = "codex_responses"
        elif "api.anthropic.com" in base_lower:
            provider = "anthropic"
            api_mode = "anthropic_messages"
        return {
            "model": configured_model,
            "provider": provider,
            "base_url": configured_base_url,
            "api_key": api_key,
            "api_mode": api_mode,
        }
    if not configured_provider:
        # No provider override — child inherits everything from parent
@ -570,7 +603,8 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
    except Exception as exc:
        raise ValueError(
            f"Cannot resolve delegation provider '{configured_provider}': {exc}. "
-            f"Check that the provider is configured (API key set, valid provider name). "
+            f"Check that the provider is configured (API key set, valid provider name), "
            f"or set delegation.base_url/delegation.api_key for a direct endpoint. "
            f"Available providers: openrouter, nous, zai, kimi-coding, minimax."
        ) from exc
--- a/website/docs/guides/daily-briefing-bot.md
+++ b/website/docs/guides/daily-briefing-bot.md
@ -29,7 +29,8 @@ Before starting, make sure you have:
 - **Hermes Agent installed** — see the [Installation guide](/docs/getting-started/installation)
 - **Gateway running** — the gateway daemon handles cron execution:
  ```bash
-  hermes gateway install   # Install as system service (recommended)
+  hermes gateway install   # Install as a user service
  sudo hermes gateway install --system   # Linux servers: boot-time system service
  # or
  hermes gateway           # Run in foreground
  ```
@ -242,10 +243,12 @@ Make sure the scheduler is actually running:
 hermes cron status
 ```
-If the gateway isn't running, your jobs won't execute. Install it as a system service for reliability:
+If the gateway isn't running, your jobs won't execute. Install it as a background service for reliability:
 ```bash
 hermes gateway install
 # or on Linux servers
 sudo hermes gateway install --system
 ```
 ## Going Further
--- a/website/docs/guides/team-telegram-assistant.md
+++ b/website/docs/guides/team-telegram-assistant.md
@ -143,12 +143,13 @@ For a persistent deployment that survives reboots:
 ```bash
 hermes gateway install
 sudo hermes gateway install --system   # Linux only: boot-time system service
 ```
-This creates a **systemd** service (Linux) or **launchd** service (macOS) that runs automatically.
+This creates a background service: a user-level **systemd** service on Linux by default, a **launchd** service on macOS, or a boot-time Linux system service if you pass `--system`.
 ```bash
-# Linux — manage the service
+# Linux — manage the default user service
 hermes gateway start
 hermes gateway stop
 hermes gateway status
@ -158,6 +159,11 @@ journalctl --user -u hermes-gateway -f
 # Keep running after SSH logout
 sudo loginctl enable-linger $USER
 # Linux servers — explicit system-service commands
 sudo hermes gateway start --system
 sudo hermes gateway status --system
 journalctl -u hermes-gateway -f
 ```
 ```bash
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@ -180,6 +180,23 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
 | `CONTEXT_COMPRESSION_THRESHOLD` | Trigger at this % of limit (default: 0.50) |
 | `CONTEXT_COMPRESSION_MODEL` | Model for summaries |
 ## Auxiliary Task Overrides
 | Variable | Description |
 |----------|-------------|
 | `AUXILIARY_VISION_PROVIDER` | Override provider for vision tasks |
 | `AUXILIARY_VISION_MODEL` | Override model for vision tasks |
 | `AUXILIARY_VISION_BASE_URL` | Direct OpenAI-compatible endpoint for vision tasks |
 | `AUXILIARY_VISION_API_KEY` | API key paired with `AUXILIARY_VISION_BASE_URL` |
 | `AUXILIARY_WEB_EXTRACT_PROVIDER` | Override provider for web extraction/summarization |
 | `AUXILIARY_WEB_EXTRACT_MODEL` | Override model for web extraction/summarization |
 | `AUXILIARY_WEB_EXTRACT_BASE_URL` | Direct OpenAI-compatible endpoint for web extraction/summarization |
 | `AUXILIARY_WEB_EXTRACT_API_KEY` | API key paired with `AUXILIARY_WEB_EXTRACT_BASE_URL` |
 | `CONTEXT_COMPRESSION_PROVIDER` | Override provider for context compression summaries |
 | `CONTEXT_COMPRESSION_MODEL` | Override model for context compression summaries |
 For task-specific direct endpoints, Hermes uses the task's configured API key or `OPENAI_API_KEY`. It does not reuse `OPENROUTER_API_KEY` for those custom endpoints.
 ## Provider Routing (config.yaml only)
 These go in `~/.hermes/config.yaml` under the `provider_routing` section:
--- a/website/docs/reference/skills-catalog.md
+++ b/website/docs/reference/skills-catalog.md
@ -236,6 +236,7 @@ Skills for controlling smart home devices — lights, switches, sensors, and hom
 | Skill | Description | Path |
 |-------|-------------|------|
 | `code-review` | Guidelines for performing thorough code reviews with security and quality focus | `software-development/code-review` |
 | `plan` | Plan mode for Hermes — inspect context, write a markdown plan into `.hermes/plans/` in the active workspace/backend working directory, and do not execute the work. | `software-development/plan` |
 | `requesting-code-review` | Use when completing tasks, implementing major features, or before merging. Validates work meets requirements through systematic review process. | `software-development/requesting-code-review` |
 | `subagent-driven-development` | Use when executing implementation plans with independent tasks. Dispatches fresh delegate_task per task with two-stage review (spec compliance then code quality). | `software-development/subagent-driven-development` |
 | `systematic-debugging` | Use when encountering any bug, test failure, or unexpected behavior. 4-phase root cause investigation — NO fixes without understanding the problem first. | `software-development/systematic-debugging` |
--- a/website/docs/reference/slash-commands.md
+++ b/website/docs/reference/slash-commands.md
@ -11,7 +11,7 @@ Hermes has two slash-command surfaces:
 - **Interactive CLI slash commands** — handled by `cli.py` / `hermes_cli/commands.py`
 - **Messaging slash commands** — handled by `gateway/run.py`
-Installed skills are also exposed as dynamic slash commands on both surfaces.
+Installed skills are also exposed as dynamic slash commands on both surfaces. That includes bundled skills like `/plan`, which opens plan mode and saves markdown plans under `.hermes/plans/` relative to the active workspace/backend working directory.
 ## Interactive CLI slash commands
@ -32,6 +32,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/compress` | Manually compress conversation context (flush memories + summarize) |
 | `/rollback` | List or restore filesystem checkpoints (usage: /rollback [number]) |
 | `/background` | Run a prompt in the background (usage: /background &lt;prompt&gt;) |
 | `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. |
 ### Configuration
@ -109,6 +110,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
 | `/voice [on\|off\|tts\|join\|channel\|leave\|status]` | Control spoken replies in chat. `join`/`channel`/`leave` manage Discord voice-channel mode. |
 | `/rollback [number]` | List or restore filesystem checkpoints. |
 | `/background &lt;prompt&gt;` | Run a prompt in a separate background session. |
 | `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. |
 | `/reload-mcp` | Reload MCP servers from config. |
 | `/update` | Update Hermes Agent to the latest version. |
 | `/help` | Show messaging help. |
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@ -571,11 +571,15 @@ auxiliary:
  vision:
    provider: "auto"           # "auto", "openrouter", "nous", "main"
    model: ""                  # e.g. "openai/gpt-4o", "google/gemini-2.5-flash"
    base_url: ""               # direct OpenAI-compatible endpoint (takes precedence over provider)
    api_key: ""                # API key for base_url (falls back to OPENAI_API_KEY)
  # Web page summarization + browser page text extraction
  web_extract:
    provider: "auto"
    model: ""                  # e.g. "google/gemini-2.5-flash"
    base_url: ""
    api_key: ""
 ```
 ### Changing the Vision Model
@ -606,6 +610,17 @@ AUXILIARY_VISION_MODEL=openai/gpt-4o
 ### Common Setups
 **Using a direct custom endpoint** (clearer than `provider: "main"` for local/self-hosted APIs):
 ```yaml
 auxiliary:
  vision:
    base_url: "http://localhost:1234/v1"
    api_key: "local-key"
    model: "qwen2.5-vl"
 ```
 `base_url` takes precedence over `provider`, so this is the most explicit way to route an auxiliary task to a specific endpoint. For direct endpoint overrides, Hermes uses the configured `api_key` or falls back to `OPENAI_API_KEY`; it does not reuse `OPENROUTER_API_KEY` for that custom endpoint.
 **Using OpenAI API key for vision:**
 ```yaml
 # In ~/.hermes/.env:
@ -852,13 +867,17 @@ delegation:
    - web
  # model: "google/gemini-3-flash-preview"  # Override model (empty = inherit parent)
  # provider: "openrouter"                  # Override provider (empty = inherit parent)
  # base_url: "http://localhost:1234/v1"    # Direct OpenAI-compatible endpoint (takes precedence over provider)
  # api_key: "local-key"                    # API key for base_url (falls back to OPENAI_API_KEY)
 ```
 **Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model.
 **Direct endpoint override:** If you want the obvious custom-endpoint path, set `delegation.base_url`, `delegation.api_key`, and `delegation.model`. That sends subagents directly to that OpenAI-compatible endpoint and takes precedence over `delegation.provider`. If `delegation.api_key` is omitted, Hermes falls back to `OPENAI_API_KEY` only.
 The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed.
-**Precedence:** `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
+**Precedence:** `delegation.base_url` in config → `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
 ## Clarify
--- a/website/docs/user-guide/features/cron.md
+++ b/website/docs/user-guide/features/cron.md
@ -156,7 +156,8 @@ What they do:
 **Cron execution is handled by the gateway daemon.** The gateway ticks the scheduler every 60 seconds, running any due jobs in isolated agent sessions.
 ```bash
-hermes gateway install     # Install as system service (recommended)
+hermes gateway install     # Install as a user service
 sudo hermes gateway install --system   # Linux: boot-time system service for servers
 hermes gateway             # Or run in foreground
 hermes cron list
--- a/website/docs/user-guide/features/delegation.md
+++ b/website/docs/user-guide/features/delegation.md
@ -209,6 +209,14 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children
 delegation:
  max_iterations: 50                        # Max turns per child (default: 50)
  default_toolsets: ["terminal", "file", "web"]  # Default toolsets
  model: "google/gemini-3-flash-preview"             # Optional provider/model override
  provider: "openrouter"                             # Optional built-in provider
 # Or use a direct custom endpoint instead of provider:
 delegation:
  model: "qwen2.5-coder"
  base_url: "http://localhost:1234/v1"
  api_key: "local-key"
 ```
 :::tip
--- a/website/docs/user-guide/features/skills.md
+++ b/website/docs/user-guide/features/skills.md
@ -24,11 +24,14 @@ Every installed skill is automatically available as a slash command:
 /gif-search funny cats
 /axolotl help me fine-tune Llama 3 on my dataset
 /github-pr-workflow create a PR for the auth refactor
 /plan design a rollout for migrating our auth provider
 # Just the skill name loads it and lets the agent ask what you need:
 /excalidraw
 ```
 The bundled `plan` skill is a good example of a skill-backed slash command with custom behavior. Running `/plan [request]` tells Hermes to inspect context if needed, write a markdown implementation plan instead of executing the task, and save the result under `.hermes/plans/` relative to the active workspace/backend working directory.
 You can also interact with skills through natural conversation:
 ```bash
--- a/website/docs/user-guide/messaging/email.md
+++ b/website/docs/user-guide/messaging/email.md
@ -80,7 +80,8 @@ EMAIL_HOME_ADDRESS=your@email.com      # Default delivery target for cron jobs
 ```bash
 hermes gateway              # Run in foreground
-hermes gateway install      # Install as a system service
+hermes gateway install      # Install as a user service
 sudo hermes gateway install --system   # Linux only: boot-time system service
 ```
 On startup, the adapter:
--- a/website/docs/user-guide/messaging/index.md
+++ b/website/docs/user-guide/messaging/index.md
@ -54,10 +54,12 @@ This walks you through configuring each platform with arrow-key selection, shows
 ```bash
 hermes gateway              # Run in foreground
 hermes gateway setup        # Configure messaging platforms interactively
-hermes gateway install      # Install as systemd service (Linux) / launchd (macOS)
+hermes gateway install      # Install as a user service (Linux) / launchd service (macOS)
-hermes gateway start        # Start the service
+sudo hermes gateway install --system   # Linux only: install a boot-time system service
-hermes gateway stop         # Stop the service
+hermes gateway start        # Start the default service
-hermes gateway status       # Check service status
+hermes gateway stop         # Stop the default service
 hermes gateway status       # Check default service status
 hermes gateway status --system         # Linux only: inspect the system service explicitly
 ```
 ## Chat Commands (Inside Messaging)
@ -188,8 +190,18 @@ journalctl --user -u hermes-gateway -f
 # Enable lingering (keeps running after logout)
 sudo loginctl enable-linger $USER
 # Or install a boot-time system service that still runs as your user
 sudo hermes gateway install --system
 sudo hermes gateway start --system
 sudo hermes gateway status --system
 journalctl -u hermes-gateway -f
 ```
 Use the user service on laptops and dev boxes. Use the system service on VPS or headless hosts that should come back at boot without relying on systemd linger.
 Avoid keeping both the user and system gateway units installed at once unless you really mean to. Hermes will warn if it detects both because start/stop/status behavior gets ambiguous.
 ### macOS (launchd)
 ```bash
--- a/website/docs/user-guide/messaging/signal.md
+++ b/website/docs/user-guide/messaging/signal.md
@ -127,7 +127,8 @@ Then start the gateway:
 ```bash
 hermes gateway              # Foreground
-hermes gateway install      # Install as a system service
+hermes gateway install      # Install as a user service
 sudo hermes gateway install --system   # Linux only: boot-time system service
 ```
 ---
--- a/website/docs/user-guide/messaging/slack.md
+++ b/website/docs/user-guide/messaging/slack.md
@ -168,7 +168,8 @@ Then start the gateway:
 ```bash
 hermes gateway              # Foreground
-hermes gateway install      # Install as a system service
+hermes gateway install      # Install as a user service
 sudo hermes gateway install --system   # Linux only: boot-time system service
 ```
 ---
--- a/website/docs/user-guide/messaging/whatsapp.md
+++ b/website/docs/user-guide/messaging/whatsapp.md
@ -101,7 +101,8 @@ Then start the gateway:
 ```bash
 hermes gateway              # Foreground
-hermes gateway install      # Install as a system service
+hermes gateway install      # Install as a user service
 sudo hermes gateway install --system   # Linux only: boot-time system service
 ```
 The gateway starts the WhatsApp bridge automatically using the saved session.