From 9f6bccd76a0a64d9251620e5c713e34f9df4649f Mon Sep 17 00:00:00 2001 From: teknium1 Date: Sat, 14 Mar 2026 20:48:29 -0700 Subject: [PATCH 1/4] feat: add direct endpoint overrides for auxiliary and delegation Add base_url/api_key overrides for auxiliary tasks and delegation so users can route those flows straight to a custom OpenAI-compatible endpoint without having to rely on provider=main or named custom providers. Also clear gateway session env vars in test isolation so the full suite stays deterministic when run from a messaging-backed agent session. --- agent/auxiliary_client.py | 244 +++++++++++++----- cli.py | 48 +++- gateway/run.py | 30 ++- hermes_cli/config.py | 16 ++ tests/agent/test_auxiliary_client.py | 64 ++++- tests/conftest.py | 6 + tests/test_auxiliary_config_bridge.py | 47 +++- tests/tools/test_delegate.py | 72 ++++++ tools/delegate_tool.py | 52 +++- .../docs/reference/environment-variables.md | 17 ++ website/docs/user-guide/configuration.md | 21 +- .../docs/user-guide/features/delegation.md | 8 + 12 files changed, 526 insertions(+), 99 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index dd8f22bb..957452fc 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -30,6 +30,10 @@ Default "auto" follows the chains above. Per-task model overrides (e.g. AUXILIARY_VISION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug than the provider's default. + +Per-task direct endpoint overrides (e.g. AUXILIARY_VISION_BASE_URL, +AUXILIARY_VISION_API_KEY) let callers route a specific auxiliary task to a +custom OpenAI-compatible endpoint without touching the main model settings. """ import json @@ -418,6 +422,17 @@ def _get_auxiliary_provider(task: str = "") -> str: return "auto" +def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]: + """Read an auxiliary env override from AUXILIARY_* or CONTEXT_* prefixes.""" + if not task: + return None + for prefix in ("AUXILIARY_", "CONTEXT_"): + val = os.getenv(f"{prefix}{task.upper()}_{suffix}", "").strip() + if val: + return val + return None + + def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]: or_key = os.getenv("OPENROUTER_API_KEY") if not or_key: @@ -564,6 +579,8 @@ def resolve_provider_client( model: str = None, async_mode: bool = False, raw_codex: bool = False, + explicit_base_url: str = None, + explicit_api_key: str = None, ) -> Tuple[Optional[Any], Optional[str]]: """Central router: given a provider name and optional model, return a configured client with the correct auth, base URL, and API format. @@ -585,6 +602,8 @@ def resolve_provider_client( instead of wrapping in CodexAuxiliaryClient. Use this when the caller needs direct access to responses.stream() (e.g., the main agent loop). + explicit_base_url: Optional direct OpenAI-compatible endpoint. + explicit_api_key: Optional API key paired with explicit_base_url. Returns: (client, resolved_model) or (None, None) if auth is unavailable. @@ -661,6 +680,22 @@ def resolve_provider_client( # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ─────────── if provider == "custom": + if explicit_base_url: + custom_base = explicit_base_url.strip() + custom_key = ( + (explicit_api_key or "").strip() + or os.getenv("OPENAI_API_KEY", "").strip() + ) + if not custom_base or not custom_key: + logger.warning( + "resolve_provider_client: explicit custom endpoint requested " + "but no API key was found (set explicit_api_key or OPENAI_API_KEY)" + ) + return None, None + final_model = model or _read_main_model() or "gpt-4o-mini" + client = OpenAI(api_key=custom_key, base_url=custom_base) + return (_to_async_client(client, final_model) if async_mode + else (client, final_model)) # Try custom first, then codex, then API-key providers for try_fn in (_try_custom_endpoint, _try_codex, _resolve_api_key_provider): @@ -749,10 +784,13 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona Callers may override the returned model with a per-task env var (e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL). """ - forced = _get_auxiliary_provider(task) - if forced != "auto": - return resolve_provider_client(forced) - return resolve_provider_client("auto") + provider, model, base_url, api_key = _resolve_task_provider_model(task or None) + return resolve_provider_client( + provider, + model=model, + explicit_base_url=base_url, + explicit_api_key=api_key, + ) def get_async_text_auxiliary_client(task: str = ""): @@ -762,10 +800,14 @@ def get_async_text_auxiliary_client(task: str = ""): (AsyncCodexAuxiliaryClient, model) which wraps the Responses API. Returns (None, None) when no provider is available. """ - forced = _get_auxiliary_provider(task) - if forced != "auto": - return resolve_provider_client(forced, async_mode=True) - return resolve_provider_client("auto", async_mode=True) + provider, model, base_url, api_key = _resolve_task_provider_model(task or None) + return resolve_provider_client( + provider, + model=model, + async_mode=True, + explicit_base_url=base_url, + explicit_api_key=api_key, + ) _VISION_AUTO_PROVIDER_ORDER = ( @@ -821,26 +863,43 @@ def resolve_vision_provider_client( provider: Optional[str] = None, model: Optional[str] = None, *, + base_url: Optional[str] = None, + api_key: Optional[str] = None, async_mode: bool = False, ) -> Tuple[Optional[str], Optional[Any], Optional[str]]: """Resolve the client actually used for vision tasks. - Explicit provider overrides still use the generic provider router for - non-standard backends, so users can intentionally force experimental - providers. Auto mode stays conservative and only tries vision backends - known to work today. + Direct endpoint overrides take precedence over provider selection. Explicit + provider overrides still use the generic provider router for non-standard + backends, so users can intentionally force experimental providers. Auto mode + stays conservative and only tries vision backends known to work today. """ - requested = _normalize_vision_provider(provider or _get_auxiliary_provider("vision")) + requested, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model( + "vision", provider, model, base_url, api_key + ) + requested = _normalize_vision_provider(requested) def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[str]): if sync_client is None: return resolved_provider, None, None - final_model = model or default_model + final_model = resolved_model or default_model if async_mode: async_client, async_model = _to_async_client(sync_client, final_model) return resolved_provider, async_client, async_model return resolved_provider, sync_client, final_model + if resolved_base_url: + client, final_model = resolve_provider_client( + "custom", + model=resolved_model, + async_mode=async_mode, + explicit_base_url=resolved_base_url, + explicit_api_key=resolved_api_key, + ) + if client is None: + return "custom", None, None + return "custom", client, final_model + if requested == "auto": for candidate in get_available_vision_backends(): sync_client, default_model = _resolve_strict_vision_backend(candidate) @@ -853,7 +912,7 @@ def resolve_vision_provider_client( sync_client, default_model = _resolve_strict_vision_backend(requested) return _finalize(requested, sync_client, default_model) - client, final_model = _get_cached_client(requested, model, async_mode) + client, final_model = _get_cached_client(requested, resolved_model, async_mode) if client is None: return requested, None, None return requested, client, final_model @@ -910,19 +969,29 @@ def auxiliary_max_tokens_param(value: int) -> dict: # Every auxiliary LLM consumer should use these instead of manually # constructing clients and calling .chat.completions.create(). -# Client cache: (provider, async_mode) -> (client, default_model) +# Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model) _client_cache: Dict[tuple, tuple] = {} def _get_cached_client( - provider: str, model: str = None, async_mode: bool = False, + provider: str, + model: str = None, + async_mode: bool = False, + base_url: str = None, + api_key: str = None, ) -> Tuple[Optional[Any], Optional[str]]: """Get or create a cached client for the given provider.""" - cache_key = (provider, async_mode) + cache_key = (provider, async_mode, base_url or "", api_key or "") if cache_key in _client_cache: cached_client, cached_default = _client_cache[cache_key] return cached_client, model or cached_default - client, default_model = resolve_provider_client(provider, model, async_mode) + client, default_model = resolve_provider_client( + provider, + model, + async_mode, + explicit_base_url=base_url, + explicit_api_key=api_key, + ) if client is not None: _client_cache[cache_key] = (client, default_model) return client, model or default_model @@ -932,57 +1001,75 @@ def _resolve_task_provider_model( task: str = None, provider: str = None, model: str = None, -) -> Tuple[str, Optional[str]]: + base_url: str = None, + api_key: str = None, +) -> Tuple[str, Optional[str], Optional[str], Optional[str]]: """Determine provider + model for a call. Priority: - 1. Explicit provider/model args (always win) - 2. Env var overrides (AUXILIARY_{TASK}_PROVIDER, etc.) - 3. Config file (auxiliary.{task}.provider/model or compression.*) + 1. Explicit provider/model/base_url/api_key args (always win) + 2. Env var overrides (AUXILIARY_{TASK}_*, CONTEXT_{TASK}_*) + 3. Config file (auxiliary.{task}.* or compression.*) 4. "auto" (full auto-detection chain) - Returns (provider, model) where model may be None (use provider default). + Returns (provider, model, base_url, api_key) where model may be None + (use provider default). When base_url is set, provider is forced to + "custom" and the task uses that direct endpoint. """ - if provider: - return provider, model + config = {} + cfg_provider = None + cfg_model = None + cfg_base_url = None + cfg_api_key = None if task: - # Check env var overrides first - env_provider = _get_auxiliary_provider(task) - if env_provider != "auto": - # Check for env var model override too - env_model = None - for prefix in ("AUXILIARY_", "CONTEXT_"): - val = os.getenv(f"{prefix}{task.upper()}_MODEL", "").strip() - if val: - env_model = val - break - return env_provider, model or env_model - - # Read from config file try: from hermes_cli.config import load_config config = load_config() except ImportError: - return "auto", model + config = {} - # Check auxiliary.{task} section - aux = config.get("auxiliary", {}) - task_config = aux.get(task, {}) - cfg_provider = task_config.get("provider", "").strip() or None - cfg_model = task_config.get("model", "").strip() or None + aux = config.get("auxiliary", {}) if isinstance(config, dict) else {} + task_config = aux.get(task, {}) if isinstance(aux, dict) else {} + if not isinstance(task_config, dict): + task_config = {} + cfg_provider = str(task_config.get("provider", "")).strip() or None + cfg_model = str(task_config.get("model", "")).strip() or None + cfg_base_url = str(task_config.get("base_url", "")).strip() or None + cfg_api_key = str(task_config.get("api_key", "")).strip() or None # Backwards compat: compression section has its own keys if task == "compression" and not cfg_provider: - comp = config.get("compression", {}) - cfg_provider = comp.get("summary_provider", "").strip() or None - cfg_model = cfg_model or comp.get("summary_model", "").strip() or None + comp = config.get("compression", {}) if isinstance(config, dict) else {} + if isinstance(comp, dict): + cfg_provider = comp.get("summary_provider", "").strip() or None + cfg_model = cfg_model or comp.get("summary_model", "").strip() or None + env_model = _get_auxiliary_env_override(task, "MODEL") if task else None + resolved_model = model or env_model or cfg_model + + if base_url: + return "custom", resolved_model, base_url, api_key + if provider: + return provider, resolved_model, base_url, api_key + + if task: + env_base_url = _get_auxiliary_env_override(task, "BASE_URL") + env_api_key = _get_auxiliary_env_override(task, "API_KEY") + if env_base_url: + return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key + + env_provider = _get_auxiliary_provider(task) + if env_provider != "auto": + return env_provider, resolved_model, None, None + + if cfg_base_url: + return "custom", resolved_model, cfg_base_url, cfg_api_key if cfg_provider and cfg_provider != "auto": - return cfg_provider, model or cfg_model - return "auto", model or cfg_model + return cfg_provider, resolved_model, None, None + return "auto", resolved_model, None, None - return "auto", model + return "auto", resolved_model, None, None def _build_call_kwargs( @@ -994,6 +1081,7 @@ def _build_call_kwargs( tools: Optional[list] = None, timeout: float = 30.0, extra_body: Optional[dict] = None, + base_url: Optional[str] = None, ) -> dict: """Build kwargs for .chat.completions.create() with model/provider adjustments.""" kwargs: Dict[str, Any] = { @@ -1009,7 +1097,7 @@ def _build_call_kwargs( # Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens. # Direct OpenAI api.openai.com with newer models needs max_completion_tokens. if provider == "custom": - custom_base = os.getenv("OPENAI_BASE_URL", "") + custom_base = base_url or os.getenv("OPENAI_BASE_URL", "") if "api.openai.com" in custom_base.lower(): kwargs["max_completion_tokens"] = max_tokens else: @@ -1035,6 +1123,8 @@ def call_llm( *, provider: str = None, model: str = None, + base_url: str = None, + api_key: str = None, messages: list, temperature: float = None, max_tokens: int = None, @@ -1066,16 +1156,18 @@ def call_llm( Raises: RuntimeError: If no provider is configured. """ - resolved_provider, resolved_model = _resolve_task_provider_model( - task, provider, model) + resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model( + task, provider, model, base_url, api_key) if task == "vision": effective_provider, client, final_model = resolve_vision_provider_client( - provider=resolved_provider, - model=resolved_model, + provider=provider, + model=model, + base_url=base_url, + api_key=api_key, async_mode=False, ) - if client is None and resolved_provider != "auto": + if client is None and resolved_provider != "auto" and not resolved_base_url: logger.warning( "Vision provider %s unavailable, falling back to auto vision backends", resolved_provider, @@ -1092,10 +1184,15 @@ def call_llm( ) resolved_provider = effective_provider or resolved_provider else: - client, final_model = _get_cached_client(resolved_provider, resolved_model) + client, final_model = _get_cached_client( + resolved_provider, + resolved_model, + base_url=resolved_base_url, + api_key=resolved_api_key, + ) if client is None: # Fallback: try openrouter - if resolved_provider != "openrouter": + if resolved_provider != "openrouter" and not resolved_base_url: logger.warning("Provider %s unavailable, falling back to openrouter", resolved_provider) client, final_model = _get_cached_client( @@ -1108,7 +1205,8 @@ def call_llm( kwargs = _build_call_kwargs( resolved_provider, final_model, messages, temperature=temperature, max_tokens=max_tokens, - tools=tools, timeout=timeout, extra_body=extra_body) + tools=tools, timeout=timeout, extra_body=extra_body, + base_url=resolved_base_url) # Handle max_tokens vs max_completion_tokens retry try: @@ -1127,6 +1225,8 @@ async def async_call_llm( *, provider: str = None, model: str = None, + base_url: str = None, + api_key: str = None, messages: list, temperature: float = None, max_tokens: int = None, @@ -1138,16 +1238,18 @@ async def async_call_llm( Same as call_llm() but async. See call_llm() for full documentation. """ - resolved_provider, resolved_model = _resolve_task_provider_model( - task, provider, model) + resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model( + task, provider, model, base_url, api_key) if task == "vision": effective_provider, client, final_model = resolve_vision_provider_client( - provider=resolved_provider, - model=resolved_model, + provider=provider, + model=model, + base_url=base_url, + api_key=api_key, async_mode=True, ) - if client is None and resolved_provider != "auto": + if client is None and resolved_provider != "auto" and not resolved_base_url: logger.warning( "Vision provider %s unavailable, falling back to auto vision backends", resolved_provider, @@ -1165,9 +1267,14 @@ async def async_call_llm( resolved_provider = effective_provider or resolved_provider else: client, final_model = _get_cached_client( - resolved_provider, resolved_model, async_mode=True) + resolved_provider, + resolved_model, + async_mode=True, + base_url=resolved_base_url, + api_key=resolved_api_key, + ) if client is None: - if resolved_provider != "openrouter": + if resolved_provider != "openrouter" and not resolved_base_url: logger.warning("Provider %s unavailable, falling back to openrouter", resolved_provider) client, final_model = _get_cached_client( @@ -1181,7 +1288,8 @@ async def async_call_llm( kwargs = _build_call_kwargs( resolved_provider, final_model, messages, temperature=temperature, max_tokens=max_tokens, - tools=tools, timeout=timeout, extra_body=extra_body) + tools=tools, timeout=timeout, extra_body=extra_body, + base_url=resolved_base_url) try: return await client.chat.completions.create(**kwargs) diff --git a/cli.py b/cli.py index 44c7889c..1bebbf4f 100755 --- a/cli.py +++ b/cli.py @@ -218,11 +218,27 @@ def load_cli_config() -> Dict[str, Any]: "timeout": 300, # Max seconds a sandbox script can run before being killed (5 min) "max_tool_calls": 50, # Max RPC tool calls per execution }, + "auxiliary": { + "vision": { + "provider": "auto", + "model": "", + "base_url": "", + "api_key": "", + }, + "web_extract": { + "provider": "auto", + "model": "", + "base_url": "", + "api_key": "", + }, + }, "delegation": { "max_iterations": 45, # Max tool-calling turns per child agent "default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents "model": "", # Subagent model override (empty = inherit parent model) "provider": "", # Subagent provider override (empty = inherit parent provider) + "base_url": "", # Direct OpenAI-compatible endpoint for subagents + "api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY) }, } @@ -363,28 +379,44 @@ def load_cli_config() -> Dict[str, Any]: if config_key in compression_config: os.environ[env_var] = str(compression_config[config_key]) - # Apply auxiliary model overrides to environment variables. - # Vision and web_extract each have their own provider + model pair. + # Apply auxiliary model/direct-endpoint overrides to environment variables. + # Vision and web_extract each have their own provider/model/base_url/api_key tuple. # (Compression is handled in the compression section above.) # Only set env vars for non-empty / non-default values so auto-detection # still works. auxiliary_config = defaults.get("auxiliary", {}) auxiliary_task_env = { - # config key → (provider env var, model env var) - "vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"), - "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"), + # config key → env var mapping + "vision": { + "provider": "AUXILIARY_VISION_PROVIDER", + "model": "AUXILIARY_VISION_MODEL", + "base_url": "AUXILIARY_VISION_BASE_URL", + "api_key": "AUXILIARY_VISION_API_KEY", + }, + "web_extract": { + "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER", + "model": "AUXILIARY_WEB_EXTRACT_MODEL", + "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL", + "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY", + }, } - for task_key, (prov_env, model_env) in auxiliary_task_env.items(): + for task_key, env_map in auxiliary_task_env.items(): task_cfg = auxiliary_config.get(task_key, {}) if not isinstance(task_cfg, dict): continue prov = str(task_cfg.get("provider", "")).strip() model = str(task_cfg.get("model", "")).strip() + base_url = str(task_cfg.get("base_url", "")).strip() + api_key = str(task_cfg.get("api_key", "")).strip() if prov and prov != "auto": - os.environ[prov_env] = prov + os.environ[env_map["provider"]] = prov if model: - os.environ[model_env] = model + os.environ[env_map["model"]] = model + if base_url: + os.environ[env_map["base_url"]] = base_url + if api_key: + os.environ[env_map["api_key"]] = api_key # Security settings security_config = defaults.get("security", {}) diff --git a/gateway/run.py b/gateway/run.py index e973852b..8941fcec 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -100,24 +100,40 @@ if _config_path.exists(): for _cfg_key, _env_var in _compression_env_map.items(): if _cfg_key in _compression_cfg: os.environ[_env_var] = str(_compression_cfg[_cfg_key]) - # Auxiliary model overrides (vision, web_extract). - # Each task has provider + model; bridge non-default values to env vars. + # Auxiliary model/direct-endpoint overrides (vision, web_extract). + # Each task has provider/model/base_url/api_key; bridge non-default values to env vars. _auxiliary_cfg = _cfg.get("auxiliary", {}) if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict): _aux_task_env = { - "vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"), - "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"), + "vision": { + "provider": "AUXILIARY_VISION_PROVIDER", + "model": "AUXILIARY_VISION_MODEL", + "base_url": "AUXILIARY_VISION_BASE_URL", + "api_key": "AUXILIARY_VISION_API_KEY", + }, + "web_extract": { + "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER", + "model": "AUXILIARY_WEB_EXTRACT_MODEL", + "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL", + "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY", + }, } - for _task_key, (_prov_env, _model_env) in _aux_task_env.items(): + for _task_key, _env_map in _aux_task_env.items(): _task_cfg = _auxiliary_cfg.get(_task_key, {}) if not isinstance(_task_cfg, dict): continue _prov = str(_task_cfg.get("provider", "")).strip() _model = str(_task_cfg.get("model", "")).strip() + _base_url = str(_task_cfg.get("base_url", "")).strip() + _api_key = str(_task_cfg.get("api_key", "")).strip() if _prov and _prov != "auto": - os.environ[_prov_env] = _prov + os.environ[_env_map["provider"]] = _prov if _model: - os.environ[_model_env] = _model + os.environ[_env_map["model"]] = _model + if _base_url: + os.environ[_env_map["base_url"]] = _base_url + if _api_key: + os.environ[_env_map["api_key"]] = _api_key _agent_cfg = _cfg.get("agent", {}) if _agent_cfg and isinstance(_agent_cfg, dict): if "max_turns" in _agent_cfg: diff --git a/hermes_cli/config.py b/hermes_cli/config.py index bdde858d..b67405a0 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -150,30 +150,44 @@ DEFAULT_CONFIG = { "vision": { "provider": "auto", # auto | openrouter | nous | codex | custom "model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o" + "base_url": "", # direct OpenAI-compatible endpoint (takes precedence over provider) + "api_key": "", # API key for base_url (falls back to OPENAI_API_KEY) }, "web_extract": { "provider": "auto", "model": "", + "base_url": "", + "api_key": "", }, "compression": { "provider": "auto", "model": "", + "base_url": "", + "api_key": "", }, "session_search": { "provider": "auto", "model": "", + "base_url": "", + "api_key": "", }, "skills_hub": { "provider": "auto", "model": "", + "base_url": "", + "api_key": "", }, "mcp": { "provider": "auto", "model": "", + "base_url": "", + "api_key": "", }, "flush_memories": { "provider": "auto", "model": "", + "base_url": "", + "api_key": "", }, }, @@ -243,6 +257,8 @@ DEFAULT_CONFIG = { "delegation": { "model": "", # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model) "provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials) + "base_url": "", # direct OpenAI-compatible endpoint for subagents + "api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY) }, # Ephemeral prefill messages file — JSON list of {role, content} dicts diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 57c73eb8..d60e3c81 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -24,9 +24,11 @@ def _clean_env(monkeypatch): for key in ( "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY", "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL", - # Per-task provider/model overrides + # Per-task provider/model/direct-endpoint overrides "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL", + "AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY", "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL", + "AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY", "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL", ): monkeypatch.delenv(key, raising=False) @@ -142,6 +144,27 @@ class TestGetTextAuxiliaryClient: call_kwargs = mock_openai.call_args assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1" + def test_task_direct_endpoint_override(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1") + monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_API_KEY", "task-key") + monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model") + with patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_text_auxiliary_client("web_extract") + assert model == "task-model" + assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1" + assert mock_openai.call_args.kwargs["api_key"] == "task-key" + + def test_task_direct_endpoint_without_openai_key_does_not_fall_back(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1") + monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model") + with patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_text_auxiliary_client("web_extract") + assert client is None + assert model is None + mock_openai.assert_not_called() + def test_codex_fallback_when_nothing_else(self, codex_auth_dir): with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ patch("agent.auxiliary_client.OpenAI") as mock_openai: @@ -194,6 +217,27 @@ class TestVisionClientFallback: client, model = get_vision_auxiliary_client() assert client is not None # Custom endpoint picked up as fallback + def test_vision_direct_endpoint_override(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1") + monkeypatch.setenv("AUXILIARY_VISION_API_KEY", "vision-key") + monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model") + with patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_vision_auxiliary_client() + assert model == "vision-model" + assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:4567/v1" + assert mock_openai.call_args.kwargs["api_key"] == "vision-key" + + def test_vision_direct_endpoint_requires_openai_api_key(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1") + monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model") + with patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_vision_auxiliary_client() + assert client is None + assert model is None + mock_openai.assert_not_called() + def test_vision_uses_openrouter_when_available(self, monkeypatch): monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") with patch("agent.auxiliary_client.OpenAI") as mock_openai: @@ -390,6 +434,24 @@ class TestTaskSpecificOverrides: client, model = get_text_auxiliary_client("web_extract") assert model == "google/gemini-3-flash-preview" + def test_task_direct_endpoint_from_config(self, monkeypatch, tmp_path): + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "config.yaml").write_text( + """auxiliary: + web_extract: + base_url: http://localhost:3456/v1 + api_key: config-key + model: config-model +""" + ) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + with patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_text_auxiliary_client("web_extract") + assert model == "config-model" + assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:3456/v1" + assert mock_openai.call_args.kwargs["api_key"] == "config-key" + def test_task_without_override_uses_auto(self, monkeypatch): """A task with no provider env var falls through to auto chain.""" monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") diff --git a/tests/conftest.py b/tests/conftest.py index 9c9f9a44..67fad819 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,6 +26,12 @@ def _isolate_hermes_home(tmp_path, monkeypatch): (fake_home / "memories").mkdir() (fake_home / "skills").mkdir() monkeypatch.setenv("HERMES_HOME", str(fake_home)) + # Tests should not inherit the agent's current gateway/messaging surface. + # Individual tests that need gateway behavior set these explicitly. + monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False) + monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False) + monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False) + monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False) @pytest.fixture() diff --git a/tests/test_auxiliary_config_bridge.py b/tests/test_auxiliary_config_bridge.py index a4d65c2a..22e88bdf 100644 --- a/tests/test_auxiliary_config_bridge.py +++ b/tests/test_auxiliary_config_bridge.py @@ -25,7 +25,9 @@ def _run_auxiliary_bridge(config_dict, monkeypatch): # Clear env vars for key in ( "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL", + "AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY", "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL", + "AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY", "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL", ): monkeypatch.delenv(key, raising=False) @@ -47,19 +49,35 @@ def _run_auxiliary_bridge(config_dict, monkeypatch): auxiliary_cfg = config_dict.get("auxiliary", {}) if auxiliary_cfg and isinstance(auxiliary_cfg, dict): aux_task_env = { - "vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"), - "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"), + "vision": { + "provider": "AUXILIARY_VISION_PROVIDER", + "model": "AUXILIARY_VISION_MODEL", + "base_url": "AUXILIARY_VISION_BASE_URL", + "api_key": "AUXILIARY_VISION_API_KEY", + }, + "web_extract": { + "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER", + "model": "AUXILIARY_WEB_EXTRACT_MODEL", + "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL", + "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY", + }, } - for task_key, (prov_env, model_env) in aux_task_env.items(): + for task_key, env_map in aux_task_env.items(): task_cfg = auxiliary_cfg.get(task_key, {}) if not isinstance(task_cfg, dict): continue prov = str(task_cfg.get("provider", "")).strip() model = str(task_cfg.get("model", "")).strip() + base_url = str(task_cfg.get("base_url", "")).strip() + api_key = str(task_cfg.get("api_key", "")).strip() if prov and prov != "auto": - os.environ[prov_env] = prov + os.environ[env_map["provider"]] = prov if model: - os.environ[model_env] = model + os.environ[env_map["model"]] = model + if base_url: + os.environ[env_map["base_url"]] = base_url + if api_key: + os.environ[env_map["api_key"]] = api_key # ── Config bridging tests ──────────────────────────────────────────────────── @@ -101,6 +119,21 @@ class TestAuxiliaryConfigBridge: assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous" assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash" + def test_direct_endpoint_bridged(self, monkeypatch): + config = { + "auxiliary": { + "vision": { + "base_url": "http://localhost:1234/v1", + "api_key": "local-key", + "model": "qwen2.5-vl", + } + } + } + _run_auxiliary_bridge(config, monkeypatch) + assert os.environ.get("AUXILIARY_VISION_BASE_URL") == "http://localhost:1234/v1" + assert os.environ.get("AUXILIARY_VISION_API_KEY") == "local-key" + assert os.environ.get("AUXILIARY_VISION_MODEL") == "qwen2.5-vl" + def test_compression_provider_bridged(self, monkeypatch): config = { "compression": { @@ -200,8 +233,12 @@ class TestGatewayBridgeCodeParity: # Check for key patterns that indicate the bridge is present assert "AUXILIARY_VISION_PROVIDER" in content assert "AUXILIARY_VISION_MODEL" in content + assert "AUXILIARY_VISION_BASE_URL" in content + assert "AUXILIARY_VISION_API_KEY" in content assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content assert "AUXILIARY_WEB_EXTRACT_MODEL" in content + assert "AUXILIARY_WEB_EXTRACT_BASE_URL" in content + assert "AUXILIARY_WEB_EXTRACT_API_KEY" in content def test_gateway_has_compression_provider(self): """Gateway must bridge compression.summary_provider.""" diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index 680233b0..a29560b2 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -10,6 +10,7 @@ Run with: python -m pytest tests/test_delegate.py -v """ import json +import os import sys import unittest from unittest.mock import MagicMock, patch @@ -462,6 +463,43 @@ class TestDelegationCredentialResolution(unittest.TestCase): self.assertEqual(creds["api_mode"], "chat_completions") mock_resolve.assert_called_once_with(requested="openrouter") + def test_direct_endpoint_uses_configured_base_url_and_api_key(self): + parent = _make_mock_parent(depth=0) + cfg = { + "model": "qwen2.5-coder", + "provider": "openrouter", + "base_url": "http://localhost:1234/v1", + "api_key": "local-key", + } + creds = _resolve_delegation_credentials(cfg, parent) + self.assertEqual(creds["model"], "qwen2.5-coder") + self.assertEqual(creds["provider"], "custom") + self.assertEqual(creds["base_url"], "http://localhost:1234/v1") + self.assertEqual(creds["api_key"], "local-key") + self.assertEqual(creds["api_mode"], "chat_completions") + + def test_direct_endpoint_falls_back_to_openai_api_key_env(self): + parent = _make_mock_parent(depth=0) + cfg = { + "model": "qwen2.5-coder", + "base_url": "http://localhost:1234/v1", + } + with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False): + creds = _resolve_delegation_credentials(cfg, parent) + self.assertEqual(creds["api_key"], "env-openai-key") + self.assertEqual(creds["provider"], "custom") + + def test_direct_endpoint_does_not_fall_back_to_openrouter_api_key_env(self): + parent = _make_mock_parent(depth=0) + cfg = { + "model": "qwen2.5-coder", + "base_url": "http://localhost:1234/v1", + } + with patch.dict(os.environ, {"OPENROUTER_API_KEY": "env-openrouter-key"}, clear=False): + with self.assertRaises(ValueError) as ctx: + _resolve_delegation_credentials(cfg, parent) + self.assertIn("OPENAI_API_KEY", str(ctx.exception)) + @patch("hermes_cli.runtime_provider.resolve_runtime_provider") def test_nous_provider_resolves_nous_credentials(self, mock_resolve): """Nous provider resolves Nous Portal base_url and api_key.""" @@ -589,6 +627,40 @@ class TestDelegationProviderIntegration(unittest.TestCase): self.assertNotEqual(kwargs["base_url"], parent.base_url) self.assertNotEqual(kwargs["api_key"], parent.api_key) + @patch("tools.delegate_tool._load_config") + @patch("tools.delegate_tool._resolve_delegation_credentials") + def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg): + mock_cfg.return_value = { + "max_iterations": 45, + "model": "qwen2.5-coder", + "base_url": "http://localhost:1234/v1", + "api_key": "local-key", + } + mock_creds.return_value = { + "model": "qwen2.5-coder", + "provider": "custom", + "base_url": "http://localhost:1234/v1", + "api_key": "local-key", + "api_mode": "chat_completions", + } + parent = _make_mock_parent(depth=0) + + with patch("run_agent.AIAgent") as MockAgent: + mock_child = MagicMock() + mock_child.run_conversation.return_value = { + "final_response": "done", "completed": True, "api_calls": 1 + } + MockAgent.return_value = mock_child + + delegate_task(goal="Direct endpoint test", parent_agent=parent) + + _, kwargs = MockAgent.call_args + self.assertEqual(kwargs["model"], "qwen2.5-coder") + self.assertEqual(kwargs["provider"], "custom") + self.assertEqual(kwargs["base_url"], "http://localhost:1234/v1") + self.assertEqual(kwargs["api_key"], "local-key") + self.assertEqual(kwargs["api_mode"], "chat_completions") + @patch("tools.delegate_tool._load_config") @patch("tools.delegate_tool._resolve_delegation_credentials") def test_empty_config_inherits_parent(self, mock_creds, mock_cfg): diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 76026be5..0d5908ab 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -540,18 +540,51 @@ def delegate_task( def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict: """Resolve credentials for subagent delegation. - If ``delegation.provider`` is configured, resolves the full credential - bundle (base_url, api_key, api_mode, provider) via the runtime provider - system — the same path used by CLI/gateway startup. This lets subagents - run on a completely different provider:model pair. + If ``delegation.base_url`` is configured, subagents use that direct + OpenAI-compatible endpoint. Otherwise, if ``delegation.provider`` is + configured, the full credential bundle (base_url, api_key, api_mode, + provider) is resolved via the runtime provider system — the same path used + by CLI/gateway startup. This lets subagents run on a completely different + provider:model pair. - If no provider is configured, returns None values so the child inherits - everything from the parent agent. + If neither base_url nor provider is configured, returns None values so the + child inherits everything from the parent agent. Raises ValueError with a user-friendly message on credential failure. """ - configured_model = cfg.get("model") or None - configured_provider = cfg.get("provider") or None + configured_model = str(cfg.get("model") or "").strip() or None + configured_provider = str(cfg.get("provider") or "").strip() or None + configured_base_url = str(cfg.get("base_url") or "").strip() or None + configured_api_key = str(cfg.get("api_key") or "").strip() or None + + if configured_base_url: + api_key = ( + configured_api_key + or os.getenv("OPENAI_API_KEY", "").strip() + ) + if not api_key: + raise ValueError( + "Delegation base_url is configured but no API key was found. " + "Set delegation.api_key or OPENAI_API_KEY." + ) + + base_lower = configured_base_url.lower() + provider = "custom" + api_mode = "chat_completions" + if "chatgpt.com/backend-api/codex" in base_lower: + provider = "openai-codex" + api_mode = "codex_responses" + elif "api.anthropic.com" in base_lower: + provider = "anthropic" + api_mode = "anthropic_messages" + + return { + "model": configured_model, + "provider": provider, + "base_url": configured_base_url, + "api_key": api_key, + "api_mode": api_mode, + } if not configured_provider: # No provider override — child inherits everything from parent @@ -570,7 +603,8 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict: except Exception as exc: raise ValueError( f"Cannot resolve delegation provider '{configured_provider}': {exc}. " - f"Check that the provider is configured (API key set, valid provider name). " + f"Check that the provider is configured (API key set, valid provider name), " + f"or set delegation.base_url/delegation.api_key for a direct endpoint. " f"Available providers: openrouter, nous, zai, kimi-coding, minimax." ) from exc diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index d4f633ee..36a54d26 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -180,6 +180,23 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | `CONTEXT_COMPRESSION_THRESHOLD` | Trigger at this % of limit (default: 0.50) | | `CONTEXT_COMPRESSION_MODEL` | Model for summaries | +## Auxiliary Task Overrides + +| Variable | Description | +|----------|-------------| +| `AUXILIARY_VISION_PROVIDER` | Override provider for vision tasks | +| `AUXILIARY_VISION_MODEL` | Override model for vision tasks | +| `AUXILIARY_VISION_BASE_URL` | Direct OpenAI-compatible endpoint for vision tasks | +| `AUXILIARY_VISION_API_KEY` | API key paired with `AUXILIARY_VISION_BASE_URL` | +| `AUXILIARY_WEB_EXTRACT_PROVIDER` | Override provider for web extraction/summarization | +| `AUXILIARY_WEB_EXTRACT_MODEL` | Override model for web extraction/summarization | +| `AUXILIARY_WEB_EXTRACT_BASE_URL` | Direct OpenAI-compatible endpoint for web extraction/summarization | +| `AUXILIARY_WEB_EXTRACT_API_KEY` | API key paired with `AUXILIARY_WEB_EXTRACT_BASE_URL` | +| `CONTEXT_COMPRESSION_PROVIDER` | Override provider for context compression summaries | +| `CONTEXT_COMPRESSION_MODEL` | Override model for context compression summaries | + +For task-specific direct endpoints, Hermes uses the task's configured API key or `OPENAI_API_KEY`. It does not reuse `OPENROUTER_API_KEY` for those custom endpoints. + ## Provider Routing (config.yaml only) These go in `~/.hermes/config.yaml` under the `provider_routing` section: diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 71525764..0a1c50cb 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -569,11 +569,15 @@ auxiliary: vision: provider: "auto" # "auto", "openrouter", "nous", "main" model: "" # e.g. "openai/gpt-4o", "google/gemini-2.5-flash" + base_url: "" # direct OpenAI-compatible endpoint (takes precedence over provider) + api_key: "" # API key for base_url (falls back to OPENAI_API_KEY) # Web page summarization + browser page text extraction web_extract: provider: "auto" model: "" # e.g. "google/gemini-2.5-flash" + base_url: "" + api_key: "" ``` ### Changing the Vision Model @@ -604,6 +608,17 @@ AUXILIARY_VISION_MODEL=openai/gpt-4o ### Common Setups +**Using a direct custom endpoint** (clearer than `provider: "main"` for local/self-hosted APIs): +```yaml +auxiliary: + vision: + base_url: "http://localhost:1234/v1" + api_key: "local-key" + model: "qwen2.5-vl" +``` + +`base_url` takes precedence over `provider`, so this is the most explicit way to route an auxiliary task to a specific endpoint. For direct endpoint overrides, Hermes uses the configured `api_key` or falls back to `OPENAI_API_KEY`; it does not reuse `OPENROUTER_API_KEY` for that custom endpoint. + **Using OpenAI API key for vision:** ```yaml # In ~/.hermes/.env: @@ -848,13 +863,17 @@ delegation: - web # model: "google/gemini-3-flash-preview" # Override model (empty = inherit parent) # provider: "openrouter" # Override provider (empty = inherit parent) + # base_url: "http://localhost:1234/v1" # Direct OpenAI-compatible endpoint (takes precedence over provider) + # api_key: "local-key" # API key for base_url (falls back to OPENAI_API_KEY) ``` **Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model. +**Direct endpoint override:** If you want the obvious custom-endpoint path, set `delegation.base_url`, `delegation.api_key`, and `delegation.model`. That sends subagents directly to that OpenAI-compatible endpoint and takes precedence over `delegation.provider`. If `delegation.api_key` is omitted, Hermes falls back to `OPENAI_API_KEY` only. + The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed. -**Precedence:** `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter). +**Precedence:** `delegation.base_url` in config → `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter). ## Clarify diff --git a/website/docs/user-guide/features/delegation.md b/website/docs/user-guide/features/delegation.md index 78237167..f3193d9a 100644 --- a/website/docs/user-guide/features/delegation.md +++ b/website/docs/user-guide/features/delegation.md @@ -209,6 +209,14 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children delegation: max_iterations: 50 # Max turns per child (default: 50) default_toolsets: ["terminal", "file", "web"] # Default toolsets + model: "google/gemini-3-flash-preview" # Optional provider/model override + provider: "openrouter" # Optional built-in provider + +# Or use a direct custom endpoint instead of provider: +delegation: + model: "qwen2.5-coder" + base_url: "http://localhost:1234/v1" + api_key: "local-key" ``` :::tip From 95939a1b5130c4a04bf67eaacbbb7ea7af5bd3f3 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 14 Mar 2026 21:17:41 -0700 Subject: [PATCH 2/4] docs: clarify gateway service scopes (#1378) --- cron/__init__.py | 3 ++- hermes_cli/cron.py | 4 +++- hermes_cli/main.py | 2 +- website/docs/guides/daily-briefing-bot.md | 7 +++++-- .../docs/guides/team-telegram-assistant.md | 10 ++++++++-- website/docs/user-guide/features/cron.md | 3 ++- website/docs/user-guide/messaging/email.md | 3 ++- website/docs/user-guide/messaging/index.md | 20 +++++++++++++++---- website/docs/user-guide/messaging/signal.md | 3 ++- website/docs/user-guide/messaging/slack.md | 3 ++- website/docs/user-guide/messaging/whatsapp.md | 3 ++- 11 files changed, 45 insertions(+), 16 deletions(-) diff --git a/cron/__init__.py b/cron/__init__.py index 31d7bf8e..2c44cabf 100644 --- a/cron/__init__.py +++ b/cron/__init__.py @@ -7,7 +7,8 @@ This module provides scheduled task execution, allowing the agent to: - Execute tasks in isolated sessions (no prior context) Cron jobs are executed automatically by the gateway daemon: - hermes gateway install # Install as system service (recommended) + hermes gateway install # Install as a user service + sudo hermes gateway install --system # Linux servers: boot-time system service hermes gateway # Or run in foreground The gateway ticks the scheduler every 60 seconds. A file lock prevents diff --git a/hermes_cli/cron.py b/hermes_cli/cron.py index a068d637..97a22579 100644 --- a/hermes_cli/cron.py +++ b/hermes_cli/cron.py @@ -96,6 +96,7 @@ def cron_list(show_all: bool = False): if not find_gateway_pids(): print(color(" ⚠ Gateway is not running — jobs won't fire automatically.", Colors.YELLOW)) print(color(" Start it with: hermes gateway install", Colors.DIM)) + print(color(" sudo hermes gateway install --system # Linux servers", Colors.DIM)) print() @@ -120,7 +121,8 @@ def cron_status(): print(color("✗ Gateway is not running — cron jobs will NOT fire", Colors.RED)) print() print(" To enable automatic execution:") - print(" hermes gateway install # Install as system service (recommended)") + print(" hermes gateway install # Install as a user service") + print(" sudo hermes gateway install --system # Linux servers: boot-time system service") print(" hermes gateway # Or run in foreground") print() diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 1238d9b6..5f9356b8 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2313,7 +2313,7 @@ Examples: hermes gateway Run messaging gateway hermes -s hermes-agent-dev,github-auth hermes -w Start in isolated git worktree - hermes gateway install Install as system service + hermes gateway install Install gateway background service hermes sessions list List past sessions hermes sessions browse Interactive session picker hermes sessions rename ID T Rename/title a session diff --git a/website/docs/guides/daily-briefing-bot.md b/website/docs/guides/daily-briefing-bot.md index 85f11c40..78bfd690 100644 --- a/website/docs/guides/daily-briefing-bot.md +++ b/website/docs/guides/daily-briefing-bot.md @@ -29,7 +29,8 @@ Before starting, make sure you have: - **Hermes Agent installed** — see the [Installation guide](/docs/getting-started/installation) - **Gateway running** — the gateway daemon handles cron execution: ```bash - hermes gateway install # Install as system service (recommended) + hermes gateway install # Install as a user service + sudo hermes gateway install --system # Linux servers: boot-time system service # or hermes gateway # Run in foreground ``` @@ -242,10 +243,12 @@ Make sure the scheduler is actually running: hermes cron status ``` -If the gateway isn't running, your jobs won't execute. Install it as a system service for reliability: +If the gateway isn't running, your jobs won't execute. Install it as a background service for reliability: ```bash hermes gateway install +# or on Linux servers +sudo hermes gateway install --system ``` ## Going Further diff --git a/website/docs/guides/team-telegram-assistant.md b/website/docs/guides/team-telegram-assistant.md index 124f4bf3..88de9c70 100644 --- a/website/docs/guides/team-telegram-assistant.md +++ b/website/docs/guides/team-telegram-assistant.md @@ -143,12 +143,13 @@ For a persistent deployment that survives reboots: ```bash hermes gateway install +sudo hermes gateway install --system # Linux only: boot-time system service ``` -This creates a **systemd** service (Linux) or **launchd** service (macOS) that runs automatically. +This creates a background service: a user-level **systemd** service on Linux by default, a **launchd** service on macOS, or a boot-time Linux system service if you pass `--system`. ```bash -# Linux — manage the service +# Linux — manage the default user service hermes gateway start hermes gateway stop hermes gateway status @@ -158,6 +159,11 @@ journalctl --user -u hermes-gateway -f # Keep running after SSH logout sudo loginctl enable-linger $USER + +# Linux servers — explicit system-service commands +sudo hermes gateway start --system +sudo hermes gateway status --system +journalctl -u hermes-gateway -f ``` ```bash diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md index dd7d5606..2d0a4c83 100644 --- a/website/docs/user-guide/features/cron.md +++ b/website/docs/user-guide/features/cron.md @@ -156,7 +156,8 @@ What they do: **Cron execution is handled by the gateway daemon.** The gateway ticks the scheduler every 60 seconds, running any due jobs in isolated agent sessions. ```bash -hermes gateway install # Install as system service (recommended) +hermes gateway install # Install as a user service +sudo hermes gateway install --system # Linux: boot-time system service for servers hermes gateway # Or run in foreground hermes cron list diff --git a/website/docs/user-guide/messaging/email.md b/website/docs/user-guide/messaging/email.md index f6746290..8f515e85 100644 --- a/website/docs/user-guide/messaging/email.md +++ b/website/docs/user-guide/messaging/email.md @@ -80,7 +80,8 @@ EMAIL_HOME_ADDRESS=your@email.com # Default delivery target for cron jobs ```bash hermes gateway # Run in foreground -hermes gateway install # Install as a system service +hermes gateway install # Install as a user service +sudo hermes gateway install --system # Linux only: boot-time system service ``` On startup, the adapter: diff --git a/website/docs/user-guide/messaging/index.md b/website/docs/user-guide/messaging/index.md index 39c03b74..d71edafe 100644 --- a/website/docs/user-guide/messaging/index.md +++ b/website/docs/user-guide/messaging/index.md @@ -54,10 +54,12 @@ This walks you through configuring each platform with arrow-key selection, shows ```bash hermes gateway # Run in foreground hermes gateway setup # Configure messaging platforms interactively -hermes gateway install # Install as systemd service (Linux) / launchd (macOS) -hermes gateway start # Start the service -hermes gateway stop # Stop the service -hermes gateway status # Check service status +hermes gateway install # Install as a user service (Linux) / launchd service (macOS) +sudo hermes gateway install --system # Linux only: install a boot-time system service +hermes gateway start # Start the default service +hermes gateway stop # Stop the default service +hermes gateway status # Check default service status +hermes gateway status --system # Linux only: inspect the system service explicitly ``` ## Chat Commands (Inside Messaging) @@ -188,8 +190,18 @@ journalctl --user -u hermes-gateway -f # Enable lingering (keeps running after logout) sudo loginctl enable-linger $USER + +# Or install a boot-time system service that still runs as your user +sudo hermes gateway install --system +sudo hermes gateway start --system +sudo hermes gateway status --system +journalctl -u hermes-gateway -f ``` +Use the user service on laptops and dev boxes. Use the system service on VPS or headless hosts that should come back at boot without relying on systemd linger. + +Avoid keeping both the user and system gateway units installed at once unless you really mean to. Hermes will warn if it detects both because start/stop/status behavior gets ambiguous. + ### macOS (launchd) ```bash diff --git a/website/docs/user-guide/messaging/signal.md b/website/docs/user-guide/messaging/signal.md index 53bb862a..e1fd5463 100644 --- a/website/docs/user-guide/messaging/signal.md +++ b/website/docs/user-guide/messaging/signal.md @@ -127,7 +127,8 @@ Then start the gateway: ```bash hermes gateway # Foreground -hermes gateway install # Install as a system service +hermes gateway install # Install as a user service +sudo hermes gateway install --system # Linux only: boot-time system service ``` --- diff --git a/website/docs/user-guide/messaging/slack.md b/website/docs/user-guide/messaging/slack.md index 2ff79f35..29a14353 100644 --- a/website/docs/user-guide/messaging/slack.md +++ b/website/docs/user-guide/messaging/slack.md @@ -168,7 +168,8 @@ Then start the gateway: ```bash hermes gateway # Foreground -hermes gateway install # Install as a system service +hermes gateway install # Install as a user service +sudo hermes gateway install --system # Linux only: boot-time system service ``` --- diff --git a/website/docs/user-guide/messaging/whatsapp.md b/website/docs/user-guide/messaging/whatsapp.md index af432fb8..eb741467 100644 --- a/website/docs/user-guide/messaging/whatsapp.md +++ b/website/docs/user-guide/messaging/whatsapp.md @@ -101,7 +101,8 @@ Then start the gateway: ```bash hermes gateway # Foreground -hermes gateway install # Install as a system service +hermes gateway install # Install as a user service +sudo hermes gateway install --system # Linux only: boot-time system service ``` The gateway starts the WhatsApp bridge automatically using the saved session. From ff3473a37c704b86a4809c349f1627bd83f1c4da Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 14 Mar 2026 21:18:17 -0700 Subject: [PATCH 3/4] feat: add /plan command (#1372) * feat: add /plan command * refactor: back /plan with bundled skill * docs: document /plan skill --- agent/skill_commands.py | 32 +++++- cli.py | 28 +++++ gateway/run.py | 23 +++- skills/software-development/plan/SKILL.md | 55 +++++++++ tests/agent/test_skill_commands.py | 38 +++++- tests/gateway/test_plan_command.py | 128 +++++++++++++++++++++ tests/test_cli_plan_command.py | 66 +++++++++++ website/docs/reference/skills-catalog.md | 1 + website/docs/reference/slash-commands.md | 4 +- website/docs/user-guide/features/skills.md | 3 + 10 files changed, 372 insertions(+), 6 deletions(-) create mode 100644 skills/software-development/plan/SKILL.md create mode 100644 tests/gateway/test_plan_command.py create mode 100644 tests/test_cli_plan_command.py diff --git a/agent/skill_commands.py b/agent/skill_commands.py index b9d5135f..8afdfa93 100644 --- a/agent/skill_commands.py +++ b/agent/skill_commands.py @@ -1,17 +1,38 @@ -"""Skill slash commands — scan installed skills and build invocation messages. +"""Shared slash command helpers for skills and built-in prompt-style modes. Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces -can invoke skills via /skill-name commands. +can invoke skills via /skill-name commands and prompt-only built-ins like +/plan. """ import json import logging +import os +import re +from datetime import datetime from pathlib import Path from typing import Any, Dict, Optional logger = logging.getLogger(__name__) _skill_commands: Dict[str, Dict[str, Any]] = {} +_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+") + + +def build_plan_path( + user_instruction: str = "", + *, + now: datetime | None = None, +) -> Path: + """Return the default markdown path for a /plan invocation.""" + hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) + slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else "" + slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-") + if slug: + slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-") + slug = slug or "conversation-plan" + timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S") + return hermes_home / "plans" / f"{timestamp}-{slug}.md" def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None: @@ -56,6 +77,7 @@ def _build_skill_message( skill_dir: Path | None, activation_note: str, user_instruction: str = "", + runtime_note: str = "", ) -> str: """Format a loaded skill into a user/system message payload.""" from tools.skills_tool import SKILLS_DIR @@ -115,6 +137,10 @@ def _build_skill_message( parts.append("") parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}") + if runtime_note: + parts.append("") + parts.append(f"[Runtime note: {runtime_note}]") + return "\n".join(parts) @@ -172,6 +198,7 @@ def build_skill_invocation_message( cmd_key: str, user_instruction: str = "", task_id: str | None = None, + runtime_note: str = "", ) -> Optional[str]: """Build the user message content for a skill slash command invocation. @@ -201,6 +228,7 @@ def build_skill_invocation_message( skill_dir, activation_note, user_instruction=user_instruction, + runtime_note=runtime_note, ) diff --git a/cli.py b/cli.py index 1bebbf4f..654dfb25 100755 --- a/cli.py +++ b/cli.py @@ -1080,6 +1080,7 @@ from agent.skill_commands import ( scan_skill_commands, get_skill_commands, build_skill_invocation_message, + build_plan_path, build_preloaded_skills_prompt, ) @@ -3193,6 +3194,8 @@ class HermesCLI: elif cmd_lower.startswith("/personality"): # Use original case (handler lowercases the personality name itself) self._handle_personality_command(cmd_original) + elif cmd_lower == "/plan" or cmd_lower.startswith("/plan "): + self._handle_plan_command(cmd_original) elif cmd_lower == "/retry": retry_msg = self.retry_last() if retry_msg and hasattr(self, '_pending_input'): @@ -3304,6 +3307,31 @@ class HermesCLI: return True + def _handle_plan_command(self, cmd: str): + """Handle /plan [request] — load the bundled plan skill.""" + parts = cmd.strip().split(maxsplit=1) + user_instruction = parts[1].strip() if len(parts) > 1 else "" + + plan_path = build_plan_path(user_instruction) + msg = build_skill_invocation_message( + "/plan", + user_instruction, + task_id=self.session_id, + runtime_note=( + f"Save the markdown plan with write_file to this exact path: {plan_path}" + ), + ) + + if not msg: + self.console.print("[bold red]Failed to load the bundled /plan skill[/]") + return + + _cprint(f" 📝 Plan mode queued via skill. Markdown plan target: {plan_path}") + if hasattr(self, '_pending_input'): + self._pending_input.put(msg) + else: + self.console.print("[bold red]Plan mode unavailable: input queue not initialized[/]") + def _handle_background_command(self, cmd: str): """Handle /background — run a prompt in a separate background session. diff --git a/gateway/run.py b/gateway/run.py index 8941fcec..c8c5831e 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1114,7 +1114,7 @@ class GatewayRunner: # Emit command:* hook for any recognized slash command _known_commands = {"new", "reset", "help", "status", "stop", "model", "reasoning", - "personality", "retry", "undo", "sethome", "set-home", + "personality", "plan", "retry", "undo", "sethome", "set-home", "compress", "usage", "insights", "reload-mcp", "reload_mcp", "update", "title", "resume", "provider", "rollback", "background", "reasoning", "voice"} @@ -1149,6 +1149,27 @@ class GatewayRunner: if command == "personality": return await self._handle_personality_command(event) + + if command == "plan": + try: + from agent.skill_commands import build_plan_path, build_skill_invocation_message + + user_instruction = event.get_command_args().strip() + plan_path = build_plan_path(user_instruction) + event.text = build_skill_invocation_message( + "/plan", + user_instruction, + task_id=_quick_key, + runtime_note=( + f"Save the markdown plan with write_file to this exact path: {plan_path}" + ), + ) + if not event.text: + return "Failed to load the bundled /plan skill." + command = None + except Exception as e: + logger.exception("Failed to prepare /plan command") + return f"Failed to enter plan mode: {e}" if command == "retry": return await self._handle_retry_command(event) diff --git a/skills/software-development/plan/SKILL.md b/skills/software-development/plan/SKILL.md new file mode 100644 index 00000000..92f39e8c --- /dev/null +++ b/skills/software-development/plan/SKILL.md @@ -0,0 +1,55 @@ +--- +name: plan +description: Plan mode for Hermes — inspect context, write a markdown plan, save it under $HERMES_HOME/plans, and do not execute the work. +version: 1.0.0 +author: Hermes Agent +license: MIT +metadata: + hermes: + tags: [planning, plan-mode, implementation, workflow] + related_skills: [writing-plans, subagent-driven-development] +--- + +# Plan Mode + +Use this skill when the user wants a plan instead of execution. + +## Core behavior + +For this turn, you are planning only. + +- Do not implement code. +- Do not edit project files except the plan markdown file. +- Do not run mutating terminal commands, commit, push, or perform external actions. +- You may inspect the repo or other context with read-only commands/tools when needed. +- Your deliverable is a markdown plan saved to `$HERMES_HOME/plans`. + +## Output requirements + +Write a markdown plan that is concrete and actionable. + +Include, when relevant: +- Goal +- Current context / assumptions +- Proposed approach +- Step-by-step plan +- Files likely to change +- Tests / validation +- Risks, tradeoffs, and open questions + +If the task is code-related, include exact file paths, likely test targets, and verification steps. + +## Save location + +Save the plan with `write_file` under: +- `$HERMES_HOME/plans/YYYY-MM-DD_HHMMSS-.md` + +If the runtime provides a specific target path, use that exact path. +If not, create a sensible timestamped filename yourself. + +## Interaction style + +- If the request is clear enough, write the plan directly. +- If no explicit instruction accompanies `/plan`, infer the task from the current conversation context. +- If it is genuinely underspecified, ask a brief clarifying question instead of guessing. +- After saving the plan, reply briefly with what you planned and the saved path. diff --git a/tests/agent/test_skill_commands.py b/tests/agent/test_skill_commands.py index 42a6fb4d..8daa7b36 100644 --- a/tests/agent/test_skill_commands.py +++ b/tests/agent/test_skill_commands.py @@ -1,13 +1,15 @@ """Tests for agent/skill_commands.py — skill slash command scanning and platform filtering.""" import os +from datetime import datetime from unittest.mock import patch import tools.skills_tool as skills_tool_module from agent.skill_commands import ( - scan_skill_commands, - build_skill_invocation_message, + build_plan_path, build_preloaded_skills_prompt, + build_skill_invocation_message, + scan_skill_commands, ) @@ -272,3 +274,35 @@ Generate some audio. assert msg is not None assert 'file_path=""' in msg + + +class TestPlanSkillHelpers: + def test_build_plan_path_uses_hermes_home_and_slugifies_request(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + path = build_plan_path( + "Implement OAuth login + refresh tokens!", + now=datetime(2026, 3, 15, 9, 30, 45), + ) + + assert path == tmp_path / "plans" / "2026-03-15_093045-implement-oauth-login-refresh-tokens.md" + + def test_plan_skill_message_can_include_runtime_save_path_note(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + _make_skill( + tmp_path, + "plan", + body="Save plans under $HERMES_HOME/plans and do not execute the work.", + ) + scan_skill_commands() + msg = build_skill_invocation_message( + "/plan", + "Add a /plan command", + runtime_note="Save the markdown plan with write_file to /tmp/plans/plan.md", + ) + + assert msg is not None + assert "Save plans under $HERMES_HOME/plans" in msg + assert "Add a /plan command" in msg + assert "/tmp/plans/plan.md" in msg + assert "Runtime note:" in msg diff --git a/tests/gateway/test_plan_command.py b/tests/gateway/test_plan_command.py new file mode 100644 index 00000000..2cfea42e --- /dev/null +++ b/tests/gateway/test_plan_command.py @@ -0,0 +1,128 @@ +"""Tests for the /plan gateway slash command.""" + +from datetime import datetime +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from agent.skill_commands import scan_skill_commands +from gateway.config import GatewayConfig, Platform, PlatformConfig +from gateway.platforms.base import MessageEvent +from gateway.session import SessionEntry, SessionSource + + +def _make_runner(): + from gateway.run import GatewayRunner + + runner = object.__new__(GatewayRunner) + runner.config = GatewayConfig( + platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")} + ) + runner.adapters = {} + runner._voice_mode = {} + runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False) + runner.session_store = MagicMock() + runner.session_store.get_or_create_session.return_value = SessionEntry( + session_key="agent:main:telegram:dm:c1:u1", + session_id="sess-1", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + runner.session_store.load_transcript.return_value = [] + runner.session_store.has_any_sessions.return_value = True + runner.session_store.append_to_transcript = MagicMock() + runner.session_store.rewrite_transcript = MagicMock() + runner._running_agents = {} + runner._pending_messages = {} + runner._pending_approvals = {} + runner._session_db = None + runner._reasoning_config = None + runner._provider_routing = {} + runner._fallback_model = None + runner._show_reasoning = False + runner._is_user_authorized = lambda _source: True + runner._set_session_env = lambda _context: None + runner._run_agent = AsyncMock( + return_value={ + "final_response": "planned", + "messages": [], + "tools": [], + "history_offset": 0, + "last_prompt_tokens": 0, + } + ) + return runner + + +def _make_event(text="/plan"): + return MessageEvent( + text=text, + source=SessionSource( + platform=Platform.TELEGRAM, + user_id="u1", + chat_id="c1", + user_name="tester", + chat_type="dm", + ), + message_id="m1", + ) + + +def _make_plan_skill(skills_dir): + skill_dir = skills_dir / "plan" + skill_dir.mkdir(parents=True, exist_ok=True) + (skill_dir / "SKILL.md").write_text( + """--- +name: plan +description: Plan mode skill. +--- + +# Plan + +Use the current conversation context when no explicit instruction is provided. +Save plans under $HERMES_HOME/plans. +""" + ) + + +class TestGatewayPlanCommand: + @pytest.mark.asyncio + async def test_plan_command_loads_skill_and_runs_agent(self, monkeypatch, tmp_path): + import gateway.run as gateway_run + + runner = _make_runner() + event = _make_event("/plan Add OAuth login") + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}) + monkeypatch.setattr( + "agent.model_metadata.get_model_context_length", + lambda *_args, **_kwargs: 100_000, + ) + + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + _make_plan_skill(tmp_path) + scan_skill_commands() + result = await runner._handle_message(event) + + assert result == "planned" + forwarded = runner._run_agent.call_args.kwargs["message"] + assert "Plan mode skill" in forwarded + assert "Add OAuth login" in forwarded + assert str(tmp_path / "plans") in forwarded + assert "Runtime note:" in forwarded + + @pytest.mark.asyncio + async def test_plan_command_appears_in_help_output_via_skill_listing(self, tmp_path): + runner = _make_runner() + event = _make_event("/help") + + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + _make_plan_skill(tmp_path) + scan_skill_commands() + result = await runner._handle_help_command(event) + + assert "/plan" in result diff --git a/tests/test_cli_plan_command.py b/tests/test_cli_plan_command.py new file mode 100644 index 00000000..50fa1c5e --- /dev/null +++ b/tests/test_cli_plan_command.py @@ -0,0 +1,66 @@ +"""Tests for the /plan CLI slash command.""" + +from unittest.mock import MagicMock, patch + +from agent.skill_commands import scan_skill_commands +from cli import HermesCLI + + +def _make_cli(): + cli_obj = HermesCLI.__new__(HermesCLI) + cli_obj.config = {} + cli_obj.console = MagicMock() + cli_obj.agent = None + cli_obj.conversation_history = [] + cli_obj.session_id = "sess-123" + cli_obj._pending_input = MagicMock() + return cli_obj + + +def _make_plan_skill(skills_dir): + skill_dir = skills_dir / "plan" + skill_dir.mkdir(parents=True, exist_ok=True) + (skill_dir / "SKILL.md").write_text( + """--- +name: plan +description: Plan mode skill. +--- + +# Plan + +Use the current conversation context when no explicit instruction is provided. +Save plans under $HERMES_HOME/plans. +""" + ) + + +class TestCLIPlanCommand: + def test_plan_command_queues_plan_skill_message(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + cli_obj = _make_cli() + + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + _make_plan_skill(tmp_path) + scan_skill_commands() + result = cli_obj.process_command("/plan Add OAuth login") + + assert result is True + cli_obj._pending_input.put.assert_called_once() + queued = cli_obj._pending_input.put.call_args[0][0] + assert "Plan mode skill" in queued + assert "Add OAuth login" in queued + assert str(tmp_path / "plans") in queued + assert "Runtime note:" in queued + + def test_plan_without_args_uses_skill_context_guidance(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + cli_obj = _make_cli() + + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + _make_plan_skill(tmp_path) + scan_skill_commands() + cli_obj.process_command("/plan") + + queued = cli_obj._pending_input.put.call_args[0][0] + assert "current conversation context" in queued + assert "conversation-plan" in queued diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md index 1be8a5f3..a6eb510e 100644 --- a/website/docs/reference/skills-catalog.md +++ b/website/docs/reference/skills-catalog.md @@ -236,6 +236,7 @@ Skills for controlling smart home devices — lights, switches, sensors, and hom | Skill | Description | Path | |-------|-------------|------| | `code-review` | Guidelines for performing thorough code reviews with security and quality focus | `software-development/code-review` | +| `plan` | Plan mode for Hermes — inspect context, write a markdown plan, save it under `$HERMES_HOME/plans`, and do not execute the work. | `software-development/plan` | | `requesting-code-review` | Use when completing tasks, implementing major features, or before merging. Validates work meets requirements through systematic review process. | `software-development/requesting-code-review` | | `subagent-driven-development` | Use when executing implementation plans with independent tasks. Dispatches fresh delegate_task per task with two-stage review (spec compliance then code quality). | `software-development/subagent-driven-development` | | `systematic-debugging` | Use when encountering any bug, test failure, or unexpected behavior. 4-phase root cause investigation — NO fixes without understanding the problem first. | `software-development/systematic-debugging` | diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md index 9ef45460..a9e9f420 100644 --- a/website/docs/reference/slash-commands.md +++ b/website/docs/reference/slash-commands.md @@ -11,7 +11,7 @@ Hermes has two slash-command surfaces: - **Interactive CLI slash commands** — handled by `cli.py` / `hermes_cli/commands.py` - **Messaging slash commands** — handled by `gateway/run.py` -Installed skills are also exposed as dynamic slash commands on both surfaces. +Installed skills are also exposed as dynamic slash commands on both surfaces. That includes bundled skills like `/plan`, which opens plan mode and saves markdown plans under `~/.hermes/plans/`. ## Interactive CLI slash commands @@ -32,6 +32,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/compress` | Manually compress conversation context (flush memories + summarize) | | `/rollback` | List or restore filesystem checkpoints (usage: /rollback [number]) | | `/background` | Run a prompt in the background (usage: /background <prompt>) | +| `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `~/.hermes/plans/`. | ### Configuration @@ -109,6 +110,7 @@ The messaging gateway supports the following built-in commands inside Telegram, | `/voice [on\|off\|tts\|join\|channel\|leave\|status]` | Control spoken replies in chat. `join`/`channel`/`leave` manage Discord voice-channel mode. | | `/rollback [number]` | List or restore filesystem checkpoints. | | `/background <prompt>` | Run a prompt in a separate background session. | +| `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `~/.hermes/plans/`. | | `/reload-mcp` | Reload MCP servers from config. | | `/update` | Update Hermes Agent to the latest version. | | `/help` | Show messaging help. | diff --git a/website/docs/user-guide/features/skills.md b/website/docs/user-guide/features/skills.md index ae2d6f05..bf40f5e0 100644 --- a/website/docs/user-guide/features/skills.md +++ b/website/docs/user-guide/features/skills.md @@ -24,11 +24,14 @@ Every installed skill is automatically available as a slash command: /gif-search funny cats /axolotl help me fine-tune Llama 3 on my dataset /github-pr-workflow create a PR for the auth refactor +/plan design a rollout for migrating our auth provider # Just the skill name loads it and lets the agent ask what you need: /excalidraw ``` +The bundled `plan` skill is a good example of a skill-backed slash command with custom behavior. Running `/plan [request]` tells Hermes to inspect context if needed, write a markdown implementation plan instead of executing the task, and save the result under `~/.hermes/plans/`. + You can also interact with skills through natural conversation: ```bash From b14a07315b5f9420f4396085501d743a01352c8e Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 14 Mar 2026 21:28:51 -0700 Subject: [PATCH 4/4] fix: save /plan output in workspace (#1381) --- agent/skill_commands.py | 12 ++++++++---- cli.py | 3 ++- gateway/run.py | 3 ++- skills/software-development/plan/SKILL.md | 10 ++++++---- tests/agent/test_skill_commands.py | 19 +++++++++++-------- tests/gateway/test_plan_command.py | 7 ++++--- tests/test_cli_plan_command.py | 11 ++++++----- website/docs/reference/skills-catalog.md | 2 +- website/docs/reference/slash-commands.md | 6 +++--- website/docs/user-guide/features/skills.md | 2 +- 10 files changed, 44 insertions(+), 31 deletions(-) diff --git a/agent/skill_commands.py b/agent/skill_commands.py index 8afdfa93..67315ee8 100644 --- a/agent/skill_commands.py +++ b/agent/skill_commands.py @@ -7,7 +7,6 @@ can invoke skills via /skill-name commands and prompt-only built-ins like import json import logging -import os import re from datetime import datetime from pathlib import Path @@ -24,15 +23,20 @@ def build_plan_path( *, now: datetime | None = None, ) -> Path: - """Return the default markdown path for a /plan invocation.""" - hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) + """Return the default workspace-relative markdown path for a /plan invocation. + + Relative paths are intentional: file tools are task/backend-aware and resolve + them against the active working directory for local, docker, ssh, modal, + daytona, and similar terminal backends. That keeps the plan with the active + workspace instead of the Hermes host's global home directory. + """ slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else "" slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-") if slug: slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-") slug = slug or "conversation-plan" timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S") - return hermes_home / "plans" / f"{timestamp}-{slug}.md" + return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md" def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None: diff --git a/cli.py b/cli.py index 654dfb25..70a202d3 100755 --- a/cli.py +++ b/cli.py @@ -3318,7 +3318,8 @@ class HermesCLI: user_instruction, task_id=self.session_id, runtime_note=( - f"Save the markdown plan with write_file to this exact path: {plan_path}" + "Save the markdown plan with write_file to this exact relative path " + f"inside the active workspace/backend cwd: {plan_path}" ), ) diff --git a/gateway/run.py b/gateway/run.py index c8c5831e..67e93d2c 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1161,7 +1161,8 @@ class GatewayRunner: user_instruction, task_id=_quick_key, runtime_note=( - f"Save the markdown plan with write_file to this exact path: {plan_path}" + "Save the markdown plan with write_file to this exact relative path " + f"inside the active workspace/backend cwd: {plan_path}" ), ) if not event.text: diff --git a/skills/software-development/plan/SKILL.md b/skills/software-development/plan/SKILL.md index 92f39e8c..daf6bf79 100644 --- a/skills/software-development/plan/SKILL.md +++ b/skills/software-development/plan/SKILL.md @@ -1,6 +1,6 @@ --- name: plan -description: Plan mode for Hermes — inspect context, write a markdown plan, save it under $HERMES_HOME/plans, and do not execute the work. +description: Plan mode for Hermes — inspect context, write a markdown plan into the active workspace's `.hermes/plans/` directory, and do not execute the work. version: 1.0.0 author: Hermes Agent license: MIT @@ -22,7 +22,7 @@ For this turn, you are planning only. - Do not edit project files except the plan markdown file. - Do not run mutating terminal commands, commit, push, or perform external actions. - You may inspect the repo or other context with read-only commands/tools when needed. -- Your deliverable is a markdown plan saved to `$HERMES_HOME/plans`. +- Your deliverable is a markdown plan saved inside the active workspace under `.hermes/plans/`. ## Output requirements @@ -42,10 +42,12 @@ If the task is code-related, include exact file paths, likely test targets, and ## Save location Save the plan with `write_file` under: -- `$HERMES_HOME/plans/YYYY-MM-DD_HHMMSS-.md` +- `.hermes/plans/YYYY-MM-DD_HHMMSS-.md` + +Treat that as relative to the active working directory / backend workspace. Hermes file tools are backend-aware, so using this relative path keeps the plan with the workspace on local, docker, ssh, modal, and daytona backends. If the runtime provides a specific target path, use that exact path. -If not, create a sensible timestamped filename yourself. +If not, create a sensible timestamped filename yourself under `.hermes/plans/`. ## Interaction style diff --git a/tests/agent/test_skill_commands.py b/tests/agent/test_skill_commands.py index 8daa7b36..c0244613 100644 --- a/tests/agent/test_skill_commands.py +++ b/tests/agent/test_skill_commands.py @@ -2,6 +2,7 @@ import os from datetime import datetime +from pathlib import Path from unittest.mock import patch import tools.skills_tool as skills_tool_module @@ -277,32 +278,34 @@ Generate some audio. class TestPlanSkillHelpers: - def test_build_plan_path_uses_hermes_home_and_slugifies_request(self, tmp_path, monkeypatch): - monkeypatch.setenv("HERMES_HOME", str(tmp_path)) - + def test_build_plan_path_uses_workspace_relative_dir_and_slugifies_request(self): path = build_plan_path( "Implement OAuth login + refresh tokens!", now=datetime(2026, 3, 15, 9, 30, 45), ) - assert path == tmp_path / "plans" / "2026-03-15_093045-implement-oauth-login-refresh-tokens.md" + assert path == Path(".hermes") / "plans" / "2026-03-15_093045-implement-oauth-login-refresh-tokens.md" def test_plan_skill_message_can_include_runtime_save_path_note(self, tmp_path): with patch("tools.skills_tool.SKILLS_DIR", tmp_path): _make_skill( tmp_path, "plan", - body="Save plans under $HERMES_HOME/plans and do not execute the work.", + body="Save plans under .hermes/plans in the active workspace and do not execute the work.", ) scan_skill_commands() msg = build_skill_invocation_message( "/plan", "Add a /plan command", - runtime_note="Save the markdown plan with write_file to /tmp/plans/plan.md", + runtime_note=( + "Save the markdown plan with write_file to this exact relative path inside " + "the active workspace/backend cwd: .hermes/plans/plan.md" + ), ) assert msg is not None - assert "Save plans under $HERMES_HOME/plans" in msg + assert "Save plans under $HERMES_HOME/plans" not in msg + assert ".hermes/plans" in msg assert "Add a /plan command" in msg - assert "/tmp/plans/plan.md" in msg + assert ".hermes/plans/plan.md" in msg assert "Runtime note:" in msg diff --git a/tests/gateway/test_plan_command.py b/tests/gateway/test_plan_command.py index 2cfea42e..d43f46cd 100644 --- a/tests/gateway/test_plan_command.py +++ b/tests/gateway/test_plan_command.py @@ -83,7 +83,7 @@ description: Plan mode skill. # Plan Use the current conversation context when no explicit instruction is provided. -Save plans under $HERMES_HOME/plans. +Save plans under the active workspace's .hermes/plans directory. """ ) @@ -96,7 +96,6 @@ class TestGatewayPlanCommand: runner = _make_runner() event = _make_event("/plan Add OAuth login") - monkeypatch.setenv("HERMES_HOME", str(tmp_path)) monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}) monkeypatch.setattr( "agent.model_metadata.get_model_context_length", @@ -112,7 +111,9 @@ class TestGatewayPlanCommand: forwarded = runner._run_agent.call_args.kwargs["message"] assert "Plan mode skill" in forwarded assert "Add OAuth login" in forwarded - assert str(tmp_path / "plans") in forwarded + assert ".hermes/plans" in forwarded + assert str(tmp_path / "plans") not in forwarded + assert "active workspace/backend cwd" in forwarded assert "Runtime note:" in forwarded @pytest.mark.asyncio diff --git a/tests/test_cli_plan_command.py b/tests/test_cli_plan_command.py index 50fa1c5e..8f8205d7 100644 --- a/tests/test_cli_plan_command.py +++ b/tests/test_cli_plan_command.py @@ -29,14 +29,13 @@ description: Plan mode skill. # Plan Use the current conversation context when no explicit instruction is provided. -Save plans under $HERMES_HOME/plans. +Save plans under the active workspace's .hermes/plans directory. """ ) class TestCLIPlanCommand: def test_plan_command_queues_plan_skill_message(self, tmp_path, monkeypatch): - monkeypatch.setenv("HERMES_HOME", str(tmp_path)) cli_obj = _make_cli() with patch("tools.skills_tool.SKILLS_DIR", tmp_path): @@ -49,11 +48,12 @@ class TestCLIPlanCommand: queued = cli_obj._pending_input.put.call_args[0][0] assert "Plan mode skill" in queued assert "Add OAuth login" in queued - assert str(tmp_path / "plans") in queued + assert ".hermes/plans" in queued + assert str(tmp_path / "plans") not in queued + assert "active workspace/backend cwd" in queued assert "Runtime note:" in queued def test_plan_without_args_uses_skill_context_guidance(self, tmp_path, monkeypatch): - monkeypatch.setenv("HERMES_HOME", str(tmp_path)) cli_obj = _make_cli() with patch("tools.skills_tool.SKILLS_DIR", tmp_path): @@ -63,4 +63,5 @@ class TestCLIPlanCommand: queued = cli_obj._pending_input.put.call_args[0][0] assert "current conversation context" in queued - assert "conversation-plan" in queued + assert ".hermes/plans/" in queued + assert "conversation-plan.md" in queued diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md index a6eb510e..7e128f11 100644 --- a/website/docs/reference/skills-catalog.md +++ b/website/docs/reference/skills-catalog.md @@ -236,7 +236,7 @@ Skills for controlling smart home devices — lights, switches, sensors, and hom | Skill | Description | Path | |-------|-------------|------| | `code-review` | Guidelines for performing thorough code reviews with security and quality focus | `software-development/code-review` | -| `plan` | Plan mode for Hermes — inspect context, write a markdown plan, save it under `$HERMES_HOME/plans`, and do not execute the work. | `software-development/plan` | +| `plan` | Plan mode for Hermes — inspect context, write a markdown plan into `.hermes/plans/` in the active workspace/backend working directory, and do not execute the work. | `software-development/plan` | | `requesting-code-review` | Use when completing tasks, implementing major features, or before merging. Validates work meets requirements through systematic review process. | `software-development/requesting-code-review` | | `subagent-driven-development` | Use when executing implementation plans with independent tasks. Dispatches fresh delegate_task per task with two-stage review (spec compliance then code quality). | `software-development/subagent-driven-development` | | `systematic-debugging` | Use when encountering any bug, test failure, or unexpected behavior. 4-phase root cause investigation — NO fixes without understanding the problem first. | `software-development/systematic-debugging` | diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md index a9e9f420..d69d1c75 100644 --- a/website/docs/reference/slash-commands.md +++ b/website/docs/reference/slash-commands.md @@ -11,7 +11,7 @@ Hermes has two slash-command surfaces: - **Interactive CLI slash commands** — handled by `cli.py` / `hermes_cli/commands.py` - **Messaging slash commands** — handled by `gateway/run.py` -Installed skills are also exposed as dynamic slash commands on both surfaces. That includes bundled skills like `/plan`, which opens plan mode and saves markdown plans under `~/.hermes/plans/`. +Installed skills are also exposed as dynamic slash commands on both surfaces. That includes bundled skills like `/plan`, which opens plan mode and saves markdown plans under `.hermes/plans/` relative to the active workspace/backend working directory. ## Interactive CLI slash commands @@ -32,7 +32,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/compress` | Manually compress conversation context (flush memories + summarize) | | `/rollback` | List or restore filesystem checkpoints (usage: /rollback [number]) | | `/background` | Run a prompt in the background (usage: /background <prompt>) | -| `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `~/.hermes/plans/`. | +| `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. | ### Configuration @@ -110,7 +110,7 @@ The messaging gateway supports the following built-in commands inside Telegram, | `/voice [on\|off\|tts\|join\|channel\|leave\|status]` | Control spoken replies in chat. `join`/`channel`/`leave` manage Discord voice-channel mode. | | `/rollback [number]` | List or restore filesystem checkpoints. | | `/background <prompt>` | Run a prompt in a separate background session. | -| `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `~/.hermes/plans/`. | +| `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. | | `/reload-mcp` | Reload MCP servers from config. | | `/update` | Update Hermes Agent to the latest version. | | `/help` | Show messaging help. | diff --git a/website/docs/user-guide/features/skills.md b/website/docs/user-guide/features/skills.md index bf40f5e0..f9073ce7 100644 --- a/website/docs/user-guide/features/skills.md +++ b/website/docs/user-guide/features/skills.md @@ -30,7 +30,7 @@ Every installed skill is automatically available as a slash command: /excalidraw ``` -The bundled `plan` skill is a good example of a skill-backed slash command with custom behavior. Running `/plan [request]` tells Hermes to inspect context if needed, write a markdown implementation plan instead of executing the task, and save the result under `~/.hermes/plans/`. +The bundled `plan` skill is a good example of a skill-backed slash command with custom behavior. Running `/plan [request]` tells Hermes to inspect context if needed, write a markdown implementation plan instead of executing the task, and save the result under `.hermes/plans/` relative to the active workspace/backend working directory. You can also interact with skills through natural conversation: