Merge origin/main into hermes/hermes-7ef7cb6a
This commit is contained in:
commit
9c322f7f59
31 changed files with 956 additions and 121 deletions
|
|
@ -30,6 +30,10 @@ Default "auto" follows the chains above.
|
|||
Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
|
||||
AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
|
||||
than the provider's default.
|
||||
|
||||
Per-task direct endpoint overrides (e.g. AUXILIARY_VISION_BASE_URL,
|
||||
AUXILIARY_VISION_API_KEY) let callers route a specific auxiliary task to a
|
||||
custom OpenAI-compatible endpoint without touching the main model settings.
|
||||
"""
|
||||
|
||||
import json
|
||||
|
|
@ -418,6 +422,17 @@ def _get_auxiliary_provider(task: str = "") -> str:
|
|||
return "auto"
|
||||
|
||||
|
||||
def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]:
|
||||
"""Read an auxiliary env override from AUXILIARY_* or CONTEXT_* prefixes."""
|
||||
if not task:
|
||||
return None
|
||||
for prefix in ("AUXILIARY_", "CONTEXT_"):
|
||||
val = os.getenv(f"{prefix}{task.upper()}_{suffix}", "").strip()
|
||||
if val:
|
||||
return val
|
||||
return None
|
||||
|
||||
|
||||
def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
or_key = os.getenv("OPENROUTER_API_KEY")
|
||||
if not or_key:
|
||||
|
|
@ -599,6 +614,8 @@ def resolve_provider_client(
|
|||
model: str = None,
|
||||
async_mode: bool = False,
|
||||
raw_codex: bool = False,
|
||||
explicit_base_url: str = None,
|
||||
explicit_api_key: str = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Central router: given a provider name and optional model, return a
|
||||
configured client with the correct auth, base URL, and API format.
|
||||
|
|
@ -620,6 +637,8 @@ def resolve_provider_client(
|
|||
instead of wrapping in CodexAuxiliaryClient. Use this when
|
||||
the caller needs direct access to responses.stream() (e.g.,
|
||||
the main agent loop).
|
||||
explicit_base_url: Optional direct OpenAI-compatible endpoint.
|
||||
explicit_api_key: Optional API key paired with explicit_base_url.
|
||||
|
||||
Returns:
|
||||
(client, resolved_model) or (None, None) if auth is unavailable.
|
||||
|
|
@ -696,6 +715,22 @@ def resolve_provider_client(
|
|||
|
||||
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
|
||||
if provider == "custom":
|
||||
if explicit_base_url:
|
||||
custom_base = explicit_base_url.strip()
|
||||
custom_key = (
|
||||
(explicit_api_key or "").strip()
|
||||
or os.getenv("OPENAI_API_KEY", "").strip()
|
||||
)
|
||||
if not custom_base or not custom_key:
|
||||
logger.warning(
|
||||
"resolve_provider_client: explicit custom endpoint requested "
|
||||
"but no API key was found (set explicit_api_key or OPENAI_API_KEY)"
|
||||
)
|
||||
return None, None
|
||||
final_model = model or _read_main_model() or "gpt-4o-mini"
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
# Try custom first, then codex, then API-key providers
|
||||
for try_fn in (_try_custom_endpoint, _try_codex,
|
||||
_resolve_api_key_provider):
|
||||
|
|
@ -784,10 +819,13 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
|
|||
Callers may override the returned model with a per-task env var
|
||||
(e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
|
||||
"""
|
||||
forced = _get_auxiliary_provider(task)
|
||||
if forced != "auto":
|
||||
return resolve_provider_client(forced)
|
||||
return resolve_provider_client("auto")
|
||||
provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
|
||||
return resolve_provider_client(
|
||||
provider,
|
||||
model=model,
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
)
|
||||
|
||||
|
||||
def get_async_text_auxiliary_client(task: str = ""):
|
||||
|
|
@ -797,10 +835,14 @@ def get_async_text_auxiliary_client(task: str = ""):
|
|||
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
||||
Returns (None, None) when no provider is available.
|
||||
"""
|
||||
forced = _get_auxiliary_provider(task)
|
||||
if forced != "auto":
|
||||
return resolve_provider_client(forced, async_mode=True)
|
||||
return resolve_provider_client("auto", async_mode=True)
|
||||
provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
|
||||
return resolve_provider_client(
|
||||
provider,
|
||||
model=model,
|
||||
async_mode=True,
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
)
|
||||
|
||||
|
||||
_VISION_AUTO_PROVIDER_ORDER = (
|
||||
|
|
@ -856,26 +898,43 @@ def resolve_vision_provider_client(
|
|||
provider: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
*,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
async_mode: bool = False,
|
||||
) -> Tuple[Optional[str], Optional[Any], Optional[str]]:
|
||||
"""Resolve the client actually used for vision tasks.
|
||||
|
||||
Explicit provider overrides still use the generic provider router for
|
||||
non-standard backends, so users can intentionally force experimental
|
||||
providers. Auto mode stays conservative and only tries vision backends
|
||||
known to work today.
|
||||
Direct endpoint overrides take precedence over provider selection. Explicit
|
||||
provider overrides still use the generic provider router for non-standard
|
||||
backends, so users can intentionally force experimental providers. Auto mode
|
||||
stays conservative and only tries vision backends known to work today.
|
||||
"""
|
||||
requested = _normalize_vision_provider(provider or _get_auxiliary_provider("vision"))
|
||||
requested, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
|
||||
"vision", provider, model, base_url, api_key
|
||||
)
|
||||
requested = _normalize_vision_provider(requested)
|
||||
|
||||
def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[str]):
|
||||
if sync_client is None:
|
||||
return resolved_provider, None, None
|
||||
final_model = model or default_model
|
||||
final_model = resolved_model or default_model
|
||||
if async_mode:
|
||||
async_client, async_model = _to_async_client(sync_client, final_model)
|
||||
return resolved_provider, async_client, async_model
|
||||
return resolved_provider, sync_client, final_model
|
||||
|
||||
if resolved_base_url:
|
||||
client, final_model = resolve_provider_client(
|
||||
"custom",
|
||||
model=resolved_model,
|
||||
async_mode=async_mode,
|
||||
explicit_base_url=resolved_base_url,
|
||||
explicit_api_key=resolved_api_key,
|
||||
)
|
||||
if client is None:
|
||||
return "custom", None, None
|
||||
return "custom", client, final_model
|
||||
|
||||
if requested == "auto":
|
||||
for candidate in get_available_vision_backends():
|
||||
sync_client, default_model = _resolve_strict_vision_backend(candidate)
|
||||
|
|
@ -888,7 +947,7 @@ def resolve_vision_provider_client(
|
|||
sync_client, default_model = _resolve_strict_vision_backend(requested)
|
||||
return _finalize(requested, sync_client, default_model)
|
||||
|
||||
client, final_model = _get_cached_client(requested, model, async_mode)
|
||||
client, final_model = _get_cached_client(requested, resolved_model, async_mode)
|
||||
if client is None:
|
||||
return requested, None, None
|
||||
return requested, client, final_model
|
||||
|
|
@ -945,19 +1004,29 @@ def auxiliary_max_tokens_param(value: int) -> dict:
|
|||
# Every auxiliary LLM consumer should use these instead of manually
|
||||
# constructing clients and calling .chat.completions.create().
|
||||
|
||||
# Client cache: (provider, async_mode) -> (client, default_model)
|
||||
# Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model)
|
||||
_client_cache: Dict[tuple, tuple] = {}
|
||||
|
||||
|
||||
def _get_cached_client(
|
||||
provider: str, model: str = None, async_mode: bool = False,
|
||||
provider: str,
|
||||
model: str = None,
|
||||
async_mode: bool = False,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Get or create a cached client for the given provider."""
|
||||
cache_key = (provider, async_mode)
|
||||
cache_key = (provider, async_mode, base_url or "", api_key or "")
|
||||
if cache_key in _client_cache:
|
||||
cached_client, cached_default = _client_cache[cache_key]
|
||||
return cached_client, model or cached_default
|
||||
client, default_model = resolve_provider_client(provider, model, async_mode)
|
||||
client, default_model = resolve_provider_client(
|
||||
provider,
|
||||
model,
|
||||
async_mode,
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
)
|
||||
if client is not None:
|
||||
_client_cache[cache_key] = (client, default_model)
|
||||
return client, model or default_model
|
||||
|
|
@ -967,57 +1036,75 @@ def _resolve_task_provider_model(
|
|||
task: str = None,
|
||||
provider: str = None,
|
||||
model: str = None,
|
||||
) -> Tuple[str, Optional[str]]:
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
) -> Tuple[str, Optional[str], Optional[str], Optional[str]]:
|
||||
"""Determine provider + model for a call.
|
||||
|
||||
Priority:
|
||||
1. Explicit provider/model args (always win)
|
||||
2. Env var overrides (AUXILIARY_{TASK}_PROVIDER, etc.)
|
||||
3. Config file (auxiliary.{task}.provider/model or compression.*)
|
||||
1. Explicit provider/model/base_url/api_key args (always win)
|
||||
2. Env var overrides (AUXILIARY_{TASK}_*, CONTEXT_{TASK}_*)
|
||||
3. Config file (auxiliary.{task}.* or compression.*)
|
||||
4. "auto" (full auto-detection chain)
|
||||
|
||||
Returns (provider, model) where model may be None (use provider default).
|
||||
Returns (provider, model, base_url, api_key) where model may be None
|
||||
(use provider default). When base_url is set, provider is forced to
|
||||
"custom" and the task uses that direct endpoint.
|
||||
"""
|
||||
if provider:
|
||||
return provider, model
|
||||
config = {}
|
||||
cfg_provider = None
|
||||
cfg_model = None
|
||||
cfg_base_url = None
|
||||
cfg_api_key = None
|
||||
|
||||
if task:
|
||||
# Check env var overrides first
|
||||
env_provider = _get_auxiliary_provider(task)
|
||||
if env_provider != "auto":
|
||||
# Check for env var model override too
|
||||
env_model = None
|
||||
for prefix in ("AUXILIARY_", "CONTEXT_"):
|
||||
val = os.getenv(f"{prefix}{task.upper()}_MODEL", "").strip()
|
||||
if val:
|
||||
env_model = val
|
||||
break
|
||||
return env_provider, model or env_model
|
||||
|
||||
# Read from config file
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
except ImportError:
|
||||
return "auto", model
|
||||
config = {}
|
||||
|
||||
# Check auxiliary.{task} section
|
||||
aux = config.get("auxiliary", {})
|
||||
task_config = aux.get(task, {})
|
||||
cfg_provider = task_config.get("provider", "").strip() or None
|
||||
cfg_model = task_config.get("model", "").strip() or None
|
||||
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
|
||||
task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
|
||||
if not isinstance(task_config, dict):
|
||||
task_config = {}
|
||||
cfg_provider = str(task_config.get("provider", "")).strip() or None
|
||||
cfg_model = str(task_config.get("model", "")).strip() or None
|
||||
cfg_base_url = str(task_config.get("base_url", "")).strip() or None
|
||||
cfg_api_key = str(task_config.get("api_key", "")).strip() or None
|
||||
|
||||
# Backwards compat: compression section has its own keys
|
||||
if task == "compression" and not cfg_provider:
|
||||
comp = config.get("compression", {})
|
||||
cfg_provider = comp.get("summary_provider", "").strip() or None
|
||||
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
|
||||
comp = config.get("compression", {}) if isinstance(config, dict) else {}
|
||||
if isinstance(comp, dict):
|
||||
cfg_provider = comp.get("summary_provider", "").strip() or None
|
||||
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
|
||||
|
||||
env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
|
||||
resolved_model = model or env_model or cfg_model
|
||||
|
||||
if base_url:
|
||||
return "custom", resolved_model, base_url, api_key
|
||||
if provider:
|
||||
return provider, resolved_model, base_url, api_key
|
||||
|
||||
if task:
|
||||
env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
|
||||
env_api_key = _get_auxiliary_env_override(task, "API_KEY")
|
||||
if env_base_url:
|
||||
return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key
|
||||
|
||||
env_provider = _get_auxiliary_provider(task)
|
||||
if env_provider != "auto":
|
||||
return env_provider, resolved_model, None, None
|
||||
|
||||
if cfg_base_url:
|
||||
return "custom", resolved_model, cfg_base_url, cfg_api_key
|
||||
if cfg_provider and cfg_provider != "auto":
|
||||
return cfg_provider, model or cfg_model
|
||||
return "auto", model or cfg_model
|
||||
return cfg_provider, resolved_model, None, None
|
||||
return "auto", resolved_model, None, None
|
||||
|
||||
return "auto", model
|
||||
return "auto", resolved_model, None, None
|
||||
|
||||
|
||||
def _build_call_kwargs(
|
||||
|
|
@ -1029,6 +1116,7 @@ def _build_call_kwargs(
|
|||
tools: Optional[list] = None,
|
||||
timeout: float = 30.0,
|
||||
extra_body: Optional[dict] = None,
|
||||
base_url: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""Build kwargs for .chat.completions.create() with model/provider adjustments."""
|
||||
kwargs: Dict[str, Any] = {
|
||||
|
|
@ -1044,7 +1132,7 @@ def _build_call_kwargs(
|
|||
# Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
|
||||
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
|
||||
if provider == "custom":
|
||||
custom_base = _current_custom_base_url()
|
||||
custom_base = base_url or _current_custom_base_url()
|
||||
if "api.openai.com" in custom_base.lower():
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
else:
|
||||
|
|
@ -1070,6 +1158,8 @@ def call_llm(
|
|||
*,
|
||||
provider: str = None,
|
||||
model: str = None,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
messages: list,
|
||||
temperature: float = None,
|
||||
max_tokens: int = None,
|
||||
|
|
@ -1101,16 +1191,18 @@ def call_llm(
|
|||
Raises:
|
||||
RuntimeError: If no provider is configured.
|
||||
"""
|
||||
resolved_provider, resolved_model = _resolve_task_provider_model(
|
||||
task, provider, model)
|
||||
resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
|
||||
task, provider, model, base_url, api_key)
|
||||
|
||||
if task == "vision":
|
||||
effective_provider, client, final_model = resolve_vision_provider_client(
|
||||
provider=resolved_provider,
|
||||
model=resolved_model,
|
||||
provider=provider,
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
async_mode=False,
|
||||
)
|
||||
if client is None and resolved_provider != "auto":
|
||||
if client is None and resolved_provider != "auto" and not resolved_base_url:
|
||||
logger.warning(
|
||||
"Vision provider %s unavailable, falling back to auto vision backends",
|
||||
resolved_provider,
|
||||
|
|
@ -1127,10 +1219,15 @@ def call_llm(
|
|||
)
|
||||
resolved_provider = effective_provider or resolved_provider
|
||||
else:
|
||||
client, final_model = _get_cached_client(resolved_provider, resolved_model)
|
||||
client, final_model = _get_cached_client(
|
||||
resolved_provider,
|
||||
resolved_model,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
)
|
||||
if client is None:
|
||||
# Fallback: try openrouter
|
||||
if resolved_provider != "openrouter":
|
||||
if resolved_provider != "openrouter" and not resolved_base_url:
|
||||
logger.warning("Provider %s unavailable, falling back to openrouter",
|
||||
resolved_provider)
|
||||
client, final_model = _get_cached_client(
|
||||
|
|
@ -1143,7 +1240,8 @@ def call_llm(
|
|||
kwargs = _build_call_kwargs(
|
||||
resolved_provider, final_model, messages,
|
||||
temperature=temperature, max_tokens=max_tokens,
|
||||
tools=tools, timeout=timeout, extra_body=extra_body)
|
||||
tools=tools, timeout=timeout, extra_body=extra_body,
|
||||
base_url=resolved_base_url)
|
||||
|
||||
# Handle max_tokens vs max_completion_tokens retry
|
||||
try:
|
||||
|
|
@ -1162,6 +1260,8 @@ async def async_call_llm(
|
|||
*,
|
||||
provider: str = None,
|
||||
model: str = None,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
messages: list,
|
||||
temperature: float = None,
|
||||
max_tokens: int = None,
|
||||
|
|
@ -1173,16 +1273,18 @@ async def async_call_llm(
|
|||
|
||||
Same as call_llm() but async. See call_llm() for full documentation.
|
||||
"""
|
||||
resolved_provider, resolved_model = _resolve_task_provider_model(
|
||||
task, provider, model)
|
||||
resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
|
||||
task, provider, model, base_url, api_key)
|
||||
|
||||
if task == "vision":
|
||||
effective_provider, client, final_model = resolve_vision_provider_client(
|
||||
provider=resolved_provider,
|
||||
model=resolved_model,
|
||||
provider=provider,
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
async_mode=True,
|
||||
)
|
||||
if client is None and resolved_provider != "auto":
|
||||
if client is None and resolved_provider != "auto" and not resolved_base_url:
|
||||
logger.warning(
|
||||
"Vision provider %s unavailable, falling back to auto vision backends",
|
||||
resolved_provider,
|
||||
|
|
@ -1200,9 +1302,14 @@ async def async_call_llm(
|
|||
resolved_provider = effective_provider or resolved_provider
|
||||
else:
|
||||
client, final_model = _get_cached_client(
|
||||
resolved_provider, resolved_model, async_mode=True)
|
||||
resolved_provider,
|
||||
resolved_model,
|
||||
async_mode=True,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
)
|
||||
if client is None:
|
||||
if resolved_provider != "openrouter":
|
||||
if resolved_provider != "openrouter" and not resolved_base_url:
|
||||
logger.warning("Provider %s unavailable, falling back to openrouter",
|
||||
resolved_provider)
|
||||
client, final_model = _get_cached_client(
|
||||
|
|
@ -1216,7 +1323,8 @@ async def async_call_llm(
|
|||
kwargs = _build_call_kwargs(
|
||||
resolved_provider, final_model, messages,
|
||||
temperature=temperature, max_tokens=max_tokens,
|
||||
tools=tools, timeout=timeout, extra_body=extra_body)
|
||||
tools=tools, timeout=timeout, extra_body=extra_body,
|
||||
base_url=resolved_base_url)
|
||||
|
||||
try:
|
||||
return await client.chat.completions.create(**kwargs)
|
||||
|
|
|
|||
|
|
@ -1,17 +1,42 @@
|
|||
"""Skill slash commands — scan installed skills and build invocation messages.
|
||||
"""Shared slash command helpers for skills and built-in prompt-style modes.
|
||||
|
||||
Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
|
||||
can invoke skills via /skill-name commands.
|
||||
can invoke skills via /skill-name commands and prompt-only built-ins like
|
||||
/plan.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||
_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
|
||||
|
||||
|
||||
def build_plan_path(
|
||||
user_instruction: str = "",
|
||||
*,
|
||||
now: datetime | None = None,
|
||||
) -> Path:
|
||||
"""Return the default workspace-relative markdown path for a /plan invocation.
|
||||
|
||||
Relative paths are intentional: file tools are task/backend-aware and resolve
|
||||
them against the active working directory for local, docker, ssh, modal,
|
||||
daytona, and similar terminal backends. That keeps the plan with the active
|
||||
workspace instead of the Hermes host's global home directory.
|
||||
"""
|
||||
slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else ""
|
||||
slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-")
|
||||
if slug:
|
||||
slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-")
|
||||
slug = slug or "conversation-plan"
|
||||
timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S")
|
||||
return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md"
|
||||
|
||||
|
||||
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
|
||||
|
|
@ -56,6 +81,7 @@ def _build_skill_message(
|
|||
skill_dir: Path | None,
|
||||
activation_note: str,
|
||||
user_instruction: str = "",
|
||||
runtime_note: str = "",
|
||||
) -> str:
|
||||
"""Format a loaded skill into a user/system message payload."""
|
||||
from tools.skills_tool import SKILLS_DIR
|
||||
|
|
@ -115,6 +141,10 @@ def _build_skill_message(
|
|||
parts.append("")
|
||||
parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}")
|
||||
|
||||
if runtime_note:
|
||||
parts.append("")
|
||||
parts.append(f"[Runtime note: {runtime_note}]")
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
|
|
@ -172,6 +202,7 @@ def build_skill_invocation_message(
|
|||
cmd_key: str,
|
||||
user_instruction: str = "",
|
||||
task_id: str | None = None,
|
||||
runtime_note: str = "",
|
||||
) -> Optional[str]:
|
||||
"""Build the user message content for a skill slash command invocation.
|
||||
|
||||
|
|
@ -201,6 +232,7 @@ def build_skill_invocation_message(
|
|||
skill_dir,
|
||||
activation_note,
|
||||
user_instruction=user_instruction,
|
||||
runtime_note=runtime_note,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
77
cli.py
77
cli.py
|
|
@ -218,11 +218,27 @@ def load_cli_config() -> Dict[str, Any]:
|
|||
"timeout": 300, # Max seconds a sandbox script can run before being killed (5 min)
|
||||
"max_tool_calls": 50, # Max RPC tool calls per execution
|
||||
},
|
||||
"auxiliary": {
|
||||
"vision": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
},
|
||||
"web_extract": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
},
|
||||
},
|
||||
"delegation": {
|
||||
"max_iterations": 45, # Max tool-calling turns per child agent
|
||||
"default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents
|
||||
"model": "", # Subagent model override (empty = inherit parent model)
|
||||
"provider": "", # Subagent provider override (empty = inherit parent provider)
|
||||
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
|
||||
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -363,28 +379,44 @@ def load_cli_config() -> Dict[str, Any]:
|
|||
if config_key in compression_config:
|
||||
os.environ[env_var] = str(compression_config[config_key])
|
||||
|
||||
# Apply auxiliary model overrides to environment variables.
|
||||
# Vision and web_extract each have their own provider + model pair.
|
||||
# Apply auxiliary model/direct-endpoint overrides to environment variables.
|
||||
# Vision and web_extract each have their own provider/model/base_url/api_key tuple.
|
||||
# (Compression is handled in the compression section above.)
|
||||
# Only set env vars for non-empty / non-default values so auto-detection
|
||||
# still works.
|
||||
auxiliary_config = defaults.get("auxiliary", {})
|
||||
auxiliary_task_env = {
|
||||
# config key → (provider env var, model env var)
|
||||
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"),
|
||||
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"),
|
||||
# config key → env var mapping
|
||||
"vision": {
|
||||
"provider": "AUXILIARY_VISION_PROVIDER",
|
||||
"model": "AUXILIARY_VISION_MODEL",
|
||||
"base_url": "AUXILIARY_VISION_BASE_URL",
|
||||
"api_key": "AUXILIARY_VISION_API_KEY",
|
||||
},
|
||||
"web_extract": {
|
||||
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
|
||||
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
|
||||
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||
},
|
||||
}
|
||||
|
||||
for task_key, (prov_env, model_env) in auxiliary_task_env.items():
|
||||
for task_key, env_map in auxiliary_task_env.items():
|
||||
task_cfg = auxiliary_config.get(task_key, {})
|
||||
if not isinstance(task_cfg, dict):
|
||||
continue
|
||||
prov = str(task_cfg.get("provider", "")).strip()
|
||||
model = str(task_cfg.get("model", "")).strip()
|
||||
base_url = str(task_cfg.get("base_url", "")).strip()
|
||||
api_key = str(task_cfg.get("api_key", "")).strip()
|
||||
if prov and prov != "auto":
|
||||
os.environ[prov_env] = prov
|
||||
os.environ[env_map["provider"]] = prov
|
||||
if model:
|
||||
os.environ[model_env] = model
|
||||
os.environ[env_map["model"]] = model
|
||||
if base_url:
|
||||
os.environ[env_map["base_url"]] = base_url
|
||||
if api_key:
|
||||
os.environ[env_map["api_key"]] = api_key
|
||||
|
||||
# Security settings
|
||||
security_config = defaults.get("security", {})
|
||||
|
|
@ -1048,6 +1080,7 @@ from agent.skill_commands import (
|
|||
scan_skill_commands,
|
||||
get_skill_commands,
|
||||
build_skill_invocation_message,
|
||||
build_plan_path,
|
||||
build_preloaded_skills_prompt,
|
||||
)
|
||||
|
||||
|
|
@ -3161,6 +3194,8 @@ class HermesCLI:
|
|||
elif cmd_lower.startswith("/personality"):
|
||||
# Use original case (handler lowercases the personality name itself)
|
||||
self._handle_personality_command(cmd_original)
|
||||
elif cmd_lower == "/plan" or cmd_lower.startswith("/plan "):
|
||||
self._handle_plan_command(cmd_original)
|
||||
elif cmd_lower == "/retry":
|
||||
retry_msg = self.retry_last()
|
||||
if retry_msg and hasattr(self, '_pending_input'):
|
||||
|
|
@ -3272,6 +3307,32 @@ class HermesCLI:
|
|||
|
||||
return True
|
||||
|
||||
def _handle_plan_command(self, cmd: str):
|
||||
"""Handle /plan [request] — load the bundled plan skill."""
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
user_instruction = parts[1].strip() if len(parts) > 1 else ""
|
||||
|
||||
plan_path = build_plan_path(user_instruction)
|
||||
msg = build_skill_invocation_message(
|
||||
"/plan",
|
||||
user_instruction,
|
||||
task_id=self.session_id,
|
||||
runtime_note=(
|
||||
"Save the markdown plan with write_file to this exact relative path "
|
||||
f"inside the active workspace/backend cwd: {plan_path}"
|
||||
),
|
||||
)
|
||||
|
||||
if not msg:
|
||||
self.console.print("[bold red]Failed to load the bundled /plan skill[/]")
|
||||
return
|
||||
|
||||
_cprint(f" 📝 Plan mode queued via skill. Markdown plan target: {plan_path}")
|
||||
if hasattr(self, '_pending_input'):
|
||||
self._pending_input.put(msg)
|
||||
else:
|
||||
self.console.print("[bold red]Plan mode unavailable: input queue not initialized[/]")
|
||||
|
||||
def _handle_background_command(self, cmd: str):
|
||||
"""Handle /background <prompt> — run a prompt in a separate background session.
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@ This module provides scheduled task execution, allowing the agent to:
|
|||
- Execute tasks in isolated sessions (no prior context)
|
||||
|
||||
Cron jobs are executed automatically by the gateway daemon:
|
||||
hermes gateway install # Install as system service (recommended)
|
||||
hermes gateway install # Install as a user service
|
||||
sudo hermes gateway install --system # Linux servers: boot-time system service
|
||||
hermes gateway # Or run in foreground
|
||||
|
||||
The gateway ticks the scheduler every 60 seconds. A file lock prevents
|
||||
|
|
|
|||
|
|
@ -100,24 +100,40 @@ if _config_path.exists():
|
|||
for _cfg_key, _env_var in _compression_env_map.items():
|
||||
if _cfg_key in _compression_cfg:
|
||||
os.environ[_env_var] = str(_compression_cfg[_cfg_key])
|
||||
# Auxiliary model overrides (vision, web_extract).
|
||||
# Each task has provider + model; bridge non-default values to env vars.
|
||||
# Auxiliary model/direct-endpoint overrides (vision, web_extract).
|
||||
# Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
|
||||
_auxiliary_cfg = _cfg.get("auxiliary", {})
|
||||
if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
|
||||
_aux_task_env = {
|
||||
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"),
|
||||
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"),
|
||||
"vision": {
|
||||
"provider": "AUXILIARY_VISION_PROVIDER",
|
||||
"model": "AUXILIARY_VISION_MODEL",
|
||||
"base_url": "AUXILIARY_VISION_BASE_URL",
|
||||
"api_key": "AUXILIARY_VISION_API_KEY",
|
||||
},
|
||||
"web_extract": {
|
||||
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
|
||||
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
|
||||
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||
},
|
||||
}
|
||||
for _task_key, (_prov_env, _model_env) in _aux_task_env.items():
|
||||
for _task_key, _env_map in _aux_task_env.items():
|
||||
_task_cfg = _auxiliary_cfg.get(_task_key, {})
|
||||
if not isinstance(_task_cfg, dict):
|
||||
continue
|
||||
_prov = str(_task_cfg.get("provider", "")).strip()
|
||||
_model = str(_task_cfg.get("model", "")).strip()
|
||||
_base_url = str(_task_cfg.get("base_url", "")).strip()
|
||||
_api_key = str(_task_cfg.get("api_key", "")).strip()
|
||||
if _prov and _prov != "auto":
|
||||
os.environ[_prov_env] = _prov
|
||||
os.environ[_env_map["provider"]] = _prov
|
||||
if _model:
|
||||
os.environ[_model_env] = _model
|
||||
os.environ[_env_map["model"]] = _model
|
||||
if _base_url:
|
||||
os.environ[_env_map["base_url"]] = _base_url
|
||||
if _api_key:
|
||||
os.environ[_env_map["api_key"]] = _api_key
|
||||
_agent_cfg = _cfg.get("agent", {})
|
||||
if _agent_cfg and isinstance(_agent_cfg, dict):
|
||||
if "max_turns" in _agent_cfg:
|
||||
|
|
@ -1098,7 +1114,7 @@ class GatewayRunner:
|
|||
|
||||
# Emit command:* hook for any recognized slash command
|
||||
_known_commands = {"new", "reset", "help", "status", "stop", "model", "reasoning",
|
||||
"personality", "retry", "undo", "sethome", "set-home",
|
||||
"personality", "plan", "retry", "undo", "sethome", "set-home",
|
||||
"compress", "usage", "insights", "reload-mcp", "reload_mcp",
|
||||
"update", "title", "resume", "provider", "rollback",
|
||||
"background", "reasoning", "voice"}
|
||||
|
|
@ -1134,6 +1150,28 @@ class GatewayRunner:
|
|||
if command == "personality":
|
||||
return await self._handle_personality_command(event)
|
||||
|
||||
if command == "plan":
|
||||
try:
|
||||
from agent.skill_commands import build_plan_path, build_skill_invocation_message
|
||||
|
||||
user_instruction = event.get_command_args().strip()
|
||||
plan_path = build_plan_path(user_instruction)
|
||||
event.text = build_skill_invocation_message(
|
||||
"/plan",
|
||||
user_instruction,
|
||||
task_id=_quick_key,
|
||||
runtime_note=(
|
||||
"Save the markdown plan with write_file to this exact relative path "
|
||||
f"inside the active workspace/backend cwd: {plan_path}"
|
||||
),
|
||||
)
|
||||
if not event.text:
|
||||
return "Failed to load the bundled /plan skill."
|
||||
command = None
|
||||
except Exception as e:
|
||||
logger.exception("Failed to prepare /plan command")
|
||||
return f"Failed to enter plan mode: {e}"
|
||||
|
||||
if command == "retry":
|
||||
return await self._handle_retry_command(event)
|
||||
|
||||
|
|
|
|||
|
|
@ -150,30 +150,44 @@ DEFAULT_CONFIG = {
|
|||
"vision": {
|
||||
"provider": "auto", # auto | openrouter | nous | codex | custom
|
||||
"model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o"
|
||||
"base_url": "", # direct OpenAI-compatible endpoint (takes precedence over provider)
|
||||
"api_key": "", # API key for base_url (falls back to OPENAI_API_KEY)
|
||||
},
|
||||
"web_extract": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
},
|
||||
"compression": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
},
|
||||
"session_search": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
},
|
||||
"skills_hub": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
},
|
||||
"mcp": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
},
|
||||
"flush_memories": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
},
|
||||
},
|
||||
|
||||
|
|
@ -243,6 +257,8 @@ DEFAULT_CONFIG = {
|
|||
"delegation": {
|
||||
"model": "", # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model)
|
||||
"provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials)
|
||||
"base_url": "", # direct OpenAI-compatible endpoint for subagents
|
||||
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
|
||||
},
|
||||
|
||||
# Ephemeral prefill messages file — JSON list of {role, content} dicts
|
||||
|
|
|
|||
|
|
@ -96,6 +96,7 @@ def cron_list(show_all: bool = False):
|
|||
if not find_gateway_pids():
|
||||
print(color(" ⚠ Gateway is not running — jobs won't fire automatically.", Colors.YELLOW))
|
||||
print(color(" Start it with: hermes gateway install", Colors.DIM))
|
||||
print(color(" sudo hermes gateway install --system # Linux servers", Colors.DIM))
|
||||
print()
|
||||
|
||||
|
||||
|
|
@ -120,7 +121,8 @@ def cron_status():
|
|||
print(color("✗ Gateway is not running — cron jobs will NOT fire", Colors.RED))
|
||||
print()
|
||||
print(" To enable automatic execution:")
|
||||
print(" hermes gateway install # Install as system service (recommended)")
|
||||
print(" hermes gateway install # Install as a user service")
|
||||
print(" sudo hermes gateway install --system # Linux servers: boot-time system service")
|
||||
print(" hermes gateway # Or run in foreground")
|
||||
|
||||
print()
|
||||
|
|
|
|||
|
|
@ -2313,7 +2313,7 @@ Examples:
|
|||
hermes gateway Run messaging gateway
|
||||
hermes -s hermes-agent-dev,github-auth
|
||||
hermes -w Start in isolated git worktree
|
||||
hermes gateway install Install as system service
|
||||
hermes gateway install Install gateway background service
|
||||
hermes sessions list List past sessions
|
||||
hermes sessions browse Interactive session picker
|
||||
hermes sessions rename ID T Rename/title a session
|
||||
|
|
|
|||
57
skills/software-development/plan/SKILL.md
Normal file
57
skills/software-development/plan/SKILL.md
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
---
|
||||
name: plan
|
||||
description: Plan mode for Hermes — inspect context, write a markdown plan into the active workspace's `.hermes/plans/` directory, and do not execute the work.
|
||||
version: 1.0.0
|
||||
author: Hermes Agent
|
||||
license: MIT
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [planning, plan-mode, implementation, workflow]
|
||||
related_skills: [writing-plans, subagent-driven-development]
|
||||
---
|
||||
|
||||
# Plan Mode
|
||||
|
||||
Use this skill when the user wants a plan instead of execution.
|
||||
|
||||
## Core behavior
|
||||
|
||||
For this turn, you are planning only.
|
||||
|
||||
- Do not implement code.
|
||||
- Do not edit project files except the plan markdown file.
|
||||
- Do not run mutating terminal commands, commit, push, or perform external actions.
|
||||
- You may inspect the repo or other context with read-only commands/tools when needed.
|
||||
- Your deliverable is a markdown plan saved inside the active workspace under `.hermes/plans/`.
|
||||
|
||||
## Output requirements
|
||||
|
||||
Write a markdown plan that is concrete and actionable.
|
||||
|
||||
Include, when relevant:
|
||||
- Goal
|
||||
- Current context / assumptions
|
||||
- Proposed approach
|
||||
- Step-by-step plan
|
||||
- Files likely to change
|
||||
- Tests / validation
|
||||
- Risks, tradeoffs, and open questions
|
||||
|
||||
If the task is code-related, include exact file paths, likely test targets, and verification steps.
|
||||
|
||||
## Save location
|
||||
|
||||
Save the plan with `write_file` under:
|
||||
- `.hermes/plans/YYYY-MM-DD_HHMMSS-<slug>.md`
|
||||
|
||||
Treat that as relative to the active working directory / backend workspace. Hermes file tools are backend-aware, so using this relative path keeps the plan with the workspace on local, docker, ssh, modal, and daytona backends.
|
||||
|
||||
If the runtime provides a specific target path, use that exact path.
|
||||
If not, create a sensible timestamped filename yourself under `.hermes/plans/`.
|
||||
|
||||
## Interaction style
|
||||
|
||||
- If the request is clear enough, write the plan directly.
|
||||
- If no explicit instruction accompanies `/plan`, infer the task from the current conversation context.
|
||||
- If it is genuinely underspecified, ask a brief clarifying question instead of guessing.
|
||||
- After saving the plan, reply briefly with what you planned and the saved path.
|
||||
|
|
@ -24,9 +24,11 @@ def _clean_env(monkeypatch):
|
|||
for key in (
|
||||
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
|
||||
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
|
||||
# Per-task provider/model overrides
|
||||
# Per-task provider/model/direct-endpoint overrides
|
||||
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
|
||||
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
|
||||
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
|
||||
):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
|
|
@ -142,6 +144,27 @@ class TestGetTextAuxiliaryClient:
|
|||
call_kwargs = mock_openai.call_args
|
||||
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
|
||||
|
||||
def test_task_direct_endpoint_override(self, monkeypatch):
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
|
||||
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_API_KEY", "task-key")
|
||||
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_text_auxiliary_client("web_extract")
|
||||
assert model == "task-model"
|
||||
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "task-key"
|
||||
|
||||
def test_task_direct_endpoint_without_openai_key_does_not_fall_back(self, monkeypatch):
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
|
||||
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_text_auxiliary_client("web_extract")
|
||||
assert client is None
|
||||
assert model is None
|
||||
mock_openai.assert_not_called()
|
||||
|
||||
def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch):
|
||||
config = {
|
||||
"model": {
|
||||
|
|
@ -217,6 +240,27 @@ class TestVisionClientFallback:
|
|||
client, model = get_vision_auxiliary_client()
|
||||
assert client is not None # Custom endpoint picked up as fallback
|
||||
|
||||
def test_vision_direct_endpoint_override(self, monkeypatch):
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
|
||||
monkeypatch.setenv("AUXILIARY_VISION_API_KEY", "vision-key")
|
||||
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_vision_auxiliary_client()
|
||||
assert model == "vision-model"
|
||||
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:4567/v1"
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "vision-key"
|
||||
|
||||
def test_vision_direct_endpoint_requires_openai_api_key(self, monkeypatch):
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
|
||||
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_vision_auxiliary_client()
|
||||
assert client is None
|
||||
assert model is None
|
||||
mock_openai.assert_not_called()
|
||||
|
||||
def test_vision_uses_openrouter_when_available(self, monkeypatch):
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
|
|
@ -434,6 +478,24 @@ class TestTaskSpecificOverrides:
|
|||
client, model = get_text_auxiliary_client("web_extract")
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
|
||||
def test_task_direct_endpoint_from_config(self, monkeypatch, tmp_path):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
(hermes_home / "config.yaml").write_text(
|
||||
"""auxiliary:
|
||||
web_extract:
|
||||
base_url: http://localhost:3456/v1
|
||||
api_key: config-key
|
||||
model: config-model
|
||||
"""
|
||||
)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_text_auxiliary_client("web_extract")
|
||||
assert model == "config-model"
|
||||
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:3456/v1"
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "config-key"
|
||||
|
||||
def test_task_without_override_uses_auto(self, monkeypatch):
|
||||
"""A task with no provider env var falls through to auto chain."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
|
|
|||
|
|
@ -1,13 +1,16 @@
|
|||
"""Tests for agent/skill_commands.py — skill slash command scanning and platform filtering."""
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import tools.skills_tool as skills_tool_module
|
||||
from agent.skill_commands import (
|
||||
scan_skill_commands,
|
||||
build_skill_invocation_message,
|
||||
build_plan_path,
|
||||
build_preloaded_skills_prompt,
|
||||
build_skill_invocation_message,
|
||||
scan_skill_commands,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -272,3 +275,37 @@ Generate some audio.
|
|||
|
||||
assert msg is not None
|
||||
assert 'file_path="<path>"' in msg
|
||||
|
||||
|
||||
class TestPlanSkillHelpers:
|
||||
def test_build_plan_path_uses_workspace_relative_dir_and_slugifies_request(self):
|
||||
path = build_plan_path(
|
||||
"Implement OAuth login + refresh tokens!",
|
||||
now=datetime(2026, 3, 15, 9, 30, 45),
|
||||
)
|
||||
|
||||
assert path == Path(".hermes") / "plans" / "2026-03-15_093045-implement-oauth-login-refresh-tokens.md"
|
||||
|
||||
def test_plan_skill_message_can_include_runtime_save_path_note(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"plan",
|
||||
body="Save plans under .hermes/plans in the active workspace and do not execute the work.",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message(
|
||||
"/plan",
|
||||
"Add a /plan command",
|
||||
runtime_note=(
|
||||
"Save the markdown plan with write_file to this exact relative path inside "
|
||||
"the active workspace/backend cwd: .hermes/plans/plan.md"
|
||||
),
|
||||
)
|
||||
|
||||
assert msg is not None
|
||||
assert "Save plans under $HERMES_HOME/plans" not in msg
|
||||
assert ".hermes/plans" in msg
|
||||
assert "Add a /plan command" in msg
|
||||
assert ".hermes/plans/plan.md" in msg
|
||||
assert "Runtime note:" in msg
|
||||
|
|
|
|||
|
|
@ -26,6 +26,12 @@ def _isolate_hermes_home(tmp_path, monkeypatch):
|
|||
(fake_home / "memories").mkdir()
|
||||
(fake_home / "skills").mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(fake_home))
|
||||
# Tests should not inherit the agent's current gateway/messaging surface.
|
||||
# Individual tests that need gateway behavior set these explicitly.
|
||||
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
|
||||
monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
|
||||
monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
|
||||
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
|
|
|
|||
129
tests/gateway/test_plan_command.py
Normal file
129
tests/gateway/test_plan_command.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
"""Tests for the /plan gateway slash command."""
|
||||
|
||||
from datetime import datetime
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.skill_commands import scan_skill_commands
|
||||
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||
from gateway.platforms.base import MessageEvent
|
||||
from gateway.session import SessionEntry, SessionSource
|
||||
|
||||
|
||||
def _make_runner():
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.config = GatewayConfig(
|
||||
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
|
||||
)
|
||||
runner.adapters = {}
|
||||
runner._voice_mode = {}
|
||||
runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
|
||||
runner.session_store = MagicMock()
|
||||
runner.session_store.get_or_create_session.return_value = SessionEntry(
|
||||
session_key="agent:main:telegram:dm:c1:u1",
|
||||
session_id="sess-1",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
runner.session_store.load_transcript.return_value = []
|
||||
runner.session_store.has_any_sessions.return_value = True
|
||||
runner.session_store.append_to_transcript = MagicMock()
|
||||
runner.session_store.rewrite_transcript = MagicMock()
|
||||
runner._running_agents = {}
|
||||
runner._pending_messages = {}
|
||||
runner._pending_approvals = {}
|
||||
runner._session_db = None
|
||||
runner._reasoning_config = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._show_reasoning = False
|
||||
runner._is_user_authorized = lambda _source: True
|
||||
runner._set_session_env = lambda _context: None
|
||||
runner._run_agent = AsyncMock(
|
||||
return_value={
|
||||
"final_response": "planned",
|
||||
"messages": [],
|
||||
"tools": [],
|
||||
"history_offset": 0,
|
||||
"last_prompt_tokens": 0,
|
||||
}
|
||||
)
|
||||
return runner
|
||||
|
||||
|
||||
def _make_event(text="/plan"):
|
||||
return MessageEvent(
|
||||
text=text,
|
||||
source=SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
user_id="u1",
|
||||
chat_id="c1",
|
||||
user_name="tester",
|
||||
chat_type="dm",
|
||||
),
|
||||
message_id="m1",
|
||||
)
|
||||
|
||||
|
||||
def _make_plan_skill(skills_dir):
|
||||
skill_dir = skills_dir / "plan"
|
||||
skill_dir.mkdir(parents=True, exist_ok=True)
|
||||
(skill_dir / "SKILL.md").write_text(
|
||||
"""---
|
||||
name: plan
|
||||
description: Plan mode skill.
|
||||
---
|
||||
|
||||
# Plan
|
||||
|
||||
Use the current conversation context when no explicit instruction is provided.
|
||||
Save plans under the active workspace's .hermes/plans directory.
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
class TestGatewayPlanCommand:
|
||||
@pytest.mark.asyncio
|
||||
async def test_plan_command_loads_skill_and_runs_agent(self, monkeypatch, tmp_path):
|
||||
import gateway.run as gateway_run
|
||||
|
||||
runner = _make_runner()
|
||||
event = _make_event("/plan Add OAuth login")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
|
||||
monkeypatch.setattr(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
lambda *_args, **_kwargs: 100_000,
|
||||
)
|
||||
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_plan_skill(tmp_path)
|
||||
scan_skill_commands()
|
||||
result = await runner._handle_message(event)
|
||||
|
||||
assert result == "planned"
|
||||
forwarded = runner._run_agent.call_args.kwargs["message"]
|
||||
assert "Plan mode skill" in forwarded
|
||||
assert "Add OAuth login" in forwarded
|
||||
assert ".hermes/plans" in forwarded
|
||||
assert str(tmp_path / "plans") not in forwarded
|
||||
assert "active workspace/backend cwd" in forwarded
|
||||
assert "Runtime note:" in forwarded
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_plan_command_appears_in_help_output_via_skill_listing(self, tmp_path):
|
||||
runner = _make_runner()
|
||||
event = _make_event("/help")
|
||||
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_plan_skill(tmp_path)
|
||||
scan_skill_commands()
|
||||
result = await runner._handle_help_command(event)
|
||||
|
||||
assert "/plan" in result
|
||||
|
|
@ -25,7 +25,9 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
|
|||
# Clear env vars
|
||||
for key in (
|
||||
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
|
||||
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
|
||||
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
|
||||
):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
|
|
@ -47,19 +49,35 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
|
|||
auxiliary_cfg = config_dict.get("auxiliary", {})
|
||||
if auxiliary_cfg and isinstance(auxiliary_cfg, dict):
|
||||
aux_task_env = {
|
||||
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"),
|
||||
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"),
|
||||
"vision": {
|
||||
"provider": "AUXILIARY_VISION_PROVIDER",
|
||||
"model": "AUXILIARY_VISION_MODEL",
|
||||
"base_url": "AUXILIARY_VISION_BASE_URL",
|
||||
"api_key": "AUXILIARY_VISION_API_KEY",
|
||||
},
|
||||
"web_extract": {
|
||||
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
|
||||
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
|
||||
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||
},
|
||||
}
|
||||
for task_key, (prov_env, model_env) in aux_task_env.items():
|
||||
for task_key, env_map in aux_task_env.items():
|
||||
task_cfg = auxiliary_cfg.get(task_key, {})
|
||||
if not isinstance(task_cfg, dict):
|
||||
continue
|
||||
prov = str(task_cfg.get("provider", "")).strip()
|
||||
model = str(task_cfg.get("model", "")).strip()
|
||||
base_url = str(task_cfg.get("base_url", "")).strip()
|
||||
api_key = str(task_cfg.get("api_key", "")).strip()
|
||||
if prov and prov != "auto":
|
||||
os.environ[prov_env] = prov
|
||||
os.environ[env_map["provider"]] = prov
|
||||
if model:
|
||||
os.environ[model_env] = model
|
||||
os.environ[env_map["model"]] = model
|
||||
if base_url:
|
||||
os.environ[env_map["base_url"]] = base_url
|
||||
if api_key:
|
||||
os.environ[env_map["api_key"]] = api_key
|
||||
|
||||
|
||||
# ── Config bridging tests ────────────────────────────────────────────────────
|
||||
|
|
@ -101,6 +119,21 @@ class TestAuxiliaryConfigBridge:
|
|||
assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
|
||||
assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash"
|
||||
|
||||
def test_direct_endpoint_bridged(self, monkeypatch):
|
||||
config = {
|
||||
"auxiliary": {
|
||||
"vision": {
|
||||
"base_url": "http://localhost:1234/v1",
|
||||
"api_key": "local-key",
|
||||
"model": "qwen2.5-vl",
|
||||
}
|
||||
}
|
||||
}
|
||||
_run_auxiliary_bridge(config, monkeypatch)
|
||||
assert os.environ.get("AUXILIARY_VISION_BASE_URL") == "http://localhost:1234/v1"
|
||||
assert os.environ.get("AUXILIARY_VISION_API_KEY") == "local-key"
|
||||
assert os.environ.get("AUXILIARY_VISION_MODEL") == "qwen2.5-vl"
|
||||
|
||||
def test_compression_provider_bridged(self, monkeypatch):
|
||||
config = {
|
||||
"compression": {
|
||||
|
|
@ -200,8 +233,12 @@ class TestGatewayBridgeCodeParity:
|
|||
# Check for key patterns that indicate the bridge is present
|
||||
assert "AUXILIARY_VISION_PROVIDER" in content
|
||||
assert "AUXILIARY_VISION_MODEL" in content
|
||||
assert "AUXILIARY_VISION_BASE_URL" in content
|
||||
assert "AUXILIARY_VISION_API_KEY" in content
|
||||
assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content
|
||||
assert "AUXILIARY_WEB_EXTRACT_MODEL" in content
|
||||
assert "AUXILIARY_WEB_EXTRACT_BASE_URL" in content
|
||||
assert "AUXILIARY_WEB_EXTRACT_API_KEY" in content
|
||||
|
||||
def test_gateway_has_compression_provider(self):
|
||||
"""Gateway must bridge compression.summary_provider."""
|
||||
|
|
|
|||
67
tests/test_cli_plan_command.py
Normal file
67
tests/test_cli_plan_command.py
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
"""Tests for the /plan CLI slash command."""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from agent.skill_commands import scan_skill_commands
|
||||
from cli import HermesCLI
|
||||
|
||||
|
||||
def _make_cli():
|
||||
cli_obj = HermesCLI.__new__(HermesCLI)
|
||||
cli_obj.config = {}
|
||||
cli_obj.console = MagicMock()
|
||||
cli_obj.agent = None
|
||||
cli_obj.conversation_history = []
|
||||
cli_obj.session_id = "sess-123"
|
||||
cli_obj._pending_input = MagicMock()
|
||||
return cli_obj
|
||||
|
||||
|
||||
def _make_plan_skill(skills_dir):
|
||||
skill_dir = skills_dir / "plan"
|
||||
skill_dir.mkdir(parents=True, exist_ok=True)
|
||||
(skill_dir / "SKILL.md").write_text(
|
||||
"""---
|
||||
name: plan
|
||||
description: Plan mode skill.
|
||||
---
|
||||
|
||||
# Plan
|
||||
|
||||
Use the current conversation context when no explicit instruction is provided.
|
||||
Save plans under the active workspace's .hermes/plans directory.
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
class TestCLIPlanCommand:
|
||||
def test_plan_command_queues_plan_skill_message(self, tmp_path, monkeypatch):
|
||||
cli_obj = _make_cli()
|
||||
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_plan_skill(tmp_path)
|
||||
scan_skill_commands()
|
||||
result = cli_obj.process_command("/plan Add OAuth login")
|
||||
|
||||
assert result is True
|
||||
cli_obj._pending_input.put.assert_called_once()
|
||||
queued = cli_obj._pending_input.put.call_args[0][0]
|
||||
assert "Plan mode skill" in queued
|
||||
assert "Add OAuth login" in queued
|
||||
assert ".hermes/plans" in queued
|
||||
assert str(tmp_path / "plans") not in queued
|
||||
assert "active workspace/backend cwd" in queued
|
||||
assert "Runtime note:" in queued
|
||||
|
||||
def test_plan_without_args_uses_skill_context_guidance(self, tmp_path, monkeypatch):
|
||||
cli_obj = _make_cli()
|
||||
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_plan_skill(tmp_path)
|
||||
scan_skill_commands()
|
||||
cli_obj.process_command("/plan")
|
||||
|
||||
queued = cli_obj._pending_input.put.call_args[0][0]
|
||||
assert "current conversation context" in queued
|
||||
assert ".hermes/plans/" in queued
|
||||
assert "conversation-plan.md" in queued
|
||||
|
|
@ -10,6 +10,7 @@ Run with: python -m pytest tests/test_delegate.py -v
|
|||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
|
@ -462,6 +463,43 @@ class TestDelegationCredentialResolution(unittest.TestCase):
|
|||
self.assertEqual(creds["api_mode"], "chat_completions")
|
||||
mock_resolve.assert_called_once_with(requested="openrouter")
|
||||
|
||||
def test_direct_endpoint_uses_configured_base_url_and_api_key(self):
|
||||
parent = _make_mock_parent(depth=0)
|
||||
cfg = {
|
||||
"model": "qwen2.5-coder",
|
||||
"provider": "openrouter",
|
||||
"base_url": "http://localhost:1234/v1",
|
||||
"api_key": "local-key",
|
||||
}
|
||||
creds = _resolve_delegation_credentials(cfg, parent)
|
||||
self.assertEqual(creds["model"], "qwen2.5-coder")
|
||||
self.assertEqual(creds["provider"], "custom")
|
||||
self.assertEqual(creds["base_url"], "http://localhost:1234/v1")
|
||||
self.assertEqual(creds["api_key"], "local-key")
|
||||
self.assertEqual(creds["api_mode"], "chat_completions")
|
||||
|
||||
def test_direct_endpoint_falls_back_to_openai_api_key_env(self):
|
||||
parent = _make_mock_parent(depth=0)
|
||||
cfg = {
|
||||
"model": "qwen2.5-coder",
|
||||
"base_url": "http://localhost:1234/v1",
|
||||
}
|
||||
with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False):
|
||||
creds = _resolve_delegation_credentials(cfg, parent)
|
||||
self.assertEqual(creds["api_key"], "env-openai-key")
|
||||
self.assertEqual(creds["provider"], "custom")
|
||||
|
||||
def test_direct_endpoint_does_not_fall_back_to_openrouter_api_key_env(self):
|
||||
parent = _make_mock_parent(depth=0)
|
||||
cfg = {
|
||||
"model": "qwen2.5-coder",
|
||||
"base_url": "http://localhost:1234/v1",
|
||||
}
|
||||
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "env-openrouter-key"}, clear=False):
|
||||
with self.assertRaises(ValueError) as ctx:
|
||||
_resolve_delegation_credentials(cfg, parent)
|
||||
self.assertIn("OPENAI_API_KEY", str(ctx.exception))
|
||||
|
||||
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
|
||||
def test_nous_provider_resolves_nous_credentials(self, mock_resolve):
|
||||
"""Nous provider resolves Nous Portal base_url and api_key."""
|
||||
|
|
@ -589,6 +627,40 @@ class TestDelegationProviderIntegration(unittest.TestCase):
|
|||
self.assertNotEqual(kwargs["base_url"], parent.base_url)
|
||||
self.assertNotEqual(kwargs["api_key"], parent.api_key)
|
||||
|
||||
@patch("tools.delegate_tool._load_config")
|
||||
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
||||
def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg):
|
||||
mock_cfg.return_value = {
|
||||
"max_iterations": 45,
|
||||
"model": "qwen2.5-coder",
|
||||
"base_url": "http://localhost:1234/v1",
|
||||
"api_key": "local-key",
|
||||
}
|
||||
mock_creds.return_value = {
|
||||
"model": "qwen2.5-coder",
|
||||
"provider": "custom",
|
||||
"base_url": "http://localhost:1234/v1",
|
||||
"api_key": "local-key",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
parent = _make_mock_parent(depth=0)
|
||||
|
||||
with patch("run_agent.AIAgent") as MockAgent:
|
||||
mock_child = MagicMock()
|
||||
mock_child.run_conversation.return_value = {
|
||||
"final_response": "done", "completed": True, "api_calls": 1
|
||||
}
|
||||
MockAgent.return_value = mock_child
|
||||
|
||||
delegate_task(goal="Direct endpoint test", parent_agent=parent)
|
||||
|
||||
_, kwargs = MockAgent.call_args
|
||||
self.assertEqual(kwargs["model"], "qwen2.5-coder")
|
||||
self.assertEqual(kwargs["provider"], "custom")
|
||||
self.assertEqual(kwargs["base_url"], "http://localhost:1234/v1")
|
||||
self.assertEqual(kwargs["api_key"], "local-key")
|
||||
self.assertEqual(kwargs["api_mode"], "chat_completions")
|
||||
|
||||
@patch("tools.delegate_tool._load_config")
|
||||
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
||||
def test_empty_config_inherits_parent(self, mock_creds, mock_cfg):
|
||||
|
|
|
|||
|
|
@ -540,18 +540,51 @@ def delegate_task(
|
|||
def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
|
||||
"""Resolve credentials for subagent delegation.
|
||||
|
||||
If ``delegation.provider`` is configured, resolves the full credential
|
||||
bundle (base_url, api_key, api_mode, provider) via the runtime provider
|
||||
system — the same path used by CLI/gateway startup. This lets subagents
|
||||
run on a completely different provider:model pair.
|
||||
If ``delegation.base_url`` is configured, subagents use that direct
|
||||
OpenAI-compatible endpoint. Otherwise, if ``delegation.provider`` is
|
||||
configured, the full credential bundle (base_url, api_key, api_mode,
|
||||
provider) is resolved via the runtime provider system — the same path used
|
||||
by CLI/gateway startup. This lets subagents run on a completely different
|
||||
provider:model pair.
|
||||
|
||||
If no provider is configured, returns None values so the child inherits
|
||||
everything from the parent agent.
|
||||
If neither base_url nor provider is configured, returns None values so the
|
||||
child inherits everything from the parent agent.
|
||||
|
||||
Raises ValueError with a user-friendly message on credential failure.
|
||||
"""
|
||||
configured_model = cfg.get("model") or None
|
||||
configured_provider = cfg.get("provider") or None
|
||||
configured_model = str(cfg.get("model") or "").strip() or None
|
||||
configured_provider = str(cfg.get("provider") or "").strip() or None
|
||||
configured_base_url = str(cfg.get("base_url") or "").strip() or None
|
||||
configured_api_key = str(cfg.get("api_key") or "").strip() or None
|
||||
|
||||
if configured_base_url:
|
||||
api_key = (
|
||||
configured_api_key
|
||||
or os.getenv("OPENAI_API_KEY", "").strip()
|
||||
)
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"Delegation base_url is configured but no API key was found. "
|
||||
"Set delegation.api_key or OPENAI_API_KEY."
|
||||
)
|
||||
|
||||
base_lower = configured_base_url.lower()
|
||||
provider = "custom"
|
||||
api_mode = "chat_completions"
|
||||
if "chatgpt.com/backend-api/codex" in base_lower:
|
||||
provider = "openai-codex"
|
||||
api_mode = "codex_responses"
|
||||
elif "api.anthropic.com" in base_lower:
|
||||
provider = "anthropic"
|
||||
api_mode = "anthropic_messages"
|
||||
|
||||
return {
|
||||
"model": configured_model,
|
||||
"provider": provider,
|
||||
"base_url": configured_base_url,
|
||||
"api_key": api_key,
|
||||
"api_mode": api_mode,
|
||||
}
|
||||
|
||||
if not configured_provider:
|
||||
# No provider override — child inherits everything from parent
|
||||
|
|
@ -570,7 +603,8 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
|
|||
except Exception as exc:
|
||||
raise ValueError(
|
||||
f"Cannot resolve delegation provider '{configured_provider}': {exc}. "
|
||||
f"Check that the provider is configured (API key set, valid provider name). "
|
||||
f"Check that the provider is configured (API key set, valid provider name), "
|
||||
f"or set delegation.base_url/delegation.api_key for a direct endpoint. "
|
||||
f"Available providers: openrouter, nous, zai, kimi-coding, minimax."
|
||||
) from exc
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,8 @@ Before starting, make sure you have:
|
|||
- **Hermes Agent installed** — see the [Installation guide](/docs/getting-started/installation)
|
||||
- **Gateway running** — the gateway daemon handles cron execution:
|
||||
```bash
|
||||
hermes gateway install # Install as system service (recommended)
|
||||
hermes gateway install # Install as a user service
|
||||
sudo hermes gateway install --system # Linux servers: boot-time system service
|
||||
# or
|
||||
hermes gateway # Run in foreground
|
||||
```
|
||||
|
|
@ -242,10 +243,12 @@ Make sure the scheduler is actually running:
|
|||
hermes cron status
|
||||
```
|
||||
|
||||
If the gateway isn't running, your jobs won't execute. Install it as a system service for reliability:
|
||||
If the gateway isn't running, your jobs won't execute. Install it as a background service for reliability:
|
||||
|
||||
```bash
|
||||
hermes gateway install
|
||||
# or on Linux servers
|
||||
sudo hermes gateway install --system
|
||||
```
|
||||
|
||||
## Going Further
|
||||
|
|
|
|||
|
|
@ -143,12 +143,13 @@ For a persistent deployment that survives reboots:
|
|||
|
||||
```bash
|
||||
hermes gateway install
|
||||
sudo hermes gateway install --system # Linux only: boot-time system service
|
||||
```
|
||||
|
||||
This creates a **systemd** service (Linux) or **launchd** service (macOS) that runs automatically.
|
||||
This creates a background service: a user-level **systemd** service on Linux by default, a **launchd** service on macOS, or a boot-time Linux system service if you pass `--system`.
|
||||
|
||||
```bash
|
||||
# Linux — manage the service
|
||||
# Linux — manage the default user service
|
||||
hermes gateway start
|
||||
hermes gateway stop
|
||||
hermes gateway status
|
||||
|
|
@ -158,6 +159,11 @@ journalctl --user -u hermes-gateway -f
|
|||
|
||||
# Keep running after SSH logout
|
||||
sudo loginctl enable-linger $USER
|
||||
|
||||
# Linux servers — explicit system-service commands
|
||||
sudo hermes gateway start --system
|
||||
sudo hermes gateway status --system
|
||||
journalctl -u hermes-gateway -f
|
||||
```
|
||||
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -180,6 +180,23 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
|||
| `CONTEXT_COMPRESSION_THRESHOLD` | Trigger at this % of limit (default: 0.50) |
|
||||
| `CONTEXT_COMPRESSION_MODEL` | Model for summaries |
|
||||
|
||||
## Auxiliary Task Overrides
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `AUXILIARY_VISION_PROVIDER` | Override provider for vision tasks |
|
||||
| `AUXILIARY_VISION_MODEL` | Override model for vision tasks |
|
||||
| `AUXILIARY_VISION_BASE_URL` | Direct OpenAI-compatible endpoint for vision tasks |
|
||||
| `AUXILIARY_VISION_API_KEY` | API key paired with `AUXILIARY_VISION_BASE_URL` |
|
||||
| `AUXILIARY_WEB_EXTRACT_PROVIDER` | Override provider for web extraction/summarization |
|
||||
| `AUXILIARY_WEB_EXTRACT_MODEL` | Override model for web extraction/summarization |
|
||||
| `AUXILIARY_WEB_EXTRACT_BASE_URL` | Direct OpenAI-compatible endpoint for web extraction/summarization |
|
||||
| `AUXILIARY_WEB_EXTRACT_API_KEY` | API key paired with `AUXILIARY_WEB_EXTRACT_BASE_URL` |
|
||||
| `CONTEXT_COMPRESSION_PROVIDER` | Override provider for context compression summaries |
|
||||
| `CONTEXT_COMPRESSION_MODEL` | Override model for context compression summaries |
|
||||
|
||||
For task-specific direct endpoints, Hermes uses the task's configured API key or `OPENAI_API_KEY`. It does not reuse `OPENROUTER_API_KEY` for those custom endpoints.
|
||||
|
||||
## Provider Routing (config.yaml only)
|
||||
|
||||
These go in `~/.hermes/config.yaml` under the `provider_routing` section:
|
||||
|
|
|
|||
|
|
@ -236,6 +236,7 @@ Skills for controlling smart home devices — lights, switches, sensors, and hom
|
|||
| Skill | Description | Path |
|
||||
|-------|-------------|------|
|
||||
| `code-review` | Guidelines for performing thorough code reviews with security and quality focus | `software-development/code-review` |
|
||||
| `plan` | Plan mode for Hermes — inspect context, write a markdown plan into `.hermes/plans/` in the active workspace/backend working directory, and do not execute the work. | `software-development/plan` |
|
||||
| `requesting-code-review` | Use when completing tasks, implementing major features, or before merging. Validates work meets requirements through systematic review process. | `software-development/requesting-code-review` |
|
||||
| `subagent-driven-development` | Use when executing implementation plans with independent tasks. Dispatches fresh delegate_task per task with two-stage review (spec compliance then code quality). | `software-development/subagent-driven-development` |
|
||||
| `systematic-debugging` | Use when encountering any bug, test failure, or unexpected behavior. 4-phase root cause investigation — NO fixes without understanding the problem first. | `software-development/systematic-debugging` |
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ Hermes has two slash-command surfaces:
|
|||
- **Interactive CLI slash commands** — handled by `cli.py` / `hermes_cli/commands.py`
|
||||
- **Messaging slash commands** — handled by `gateway/run.py`
|
||||
|
||||
Installed skills are also exposed as dynamic slash commands on both surfaces.
|
||||
Installed skills are also exposed as dynamic slash commands on both surfaces. That includes bundled skills like `/plan`, which opens plan mode and saves markdown plans under `.hermes/plans/` relative to the active workspace/backend working directory.
|
||||
|
||||
## Interactive CLI slash commands
|
||||
|
||||
|
|
@ -32,6 +32,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
|
|||
| `/compress` | Manually compress conversation context (flush memories + summarize) |
|
||||
| `/rollback` | List or restore filesystem checkpoints (usage: /rollback [number]) |
|
||||
| `/background` | Run a prompt in the background (usage: /background <prompt>) |
|
||||
| `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. |
|
||||
|
||||
### Configuration
|
||||
|
||||
|
|
@ -109,6 +110,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
|
|||
| `/voice [on\|off\|tts\|join\|channel\|leave\|status]` | Control spoken replies in chat. `join`/`channel`/`leave` manage Discord voice-channel mode. |
|
||||
| `/rollback [number]` | List or restore filesystem checkpoints. |
|
||||
| `/background <prompt>` | Run a prompt in a separate background session. |
|
||||
| `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. |
|
||||
| `/reload-mcp` | Reload MCP servers from config. |
|
||||
| `/update` | Update Hermes Agent to the latest version. |
|
||||
| `/help` | Show messaging help. |
|
||||
|
|
|
|||
|
|
@ -571,11 +571,15 @@ auxiliary:
|
|||
vision:
|
||||
provider: "auto" # "auto", "openrouter", "nous", "main"
|
||||
model: "" # e.g. "openai/gpt-4o", "google/gemini-2.5-flash"
|
||||
base_url: "" # direct OpenAI-compatible endpoint (takes precedence over provider)
|
||||
api_key: "" # API key for base_url (falls back to OPENAI_API_KEY)
|
||||
|
||||
# Web page summarization + browser page text extraction
|
||||
web_extract:
|
||||
provider: "auto"
|
||||
model: "" # e.g. "google/gemini-2.5-flash"
|
||||
base_url: ""
|
||||
api_key: ""
|
||||
```
|
||||
|
||||
### Changing the Vision Model
|
||||
|
|
@ -606,6 +610,17 @@ AUXILIARY_VISION_MODEL=openai/gpt-4o
|
|||
|
||||
### Common Setups
|
||||
|
||||
**Using a direct custom endpoint** (clearer than `provider: "main"` for local/self-hosted APIs):
|
||||
```yaml
|
||||
auxiliary:
|
||||
vision:
|
||||
base_url: "http://localhost:1234/v1"
|
||||
api_key: "local-key"
|
||||
model: "qwen2.5-vl"
|
||||
```
|
||||
|
||||
`base_url` takes precedence over `provider`, so this is the most explicit way to route an auxiliary task to a specific endpoint. For direct endpoint overrides, Hermes uses the configured `api_key` or falls back to `OPENAI_API_KEY`; it does not reuse `OPENROUTER_API_KEY` for that custom endpoint.
|
||||
|
||||
**Using OpenAI API key for vision:**
|
||||
```yaml
|
||||
# In ~/.hermes/.env:
|
||||
|
|
@ -852,13 +867,17 @@ delegation:
|
|||
- web
|
||||
# model: "google/gemini-3-flash-preview" # Override model (empty = inherit parent)
|
||||
# provider: "openrouter" # Override provider (empty = inherit parent)
|
||||
# base_url: "http://localhost:1234/v1" # Direct OpenAI-compatible endpoint (takes precedence over provider)
|
||||
# api_key: "local-key" # API key for base_url (falls back to OPENAI_API_KEY)
|
||||
```
|
||||
|
||||
**Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model.
|
||||
|
||||
**Direct endpoint override:** If you want the obvious custom-endpoint path, set `delegation.base_url`, `delegation.api_key`, and `delegation.model`. That sends subagents directly to that OpenAI-compatible endpoint and takes precedence over `delegation.provider`. If `delegation.api_key` is omitted, Hermes falls back to `OPENAI_API_KEY` only.
|
||||
|
||||
The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed.
|
||||
|
||||
**Precedence:** `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
|
||||
**Precedence:** `delegation.base_url` in config → `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
|
||||
|
||||
## Clarify
|
||||
|
||||
|
|
|
|||
|
|
@ -156,7 +156,8 @@ What they do:
|
|||
**Cron execution is handled by the gateway daemon.** The gateway ticks the scheduler every 60 seconds, running any due jobs in isolated agent sessions.
|
||||
|
||||
```bash
|
||||
hermes gateway install # Install as system service (recommended)
|
||||
hermes gateway install # Install as a user service
|
||||
sudo hermes gateway install --system # Linux: boot-time system service for servers
|
||||
hermes gateway # Or run in foreground
|
||||
|
||||
hermes cron list
|
||||
|
|
|
|||
|
|
@ -209,6 +209,14 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children
|
|||
delegation:
|
||||
max_iterations: 50 # Max turns per child (default: 50)
|
||||
default_toolsets: ["terminal", "file", "web"] # Default toolsets
|
||||
model: "google/gemini-3-flash-preview" # Optional provider/model override
|
||||
provider: "openrouter" # Optional built-in provider
|
||||
|
||||
# Or use a direct custom endpoint instead of provider:
|
||||
delegation:
|
||||
model: "qwen2.5-coder"
|
||||
base_url: "http://localhost:1234/v1"
|
||||
api_key: "local-key"
|
||||
```
|
||||
|
||||
:::tip
|
||||
|
|
|
|||
|
|
@ -24,11 +24,14 @@ Every installed skill is automatically available as a slash command:
|
|||
/gif-search funny cats
|
||||
/axolotl help me fine-tune Llama 3 on my dataset
|
||||
/github-pr-workflow create a PR for the auth refactor
|
||||
/plan design a rollout for migrating our auth provider
|
||||
|
||||
# Just the skill name loads it and lets the agent ask what you need:
|
||||
/excalidraw
|
||||
```
|
||||
|
||||
The bundled `plan` skill is a good example of a skill-backed slash command with custom behavior. Running `/plan [request]` tells Hermes to inspect context if needed, write a markdown implementation plan instead of executing the task, and save the result under `.hermes/plans/` relative to the active workspace/backend working directory.
|
||||
|
||||
You can also interact with skills through natural conversation:
|
||||
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -80,7 +80,8 @@ EMAIL_HOME_ADDRESS=your@email.com # Default delivery target for cron jobs
|
|||
|
||||
```bash
|
||||
hermes gateway # Run in foreground
|
||||
hermes gateway install # Install as a system service
|
||||
hermes gateway install # Install as a user service
|
||||
sudo hermes gateway install --system # Linux only: boot-time system service
|
||||
```
|
||||
|
||||
On startup, the adapter:
|
||||
|
|
|
|||
|
|
@ -54,10 +54,12 @@ This walks you through configuring each platform with arrow-key selection, shows
|
|||
```bash
|
||||
hermes gateway # Run in foreground
|
||||
hermes gateway setup # Configure messaging platforms interactively
|
||||
hermes gateway install # Install as systemd service (Linux) / launchd (macOS)
|
||||
hermes gateway start # Start the service
|
||||
hermes gateway stop # Stop the service
|
||||
hermes gateway status # Check service status
|
||||
hermes gateway install # Install as a user service (Linux) / launchd service (macOS)
|
||||
sudo hermes gateway install --system # Linux only: install a boot-time system service
|
||||
hermes gateway start # Start the default service
|
||||
hermes gateway stop # Stop the default service
|
||||
hermes gateway status # Check default service status
|
||||
hermes gateway status --system # Linux only: inspect the system service explicitly
|
||||
```
|
||||
|
||||
## Chat Commands (Inside Messaging)
|
||||
|
|
@ -188,8 +190,18 @@ journalctl --user -u hermes-gateway -f
|
|||
|
||||
# Enable lingering (keeps running after logout)
|
||||
sudo loginctl enable-linger $USER
|
||||
|
||||
# Or install a boot-time system service that still runs as your user
|
||||
sudo hermes gateway install --system
|
||||
sudo hermes gateway start --system
|
||||
sudo hermes gateway status --system
|
||||
journalctl -u hermes-gateway -f
|
||||
```
|
||||
|
||||
Use the user service on laptops and dev boxes. Use the system service on VPS or headless hosts that should come back at boot without relying on systemd linger.
|
||||
|
||||
Avoid keeping both the user and system gateway units installed at once unless you really mean to. Hermes will warn if it detects both because start/stop/status behavior gets ambiguous.
|
||||
|
||||
### macOS (launchd)
|
||||
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -127,7 +127,8 @@ Then start the gateway:
|
|||
|
||||
```bash
|
||||
hermes gateway # Foreground
|
||||
hermes gateway install # Install as a system service
|
||||
hermes gateway install # Install as a user service
|
||||
sudo hermes gateway install --system # Linux only: boot-time system service
|
||||
```
|
||||
|
||||
---
|
||||
|
|
|
|||
|
|
@ -168,7 +168,8 @@ Then start the gateway:
|
|||
|
||||
```bash
|
||||
hermes gateway # Foreground
|
||||
hermes gateway install # Install as a system service
|
||||
hermes gateway install # Install as a user service
|
||||
sudo hermes gateway install --system # Linux only: boot-time system service
|
||||
```
|
||||
|
||||
---
|
||||
|
|
|
|||
|
|
@ -101,7 +101,8 @@ Then start the gateway:
|
|||
|
||||
```bash
|
||||
hermes gateway # Foreground
|
||||
hermes gateway install # Install as a system service
|
||||
hermes gateway install # Install as a user service
|
||||
sudo hermes gateway install --system # Linux only: boot-time system service
|
||||
```
|
||||
|
||||
The gateway starts the WhatsApp bridge automatically using the saved session.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue