Merge origin/main into hermes/hermes-7ef7cb6a
This commit is contained in:
commit
9c322f7f59
31 changed files with 956 additions and 121 deletions
|
|
@ -30,6 +30,10 @@ Default "auto" follows the chains above.
|
||||||
Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
|
Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
|
||||||
AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
|
AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
|
||||||
than the provider's default.
|
than the provider's default.
|
||||||
|
|
||||||
|
Per-task direct endpoint overrides (e.g. AUXILIARY_VISION_BASE_URL,
|
||||||
|
AUXILIARY_VISION_API_KEY) let callers route a specific auxiliary task to a
|
||||||
|
custom OpenAI-compatible endpoint without touching the main model settings.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
@ -418,6 +422,17 @@ def _get_auxiliary_provider(task: str = "") -> str:
|
||||||
return "auto"
|
return "auto"
|
||||||
|
|
||||||
|
|
||||||
|
def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]:
|
||||||
|
"""Read an auxiliary env override from AUXILIARY_* or CONTEXT_* prefixes."""
|
||||||
|
if not task:
|
||||||
|
return None
|
||||||
|
for prefix in ("AUXILIARY_", "CONTEXT_"):
|
||||||
|
val = os.getenv(f"{prefix}{task.upper()}_{suffix}", "").strip()
|
||||||
|
if val:
|
||||||
|
return val
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
or_key = os.getenv("OPENROUTER_API_KEY")
|
or_key = os.getenv("OPENROUTER_API_KEY")
|
||||||
if not or_key:
|
if not or_key:
|
||||||
|
|
@ -599,6 +614,8 @@ def resolve_provider_client(
|
||||||
model: str = None,
|
model: str = None,
|
||||||
async_mode: bool = False,
|
async_mode: bool = False,
|
||||||
raw_codex: bool = False,
|
raw_codex: bool = False,
|
||||||
|
explicit_base_url: str = None,
|
||||||
|
explicit_api_key: str = None,
|
||||||
) -> Tuple[Optional[Any], Optional[str]]:
|
) -> Tuple[Optional[Any], Optional[str]]:
|
||||||
"""Central router: given a provider name and optional model, return a
|
"""Central router: given a provider name and optional model, return a
|
||||||
configured client with the correct auth, base URL, and API format.
|
configured client with the correct auth, base URL, and API format.
|
||||||
|
|
@ -620,6 +637,8 @@ def resolve_provider_client(
|
||||||
instead of wrapping in CodexAuxiliaryClient. Use this when
|
instead of wrapping in CodexAuxiliaryClient. Use this when
|
||||||
the caller needs direct access to responses.stream() (e.g.,
|
the caller needs direct access to responses.stream() (e.g.,
|
||||||
the main agent loop).
|
the main agent loop).
|
||||||
|
explicit_base_url: Optional direct OpenAI-compatible endpoint.
|
||||||
|
explicit_api_key: Optional API key paired with explicit_base_url.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(client, resolved_model) or (None, None) if auth is unavailable.
|
(client, resolved_model) or (None, None) if auth is unavailable.
|
||||||
|
|
@ -696,6 +715,22 @@ def resolve_provider_client(
|
||||||
|
|
||||||
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
|
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
|
||||||
if provider == "custom":
|
if provider == "custom":
|
||||||
|
if explicit_base_url:
|
||||||
|
custom_base = explicit_base_url.strip()
|
||||||
|
custom_key = (
|
||||||
|
(explicit_api_key or "").strip()
|
||||||
|
or os.getenv("OPENAI_API_KEY", "").strip()
|
||||||
|
)
|
||||||
|
if not custom_base or not custom_key:
|
||||||
|
logger.warning(
|
||||||
|
"resolve_provider_client: explicit custom endpoint requested "
|
||||||
|
"but no API key was found (set explicit_api_key or OPENAI_API_KEY)"
|
||||||
|
)
|
||||||
|
return None, None
|
||||||
|
final_model = model or _read_main_model() or "gpt-4o-mini"
|
||||||
|
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||||
|
return (_to_async_client(client, final_model) if async_mode
|
||||||
|
else (client, final_model))
|
||||||
# Try custom first, then codex, then API-key providers
|
# Try custom first, then codex, then API-key providers
|
||||||
for try_fn in (_try_custom_endpoint, _try_codex,
|
for try_fn in (_try_custom_endpoint, _try_codex,
|
||||||
_resolve_api_key_provider):
|
_resolve_api_key_provider):
|
||||||
|
|
@ -784,10 +819,13 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
|
||||||
Callers may override the returned model with a per-task env var
|
Callers may override the returned model with a per-task env var
|
||||||
(e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
|
(e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
|
||||||
"""
|
"""
|
||||||
forced = _get_auxiliary_provider(task)
|
provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
|
||||||
if forced != "auto":
|
return resolve_provider_client(
|
||||||
return resolve_provider_client(forced)
|
provider,
|
||||||
return resolve_provider_client("auto")
|
model=model,
|
||||||
|
explicit_base_url=base_url,
|
||||||
|
explicit_api_key=api_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_async_text_auxiliary_client(task: str = ""):
|
def get_async_text_auxiliary_client(task: str = ""):
|
||||||
|
|
@ -797,10 +835,14 @@ def get_async_text_auxiliary_client(task: str = ""):
|
||||||
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
||||||
Returns (None, None) when no provider is available.
|
Returns (None, None) when no provider is available.
|
||||||
"""
|
"""
|
||||||
forced = _get_auxiliary_provider(task)
|
provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
|
||||||
if forced != "auto":
|
return resolve_provider_client(
|
||||||
return resolve_provider_client(forced, async_mode=True)
|
provider,
|
||||||
return resolve_provider_client("auto", async_mode=True)
|
model=model,
|
||||||
|
async_mode=True,
|
||||||
|
explicit_base_url=base_url,
|
||||||
|
explicit_api_key=api_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
_VISION_AUTO_PROVIDER_ORDER = (
|
_VISION_AUTO_PROVIDER_ORDER = (
|
||||||
|
|
@ -856,26 +898,43 @@ def resolve_vision_provider_client(
|
||||||
provider: Optional[str] = None,
|
provider: Optional[str] = None,
|
||||||
model: Optional[str] = None,
|
model: Optional[str] = None,
|
||||||
*,
|
*,
|
||||||
|
base_url: Optional[str] = None,
|
||||||
|
api_key: Optional[str] = None,
|
||||||
async_mode: bool = False,
|
async_mode: bool = False,
|
||||||
) -> Tuple[Optional[str], Optional[Any], Optional[str]]:
|
) -> Tuple[Optional[str], Optional[Any], Optional[str]]:
|
||||||
"""Resolve the client actually used for vision tasks.
|
"""Resolve the client actually used for vision tasks.
|
||||||
|
|
||||||
Explicit provider overrides still use the generic provider router for
|
Direct endpoint overrides take precedence over provider selection. Explicit
|
||||||
non-standard backends, so users can intentionally force experimental
|
provider overrides still use the generic provider router for non-standard
|
||||||
providers. Auto mode stays conservative and only tries vision backends
|
backends, so users can intentionally force experimental providers. Auto mode
|
||||||
known to work today.
|
stays conservative and only tries vision backends known to work today.
|
||||||
"""
|
"""
|
||||||
requested = _normalize_vision_provider(provider or _get_auxiliary_provider("vision"))
|
requested, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
|
||||||
|
"vision", provider, model, base_url, api_key
|
||||||
|
)
|
||||||
|
requested = _normalize_vision_provider(requested)
|
||||||
|
|
||||||
def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[str]):
|
def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[str]):
|
||||||
if sync_client is None:
|
if sync_client is None:
|
||||||
return resolved_provider, None, None
|
return resolved_provider, None, None
|
||||||
final_model = model or default_model
|
final_model = resolved_model or default_model
|
||||||
if async_mode:
|
if async_mode:
|
||||||
async_client, async_model = _to_async_client(sync_client, final_model)
|
async_client, async_model = _to_async_client(sync_client, final_model)
|
||||||
return resolved_provider, async_client, async_model
|
return resolved_provider, async_client, async_model
|
||||||
return resolved_provider, sync_client, final_model
|
return resolved_provider, sync_client, final_model
|
||||||
|
|
||||||
|
if resolved_base_url:
|
||||||
|
client, final_model = resolve_provider_client(
|
||||||
|
"custom",
|
||||||
|
model=resolved_model,
|
||||||
|
async_mode=async_mode,
|
||||||
|
explicit_base_url=resolved_base_url,
|
||||||
|
explicit_api_key=resolved_api_key,
|
||||||
|
)
|
||||||
|
if client is None:
|
||||||
|
return "custom", None, None
|
||||||
|
return "custom", client, final_model
|
||||||
|
|
||||||
if requested == "auto":
|
if requested == "auto":
|
||||||
for candidate in get_available_vision_backends():
|
for candidate in get_available_vision_backends():
|
||||||
sync_client, default_model = _resolve_strict_vision_backend(candidate)
|
sync_client, default_model = _resolve_strict_vision_backend(candidate)
|
||||||
|
|
@ -888,7 +947,7 @@ def resolve_vision_provider_client(
|
||||||
sync_client, default_model = _resolve_strict_vision_backend(requested)
|
sync_client, default_model = _resolve_strict_vision_backend(requested)
|
||||||
return _finalize(requested, sync_client, default_model)
|
return _finalize(requested, sync_client, default_model)
|
||||||
|
|
||||||
client, final_model = _get_cached_client(requested, model, async_mode)
|
client, final_model = _get_cached_client(requested, resolved_model, async_mode)
|
||||||
if client is None:
|
if client is None:
|
||||||
return requested, None, None
|
return requested, None, None
|
||||||
return requested, client, final_model
|
return requested, client, final_model
|
||||||
|
|
@ -945,19 +1004,29 @@ def auxiliary_max_tokens_param(value: int) -> dict:
|
||||||
# Every auxiliary LLM consumer should use these instead of manually
|
# Every auxiliary LLM consumer should use these instead of manually
|
||||||
# constructing clients and calling .chat.completions.create().
|
# constructing clients and calling .chat.completions.create().
|
||||||
|
|
||||||
# Client cache: (provider, async_mode) -> (client, default_model)
|
# Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model)
|
||||||
_client_cache: Dict[tuple, tuple] = {}
|
_client_cache: Dict[tuple, tuple] = {}
|
||||||
|
|
||||||
|
|
||||||
def _get_cached_client(
|
def _get_cached_client(
|
||||||
provider: str, model: str = None, async_mode: bool = False,
|
provider: str,
|
||||||
|
model: str = None,
|
||||||
|
async_mode: bool = False,
|
||||||
|
base_url: str = None,
|
||||||
|
api_key: str = None,
|
||||||
) -> Tuple[Optional[Any], Optional[str]]:
|
) -> Tuple[Optional[Any], Optional[str]]:
|
||||||
"""Get or create a cached client for the given provider."""
|
"""Get or create a cached client for the given provider."""
|
||||||
cache_key = (provider, async_mode)
|
cache_key = (provider, async_mode, base_url or "", api_key or "")
|
||||||
if cache_key in _client_cache:
|
if cache_key in _client_cache:
|
||||||
cached_client, cached_default = _client_cache[cache_key]
|
cached_client, cached_default = _client_cache[cache_key]
|
||||||
return cached_client, model or cached_default
|
return cached_client, model or cached_default
|
||||||
client, default_model = resolve_provider_client(provider, model, async_mode)
|
client, default_model = resolve_provider_client(
|
||||||
|
provider,
|
||||||
|
model,
|
||||||
|
async_mode,
|
||||||
|
explicit_base_url=base_url,
|
||||||
|
explicit_api_key=api_key,
|
||||||
|
)
|
||||||
if client is not None:
|
if client is not None:
|
||||||
_client_cache[cache_key] = (client, default_model)
|
_client_cache[cache_key] = (client, default_model)
|
||||||
return client, model or default_model
|
return client, model or default_model
|
||||||
|
|
@ -967,57 +1036,75 @@ def _resolve_task_provider_model(
|
||||||
task: str = None,
|
task: str = None,
|
||||||
provider: str = None,
|
provider: str = None,
|
||||||
model: str = None,
|
model: str = None,
|
||||||
) -> Tuple[str, Optional[str]]:
|
base_url: str = None,
|
||||||
|
api_key: str = None,
|
||||||
|
) -> Tuple[str, Optional[str], Optional[str], Optional[str]]:
|
||||||
"""Determine provider + model for a call.
|
"""Determine provider + model for a call.
|
||||||
|
|
||||||
Priority:
|
Priority:
|
||||||
1. Explicit provider/model args (always win)
|
1. Explicit provider/model/base_url/api_key args (always win)
|
||||||
2. Env var overrides (AUXILIARY_{TASK}_PROVIDER, etc.)
|
2. Env var overrides (AUXILIARY_{TASK}_*, CONTEXT_{TASK}_*)
|
||||||
3. Config file (auxiliary.{task}.provider/model or compression.*)
|
3. Config file (auxiliary.{task}.* or compression.*)
|
||||||
4. "auto" (full auto-detection chain)
|
4. "auto" (full auto-detection chain)
|
||||||
|
|
||||||
Returns (provider, model) where model may be None (use provider default).
|
Returns (provider, model, base_url, api_key) where model may be None
|
||||||
|
(use provider default). When base_url is set, provider is forced to
|
||||||
|
"custom" and the task uses that direct endpoint.
|
||||||
"""
|
"""
|
||||||
if provider:
|
config = {}
|
||||||
return provider, model
|
cfg_provider = None
|
||||||
|
cfg_model = None
|
||||||
|
cfg_base_url = None
|
||||||
|
cfg_api_key = None
|
||||||
|
|
||||||
if task:
|
if task:
|
||||||
# Check env var overrides first
|
|
||||||
env_provider = _get_auxiliary_provider(task)
|
|
||||||
if env_provider != "auto":
|
|
||||||
# Check for env var model override too
|
|
||||||
env_model = None
|
|
||||||
for prefix in ("AUXILIARY_", "CONTEXT_"):
|
|
||||||
val = os.getenv(f"{prefix}{task.upper()}_MODEL", "").strip()
|
|
||||||
if val:
|
|
||||||
env_model = val
|
|
||||||
break
|
|
||||||
return env_provider, model or env_model
|
|
||||||
|
|
||||||
# Read from config file
|
|
||||||
try:
|
try:
|
||||||
from hermes_cli.config import load_config
|
from hermes_cli.config import load_config
|
||||||
config = load_config()
|
config = load_config()
|
||||||
except ImportError:
|
except ImportError:
|
||||||
return "auto", model
|
config = {}
|
||||||
|
|
||||||
# Check auxiliary.{task} section
|
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
|
||||||
aux = config.get("auxiliary", {})
|
task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
|
||||||
task_config = aux.get(task, {})
|
if not isinstance(task_config, dict):
|
||||||
cfg_provider = task_config.get("provider", "").strip() or None
|
task_config = {}
|
||||||
cfg_model = task_config.get("model", "").strip() or None
|
cfg_provider = str(task_config.get("provider", "")).strip() or None
|
||||||
|
cfg_model = str(task_config.get("model", "")).strip() or None
|
||||||
|
cfg_base_url = str(task_config.get("base_url", "")).strip() or None
|
||||||
|
cfg_api_key = str(task_config.get("api_key", "")).strip() or None
|
||||||
|
|
||||||
# Backwards compat: compression section has its own keys
|
# Backwards compat: compression section has its own keys
|
||||||
if task == "compression" and not cfg_provider:
|
if task == "compression" and not cfg_provider:
|
||||||
comp = config.get("compression", {})
|
comp = config.get("compression", {}) if isinstance(config, dict) else {}
|
||||||
cfg_provider = comp.get("summary_provider", "").strip() or None
|
if isinstance(comp, dict):
|
||||||
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
|
cfg_provider = comp.get("summary_provider", "").strip() or None
|
||||||
|
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
|
||||||
|
|
||||||
|
env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
|
||||||
|
resolved_model = model or env_model or cfg_model
|
||||||
|
|
||||||
|
if base_url:
|
||||||
|
return "custom", resolved_model, base_url, api_key
|
||||||
|
if provider:
|
||||||
|
return provider, resolved_model, base_url, api_key
|
||||||
|
|
||||||
|
if task:
|
||||||
|
env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
|
||||||
|
env_api_key = _get_auxiliary_env_override(task, "API_KEY")
|
||||||
|
if env_base_url:
|
||||||
|
return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key
|
||||||
|
|
||||||
|
env_provider = _get_auxiliary_provider(task)
|
||||||
|
if env_provider != "auto":
|
||||||
|
return env_provider, resolved_model, None, None
|
||||||
|
|
||||||
|
if cfg_base_url:
|
||||||
|
return "custom", resolved_model, cfg_base_url, cfg_api_key
|
||||||
if cfg_provider and cfg_provider != "auto":
|
if cfg_provider and cfg_provider != "auto":
|
||||||
return cfg_provider, model or cfg_model
|
return cfg_provider, resolved_model, None, None
|
||||||
return "auto", model or cfg_model
|
return "auto", resolved_model, None, None
|
||||||
|
|
||||||
return "auto", model
|
return "auto", resolved_model, None, None
|
||||||
|
|
||||||
|
|
||||||
def _build_call_kwargs(
|
def _build_call_kwargs(
|
||||||
|
|
@ -1029,6 +1116,7 @@ def _build_call_kwargs(
|
||||||
tools: Optional[list] = None,
|
tools: Optional[list] = None,
|
||||||
timeout: float = 30.0,
|
timeout: float = 30.0,
|
||||||
extra_body: Optional[dict] = None,
|
extra_body: Optional[dict] = None,
|
||||||
|
base_url: Optional[str] = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Build kwargs for .chat.completions.create() with model/provider adjustments."""
|
"""Build kwargs for .chat.completions.create() with model/provider adjustments."""
|
||||||
kwargs: Dict[str, Any] = {
|
kwargs: Dict[str, Any] = {
|
||||||
|
|
@ -1044,7 +1132,7 @@ def _build_call_kwargs(
|
||||||
# Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
|
# Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
|
||||||
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
|
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
|
||||||
if provider == "custom":
|
if provider == "custom":
|
||||||
custom_base = _current_custom_base_url()
|
custom_base = base_url or _current_custom_base_url()
|
||||||
if "api.openai.com" in custom_base.lower():
|
if "api.openai.com" in custom_base.lower():
|
||||||
kwargs["max_completion_tokens"] = max_tokens
|
kwargs["max_completion_tokens"] = max_tokens
|
||||||
else:
|
else:
|
||||||
|
|
@ -1070,6 +1158,8 @@ def call_llm(
|
||||||
*,
|
*,
|
||||||
provider: str = None,
|
provider: str = None,
|
||||||
model: str = None,
|
model: str = None,
|
||||||
|
base_url: str = None,
|
||||||
|
api_key: str = None,
|
||||||
messages: list,
|
messages: list,
|
||||||
temperature: float = None,
|
temperature: float = None,
|
||||||
max_tokens: int = None,
|
max_tokens: int = None,
|
||||||
|
|
@ -1101,16 +1191,18 @@ def call_llm(
|
||||||
Raises:
|
Raises:
|
||||||
RuntimeError: If no provider is configured.
|
RuntimeError: If no provider is configured.
|
||||||
"""
|
"""
|
||||||
resolved_provider, resolved_model = _resolve_task_provider_model(
|
resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
|
||||||
task, provider, model)
|
task, provider, model, base_url, api_key)
|
||||||
|
|
||||||
if task == "vision":
|
if task == "vision":
|
||||||
effective_provider, client, final_model = resolve_vision_provider_client(
|
effective_provider, client, final_model = resolve_vision_provider_client(
|
||||||
provider=resolved_provider,
|
provider=provider,
|
||||||
model=resolved_model,
|
model=model,
|
||||||
|
base_url=base_url,
|
||||||
|
api_key=api_key,
|
||||||
async_mode=False,
|
async_mode=False,
|
||||||
)
|
)
|
||||||
if client is None and resolved_provider != "auto":
|
if client is None and resolved_provider != "auto" and not resolved_base_url:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Vision provider %s unavailable, falling back to auto vision backends",
|
"Vision provider %s unavailable, falling back to auto vision backends",
|
||||||
resolved_provider,
|
resolved_provider,
|
||||||
|
|
@ -1127,10 +1219,15 @@ def call_llm(
|
||||||
)
|
)
|
||||||
resolved_provider = effective_provider or resolved_provider
|
resolved_provider = effective_provider or resolved_provider
|
||||||
else:
|
else:
|
||||||
client, final_model = _get_cached_client(resolved_provider, resolved_model)
|
client, final_model = _get_cached_client(
|
||||||
|
resolved_provider,
|
||||||
|
resolved_model,
|
||||||
|
base_url=resolved_base_url,
|
||||||
|
api_key=resolved_api_key,
|
||||||
|
)
|
||||||
if client is None:
|
if client is None:
|
||||||
# Fallback: try openrouter
|
# Fallback: try openrouter
|
||||||
if resolved_provider != "openrouter":
|
if resolved_provider != "openrouter" and not resolved_base_url:
|
||||||
logger.warning("Provider %s unavailable, falling back to openrouter",
|
logger.warning("Provider %s unavailable, falling back to openrouter",
|
||||||
resolved_provider)
|
resolved_provider)
|
||||||
client, final_model = _get_cached_client(
|
client, final_model = _get_cached_client(
|
||||||
|
|
@ -1143,7 +1240,8 @@ def call_llm(
|
||||||
kwargs = _build_call_kwargs(
|
kwargs = _build_call_kwargs(
|
||||||
resolved_provider, final_model, messages,
|
resolved_provider, final_model, messages,
|
||||||
temperature=temperature, max_tokens=max_tokens,
|
temperature=temperature, max_tokens=max_tokens,
|
||||||
tools=tools, timeout=timeout, extra_body=extra_body)
|
tools=tools, timeout=timeout, extra_body=extra_body,
|
||||||
|
base_url=resolved_base_url)
|
||||||
|
|
||||||
# Handle max_tokens vs max_completion_tokens retry
|
# Handle max_tokens vs max_completion_tokens retry
|
||||||
try:
|
try:
|
||||||
|
|
@ -1162,6 +1260,8 @@ async def async_call_llm(
|
||||||
*,
|
*,
|
||||||
provider: str = None,
|
provider: str = None,
|
||||||
model: str = None,
|
model: str = None,
|
||||||
|
base_url: str = None,
|
||||||
|
api_key: str = None,
|
||||||
messages: list,
|
messages: list,
|
||||||
temperature: float = None,
|
temperature: float = None,
|
||||||
max_tokens: int = None,
|
max_tokens: int = None,
|
||||||
|
|
@ -1173,16 +1273,18 @@ async def async_call_llm(
|
||||||
|
|
||||||
Same as call_llm() but async. See call_llm() for full documentation.
|
Same as call_llm() but async. See call_llm() for full documentation.
|
||||||
"""
|
"""
|
||||||
resolved_provider, resolved_model = _resolve_task_provider_model(
|
resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
|
||||||
task, provider, model)
|
task, provider, model, base_url, api_key)
|
||||||
|
|
||||||
if task == "vision":
|
if task == "vision":
|
||||||
effective_provider, client, final_model = resolve_vision_provider_client(
|
effective_provider, client, final_model = resolve_vision_provider_client(
|
||||||
provider=resolved_provider,
|
provider=provider,
|
||||||
model=resolved_model,
|
model=model,
|
||||||
|
base_url=base_url,
|
||||||
|
api_key=api_key,
|
||||||
async_mode=True,
|
async_mode=True,
|
||||||
)
|
)
|
||||||
if client is None and resolved_provider != "auto":
|
if client is None and resolved_provider != "auto" and not resolved_base_url:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Vision provider %s unavailable, falling back to auto vision backends",
|
"Vision provider %s unavailable, falling back to auto vision backends",
|
||||||
resolved_provider,
|
resolved_provider,
|
||||||
|
|
@ -1200,9 +1302,14 @@ async def async_call_llm(
|
||||||
resolved_provider = effective_provider or resolved_provider
|
resolved_provider = effective_provider or resolved_provider
|
||||||
else:
|
else:
|
||||||
client, final_model = _get_cached_client(
|
client, final_model = _get_cached_client(
|
||||||
resolved_provider, resolved_model, async_mode=True)
|
resolved_provider,
|
||||||
|
resolved_model,
|
||||||
|
async_mode=True,
|
||||||
|
base_url=resolved_base_url,
|
||||||
|
api_key=resolved_api_key,
|
||||||
|
)
|
||||||
if client is None:
|
if client is None:
|
||||||
if resolved_provider != "openrouter":
|
if resolved_provider != "openrouter" and not resolved_base_url:
|
||||||
logger.warning("Provider %s unavailable, falling back to openrouter",
|
logger.warning("Provider %s unavailable, falling back to openrouter",
|
||||||
resolved_provider)
|
resolved_provider)
|
||||||
client, final_model = _get_cached_client(
|
client, final_model = _get_cached_client(
|
||||||
|
|
@ -1216,7 +1323,8 @@ async def async_call_llm(
|
||||||
kwargs = _build_call_kwargs(
|
kwargs = _build_call_kwargs(
|
||||||
resolved_provider, final_model, messages,
|
resolved_provider, final_model, messages,
|
||||||
temperature=temperature, max_tokens=max_tokens,
|
temperature=temperature, max_tokens=max_tokens,
|
||||||
tools=tools, timeout=timeout, extra_body=extra_body)
|
tools=tools, timeout=timeout, extra_body=extra_body,
|
||||||
|
base_url=resolved_base_url)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return await client.chat.completions.create(**kwargs)
|
return await client.chat.completions.create(**kwargs)
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,42 @@
|
||||||
"""Skill slash commands — scan installed skills and build invocation messages.
|
"""Shared slash command helpers for skills and built-in prompt-style modes.
|
||||||
|
|
||||||
Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
|
Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
|
||||||
can invoke skills via /skill-name commands.
|
can invoke skills via /skill-name commands and prompt-only built-ins like
|
||||||
|
/plan.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_skill_commands: Dict[str, Dict[str, Any]] = {}
|
_skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||||
|
_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
|
||||||
|
|
||||||
|
|
||||||
|
def build_plan_path(
|
||||||
|
user_instruction: str = "",
|
||||||
|
*,
|
||||||
|
now: datetime | None = None,
|
||||||
|
) -> Path:
|
||||||
|
"""Return the default workspace-relative markdown path for a /plan invocation.
|
||||||
|
|
||||||
|
Relative paths are intentional: file tools are task/backend-aware and resolve
|
||||||
|
them against the active working directory for local, docker, ssh, modal,
|
||||||
|
daytona, and similar terminal backends. That keeps the plan with the active
|
||||||
|
workspace instead of the Hermes host's global home directory.
|
||||||
|
"""
|
||||||
|
slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else ""
|
||||||
|
slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-")
|
||||||
|
if slug:
|
||||||
|
slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-")
|
||||||
|
slug = slug or "conversation-plan"
|
||||||
|
timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S")
|
||||||
|
return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md"
|
||||||
|
|
||||||
|
|
||||||
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
|
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
|
||||||
|
|
@ -56,6 +81,7 @@ def _build_skill_message(
|
||||||
skill_dir: Path | None,
|
skill_dir: Path | None,
|
||||||
activation_note: str,
|
activation_note: str,
|
||||||
user_instruction: str = "",
|
user_instruction: str = "",
|
||||||
|
runtime_note: str = "",
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Format a loaded skill into a user/system message payload."""
|
"""Format a loaded skill into a user/system message payload."""
|
||||||
from tools.skills_tool import SKILLS_DIR
|
from tools.skills_tool import SKILLS_DIR
|
||||||
|
|
@ -115,6 +141,10 @@ def _build_skill_message(
|
||||||
parts.append("")
|
parts.append("")
|
||||||
parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}")
|
parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}")
|
||||||
|
|
||||||
|
if runtime_note:
|
||||||
|
parts.append("")
|
||||||
|
parts.append(f"[Runtime note: {runtime_note}]")
|
||||||
|
|
||||||
return "\n".join(parts)
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -172,6 +202,7 @@ def build_skill_invocation_message(
|
||||||
cmd_key: str,
|
cmd_key: str,
|
||||||
user_instruction: str = "",
|
user_instruction: str = "",
|
||||||
task_id: str | None = None,
|
task_id: str | None = None,
|
||||||
|
runtime_note: str = "",
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
"""Build the user message content for a skill slash command invocation.
|
"""Build the user message content for a skill slash command invocation.
|
||||||
|
|
||||||
|
|
@ -201,6 +232,7 @@ def build_skill_invocation_message(
|
||||||
skill_dir,
|
skill_dir,
|
||||||
activation_note,
|
activation_note,
|
||||||
user_instruction=user_instruction,
|
user_instruction=user_instruction,
|
||||||
|
runtime_note=runtime_note,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
77
cli.py
77
cli.py
|
|
@ -218,11 +218,27 @@ def load_cli_config() -> Dict[str, Any]:
|
||||||
"timeout": 300, # Max seconds a sandbox script can run before being killed (5 min)
|
"timeout": 300, # Max seconds a sandbox script can run before being killed (5 min)
|
||||||
"max_tool_calls": 50, # Max RPC tool calls per execution
|
"max_tool_calls": 50, # Max RPC tool calls per execution
|
||||||
},
|
},
|
||||||
|
"auxiliary": {
|
||||||
|
"vision": {
|
||||||
|
"provider": "auto",
|
||||||
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
|
},
|
||||||
|
"web_extract": {
|
||||||
|
"provider": "auto",
|
||||||
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
|
},
|
||||||
|
},
|
||||||
"delegation": {
|
"delegation": {
|
||||||
"max_iterations": 45, # Max tool-calling turns per child agent
|
"max_iterations": 45, # Max tool-calling turns per child agent
|
||||||
"default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents
|
"default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents
|
||||||
"model": "", # Subagent model override (empty = inherit parent model)
|
"model": "", # Subagent model override (empty = inherit parent model)
|
||||||
"provider": "", # Subagent provider override (empty = inherit parent provider)
|
"provider": "", # Subagent provider override (empty = inherit parent provider)
|
||||||
|
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
|
||||||
|
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -363,28 +379,44 @@ def load_cli_config() -> Dict[str, Any]:
|
||||||
if config_key in compression_config:
|
if config_key in compression_config:
|
||||||
os.environ[env_var] = str(compression_config[config_key])
|
os.environ[env_var] = str(compression_config[config_key])
|
||||||
|
|
||||||
# Apply auxiliary model overrides to environment variables.
|
# Apply auxiliary model/direct-endpoint overrides to environment variables.
|
||||||
# Vision and web_extract each have their own provider + model pair.
|
# Vision and web_extract each have their own provider/model/base_url/api_key tuple.
|
||||||
# (Compression is handled in the compression section above.)
|
# (Compression is handled in the compression section above.)
|
||||||
# Only set env vars for non-empty / non-default values so auto-detection
|
# Only set env vars for non-empty / non-default values so auto-detection
|
||||||
# still works.
|
# still works.
|
||||||
auxiliary_config = defaults.get("auxiliary", {})
|
auxiliary_config = defaults.get("auxiliary", {})
|
||||||
auxiliary_task_env = {
|
auxiliary_task_env = {
|
||||||
# config key → (provider env var, model env var)
|
# config key → env var mapping
|
||||||
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"),
|
"vision": {
|
||||||
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"),
|
"provider": "AUXILIARY_VISION_PROVIDER",
|
||||||
|
"model": "AUXILIARY_VISION_MODEL",
|
||||||
|
"base_url": "AUXILIARY_VISION_BASE_URL",
|
||||||
|
"api_key": "AUXILIARY_VISION_API_KEY",
|
||||||
|
},
|
||||||
|
"web_extract": {
|
||||||
|
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
|
||||||
|
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||||
|
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
|
||||||
|
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for task_key, (prov_env, model_env) in auxiliary_task_env.items():
|
for task_key, env_map in auxiliary_task_env.items():
|
||||||
task_cfg = auxiliary_config.get(task_key, {})
|
task_cfg = auxiliary_config.get(task_key, {})
|
||||||
if not isinstance(task_cfg, dict):
|
if not isinstance(task_cfg, dict):
|
||||||
continue
|
continue
|
||||||
prov = str(task_cfg.get("provider", "")).strip()
|
prov = str(task_cfg.get("provider", "")).strip()
|
||||||
model = str(task_cfg.get("model", "")).strip()
|
model = str(task_cfg.get("model", "")).strip()
|
||||||
|
base_url = str(task_cfg.get("base_url", "")).strip()
|
||||||
|
api_key = str(task_cfg.get("api_key", "")).strip()
|
||||||
if prov and prov != "auto":
|
if prov and prov != "auto":
|
||||||
os.environ[prov_env] = prov
|
os.environ[env_map["provider"]] = prov
|
||||||
if model:
|
if model:
|
||||||
os.environ[model_env] = model
|
os.environ[env_map["model"]] = model
|
||||||
|
if base_url:
|
||||||
|
os.environ[env_map["base_url"]] = base_url
|
||||||
|
if api_key:
|
||||||
|
os.environ[env_map["api_key"]] = api_key
|
||||||
|
|
||||||
# Security settings
|
# Security settings
|
||||||
security_config = defaults.get("security", {})
|
security_config = defaults.get("security", {})
|
||||||
|
|
@ -1048,6 +1080,7 @@ from agent.skill_commands import (
|
||||||
scan_skill_commands,
|
scan_skill_commands,
|
||||||
get_skill_commands,
|
get_skill_commands,
|
||||||
build_skill_invocation_message,
|
build_skill_invocation_message,
|
||||||
|
build_plan_path,
|
||||||
build_preloaded_skills_prompt,
|
build_preloaded_skills_prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -3161,6 +3194,8 @@ class HermesCLI:
|
||||||
elif cmd_lower.startswith("/personality"):
|
elif cmd_lower.startswith("/personality"):
|
||||||
# Use original case (handler lowercases the personality name itself)
|
# Use original case (handler lowercases the personality name itself)
|
||||||
self._handle_personality_command(cmd_original)
|
self._handle_personality_command(cmd_original)
|
||||||
|
elif cmd_lower == "/plan" or cmd_lower.startswith("/plan "):
|
||||||
|
self._handle_plan_command(cmd_original)
|
||||||
elif cmd_lower == "/retry":
|
elif cmd_lower == "/retry":
|
||||||
retry_msg = self.retry_last()
|
retry_msg = self.retry_last()
|
||||||
if retry_msg and hasattr(self, '_pending_input'):
|
if retry_msg and hasattr(self, '_pending_input'):
|
||||||
|
|
@ -3272,6 +3307,32 @@ class HermesCLI:
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def _handle_plan_command(self, cmd: str):
|
||||||
|
"""Handle /plan [request] — load the bundled plan skill."""
|
||||||
|
parts = cmd.strip().split(maxsplit=1)
|
||||||
|
user_instruction = parts[1].strip() if len(parts) > 1 else ""
|
||||||
|
|
||||||
|
plan_path = build_plan_path(user_instruction)
|
||||||
|
msg = build_skill_invocation_message(
|
||||||
|
"/plan",
|
||||||
|
user_instruction,
|
||||||
|
task_id=self.session_id,
|
||||||
|
runtime_note=(
|
||||||
|
"Save the markdown plan with write_file to this exact relative path "
|
||||||
|
f"inside the active workspace/backend cwd: {plan_path}"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
if not msg:
|
||||||
|
self.console.print("[bold red]Failed to load the bundled /plan skill[/]")
|
||||||
|
return
|
||||||
|
|
||||||
|
_cprint(f" 📝 Plan mode queued via skill. Markdown plan target: {plan_path}")
|
||||||
|
if hasattr(self, '_pending_input'):
|
||||||
|
self._pending_input.put(msg)
|
||||||
|
else:
|
||||||
|
self.console.print("[bold red]Plan mode unavailable: input queue not initialized[/]")
|
||||||
|
|
||||||
def _handle_background_command(self, cmd: str):
|
def _handle_background_command(self, cmd: str):
|
||||||
"""Handle /background <prompt> — run a prompt in a separate background session.
|
"""Handle /background <prompt> — run a prompt in a separate background session.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,8 @@ This module provides scheduled task execution, allowing the agent to:
|
||||||
- Execute tasks in isolated sessions (no prior context)
|
- Execute tasks in isolated sessions (no prior context)
|
||||||
|
|
||||||
Cron jobs are executed automatically by the gateway daemon:
|
Cron jobs are executed automatically by the gateway daemon:
|
||||||
hermes gateway install # Install as system service (recommended)
|
hermes gateway install # Install as a user service
|
||||||
|
sudo hermes gateway install --system # Linux servers: boot-time system service
|
||||||
hermes gateway # Or run in foreground
|
hermes gateway # Or run in foreground
|
||||||
|
|
||||||
The gateway ticks the scheduler every 60 seconds. A file lock prevents
|
The gateway ticks the scheduler every 60 seconds. A file lock prevents
|
||||||
|
|
|
||||||
|
|
@ -100,24 +100,40 @@ if _config_path.exists():
|
||||||
for _cfg_key, _env_var in _compression_env_map.items():
|
for _cfg_key, _env_var in _compression_env_map.items():
|
||||||
if _cfg_key in _compression_cfg:
|
if _cfg_key in _compression_cfg:
|
||||||
os.environ[_env_var] = str(_compression_cfg[_cfg_key])
|
os.environ[_env_var] = str(_compression_cfg[_cfg_key])
|
||||||
# Auxiliary model overrides (vision, web_extract).
|
# Auxiliary model/direct-endpoint overrides (vision, web_extract).
|
||||||
# Each task has provider + model; bridge non-default values to env vars.
|
# Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
|
||||||
_auxiliary_cfg = _cfg.get("auxiliary", {})
|
_auxiliary_cfg = _cfg.get("auxiliary", {})
|
||||||
if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
|
if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
|
||||||
_aux_task_env = {
|
_aux_task_env = {
|
||||||
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"),
|
"vision": {
|
||||||
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"),
|
"provider": "AUXILIARY_VISION_PROVIDER",
|
||||||
|
"model": "AUXILIARY_VISION_MODEL",
|
||||||
|
"base_url": "AUXILIARY_VISION_BASE_URL",
|
||||||
|
"api_key": "AUXILIARY_VISION_API_KEY",
|
||||||
|
},
|
||||||
|
"web_extract": {
|
||||||
|
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
|
||||||
|
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||||
|
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
|
||||||
|
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _task_key, (_prov_env, _model_env) in _aux_task_env.items():
|
for _task_key, _env_map in _aux_task_env.items():
|
||||||
_task_cfg = _auxiliary_cfg.get(_task_key, {})
|
_task_cfg = _auxiliary_cfg.get(_task_key, {})
|
||||||
if not isinstance(_task_cfg, dict):
|
if not isinstance(_task_cfg, dict):
|
||||||
continue
|
continue
|
||||||
_prov = str(_task_cfg.get("provider", "")).strip()
|
_prov = str(_task_cfg.get("provider", "")).strip()
|
||||||
_model = str(_task_cfg.get("model", "")).strip()
|
_model = str(_task_cfg.get("model", "")).strip()
|
||||||
|
_base_url = str(_task_cfg.get("base_url", "")).strip()
|
||||||
|
_api_key = str(_task_cfg.get("api_key", "")).strip()
|
||||||
if _prov and _prov != "auto":
|
if _prov and _prov != "auto":
|
||||||
os.environ[_prov_env] = _prov
|
os.environ[_env_map["provider"]] = _prov
|
||||||
if _model:
|
if _model:
|
||||||
os.environ[_model_env] = _model
|
os.environ[_env_map["model"]] = _model
|
||||||
|
if _base_url:
|
||||||
|
os.environ[_env_map["base_url"]] = _base_url
|
||||||
|
if _api_key:
|
||||||
|
os.environ[_env_map["api_key"]] = _api_key
|
||||||
_agent_cfg = _cfg.get("agent", {})
|
_agent_cfg = _cfg.get("agent", {})
|
||||||
if _agent_cfg and isinstance(_agent_cfg, dict):
|
if _agent_cfg and isinstance(_agent_cfg, dict):
|
||||||
if "max_turns" in _agent_cfg:
|
if "max_turns" in _agent_cfg:
|
||||||
|
|
@ -1098,7 +1114,7 @@ class GatewayRunner:
|
||||||
|
|
||||||
# Emit command:* hook for any recognized slash command
|
# Emit command:* hook for any recognized slash command
|
||||||
_known_commands = {"new", "reset", "help", "status", "stop", "model", "reasoning",
|
_known_commands = {"new", "reset", "help", "status", "stop", "model", "reasoning",
|
||||||
"personality", "retry", "undo", "sethome", "set-home",
|
"personality", "plan", "retry", "undo", "sethome", "set-home",
|
||||||
"compress", "usage", "insights", "reload-mcp", "reload_mcp",
|
"compress", "usage", "insights", "reload-mcp", "reload_mcp",
|
||||||
"update", "title", "resume", "provider", "rollback",
|
"update", "title", "resume", "provider", "rollback",
|
||||||
"background", "reasoning", "voice"}
|
"background", "reasoning", "voice"}
|
||||||
|
|
@ -1134,6 +1150,28 @@ class GatewayRunner:
|
||||||
if command == "personality":
|
if command == "personality":
|
||||||
return await self._handle_personality_command(event)
|
return await self._handle_personality_command(event)
|
||||||
|
|
||||||
|
if command == "plan":
|
||||||
|
try:
|
||||||
|
from agent.skill_commands import build_plan_path, build_skill_invocation_message
|
||||||
|
|
||||||
|
user_instruction = event.get_command_args().strip()
|
||||||
|
plan_path = build_plan_path(user_instruction)
|
||||||
|
event.text = build_skill_invocation_message(
|
||||||
|
"/plan",
|
||||||
|
user_instruction,
|
||||||
|
task_id=_quick_key,
|
||||||
|
runtime_note=(
|
||||||
|
"Save the markdown plan with write_file to this exact relative path "
|
||||||
|
f"inside the active workspace/backend cwd: {plan_path}"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
if not event.text:
|
||||||
|
return "Failed to load the bundled /plan skill."
|
||||||
|
command = None
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Failed to prepare /plan command")
|
||||||
|
return f"Failed to enter plan mode: {e}"
|
||||||
|
|
||||||
if command == "retry":
|
if command == "retry":
|
||||||
return await self._handle_retry_command(event)
|
return await self._handle_retry_command(event)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -150,30 +150,44 @@ DEFAULT_CONFIG = {
|
||||||
"vision": {
|
"vision": {
|
||||||
"provider": "auto", # auto | openrouter | nous | codex | custom
|
"provider": "auto", # auto | openrouter | nous | codex | custom
|
||||||
"model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o"
|
"model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o"
|
||||||
|
"base_url": "", # direct OpenAI-compatible endpoint (takes precedence over provider)
|
||||||
|
"api_key": "", # API key for base_url (falls back to OPENAI_API_KEY)
|
||||||
},
|
},
|
||||||
"web_extract": {
|
"web_extract": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
},
|
},
|
||||||
"compression": {
|
"compression": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
},
|
},
|
||||||
"session_search": {
|
"session_search": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
},
|
},
|
||||||
"skills_hub": {
|
"skills_hub": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
},
|
},
|
||||||
"mcp": {
|
"mcp": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
},
|
},
|
||||||
"flush_memories": {
|
"flush_memories": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
|
"base_url": "",
|
||||||
|
"api_key": "",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
@ -243,6 +257,8 @@ DEFAULT_CONFIG = {
|
||||||
"delegation": {
|
"delegation": {
|
||||||
"model": "", # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model)
|
"model": "", # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model)
|
||||||
"provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials)
|
"provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials)
|
||||||
|
"base_url": "", # direct OpenAI-compatible endpoint for subagents
|
||||||
|
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
|
||||||
},
|
},
|
||||||
|
|
||||||
# Ephemeral prefill messages file — JSON list of {role, content} dicts
|
# Ephemeral prefill messages file — JSON list of {role, content} dicts
|
||||||
|
|
|
||||||
|
|
@ -96,6 +96,7 @@ def cron_list(show_all: bool = False):
|
||||||
if not find_gateway_pids():
|
if not find_gateway_pids():
|
||||||
print(color(" ⚠ Gateway is not running — jobs won't fire automatically.", Colors.YELLOW))
|
print(color(" ⚠ Gateway is not running — jobs won't fire automatically.", Colors.YELLOW))
|
||||||
print(color(" Start it with: hermes gateway install", Colors.DIM))
|
print(color(" Start it with: hermes gateway install", Colors.DIM))
|
||||||
|
print(color(" sudo hermes gateway install --system # Linux servers", Colors.DIM))
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -120,7 +121,8 @@ def cron_status():
|
||||||
print(color("✗ Gateway is not running — cron jobs will NOT fire", Colors.RED))
|
print(color("✗ Gateway is not running — cron jobs will NOT fire", Colors.RED))
|
||||||
print()
|
print()
|
||||||
print(" To enable automatic execution:")
|
print(" To enable automatic execution:")
|
||||||
print(" hermes gateway install # Install as system service (recommended)")
|
print(" hermes gateway install # Install as a user service")
|
||||||
|
print(" sudo hermes gateway install --system # Linux servers: boot-time system service")
|
||||||
print(" hermes gateway # Or run in foreground")
|
print(" hermes gateway # Or run in foreground")
|
||||||
|
|
||||||
print()
|
print()
|
||||||
|
|
|
||||||
|
|
@ -2313,7 +2313,7 @@ Examples:
|
||||||
hermes gateway Run messaging gateway
|
hermes gateway Run messaging gateway
|
||||||
hermes -s hermes-agent-dev,github-auth
|
hermes -s hermes-agent-dev,github-auth
|
||||||
hermes -w Start in isolated git worktree
|
hermes -w Start in isolated git worktree
|
||||||
hermes gateway install Install as system service
|
hermes gateway install Install gateway background service
|
||||||
hermes sessions list List past sessions
|
hermes sessions list List past sessions
|
||||||
hermes sessions browse Interactive session picker
|
hermes sessions browse Interactive session picker
|
||||||
hermes sessions rename ID T Rename/title a session
|
hermes sessions rename ID T Rename/title a session
|
||||||
|
|
|
||||||
57
skills/software-development/plan/SKILL.md
Normal file
57
skills/software-development/plan/SKILL.md
Normal file
|
|
@ -0,0 +1,57 @@
|
||||||
|
---
|
||||||
|
name: plan
|
||||||
|
description: Plan mode for Hermes — inspect context, write a markdown plan into the active workspace's `.hermes/plans/` directory, and do not execute the work.
|
||||||
|
version: 1.0.0
|
||||||
|
author: Hermes Agent
|
||||||
|
license: MIT
|
||||||
|
metadata:
|
||||||
|
hermes:
|
||||||
|
tags: [planning, plan-mode, implementation, workflow]
|
||||||
|
related_skills: [writing-plans, subagent-driven-development]
|
||||||
|
---
|
||||||
|
|
||||||
|
# Plan Mode
|
||||||
|
|
||||||
|
Use this skill when the user wants a plan instead of execution.
|
||||||
|
|
||||||
|
## Core behavior
|
||||||
|
|
||||||
|
For this turn, you are planning only.
|
||||||
|
|
||||||
|
- Do not implement code.
|
||||||
|
- Do not edit project files except the plan markdown file.
|
||||||
|
- Do not run mutating terminal commands, commit, push, or perform external actions.
|
||||||
|
- You may inspect the repo or other context with read-only commands/tools when needed.
|
||||||
|
- Your deliverable is a markdown plan saved inside the active workspace under `.hermes/plans/`.
|
||||||
|
|
||||||
|
## Output requirements
|
||||||
|
|
||||||
|
Write a markdown plan that is concrete and actionable.
|
||||||
|
|
||||||
|
Include, when relevant:
|
||||||
|
- Goal
|
||||||
|
- Current context / assumptions
|
||||||
|
- Proposed approach
|
||||||
|
- Step-by-step plan
|
||||||
|
- Files likely to change
|
||||||
|
- Tests / validation
|
||||||
|
- Risks, tradeoffs, and open questions
|
||||||
|
|
||||||
|
If the task is code-related, include exact file paths, likely test targets, and verification steps.
|
||||||
|
|
||||||
|
## Save location
|
||||||
|
|
||||||
|
Save the plan with `write_file` under:
|
||||||
|
- `.hermes/plans/YYYY-MM-DD_HHMMSS-<slug>.md`
|
||||||
|
|
||||||
|
Treat that as relative to the active working directory / backend workspace. Hermes file tools are backend-aware, so using this relative path keeps the plan with the workspace on local, docker, ssh, modal, and daytona backends.
|
||||||
|
|
||||||
|
If the runtime provides a specific target path, use that exact path.
|
||||||
|
If not, create a sensible timestamped filename yourself under `.hermes/plans/`.
|
||||||
|
|
||||||
|
## Interaction style
|
||||||
|
|
||||||
|
- If the request is clear enough, write the plan directly.
|
||||||
|
- If no explicit instruction accompanies `/plan`, infer the task from the current conversation context.
|
||||||
|
- If it is genuinely underspecified, ask a brief clarifying question instead of guessing.
|
||||||
|
- After saving the plan, reply briefly with what you planned and the saved path.
|
||||||
|
|
@ -24,9 +24,11 @@ def _clean_env(monkeypatch):
|
||||||
for key in (
|
for key in (
|
||||||
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
|
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
|
||||||
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
|
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
|
||||||
# Per-task provider/model overrides
|
# Per-task provider/model/direct-endpoint overrides
|
||||||
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
|
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
|
||||||
|
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
|
||||||
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
|
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||||
|
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||||
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
|
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
|
||||||
):
|
):
|
||||||
monkeypatch.delenv(key, raising=False)
|
monkeypatch.delenv(key, raising=False)
|
||||||
|
|
@ -142,6 +144,27 @@ class TestGetTextAuxiliaryClient:
|
||||||
call_kwargs = mock_openai.call_args
|
call_kwargs = mock_openai.call_args
|
||||||
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
|
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
|
||||||
|
|
||||||
|
def test_task_direct_endpoint_override(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||||
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
|
||||||
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_API_KEY", "task-key")
|
||||||
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
|
||||||
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||||
|
client, model = get_text_auxiliary_client("web_extract")
|
||||||
|
assert model == "task-model"
|
||||||
|
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
|
||||||
|
assert mock_openai.call_args.kwargs["api_key"] == "task-key"
|
||||||
|
|
||||||
|
def test_task_direct_endpoint_without_openai_key_does_not_fall_back(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||||
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
|
||||||
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
|
||||||
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||||
|
client, model = get_text_auxiliary_client("web_extract")
|
||||||
|
assert client is None
|
||||||
|
assert model is None
|
||||||
|
mock_openai.assert_not_called()
|
||||||
|
|
||||||
def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch):
|
def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch):
|
||||||
config = {
|
config = {
|
||||||
"model": {
|
"model": {
|
||||||
|
|
@ -217,6 +240,27 @@ class TestVisionClientFallback:
|
||||||
client, model = get_vision_auxiliary_client()
|
client, model = get_vision_auxiliary_client()
|
||||||
assert client is not None # Custom endpoint picked up as fallback
|
assert client is not None # Custom endpoint picked up as fallback
|
||||||
|
|
||||||
|
def test_vision_direct_endpoint_override(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||||
|
monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
|
||||||
|
monkeypatch.setenv("AUXILIARY_VISION_API_KEY", "vision-key")
|
||||||
|
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
|
||||||
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||||
|
client, model = get_vision_auxiliary_client()
|
||||||
|
assert model == "vision-model"
|
||||||
|
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:4567/v1"
|
||||||
|
assert mock_openai.call_args.kwargs["api_key"] == "vision-key"
|
||||||
|
|
||||||
|
def test_vision_direct_endpoint_requires_openai_api_key(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||||
|
monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
|
||||||
|
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
|
||||||
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||||
|
client, model = get_vision_auxiliary_client()
|
||||||
|
assert client is None
|
||||||
|
assert model is None
|
||||||
|
mock_openai.assert_not_called()
|
||||||
|
|
||||||
def test_vision_uses_openrouter_when_available(self, monkeypatch):
|
def test_vision_uses_openrouter_when_available(self, monkeypatch):
|
||||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||||
|
|
@ -434,6 +478,24 @@ class TestTaskSpecificOverrides:
|
||||||
client, model = get_text_auxiliary_client("web_extract")
|
client, model = get_text_auxiliary_client("web_extract")
|
||||||
assert model == "google/gemini-3-flash-preview"
|
assert model == "google/gemini-3-flash-preview"
|
||||||
|
|
||||||
|
def test_task_direct_endpoint_from_config(self, monkeypatch, tmp_path):
|
||||||
|
hermes_home = tmp_path / "hermes"
|
||||||
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||||
|
(hermes_home / "config.yaml").write_text(
|
||||||
|
"""auxiliary:
|
||||||
|
web_extract:
|
||||||
|
base_url: http://localhost:3456/v1
|
||||||
|
api_key: config-key
|
||||||
|
model: config-model
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||||
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||||
|
client, model = get_text_auxiliary_client("web_extract")
|
||||||
|
assert model == "config-model"
|
||||||
|
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:3456/v1"
|
||||||
|
assert mock_openai.call_args.kwargs["api_key"] == "config-key"
|
||||||
|
|
||||||
def test_task_without_override_uses_auto(self, monkeypatch):
|
def test_task_without_override_uses_auto(self, monkeypatch):
|
||||||
"""A task with no provider env var falls through to auto chain."""
|
"""A task with no provider env var falls through to auto chain."""
|
||||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,16 @@
|
||||||
"""Tests for agent/skill_commands.py — skill slash command scanning and platform filtering."""
|
"""Tests for agent/skill_commands.py — skill slash command scanning and platform filtering."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import tools.skills_tool as skills_tool_module
|
import tools.skills_tool as skills_tool_module
|
||||||
from agent.skill_commands import (
|
from agent.skill_commands import (
|
||||||
scan_skill_commands,
|
build_plan_path,
|
||||||
build_skill_invocation_message,
|
|
||||||
build_preloaded_skills_prompt,
|
build_preloaded_skills_prompt,
|
||||||
|
build_skill_invocation_message,
|
||||||
|
scan_skill_commands,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -272,3 +275,37 @@ Generate some audio.
|
||||||
|
|
||||||
assert msg is not None
|
assert msg is not None
|
||||||
assert 'file_path="<path>"' in msg
|
assert 'file_path="<path>"' in msg
|
||||||
|
|
||||||
|
|
||||||
|
class TestPlanSkillHelpers:
|
||||||
|
def test_build_plan_path_uses_workspace_relative_dir_and_slugifies_request(self):
|
||||||
|
path = build_plan_path(
|
||||||
|
"Implement OAuth login + refresh tokens!",
|
||||||
|
now=datetime(2026, 3, 15, 9, 30, 45),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert path == Path(".hermes") / "plans" / "2026-03-15_093045-implement-oauth-login-refresh-tokens.md"
|
||||||
|
|
||||||
|
def test_plan_skill_message_can_include_runtime_save_path_note(self, tmp_path):
|
||||||
|
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||||
|
_make_skill(
|
||||||
|
tmp_path,
|
||||||
|
"plan",
|
||||||
|
body="Save plans under .hermes/plans in the active workspace and do not execute the work.",
|
||||||
|
)
|
||||||
|
scan_skill_commands()
|
||||||
|
msg = build_skill_invocation_message(
|
||||||
|
"/plan",
|
||||||
|
"Add a /plan command",
|
||||||
|
runtime_note=(
|
||||||
|
"Save the markdown plan with write_file to this exact relative path inside "
|
||||||
|
"the active workspace/backend cwd: .hermes/plans/plan.md"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert msg is not None
|
||||||
|
assert "Save plans under $HERMES_HOME/plans" not in msg
|
||||||
|
assert ".hermes/plans" in msg
|
||||||
|
assert "Add a /plan command" in msg
|
||||||
|
assert ".hermes/plans/plan.md" in msg
|
||||||
|
assert "Runtime note:" in msg
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,12 @@ def _isolate_hermes_home(tmp_path, monkeypatch):
|
||||||
(fake_home / "memories").mkdir()
|
(fake_home / "memories").mkdir()
|
||||||
(fake_home / "skills").mkdir()
|
(fake_home / "skills").mkdir()
|
||||||
monkeypatch.setenv("HERMES_HOME", str(fake_home))
|
monkeypatch.setenv("HERMES_HOME", str(fake_home))
|
||||||
|
# Tests should not inherit the agent's current gateway/messaging surface.
|
||||||
|
# Individual tests that need gateway behavior set these explicitly.
|
||||||
|
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
|
||||||
|
monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
|
||||||
|
monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
|
||||||
|
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
|
|
|
||||||
129
tests/gateway/test_plan_command.py
Normal file
129
tests/gateway/test_plan_command.py
Normal file
|
|
@ -0,0 +1,129 @@
|
||||||
|
"""Tests for the /plan gateway slash command."""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from agent.skill_commands import scan_skill_commands
|
||||||
|
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||||
|
from gateway.platforms.base import MessageEvent
|
||||||
|
from gateway.session import SessionEntry, SessionSource
|
||||||
|
|
||||||
|
|
||||||
|
def _make_runner():
|
||||||
|
from gateway.run import GatewayRunner
|
||||||
|
|
||||||
|
runner = object.__new__(GatewayRunner)
|
||||||
|
runner.config = GatewayConfig(
|
||||||
|
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
|
||||||
|
)
|
||||||
|
runner.adapters = {}
|
||||||
|
runner._voice_mode = {}
|
||||||
|
runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
|
||||||
|
runner.session_store = MagicMock()
|
||||||
|
runner.session_store.get_or_create_session.return_value = SessionEntry(
|
||||||
|
session_key="agent:main:telegram:dm:c1:u1",
|
||||||
|
session_id="sess-1",
|
||||||
|
created_at=datetime.now(),
|
||||||
|
updated_at=datetime.now(),
|
||||||
|
platform=Platform.TELEGRAM,
|
||||||
|
chat_type="dm",
|
||||||
|
)
|
||||||
|
runner.session_store.load_transcript.return_value = []
|
||||||
|
runner.session_store.has_any_sessions.return_value = True
|
||||||
|
runner.session_store.append_to_transcript = MagicMock()
|
||||||
|
runner.session_store.rewrite_transcript = MagicMock()
|
||||||
|
runner._running_agents = {}
|
||||||
|
runner._pending_messages = {}
|
||||||
|
runner._pending_approvals = {}
|
||||||
|
runner._session_db = None
|
||||||
|
runner._reasoning_config = None
|
||||||
|
runner._provider_routing = {}
|
||||||
|
runner._fallback_model = None
|
||||||
|
runner._show_reasoning = False
|
||||||
|
runner._is_user_authorized = lambda _source: True
|
||||||
|
runner._set_session_env = lambda _context: None
|
||||||
|
runner._run_agent = AsyncMock(
|
||||||
|
return_value={
|
||||||
|
"final_response": "planned",
|
||||||
|
"messages": [],
|
||||||
|
"tools": [],
|
||||||
|
"history_offset": 0,
|
||||||
|
"last_prompt_tokens": 0,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return runner
|
||||||
|
|
||||||
|
|
||||||
|
def _make_event(text="/plan"):
|
||||||
|
return MessageEvent(
|
||||||
|
text=text,
|
||||||
|
source=SessionSource(
|
||||||
|
platform=Platform.TELEGRAM,
|
||||||
|
user_id="u1",
|
||||||
|
chat_id="c1",
|
||||||
|
user_name="tester",
|
||||||
|
chat_type="dm",
|
||||||
|
),
|
||||||
|
message_id="m1",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_plan_skill(skills_dir):
|
||||||
|
skill_dir = skills_dir / "plan"
|
||||||
|
skill_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
(skill_dir / "SKILL.md").write_text(
|
||||||
|
"""---
|
||||||
|
name: plan
|
||||||
|
description: Plan mode skill.
|
||||||
|
---
|
||||||
|
|
||||||
|
# Plan
|
||||||
|
|
||||||
|
Use the current conversation context when no explicit instruction is provided.
|
||||||
|
Save plans under the active workspace's .hermes/plans directory.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestGatewayPlanCommand:
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_plan_command_loads_skill_and_runs_agent(self, monkeypatch, tmp_path):
|
||||||
|
import gateway.run as gateway_run
|
||||||
|
|
||||||
|
runner = _make_runner()
|
||||||
|
event = _make_event("/plan Add OAuth login")
|
||||||
|
|
||||||
|
monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"agent.model_metadata.get_model_context_length",
|
||||||
|
lambda *_args, **_kwargs: 100_000,
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||||
|
_make_plan_skill(tmp_path)
|
||||||
|
scan_skill_commands()
|
||||||
|
result = await runner._handle_message(event)
|
||||||
|
|
||||||
|
assert result == "planned"
|
||||||
|
forwarded = runner._run_agent.call_args.kwargs["message"]
|
||||||
|
assert "Plan mode skill" in forwarded
|
||||||
|
assert "Add OAuth login" in forwarded
|
||||||
|
assert ".hermes/plans" in forwarded
|
||||||
|
assert str(tmp_path / "plans") not in forwarded
|
||||||
|
assert "active workspace/backend cwd" in forwarded
|
||||||
|
assert "Runtime note:" in forwarded
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_plan_command_appears_in_help_output_via_skill_listing(self, tmp_path):
|
||||||
|
runner = _make_runner()
|
||||||
|
event = _make_event("/help")
|
||||||
|
|
||||||
|
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||||
|
_make_plan_skill(tmp_path)
|
||||||
|
scan_skill_commands()
|
||||||
|
result = await runner._handle_help_command(event)
|
||||||
|
|
||||||
|
assert "/plan" in result
|
||||||
|
|
@ -25,7 +25,9 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
|
||||||
# Clear env vars
|
# Clear env vars
|
||||||
for key in (
|
for key in (
|
||||||
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
|
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
|
||||||
|
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
|
||||||
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
|
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||||
|
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||||
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
|
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
|
||||||
):
|
):
|
||||||
monkeypatch.delenv(key, raising=False)
|
monkeypatch.delenv(key, raising=False)
|
||||||
|
|
@ -47,19 +49,35 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
|
||||||
auxiliary_cfg = config_dict.get("auxiliary", {})
|
auxiliary_cfg = config_dict.get("auxiliary", {})
|
||||||
if auxiliary_cfg and isinstance(auxiliary_cfg, dict):
|
if auxiliary_cfg and isinstance(auxiliary_cfg, dict):
|
||||||
aux_task_env = {
|
aux_task_env = {
|
||||||
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"),
|
"vision": {
|
||||||
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"),
|
"provider": "AUXILIARY_VISION_PROVIDER",
|
||||||
|
"model": "AUXILIARY_VISION_MODEL",
|
||||||
|
"base_url": "AUXILIARY_VISION_BASE_URL",
|
||||||
|
"api_key": "AUXILIARY_VISION_API_KEY",
|
||||||
|
},
|
||||||
|
"web_extract": {
|
||||||
|
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
|
||||||
|
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||||
|
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
|
||||||
|
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for task_key, (prov_env, model_env) in aux_task_env.items():
|
for task_key, env_map in aux_task_env.items():
|
||||||
task_cfg = auxiliary_cfg.get(task_key, {})
|
task_cfg = auxiliary_cfg.get(task_key, {})
|
||||||
if not isinstance(task_cfg, dict):
|
if not isinstance(task_cfg, dict):
|
||||||
continue
|
continue
|
||||||
prov = str(task_cfg.get("provider", "")).strip()
|
prov = str(task_cfg.get("provider", "")).strip()
|
||||||
model = str(task_cfg.get("model", "")).strip()
|
model = str(task_cfg.get("model", "")).strip()
|
||||||
|
base_url = str(task_cfg.get("base_url", "")).strip()
|
||||||
|
api_key = str(task_cfg.get("api_key", "")).strip()
|
||||||
if prov and prov != "auto":
|
if prov and prov != "auto":
|
||||||
os.environ[prov_env] = prov
|
os.environ[env_map["provider"]] = prov
|
||||||
if model:
|
if model:
|
||||||
os.environ[model_env] = model
|
os.environ[env_map["model"]] = model
|
||||||
|
if base_url:
|
||||||
|
os.environ[env_map["base_url"]] = base_url
|
||||||
|
if api_key:
|
||||||
|
os.environ[env_map["api_key"]] = api_key
|
||||||
|
|
||||||
|
|
||||||
# ── Config bridging tests ────────────────────────────────────────────────────
|
# ── Config bridging tests ────────────────────────────────────────────────────
|
||||||
|
|
@ -101,6 +119,21 @@ class TestAuxiliaryConfigBridge:
|
||||||
assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
|
assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
|
||||||
assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash"
|
assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash"
|
||||||
|
|
||||||
|
def test_direct_endpoint_bridged(self, monkeypatch):
|
||||||
|
config = {
|
||||||
|
"auxiliary": {
|
||||||
|
"vision": {
|
||||||
|
"base_url": "http://localhost:1234/v1",
|
||||||
|
"api_key": "local-key",
|
||||||
|
"model": "qwen2.5-vl",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_run_auxiliary_bridge(config, monkeypatch)
|
||||||
|
assert os.environ.get("AUXILIARY_VISION_BASE_URL") == "http://localhost:1234/v1"
|
||||||
|
assert os.environ.get("AUXILIARY_VISION_API_KEY") == "local-key"
|
||||||
|
assert os.environ.get("AUXILIARY_VISION_MODEL") == "qwen2.5-vl"
|
||||||
|
|
||||||
def test_compression_provider_bridged(self, monkeypatch):
|
def test_compression_provider_bridged(self, monkeypatch):
|
||||||
config = {
|
config = {
|
||||||
"compression": {
|
"compression": {
|
||||||
|
|
@ -200,8 +233,12 @@ class TestGatewayBridgeCodeParity:
|
||||||
# Check for key patterns that indicate the bridge is present
|
# Check for key patterns that indicate the bridge is present
|
||||||
assert "AUXILIARY_VISION_PROVIDER" in content
|
assert "AUXILIARY_VISION_PROVIDER" in content
|
||||||
assert "AUXILIARY_VISION_MODEL" in content
|
assert "AUXILIARY_VISION_MODEL" in content
|
||||||
|
assert "AUXILIARY_VISION_BASE_URL" in content
|
||||||
|
assert "AUXILIARY_VISION_API_KEY" in content
|
||||||
assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content
|
assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content
|
||||||
assert "AUXILIARY_WEB_EXTRACT_MODEL" in content
|
assert "AUXILIARY_WEB_EXTRACT_MODEL" in content
|
||||||
|
assert "AUXILIARY_WEB_EXTRACT_BASE_URL" in content
|
||||||
|
assert "AUXILIARY_WEB_EXTRACT_API_KEY" in content
|
||||||
|
|
||||||
def test_gateway_has_compression_provider(self):
|
def test_gateway_has_compression_provider(self):
|
||||||
"""Gateway must bridge compression.summary_provider."""
|
"""Gateway must bridge compression.summary_provider."""
|
||||||
|
|
|
||||||
67
tests/test_cli_plan_command.py
Normal file
67
tests/test_cli_plan_command.py
Normal file
|
|
@ -0,0 +1,67 @@
|
||||||
|
"""Tests for the /plan CLI slash command."""
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
from agent.skill_commands import scan_skill_commands
|
||||||
|
from cli import HermesCLI
|
||||||
|
|
||||||
|
|
||||||
|
def _make_cli():
|
||||||
|
cli_obj = HermesCLI.__new__(HermesCLI)
|
||||||
|
cli_obj.config = {}
|
||||||
|
cli_obj.console = MagicMock()
|
||||||
|
cli_obj.agent = None
|
||||||
|
cli_obj.conversation_history = []
|
||||||
|
cli_obj.session_id = "sess-123"
|
||||||
|
cli_obj._pending_input = MagicMock()
|
||||||
|
return cli_obj
|
||||||
|
|
||||||
|
|
||||||
|
def _make_plan_skill(skills_dir):
|
||||||
|
skill_dir = skills_dir / "plan"
|
||||||
|
skill_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
(skill_dir / "SKILL.md").write_text(
|
||||||
|
"""---
|
||||||
|
name: plan
|
||||||
|
description: Plan mode skill.
|
||||||
|
---
|
||||||
|
|
||||||
|
# Plan
|
||||||
|
|
||||||
|
Use the current conversation context when no explicit instruction is provided.
|
||||||
|
Save plans under the active workspace's .hermes/plans directory.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestCLIPlanCommand:
|
||||||
|
def test_plan_command_queues_plan_skill_message(self, tmp_path, monkeypatch):
|
||||||
|
cli_obj = _make_cli()
|
||||||
|
|
||||||
|
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||||
|
_make_plan_skill(tmp_path)
|
||||||
|
scan_skill_commands()
|
||||||
|
result = cli_obj.process_command("/plan Add OAuth login")
|
||||||
|
|
||||||
|
assert result is True
|
||||||
|
cli_obj._pending_input.put.assert_called_once()
|
||||||
|
queued = cli_obj._pending_input.put.call_args[0][0]
|
||||||
|
assert "Plan mode skill" in queued
|
||||||
|
assert "Add OAuth login" in queued
|
||||||
|
assert ".hermes/plans" in queued
|
||||||
|
assert str(tmp_path / "plans") not in queued
|
||||||
|
assert "active workspace/backend cwd" in queued
|
||||||
|
assert "Runtime note:" in queued
|
||||||
|
|
||||||
|
def test_plan_without_args_uses_skill_context_guidance(self, tmp_path, monkeypatch):
|
||||||
|
cli_obj = _make_cli()
|
||||||
|
|
||||||
|
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||||
|
_make_plan_skill(tmp_path)
|
||||||
|
scan_skill_commands()
|
||||||
|
cli_obj.process_command("/plan")
|
||||||
|
|
||||||
|
queued = cli_obj._pending_input.put.call_args[0][0]
|
||||||
|
assert "current conversation context" in queued
|
||||||
|
assert ".hermes/plans/" in queued
|
||||||
|
assert "conversation-plan.md" in queued
|
||||||
|
|
@ -10,6 +10,7 @@ Run with: python -m pytest tests/test_delegate.py -v
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
@ -462,6 +463,43 @@ class TestDelegationCredentialResolution(unittest.TestCase):
|
||||||
self.assertEqual(creds["api_mode"], "chat_completions")
|
self.assertEqual(creds["api_mode"], "chat_completions")
|
||||||
mock_resolve.assert_called_once_with(requested="openrouter")
|
mock_resolve.assert_called_once_with(requested="openrouter")
|
||||||
|
|
||||||
|
def test_direct_endpoint_uses_configured_base_url_and_api_key(self):
|
||||||
|
parent = _make_mock_parent(depth=0)
|
||||||
|
cfg = {
|
||||||
|
"model": "qwen2.5-coder",
|
||||||
|
"provider": "openrouter",
|
||||||
|
"base_url": "http://localhost:1234/v1",
|
||||||
|
"api_key": "local-key",
|
||||||
|
}
|
||||||
|
creds = _resolve_delegation_credentials(cfg, parent)
|
||||||
|
self.assertEqual(creds["model"], "qwen2.5-coder")
|
||||||
|
self.assertEqual(creds["provider"], "custom")
|
||||||
|
self.assertEqual(creds["base_url"], "http://localhost:1234/v1")
|
||||||
|
self.assertEqual(creds["api_key"], "local-key")
|
||||||
|
self.assertEqual(creds["api_mode"], "chat_completions")
|
||||||
|
|
||||||
|
def test_direct_endpoint_falls_back_to_openai_api_key_env(self):
|
||||||
|
parent = _make_mock_parent(depth=0)
|
||||||
|
cfg = {
|
||||||
|
"model": "qwen2.5-coder",
|
||||||
|
"base_url": "http://localhost:1234/v1",
|
||||||
|
}
|
||||||
|
with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False):
|
||||||
|
creds = _resolve_delegation_credentials(cfg, parent)
|
||||||
|
self.assertEqual(creds["api_key"], "env-openai-key")
|
||||||
|
self.assertEqual(creds["provider"], "custom")
|
||||||
|
|
||||||
|
def test_direct_endpoint_does_not_fall_back_to_openrouter_api_key_env(self):
|
||||||
|
parent = _make_mock_parent(depth=0)
|
||||||
|
cfg = {
|
||||||
|
"model": "qwen2.5-coder",
|
||||||
|
"base_url": "http://localhost:1234/v1",
|
||||||
|
}
|
||||||
|
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "env-openrouter-key"}, clear=False):
|
||||||
|
with self.assertRaises(ValueError) as ctx:
|
||||||
|
_resolve_delegation_credentials(cfg, parent)
|
||||||
|
self.assertIn("OPENAI_API_KEY", str(ctx.exception))
|
||||||
|
|
||||||
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
|
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
|
||||||
def test_nous_provider_resolves_nous_credentials(self, mock_resolve):
|
def test_nous_provider_resolves_nous_credentials(self, mock_resolve):
|
||||||
"""Nous provider resolves Nous Portal base_url and api_key."""
|
"""Nous provider resolves Nous Portal base_url and api_key."""
|
||||||
|
|
@ -589,6 +627,40 @@ class TestDelegationProviderIntegration(unittest.TestCase):
|
||||||
self.assertNotEqual(kwargs["base_url"], parent.base_url)
|
self.assertNotEqual(kwargs["base_url"], parent.base_url)
|
||||||
self.assertNotEqual(kwargs["api_key"], parent.api_key)
|
self.assertNotEqual(kwargs["api_key"], parent.api_key)
|
||||||
|
|
||||||
|
@patch("tools.delegate_tool._load_config")
|
||||||
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
||||||
|
def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg):
|
||||||
|
mock_cfg.return_value = {
|
||||||
|
"max_iterations": 45,
|
||||||
|
"model": "qwen2.5-coder",
|
||||||
|
"base_url": "http://localhost:1234/v1",
|
||||||
|
"api_key": "local-key",
|
||||||
|
}
|
||||||
|
mock_creds.return_value = {
|
||||||
|
"model": "qwen2.5-coder",
|
||||||
|
"provider": "custom",
|
||||||
|
"base_url": "http://localhost:1234/v1",
|
||||||
|
"api_key": "local-key",
|
||||||
|
"api_mode": "chat_completions",
|
||||||
|
}
|
||||||
|
parent = _make_mock_parent(depth=0)
|
||||||
|
|
||||||
|
with patch("run_agent.AIAgent") as MockAgent:
|
||||||
|
mock_child = MagicMock()
|
||||||
|
mock_child.run_conversation.return_value = {
|
||||||
|
"final_response": "done", "completed": True, "api_calls": 1
|
||||||
|
}
|
||||||
|
MockAgent.return_value = mock_child
|
||||||
|
|
||||||
|
delegate_task(goal="Direct endpoint test", parent_agent=parent)
|
||||||
|
|
||||||
|
_, kwargs = MockAgent.call_args
|
||||||
|
self.assertEqual(kwargs["model"], "qwen2.5-coder")
|
||||||
|
self.assertEqual(kwargs["provider"], "custom")
|
||||||
|
self.assertEqual(kwargs["base_url"], "http://localhost:1234/v1")
|
||||||
|
self.assertEqual(kwargs["api_key"], "local-key")
|
||||||
|
self.assertEqual(kwargs["api_mode"], "chat_completions")
|
||||||
|
|
||||||
@patch("tools.delegate_tool._load_config")
|
@patch("tools.delegate_tool._load_config")
|
||||||
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
@patch("tools.delegate_tool._resolve_delegation_credentials")
|
||||||
def test_empty_config_inherits_parent(self, mock_creds, mock_cfg):
|
def test_empty_config_inherits_parent(self, mock_creds, mock_cfg):
|
||||||
|
|
|
||||||
|
|
@ -540,18 +540,51 @@ def delegate_task(
|
||||||
def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
|
def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
|
||||||
"""Resolve credentials for subagent delegation.
|
"""Resolve credentials for subagent delegation.
|
||||||
|
|
||||||
If ``delegation.provider`` is configured, resolves the full credential
|
If ``delegation.base_url`` is configured, subagents use that direct
|
||||||
bundle (base_url, api_key, api_mode, provider) via the runtime provider
|
OpenAI-compatible endpoint. Otherwise, if ``delegation.provider`` is
|
||||||
system — the same path used by CLI/gateway startup. This lets subagents
|
configured, the full credential bundle (base_url, api_key, api_mode,
|
||||||
run on a completely different provider:model pair.
|
provider) is resolved via the runtime provider system — the same path used
|
||||||
|
by CLI/gateway startup. This lets subagents run on a completely different
|
||||||
|
provider:model pair.
|
||||||
|
|
||||||
If no provider is configured, returns None values so the child inherits
|
If neither base_url nor provider is configured, returns None values so the
|
||||||
everything from the parent agent.
|
child inherits everything from the parent agent.
|
||||||
|
|
||||||
Raises ValueError with a user-friendly message on credential failure.
|
Raises ValueError with a user-friendly message on credential failure.
|
||||||
"""
|
"""
|
||||||
configured_model = cfg.get("model") or None
|
configured_model = str(cfg.get("model") or "").strip() or None
|
||||||
configured_provider = cfg.get("provider") or None
|
configured_provider = str(cfg.get("provider") or "").strip() or None
|
||||||
|
configured_base_url = str(cfg.get("base_url") or "").strip() or None
|
||||||
|
configured_api_key = str(cfg.get("api_key") or "").strip() or None
|
||||||
|
|
||||||
|
if configured_base_url:
|
||||||
|
api_key = (
|
||||||
|
configured_api_key
|
||||||
|
or os.getenv("OPENAI_API_KEY", "").strip()
|
||||||
|
)
|
||||||
|
if not api_key:
|
||||||
|
raise ValueError(
|
||||||
|
"Delegation base_url is configured but no API key was found. "
|
||||||
|
"Set delegation.api_key or OPENAI_API_KEY."
|
||||||
|
)
|
||||||
|
|
||||||
|
base_lower = configured_base_url.lower()
|
||||||
|
provider = "custom"
|
||||||
|
api_mode = "chat_completions"
|
||||||
|
if "chatgpt.com/backend-api/codex" in base_lower:
|
||||||
|
provider = "openai-codex"
|
||||||
|
api_mode = "codex_responses"
|
||||||
|
elif "api.anthropic.com" in base_lower:
|
||||||
|
provider = "anthropic"
|
||||||
|
api_mode = "anthropic_messages"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"model": configured_model,
|
||||||
|
"provider": provider,
|
||||||
|
"base_url": configured_base_url,
|
||||||
|
"api_key": api_key,
|
||||||
|
"api_mode": api_mode,
|
||||||
|
}
|
||||||
|
|
||||||
if not configured_provider:
|
if not configured_provider:
|
||||||
# No provider override — child inherits everything from parent
|
# No provider override — child inherits everything from parent
|
||||||
|
|
@ -570,7 +603,8 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Cannot resolve delegation provider '{configured_provider}': {exc}. "
|
f"Cannot resolve delegation provider '{configured_provider}': {exc}. "
|
||||||
f"Check that the provider is configured (API key set, valid provider name). "
|
f"Check that the provider is configured (API key set, valid provider name), "
|
||||||
|
f"or set delegation.base_url/delegation.api_key for a direct endpoint. "
|
||||||
f"Available providers: openrouter, nous, zai, kimi-coding, minimax."
|
f"Available providers: openrouter, nous, zai, kimi-coding, minimax."
|
||||||
) from exc
|
) from exc
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,8 @@ Before starting, make sure you have:
|
||||||
- **Hermes Agent installed** — see the [Installation guide](/docs/getting-started/installation)
|
- **Hermes Agent installed** — see the [Installation guide](/docs/getting-started/installation)
|
||||||
- **Gateway running** — the gateway daemon handles cron execution:
|
- **Gateway running** — the gateway daemon handles cron execution:
|
||||||
```bash
|
```bash
|
||||||
hermes gateway install # Install as system service (recommended)
|
hermes gateway install # Install as a user service
|
||||||
|
sudo hermes gateway install --system # Linux servers: boot-time system service
|
||||||
# or
|
# or
|
||||||
hermes gateway # Run in foreground
|
hermes gateway # Run in foreground
|
||||||
```
|
```
|
||||||
|
|
@ -242,10 +243,12 @@ Make sure the scheduler is actually running:
|
||||||
hermes cron status
|
hermes cron status
|
||||||
```
|
```
|
||||||
|
|
||||||
If the gateway isn't running, your jobs won't execute. Install it as a system service for reliability:
|
If the gateway isn't running, your jobs won't execute. Install it as a background service for reliability:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
hermes gateway install
|
hermes gateway install
|
||||||
|
# or on Linux servers
|
||||||
|
sudo hermes gateway install --system
|
||||||
```
|
```
|
||||||
|
|
||||||
## Going Further
|
## Going Further
|
||||||
|
|
|
||||||
|
|
@ -143,12 +143,13 @@ For a persistent deployment that survives reboots:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
hermes gateway install
|
hermes gateway install
|
||||||
|
sudo hermes gateway install --system # Linux only: boot-time system service
|
||||||
```
|
```
|
||||||
|
|
||||||
This creates a **systemd** service (Linux) or **launchd** service (macOS) that runs automatically.
|
This creates a background service: a user-level **systemd** service on Linux by default, a **launchd** service on macOS, or a boot-time Linux system service if you pass `--system`.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Linux — manage the service
|
# Linux — manage the default user service
|
||||||
hermes gateway start
|
hermes gateway start
|
||||||
hermes gateway stop
|
hermes gateway stop
|
||||||
hermes gateway status
|
hermes gateway status
|
||||||
|
|
@ -158,6 +159,11 @@ journalctl --user -u hermes-gateway -f
|
||||||
|
|
||||||
# Keep running after SSH logout
|
# Keep running after SSH logout
|
||||||
sudo loginctl enable-linger $USER
|
sudo loginctl enable-linger $USER
|
||||||
|
|
||||||
|
# Linux servers — explicit system-service commands
|
||||||
|
sudo hermes gateway start --system
|
||||||
|
sudo hermes gateway status --system
|
||||||
|
journalctl -u hermes-gateway -f
|
||||||
```
|
```
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
||||||
|
|
@ -180,6 +180,23 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||||
| `CONTEXT_COMPRESSION_THRESHOLD` | Trigger at this % of limit (default: 0.50) |
|
| `CONTEXT_COMPRESSION_THRESHOLD` | Trigger at this % of limit (default: 0.50) |
|
||||||
| `CONTEXT_COMPRESSION_MODEL` | Model for summaries |
|
| `CONTEXT_COMPRESSION_MODEL` | Model for summaries |
|
||||||
|
|
||||||
|
## Auxiliary Task Overrides
|
||||||
|
|
||||||
|
| Variable | Description |
|
||||||
|
|----------|-------------|
|
||||||
|
| `AUXILIARY_VISION_PROVIDER` | Override provider for vision tasks |
|
||||||
|
| `AUXILIARY_VISION_MODEL` | Override model for vision tasks |
|
||||||
|
| `AUXILIARY_VISION_BASE_URL` | Direct OpenAI-compatible endpoint for vision tasks |
|
||||||
|
| `AUXILIARY_VISION_API_KEY` | API key paired with `AUXILIARY_VISION_BASE_URL` |
|
||||||
|
| `AUXILIARY_WEB_EXTRACT_PROVIDER` | Override provider for web extraction/summarization |
|
||||||
|
| `AUXILIARY_WEB_EXTRACT_MODEL` | Override model for web extraction/summarization |
|
||||||
|
| `AUXILIARY_WEB_EXTRACT_BASE_URL` | Direct OpenAI-compatible endpoint for web extraction/summarization |
|
||||||
|
| `AUXILIARY_WEB_EXTRACT_API_KEY` | API key paired with `AUXILIARY_WEB_EXTRACT_BASE_URL` |
|
||||||
|
| `CONTEXT_COMPRESSION_PROVIDER` | Override provider for context compression summaries |
|
||||||
|
| `CONTEXT_COMPRESSION_MODEL` | Override model for context compression summaries |
|
||||||
|
|
||||||
|
For task-specific direct endpoints, Hermes uses the task's configured API key or `OPENAI_API_KEY`. It does not reuse `OPENROUTER_API_KEY` for those custom endpoints.
|
||||||
|
|
||||||
## Provider Routing (config.yaml only)
|
## Provider Routing (config.yaml only)
|
||||||
|
|
||||||
These go in `~/.hermes/config.yaml` under the `provider_routing` section:
|
These go in `~/.hermes/config.yaml` under the `provider_routing` section:
|
||||||
|
|
|
||||||
|
|
@ -236,6 +236,7 @@ Skills for controlling smart home devices — lights, switches, sensors, and hom
|
||||||
| Skill | Description | Path |
|
| Skill | Description | Path |
|
||||||
|-------|-------------|------|
|
|-------|-------------|------|
|
||||||
| `code-review` | Guidelines for performing thorough code reviews with security and quality focus | `software-development/code-review` |
|
| `code-review` | Guidelines for performing thorough code reviews with security and quality focus | `software-development/code-review` |
|
||||||
|
| `plan` | Plan mode for Hermes — inspect context, write a markdown plan into `.hermes/plans/` in the active workspace/backend working directory, and do not execute the work. | `software-development/plan` |
|
||||||
| `requesting-code-review` | Use when completing tasks, implementing major features, or before merging. Validates work meets requirements through systematic review process. | `software-development/requesting-code-review` |
|
| `requesting-code-review` | Use when completing tasks, implementing major features, or before merging. Validates work meets requirements through systematic review process. | `software-development/requesting-code-review` |
|
||||||
| `subagent-driven-development` | Use when executing implementation plans with independent tasks. Dispatches fresh delegate_task per task with two-stage review (spec compliance then code quality). | `software-development/subagent-driven-development` |
|
| `subagent-driven-development` | Use when executing implementation plans with independent tasks. Dispatches fresh delegate_task per task with two-stage review (spec compliance then code quality). | `software-development/subagent-driven-development` |
|
||||||
| `systematic-debugging` | Use when encountering any bug, test failure, or unexpected behavior. 4-phase root cause investigation — NO fixes without understanding the problem first. | `software-development/systematic-debugging` |
|
| `systematic-debugging` | Use when encountering any bug, test failure, or unexpected behavior. 4-phase root cause investigation — NO fixes without understanding the problem first. | `software-development/systematic-debugging` |
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ Hermes has two slash-command surfaces:
|
||||||
- **Interactive CLI slash commands** — handled by `cli.py` / `hermes_cli/commands.py`
|
- **Interactive CLI slash commands** — handled by `cli.py` / `hermes_cli/commands.py`
|
||||||
- **Messaging slash commands** — handled by `gateway/run.py`
|
- **Messaging slash commands** — handled by `gateway/run.py`
|
||||||
|
|
||||||
Installed skills are also exposed as dynamic slash commands on both surfaces.
|
Installed skills are also exposed as dynamic slash commands on both surfaces. That includes bundled skills like `/plan`, which opens plan mode and saves markdown plans under `.hermes/plans/` relative to the active workspace/backend working directory.
|
||||||
|
|
||||||
## Interactive CLI slash commands
|
## Interactive CLI slash commands
|
||||||
|
|
||||||
|
|
@ -32,6 +32,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
|
||||||
| `/compress` | Manually compress conversation context (flush memories + summarize) |
|
| `/compress` | Manually compress conversation context (flush memories + summarize) |
|
||||||
| `/rollback` | List or restore filesystem checkpoints (usage: /rollback [number]) |
|
| `/rollback` | List or restore filesystem checkpoints (usage: /rollback [number]) |
|
||||||
| `/background` | Run a prompt in the background (usage: /background <prompt>) |
|
| `/background` | Run a prompt in the background (usage: /background <prompt>) |
|
||||||
|
| `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. |
|
||||||
|
|
||||||
### Configuration
|
### Configuration
|
||||||
|
|
||||||
|
|
@ -109,6 +110,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
|
||||||
| `/voice [on\|off\|tts\|join\|channel\|leave\|status]` | Control spoken replies in chat. `join`/`channel`/`leave` manage Discord voice-channel mode. |
|
| `/voice [on\|off\|tts\|join\|channel\|leave\|status]` | Control spoken replies in chat. `join`/`channel`/`leave` manage Discord voice-channel mode. |
|
||||||
| `/rollback [number]` | List or restore filesystem checkpoints. |
|
| `/rollback [number]` | List or restore filesystem checkpoints. |
|
||||||
| `/background <prompt>` | Run a prompt in a separate background session. |
|
| `/background <prompt>` | Run a prompt in a separate background session. |
|
||||||
|
| `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. |
|
||||||
| `/reload-mcp` | Reload MCP servers from config. |
|
| `/reload-mcp` | Reload MCP servers from config. |
|
||||||
| `/update` | Update Hermes Agent to the latest version. |
|
| `/update` | Update Hermes Agent to the latest version. |
|
||||||
| `/help` | Show messaging help. |
|
| `/help` | Show messaging help. |
|
||||||
|
|
|
||||||
|
|
@ -571,11 +571,15 @@ auxiliary:
|
||||||
vision:
|
vision:
|
||||||
provider: "auto" # "auto", "openrouter", "nous", "main"
|
provider: "auto" # "auto", "openrouter", "nous", "main"
|
||||||
model: "" # e.g. "openai/gpt-4o", "google/gemini-2.5-flash"
|
model: "" # e.g. "openai/gpt-4o", "google/gemini-2.5-flash"
|
||||||
|
base_url: "" # direct OpenAI-compatible endpoint (takes precedence over provider)
|
||||||
|
api_key: "" # API key for base_url (falls back to OPENAI_API_KEY)
|
||||||
|
|
||||||
# Web page summarization + browser page text extraction
|
# Web page summarization + browser page text extraction
|
||||||
web_extract:
|
web_extract:
|
||||||
provider: "auto"
|
provider: "auto"
|
||||||
model: "" # e.g. "google/gemini-2.5-flash"
|
model: "" # e.g. "google/gemini-2.5-flash"
|
||||||
|
base_url: ""
|
||||||
|
api_key: ""
|
||||||
```
|
```
|
||||||
|
|
||||||
### Changing the Vision Model
|
### Changing the Vision Model
|
||||||
|
|
@ -606,6 +610,17 @@ AUXILIARY_VISION_MODEL=openai/gpt-4o
|
||||||
|
|
||||||
### Common Setups
|
### Common Setups
|
||||||
|
|
||||||
|
**Using a direct custom endpoint** (clearer than `provider: "main"` for local/self-hosted APIs):
|
||||||
|
```yaml
|
||||||
|
auxiliary:
|
||||||
|
vision:
|
||||||
|
base_url: "http://localhost:1234/v1"
|
||||||
|
api_key: "local-key"
|
||||||
|
model: "qwen2.5-vl"
|
||||||
|
```
|
||||||
|
|
||||||
|
`base_url` takes precedence over `provider`, so this is the most explicit way to route an auxiliary task to a specific endpoint. For direct endpoint overrides, Hermes uses the configured `api_key` or falls back to `OPENAI_API_KEY`; it does not reuse `OPENROUTER_API_KEY` for that custom endpoint.
|
||||||
|
|
||||||
**Using OpenAI API key for vision:**
|
**Using OpenAI API key for vision:**
|
||||||
```yaml
|
```yaml
|
||||||
# In ~/.hermes/.env:
|
# In ~/.hermes/.env:
|
||||||
|
|
@ -852,13 +867,17 @@ delegation:
|
||||||
- web
|
- web
|
||||||
# model: "google/gemini-3-flash-preview" # Override model (empty = inherit parent)
|
# model: "google/gemini-3-flash-preview" # Override model (empty = inherit parent)
|
||||||
# provider: "openrouter" # Override provider (empty = inherit parent)
|
# provider: "openrouter" # Override provider (empty = inherit parent)
|
||||||
|
# base_url: "http://localhost:1234/v1" # Direct OpenAI-compatible endpoint (takes precedence over provider)
|
||||||
|
# api_key: "local-key" # API key for base_url (falls back to OPENAI_API_KEY)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model.
|
**Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model.
|
||||||
|
|
||||||
|
**Direct endpoint override:** If you want the obvious custom-endpoint path, set `delegation.base_url`, `delegation.api_key`, and `delegation.model`. That sends subagents directly to that OpenAI-compatible endpoint and takes precedence over `delegation.provider`. If `delegation.api_key` is omitted, Hermes falls back to `OPENAI_API_KEY` only.
|
||||||
|
|
||||||
The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed.
|
The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed.
|
||||||
|
|
||||||
**Precedence:** `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
|
**Precedence:** `delegation.base_url` in config → `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
|
||||||
|
|
||||||
## Clarify
|
## Clarify
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -156,7 +156,8 @@ What they do:
|
||||||
**Cron execution is handled by the gateway daemon.** The gateway ticks the scheduler every 60 seconds, running any due jobs in isolated agent sessions.
|
**Cron execution is handled by the gateway daemon.** The gateway ticks the scheduler every 60 seconds, running any due jobs in isolated agent sessions.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
hermes gateway install # Install as system service (recommended)
|
hermes gateway install # Install as a user service
|
||||||
|
sudo hermes gateway install --system # Linux: boot-time system service for servers
|
||||||
hermes gateway # Or run in foreground
|
hermes gateway # Or run in foreground
|
||||||
|
|
||||||
hermes cron list
|
hermes cron list
|
||||||
|
|
|
||||||
|
|
@ -209,6 +209,14 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children
|
||||||
delegation:
|
delegation:
|
||||||
max_iterations: 50 # Max turns per child (default: 50)
|
max_iterations: 50 # Max turns per child (default: 50)
|
||||||
default_toolsets: ["terminal", "file", "web"] # Default toolsets
|
default_toolsets: ["terminal", "file", "web"] # Default toolsets
|
||||||
|
model: "google/gemini-3-flash-preview" # Optional provider/model override
|
||||||
|
provider: "openrouter" # Optional built-in provider
|
||||||
|
|
||||||
|
# Or use a direct custom endpoint instead of provider:
|
||||||
|
delegation:
|
||||||
|
model: "qwen2.5-coder"
|
||||||
|
base_url: "http://localhost:1234/v1"
|
||||||
|
api_key: "local-key"
|
||||||
```
|
```
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
|
|
|
||||||
|
|
@ -24,11 +24,14 @@ Every installed skill is automatically available as a slash command:
|
||||||
/gif-search funny cats
|
/gif-search funny cats
|
||||||
/axolotl help me fine-tune Llama 3 on my dataset
|
/axolotl help me fine-tune Llama 3 on my dataset
|
||||||
/github-pr-workflow create a PR for the auth refactor
|
/github-pr-workflow create a PR for the auth refactor
|
||||||
|
/plan design a rollout for migrating our auth provider
|
||||||
|
|
||||||
# Just the skill name loads it and lets the agent ask what you need:
|
# Just the skill name loads it and lets the agent ask what you need:
|
||||||
/excalidraw
|
/excalidraw
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The bundled `plan` skill is a good example of a skill-backed slash command with custom behavior. Running `/plan [request]` tells Hermes to inspect context if needed, write a markdown implementation plan instead of executing the task, and save the result under `.hermes/plans/` relative to the active workspace/backend working directory.
|
||||||
|
|
||||||
You can also interact with skills through natural conversation:
|
You can also interact with skills through natural conversation:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,8 @@ EMAIL_HOME_ADDRESS=your@email.com # Default delivery target for cron jobs
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
hermes gateway # Run in foreground
|
hermes gateway # Run in foreground
|
||||||
hermes gateway install # Install as a system service
|
hermes gateway install # Install as a user service
|
||||||
|
sudo hermes gateway install --system # Linux only: boot-time system service
|
||||||
```
|
```
|
||||||
|
|
||||||
On startup, the adapter:
|
On startup, the adapter:
|
||||||
|
|
|
||||||
|
|
@ -54,10 +54,12 @@ This walks you through configuring each platform with arrow-key selection, shows
|
||||||
```bash
|
```bash
|
||||||
hermes gateway # Run in foreground
|
hermes gateway # Run in foreground
|
||||||
hermes gateway setup # Configure messaging platforms interactively
|
hermes gateway setup # Configure messaging platforms interactively
|
||||||
hermes gateway install # Install as systemd service (Linux) / launchd (macOS)
|
hermes gateway install # Install as a user service (Linux) / launchd service (macOS)
|
||||||
hermes gateway start # Start the service
|
sudo hermes gateway install --system # Linux only: install a boot-time system service
|
||||||
hermes gateway stop # Stop the service
|
hermes gateway start # Start the default service
|
||||||
hermes gateway status # Check service status
|
hermes gateway stop # Stop the default service
|
||||||
|
hermes gateway status # Check default service status
|
||||||
|
hermes gateway status --system # Linux only: inspect the system service explicitly
|
||||||
```
|
```
|
||||||
|
|
||||||
## Chat Commands (Inside Messaging)
|
## Chat Commands (Inside Messaging)
|
||||||
|
|
@ -188,8 +190,18 @@ journalctl --user -u hermes-gateway -f
|
||||||
|
|
||||||
# Enable lingering (keeps running after logout)
|
# Enable lingering (keeps running after logout)
|
||||||
sudo loginctl enable-linger $USER
|
sudo loginctl enable-linger $USER
|
||||||
|
|
||||||
|
# Or install a boot-time system service that still runs as your user
|
||||||
|
sudo hermes gateway install --system
|
||||||
|
sudo hermes gateway start --system
|
||||||
|
sudo hermes gateway status --system
|
||||||
|
journalctl -u hermes-gateway -f
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Use the user service on laptops and dev boxes. Use the system service on VPS or headless hosts that should come back at boot without relying on systemd linger.
|
||||||
|
|
||||||
|
Avoid keeping both the user and system gateway units installed at once unless you really mean to. Hermes will warn if it detects both because start/stop/status behavior gets ambiguous.
|
||||||
|
|
||||||
### macOS (launchd)
|
### macOS (launchd)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
||||||
|
|
@ -127,7 +127,8 @@ Then start the gateway:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
hermes gateway # Foreground
|
hermes gateway # Foreground
|
||||||
hermes gateway install # Install as a system service
|
hermes gateway install # Install as a user service
|
||||||
|
sudo hermes gateway install --system # Linux only: boot-time system service
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
|
||||||
|
|
@ -168,7 +168,8 @@ Then start the gateway:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
hermes gateway # Foreground
|
hermes gateway # Foreground
|
||||||
hermes gateway install # Install as a system service
|
hermes gateway install # Install as a user service
|
||||||
|
sudo hermes gateway install --system # Linux only: boot-time system service
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
|
||||||
|
|
@ -101,7 +101,8 @@ Then start the gateway:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
hermes gateway # Foreground
|
hermes gateway # Foreground
|
||||||
hermes gateway install # Install as a system service
|
hermes gateway install # Install as a user service
|
||||||
|
sudo hermes gateway install --system # Linux only: boot-time system service
|
||||||
```
|
```
|
||||||
|
|
||||||
The gateway starts the WhatsApp bridge automatically using the saved session.
|
The gateway starts the WhatsApp bridge automatically using the saved session.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue