Merge origin/main into hermes/hermes-5d160594

This commit is contained in:
teknium1 2026-03-14 19:34:05 -07:00
commit 3229e434b8
78 changed files with 3762 additions and 395 deletions

View file

@ -71,15 +71,17 @@ DEFAULT_AGENT_IDENTITY = (
)
MEMORY_GUIDANCE = (
"You have persistent memory across sessions. Proactively save important things "
"you learn (user preferences, environment details, useful approaches) and do "
"(like a diary!) using the memory tool -- don't wait to be asked."
"You have persistent memory across sessions. Save durable facts using the memory "
"tool: user preferences, environment details, tool quirks, and stable conventions. "
"Memory is injected into every turn, so keep it compact. Do NOT save task progress, "
"session outcomes, or completed-work logs to memory; use session_search to recall "
"those from past transcripts."
)
SESSION_SEARCH_GUIDANCE = (
"When the user references something from a past conversation or you suspect "
"relevant prior context exists, use session_search to recall it before asking "
"them to repeat themselves."
"relevant cross-session context exists, use session_search to recall it before "
"asking them to repeat themselves."
)
SKILLS_GUIDANCE = (
@ -139,6 +141,13 @@ PLATFORM_HINTS = {
"is preserved for threading. Do not include greetings or sign-offs unless "
"contextually appropriate."
),
"cron": (
"You are running as a scheduled cron job. Your final response is automatically "
"delivered to the job's configured destination, so do not use send_message to "
"send to that same target again. If you want the user to receive something in "
"the scheduled destination, put it directly in your final response. Use "
"send_message only for additional or different targets."
),
"cli": (
"You are a CLI AI Agent. Try not to use markdown but simple text "
"renderable inside a terminal."

209
cli.py
View file

@ -3203,8 +3203,33 @@ class HermesCLI:
else:
self.console.print(f"[bold red]Failed to load skill for {base_cmd}[/]")
else:
self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]")
self.console.print("[dim #B8860B]Type /help for available commands[/]")
# Prefix matching: if input uniquely identifies one command, execute it.
# Matches against both built-in COMMANDS and installed skill commands so
# that execution-time resolution agrees with tab-completion.
from hermes_cli.commands import COMMANDS
typed_base = cmd_lower.split()[0]
all_known = set(COMMANDS) | set(_skill_commands)
matches = [c for c in all_known if c.startswith(typed_base)]
if len(matches) == 1:
# Expand the prefix to the full command name, preserving arguments.
# Guard against redispatching the same token to avoid infinite
# recursion when the expanded name still doesn't hit an exact branch
# (e.g. /config with extra args that are not yet handled above).
full_name = matches[0]
if full_name == typed_base:
# Already an exact token — no expansion possible; fall through
self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]")
self.console.print("[dim #B8860B]Type /help for available commands[/]")
else:
remainder = cmd_original.strip()[len(typed_base):]
full_cmd = full_name + remainder
return self.process_command(full_cmd)
elif len(matches) > 1:
self.console.print(f"[bold yellow]Ambiguous command: {cmd_lower}[/]")
self.console.print(f"[dim]Did you mean: {', '.join(sorted(matches))}?[/]")
else:
self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]")
self.console.print("[dim #B8860B]Type /help for available commands[/]")
return True
@ -4174,6 +4199,8 @@ class HermesCLI:
Called from the agent thread. Shows a selection UI similar to clarify
with choices: once / session / always / deny. When allow_permanent
is False (tirith warnings present), the 'always' option is hidden.
Long commands also get a 'view' option so the full command can be
expanded before deciding.
Uses _approval_lock to serialize concurrent requests (e.g. from
parallel delegation subtasks) so each prompt gets its own turn
@ -4184,12 +4211,11 @@ class HermesCLI:
with self._approval_lock:
timeout = 60
response_queue = queue.Queue()
choices = ["once", "session", "always", "deny"] if allow_permanent else ["once", "session", "deny"]
self._approval_state = {
"command": command,
"description": description,
"choices": choices,
"choices": self._approval_choices(command, allow_permanent=allow_permanent),
"selected": 0,
"response_queue": response_queue,
}
@ -4220,6 +4246,116 @@ class HermesCLI:
_cprint(f"\n{_DIM} ⏱ Timeout — denying command{_RST}")
return "deny"
def _approval_choices(self, command: str, *, allow_permanent: bool = True) -> list[str]:
"""Return approval choices for a dangerous command prompt."""
choices = ["once", "session", "always", "deny"] if allow_permanent else ["once", "session", "deny"]
if len(command) > 70:
choices.append("view")
return choices
def _handle_approval_selection(self) -> None:
"""Process the currently selected dangerous-command approval choice."""
state = self._approval_state
if not state:
return
selected = state.get("selected", 0)
choices = state.get("choices") or []
if not (0 <= selected < len(choices)):
return
chosen = choices[selected]
if chosen == "view":
state["show_full"] = True
state["choices"] = [choice for choice in choices if choice != "view"]
if state["selected"] >= len(state["choices"]):
state["selected"] = max(0, len(state["choices"]) - 1)
self._invalidate()
return
state["response_queue"].put(chosen)
self._approval_state = None
self._invalidate()
def _get_approval_display_fragments(self):
"""Render the dangerous-command approval panel for the prompt_toolkit UI."""
state = self._approval_state
if not state:
return []
def _panel_box_width(title_text: str, content_lines: list[str], min_width: int = 46, max_width: int = 76) -> int:
term_cols = shutil.get_terminal_size((100, 20)).columns
longest = max([len(title_text)] + [len(line) for line in content_lines] + [min_width - 4])
inner = min(max(longest + 4, min_width - 2), max_width - 2, max(24, term_cols - 6))
return inner + 2
def _wrap_panel_text(text: str, width: int, subsequent_indent: str = "") -> list[str]:
wrapped = textwrap.wrap(
text,
width=max(8, width),
replace_whitespace=False,
drop_whitespace=False,
subsequent_indent=subsequent_indent,
)
return wrapped or [""]
def _append_panel_line(lines, border_style: str, content_style: str, text: str, box_width: int) -> None:
inner_width = max(0, box_width - 2)
lines.append((border_style, ""))
lines.append((content_style, text.ljust(inner_width)))
lines.append((border_style, "\n"))
def _append_blank_panel_line(lines, border_style: str, box_width: int) -> None:
lines.append((border_style, "" + (" " * box_width) + "\n"))
command = state["command"]
description = state["description"]
choices = state["choices"]
selected = state.get("selected", 0)
show_full = state.get("show_full", False)
title = "⚠️ Dangerous Command"
cmd_display = command if show_full or len(command) <= 70 else command[:70] + '...'
choice_labels = {
"once": "Allow once",
"session": "Allow for this session",
"always": "Add to permanent allowlist",
"deny": "Deny",
"view": "Show full command",
}
preview_lines = _wrap_panel_text(description, 60)
preview_lines.extend(_wrap_panel_text(cmd_display, 60))
for i, choice in enumerate(choices):
prefix = ' ' if i == selected else ' '
preview_lines.extend(_wrap_panel_text(
f"{prefix}{choice_labels.get(choice, choice)}",
60,
subsequent_indent=" ",
))
box_width = _panel_box_width(title, preview_lines)
inner_text_width = max(8, box_width - 2)
lines = []
lines.append(('class:approval-border', '' + ('' * box_width) + '\n'))
_append_panel_line(lines, 'class:approval-border', 'class:approval-title', title, box_width)
_append_blank_panel_line(lines, 'class:approval-border', box_width)
for wrapped in _wrap_panel_text(description, inner_text_width):
_append_panel_line(lines, 'class:approval-border', 'class:approval-desc', wrapped, box_width)
for wrapped in _wrap_panel_text(cmd_display, inner_text_width):
_append_panel_line(lines, 'class:approval-border', 'class:approval-cmd', wrapped, box_width)
_append_blank_panel_line(lines, 'class:approval-border', box_width)
for i, choice in enumerate(choices):
label = choice_labels.get(choice, choice)
style = 'class:approval-selected' if i == selected else 'class:approval-choice'
prefix = ' ' if i == selected else ' '
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
_append_panel_line(lines, 'class:approval-border', style, wrapped, box_width)
_append_blank_panel_line(lines, 'class:approval-border', box_width)
lines.append(('class:approval-border', '' + ('' * box_width) + '\n'))
return lines
def _secret_capture_callback(self, var_name: str, prompt: str, metadata=None) -> dict:
return prompt_for_secret(self, var_name, prompt, metadata)
@ -4811,22 +4947,7 @@ class HermesCLI:
# --- Approval selection: confirm the highlighted choice ---
if self._approval_state:
state = self._approval_state
selected = state["selected"]
choices = state["choices"]
if 0 <= selected < len(choices):
chosen = choices[selected]
if chosen == "view":
# Toggle full command display without closing the prompt
state["show_full"] = True
# Remove the "view" option since it's been used
state["choices"] = [c for c in choices if c != "view"]
if state["selected"] >= len(state["choices"]):
state["selected"] = len(state["choices"]) - 1
event.app.invalidate()
return
state["response_queue"].put(chosen)
self._approval_state = None
self._handle_approval_selection()
event.app.invalidate()
return
@ -5512,53 +5633,7 @@ class HermesCLI:
# --- Dangerous command approval: display widget ---
def _get_approval_display():
state = cli_ref._approval_state
if not state:
return []
command = state["command"]
description = state["description"]
choices = state["choices"]
selected = state.get("selected", 0)
show_full = state.get("show_full", False)
if show_full or len(command) <= 70:
cmd_display = command
else:
cmd_display = command[:70] + '...'
choice_labels = {
"once": "Allow once",
"session": "Allow for this session",
"always": "Add to permanent allowlist",
"deny": "Deny",
"view": "Show full command",
}
preview_lines = _wrap_panel_text(description, 60)
preview_lines.extend(_wrap_panel_text(cmd_display, 60))
for i, choice in enumerate(choices):
prefix = ' ' if i == selected else ' '
preview_lines.extend(_wrap_panel_text(f"{prefix}{choice_labels.get(choice, choice)}", 60, subsequent_indent=" "))
box_width = _panel_box_width("⚠️ Dangerous Command", preview_lines)
inner_text_width = max(8, box_width - 2)
lines = []
lines.append(('class:approval-border', '╭─ '))
lines.append(('class:approval-title', '⚠️ Dangerous Command'))
lines.append(('class:approval-border', ' ' + ('' * max(0, box_width - len("⚠️ Dangerous Command") - 3)) + '\n'))
_append_blank_panel_line(lines, 'class:approval-border', box_width)
for wrapped in _wrap_panel_text(description, inner_text_width):
_append_panel_line(lines, 'class:approval-border', 'class:approval-desc', wrapped, box_width)
for wrapped in _wrap_panel_text(cmd_display, inner_text_width):
_append_panel_line(lines, 'class:approval-border', 'class:approval-cmd', wrapped, box_width)
_append_blank_panel_line(lines, 'class:approval-border', box_width)
for i, choice in enumerate(choices):
label = choice_labels.get(choice, choice)
style = 'class:approval-selected' if i == selected else 'class:approval-choice'
prefix = ' ' if i == selected else ' '
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
_append_panel_line(lines, 'class:approval-border', style, wrapped, box_width)
_append_blank_panel_line(lines, 'class:approval-border', box_width)
lines.append(('class:approval-border', '' + ('' * box_width) + '\n'))
return lines
return cli_ref._get_approval_display_fragments()
approval_widget = ConditionalContainer(
Window(

View file

@ -57,6 +57,50 @@ def _resolve_origin(job: dict) -> Optional[dict]:
return None
def _resolve_delivery_target(job: dict) -> Optional[dict]:
"""Resolve the concrete auto-delivery target for a cron job, if any."""
deliver = job.get("deliver", "local")
origin = _resolve_origin(job)
if deliver == "local":
return None
if deliver == "origin":
if not origin:
return None
return {
"platform": origin["platform"],
"chat_id": str(origin["chat_id"]),
"thread_id": origin.get("thread_id"),
}
if ":" in deliver:
platform_name, chat_id = deliver.split(":", 1)
return {
"platform": platform_name,
"chat_id": chat_id,
"thread_id": None,
}
platform_name = deliver
if origin and origin.get("platform") == platform_name:
return {
"platform": platform_name,
"chat_id": str(origin["chat_id"]),
"thread_id": origin.get("thread_id"),
}
chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
if not chat_id:
return None
return {
"platform": platform_name,
"chat_id": chat_id,
"thread_id": None,
}
def _deliver_result(job: dict, content: str) -> None:
"""
Deliver job output to the configured target (origin chat, specific platform, etc.).
@ -64,36 +108,19 @@ def _deliver_result(job: dict, content: str) -> None:
Uses the standalone platform send functions from send_message_tool so delivery
works whether or not the gateway is running.
"""
deliver = job.get("deliver", "local")
origin = _resolve_origin(job)
if deliver == "local":
target = _resolve_delivery_target(job)
if not target:
if job.get("deliver", "local") != "local":
logger.warning(
"Job '%s' deliver=%s but no concrete delivery target could be resolved",
job["id"],
job.get("deliver", "local"),
)
return
thread_id = None
# Resolve target platform + chat_id
if deliver == "origin":
if not origin:
logger.warning("Job '%s' deliver=origin but no origin stored, skipping delivery", job["id"])
return
platform_name = origin["platform"]
chat_id = origin["chat_id"]
thread_id = origin.get("thread_id")
elif ":" in deliver:
platform_name, chat_id = deliver.split(":", 1)
else:
# Bare platform name like "telegram" — need to resolve to origin or home channel
platform_name = deliver
if origin and origin.get("platform") == platform_name:
chat_id = origin["chat_id"]
thread_id = origin.get("thread_id")
else:
# Fall back to home channel
chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
if not chat_id:
logger.warning("Job '%s' deliver=%s but no chat_id or home channel. Set via: hermes config set %s_HOME_CHANNEL <channel_id>", job["id"], deliver, platform_name.upper())
return
platform_name = target["platform"]
chat_id = target["chat_id"]
thread_id = target.get("thread_id")
from tools.send_message_tool import _send_to_platform
from gateway.config import load_gateway_config, Platform
@ -207,6 +234,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
job_name = job["name"]
prompt = _build_job_prompt(job)
origin = _resolve_origin(job)
delivery_target = _resolve_delivery_target(job)
logger.info("Running job '%s' (ID: %s)", job_name, job_id)
logger.info("Prompt: %s", prompt[:100])
@ -217,6 +245,11 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
if origin.get("chat_name"):
os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
if delivery_target:
os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
if delivery_target.get("thread_id") is not None:
os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
try:
# Re-read .env and config.yaml fresh every run so provider/key
@ -363,7 +396,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
finally:
# Clean up injected env vars so they don't leak to other jobs
for key in ("HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME"):
for key in (
"HERMES_SESSION_PLATFORM",
"HERMES_SESSION_CHAT_ID",
"HERMES_SESSION_CHAT_NAME",
"HERMES_CRON_AUTO_DELIVER_PLATFORM",
"HERMES_CRON_AUTO_DELIVER_CHAT_ID",
"HERMES_CRON_AUTO_DELIVER_THREAD_ID",
):
os.environ.pop(key, None)
if _session_db:
try:

View file

@ -39,7 +39,9 @@ def resize_tool_pool(max_workers: int):
Safe to call before any tasks are submitted.
"""
global _tool_executor
old_executor = _tool_executor
_tool_executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers)
old_executor.shutdown(wait=False)
logger.info("Tool thread pool resized to %d workers", max_workers)
logger = logging.getLogger(__name__)

View file

@ -161,7 +161,7 @@ class DeliveryRouter:
# Always include local if configured
if self.config.always_log_local:
local_key = (Platform.LOCAL, None)
local_key = (Platform.LOCAL, None, None)
if local_key not in seen_platforms:
targets.append(DeliveryTarget(platform=Platform.LOCAL))

View file

@ -346,6 +346,10 @@ class BasePlatformAdapter(ABC):
self.platform = platform
self._message_handler: Optional[MessageHandler] = None
self._running = False
self._fatal_error_code: Optional[str] = None
self._fatal_error_message: Optional[str] = None
self._fatal_error_retryable = True
self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None
# Track active message handlers per session for interrupt support
# Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
@ -353,6 +357,70 @@ class BasePlatformAdapter(ABC):
self._pending_messages: Dict[str, MessageEvent] = {}
# Chats where auto-TTS on voice input is disabled (set by /voice off)
self._auto_tts_disabled_chats: set = set()
@property
def has_fatal_error(self) -> bool:
return self._fatal_error_message is not None
@property
def fatal_error_message(self) -> Optional[str]:
return self._fatal_error_message
@property
def fatal_error_code(self) -> Optional[str]:
return self._fatal_error_code
@property
def fatal_error_retryable(self) -> bool:
return self._fatal_error_retryable
def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
self._fatal_error_handler = handler
def _mark_connected(self) -> None:
self._running = True
self._fatal_error_code = None
self._fatal_error_message = None
self._fatal_error_retryable = True
try:
from gateway.status import write_runtime_status
write_runtime_status(platform=self.platform.value, platform_state="connected", error_code=None, error_message=None)
except Exception:
pass
def _mark_disconnected(self) -> None:
self._running = False
if self.has_fatal_error:
return
try:
from gateway.status import write_runtime_status
write_runtime_status(platform=self.platform.value, platform_state="disconnected", error_code=None, error_message=None)
except Exception:
pass
def _set_fatal_error(self, code: str, message: str, *, retryable: bool) -> None:
self._running = False
self._fatal_error_code = code
self._fatal_error_message = message
self._fatal_error_retryable = retryable
try:
from gateway.status import write_runtime_status
write_runtime_status(
platform=self.platform.value,
platform_state="fatal",
error_code=code,
error_message=message,
)
except Exception:
pass
async def _notify_fatal_error(self) -> None:
handler = self._fatal_error_handler
if not handler:
return
result = handler(self)
if asyncio.iscoroutine(result):
await result
@property
def name(self) -> str:

View file

@ -105,12 +105,43 @@ class TelegramAdapter(BasePlatformAdapter):
# Telegram message limits
MAX_MESSAGE_LENGTH = 4096
MEDIA_GROUP_WAIT_SECONDS = 0.8
def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.TELEGRAM)
self._app: Optional[Application] = None
self._bot: Optional[Bot] = None
self._media_group_events: Dict[str, MessageEvent] = {}
self._media_group_tasks: Dict[str, asyncio.Task] = {}
self._token_lock_identity: Optional[str] = None
self._polling_error_task: Optional[asyncio.Task] = None
@staticmethod
def _looks_like_polling_conflict(error: Exception) -> bool:
text = str(error).lower()
return (
error.__class__.__name__.lower() == "conflict"
or "terminated by other getupdates request" in text
or "another bot instance is running" in text
)
async def _handle_polling_conflict(self, error: Exception) -> None:
if self.has_fatal_error and self.fatal_error_code == "telegram_polling_conflict":
return
message = (
"Another Telegram bot poller is already using this token. "
"Hermes stopped Telegram polling to avoid endless retry spam. "
"Make sure only one gateway instance is running for this bot token."
)
logger.error("[%s] %s Original error: %s", self.name, message, error)
self._set_fatal_error("telegram_polling_conflict", message, retryable=False)
try:
if self._app and self._app.updater:
await self._app.updater.stop()
except Exception as stop_error:
logger.warning("[%s] Failed stopping Telegram polling after conflict: %s", self.name, stop_error, exc_info=True)
await self._notify_fatal_error()
async def connect(self) -> bool:
"""Connect to Telegram and start polling for updates."""
if not TELEGRAM_AVAILABLE:
@ -125,6 +156,25 @@ class TelegramAdapter(BasePlatformAdapter):
return False
try:
from gateway.status import acquire_scoped_lock
self._token_lock_identity = self.config.token
acquired, existing = acquire_scoped_lock(
"telegram-bot-token",
self._token_lock_identity,
metadata={"platform": self.platform.value},
)
if not acquired:
owner_pid = existing.get("pid") if isinstance(existing, dict) else None
message = (
"Another local Hermes gateway is already using this Telegram bot token"
+ (f" (PID {owner_pid})." if owner_pid else ".")
+ " Stop the other gateway before starting a second Telegram poller."
)
logger.error("[%s] %s", self.name, message)
self._set_fatal_error("telegram_token_lock", message, retryable=False)
return False
# Build the application
self._app = Application.builder().token(self.config.token).build()
self._bot = self._app.bot
@ -150,9 +200,20 @@ class TelegramAdapter(BasePlatformAdapter):
# Start polling in background
await self._app.initialize()
await self._app.start()
loop = asyncio.get_running_loop()
def _polling_error_callback(error: Exception) -> None:
if not self._looks_like_polling_conflict(error):
logger.error("[%s] Telegram polling error: %s", self.name, error, exc_info=True)
return
if self._polling_error_task and not self._polling_error_task.done():
return
self._polling_error_task = loop.create_task(self._handle_polling_conflict(error))
await self._app.updater.start_polling(
allowed_updates=Update.ALL_TYPES,
drop_pending_updates=True,
error_callback=_polling_error_callback,
)
# Register bot commands so Telegram shows a hint menu when users type /
@ -188,16 +249,30 @@ class TelegramAdapter(BasePlatformAdapter):
exc_info=True,
)
self._running = True
self._mark_connected()
logger.info("[%s] Connected and polling for Telegram updates", self.name)
return True
except Exception as e:
if self._token_lock_identity:
try:
from gateway.status import release_scoped_lock
release_scoped_lock("telegram-bot-token", self._token_lock_identity)
except Exception:
pass
logger.error("[%s] Failed to connect to Telegram: %s", self.name, e, exc_info=True)
return False
async def disconnect(self) -> None:
"""Stop polling and disconnect."""
"""Stop polling, cancel pending album flushes, and disconnect."""
pending_media_group_tasks = list(self._media_group_tasks.values())
for task in pending_media_group_tasks:
task.cancel()
if pending_media_group_tasks:
await asyncio.gather(*pending_media_group_tasks, return_exceptions=True)
self._media_group_tasks.clear()
self._media_group_events.clear()
if self._app:
try:
await self._app.updater.stop()
@ -205,10 +280,17 @@ class TelegramAdapter(BasePlatformAdapter):
await self._app.shutdown()
except Exception as e:
logger.warning("[%s] Error during Telegram disconnect: %s", self.name, e, exc_info=True)
if self._token_lock_identity:
try:
from gateway.status import release_scoped_lock
release_scoped_lock("telegram-bot-token", self._token_lock_identity)
except Exception as e:
logger.warning("[%s] Error releasing Telegram token lock: %s", self.name, e, exc_info=True)
self._running = False
self._mark_disconnected()
self._app = None
self._bot = None
self._token_lock_identity = None
logger.info("[%s] Disconnected from Telegram", self.name)
async def send(
@ -872,8 +954,53 @@ class TelegramAdapter(BasePlatformAdapter):
except Exception as e:
logger.warning("[Telegram] Failed to cache document: %s", e, exc_info=True)
media_group_id = getattr(msg, "media_group_id", None)
if media_group_id:
await self._queue_media_group_event(str(media_group_id), event)
return
await self.handle_message(event)
async def _queue_media_group_event(self, media_group_id: str, event: MessageEvent) -> None:
"""Buffer Telegram media-group items so albums arrive as one logical event.
Telegram delivers albums as multiple updates with a shared media_group_id.
If we forward each item immediately, the gateway thinks the second image is a
new user message and interrupts the first. We debounce briefly and merge the
attachments into a single MessageEvent.
"""
existing = self._media_group_events.get(media_group_id)
if existing is None:
self._media_group_events[media_group_id] = event
else:
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
if existing.text:
if event.text not in existing.text.split("\n\n"):
existing.text = f"{existing.text}\n\n{event.text}"
else:
existing.text = event.text
prior_task = self._media_group_tasks.get(media_group_id)
if prior_task:
prior_task.cancel()
self._media_group_tasks[media_group_id] = asyncio.create_task(
self._flush_media_group_event(media_group_id)
)
async def _flush_media_group_event(self, media_group_id: str) -> None:
try:
await asyncio.sleep(self.MEDIA_GROUP_WAIT_SECONDS)
event = self._media_group_events.pop(media_group_id, None)
if event is not None:
await self.handle_message(event)
except asyncio.CancelledError:
return
finally:
self._media_group_tasks.pop(media_group_id, None)
async def _handle_sticker(self, msg: Message, event: "MessageEvent") -> None:
"""
Describe a Telegram sticker via vision analysis, with caching.

View file

@ -215,6 +215,33 @@ def _resolve_gateway_model() -> str:
return model
def _resolve_hermes_bin() -> Optional[list[str]]:
"""Resolve the Hermes update command as argv parts.
Tries in order:
1. ``shutil.which("hermes")`` standard PATH lookup
2. ``sys.executable -m hermes_cli.main`` fallback when Hermes is running
from a venv/module invocation and the ``hermes`` shim is not on PATH
Returns argv parts ready for quoting/joining, or ``None`` if neither works.
"""
import shutil
hermes_bin = shutil.which("hermes")
if hermes_bin:
return [hermes_bin]
try:
import importlib.util
if importlib.util.find_spec("hermes_cli") is not None:
return [sys.executable, "-m", "hermes_cli.main"]
except Exception:
pass
return None
class GatewayRunner:
"""
Main gateway controller.
@ -245,6 +272,8 @@ class GatewayRunner:
self.delivery_router = DeliveryRouter(self.config)
self._running = False
self._shutdown_event = asyncio.Event()
self._exit_cleanly = False
self._exit_reason: Optional[str] = None
# Track running agents per session for interrupt support
# Key: session_key, Value: AIAgent instance
@ -463,6 +492,41 @@ class GatewayRunner:
"""Run the sync memory flush in a thread pool so it won't block the event loop."""
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, self._flush_memories_for_session, old_session_id)
@property
def should_exit_cleanly(self) -> bool:
return self._exit_cleanly
@property
def exit_reason(self) -> Optional[str]:
return self._exit_reason
async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None:
"""React to a non-retryable adapter failure after startup."""
logger.error(
"Fatal %s adapter error (%s): %s",
adapter.platform.value,
adapter.fatal_error_code or "unknown",
adapter.fatal_error_message or "unknown error",
)
existing = self.adapters.get(adapter.platform)
if existing is adapter:
try:
await adapter.disconnect()
finally:
self.adapters.pop(adapter.platform, None)
self.delivery_router.adapters = self.adapters
if not self.adapters:
self._exit_reason = adapter.fatal_error_message or "All messaging adapters disconnected"
logger.error("No connected messaging platforms remain. Shutting down gateway cleanly.")
await self.stop()
def _request_clean_exit(self, reason: str) -> None:
self._exit_cleanly = True
self._exit_reason = reason
self._shutdown_event.set()
@staticmethod
def _load_prefill_messages() -> List[Dict[str, Any]]:
@ -647,6 +711,11 @@ class GatewayRunner:
"""
logger.info("Starting Hermes Gateway...")
logger.info("Session storage: %s", self.config.sessions_dir)
try:
from gateway.status import write_runtime_status
write_runtime_status(gateway_state="starting", exit_reason=None)
except Exception:
pass
# Warn if no user allowlists are configured and open access is not opted in
_any_allowlist = any(
@ -676,6 +745,7 @@ class GatewayRunner:
logger.warning("Process checkpoint recovery: %s", e)
connected_count = 0
startup_nonretryable_errors: list[str] = []
# Initialize and connect each configured platform
for platform, platform_config in self.config.platforms.items():
@ -687,8 +757,9 @@ class GatewayRunner:
logger.warning("No adapter available for %s", platform.value)
continue
# Set up message handler
# Set up message + fatal error handlers
adapter.set_message_handler(self._handle_message)
adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
# Try to connect
logger.info("Connecting to %s...", platform.value)
@ -701,10 +772,24 @@ class GatewayRunner:
logger.info("%s connected", platform.value)
else:
logger.warning("%s failed to connect", platform.value)
if adapter.has_fatal_error and not adapter.fatal_error_retryable:
startup_nonretryable_errors.append(
f"{platform.value}: {adapter.fatal_error_message}"
)
except Exception as e:
logger.error("%s error: %s", platform.value, e)
if connected_count == 0:
if startup_nonretryable_errors:
reason = "; ".join(startup_nonretryable_errors)
logger.error("Gateway hit a non-retryable startup conflict: %s", reason)
try:
from gateway.status import write_runtime_status
write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
except Exception:
pass
self._request_clean_exit(reason)
return True
logger.warning("No messaging platforms connected.")
logger.info("Gateway will continue running for cron job execution.")
@ -712,6 +797,11 @@ class GatewayRunner:
self.delivery_router.adapters = self.adapters
self._running = True
try:
from gateway.status import write_runtime_status
write_runtime_status(gateway_state="running", exit_reason=None)
except Exception:
pass
# Emit gateway:startup hook
hook_count = len(self.hooks.loaded_hooks)
@ -806,8 +896,12 @@ class GatewayRunner:
self._shutdown_all_gateway_honcho()
self._shutdown_event.set()
from gateway.status import remove_pid_file
from gateway.status import remove_pid_file, write_runtime_status
remove_pid_file()
try:
write_runtime_status(gateway_state="stopped", exit_reason=self._exit_reason)
except Exception:
pass
logger.info("Gateway stopped")
@ -3155,9 +3249,14 @@ class GatewayRunner:
if not git_dir.exists():
return "✗ Not a git repository — cannot update."
hermes_bin = shutil.which("hermes")
if not hermes_bin:
return "✗ `hermes` command not found on PATH."
hermes_cmd = _resolve_hermes_bin()
if not hermes_cmd:
return (
"✗ Could not locate the `hermes` command. "
"Hermes is running, but the update command could not find the "
"executable on PATH or via the current Python interpreter. "
"Try running `hermes update` manually in your terminal."
)
pending_path = _hermes_home / ".update_pending.json"
output_path = _hermes_home / ".update_output.txt"
@ -3173,8 +3272,9 @@ class GatewayRunner:
# Spawn `hermes update` in a separate cgroup so it survives gateway
# restart. systemd-run --user --scope creates a transient scope unit.
hermes_cmd_str = " ".join(shlex.quote(part) for part in hermes_cmd)
update_cmd = (
f"{shlex.quote(hermes_bin)} update > {shlex.quote(str(output_path))} 2>&1; "
f"{hermes_cmd_str} update > {shlex.quote(str(output_path))} 2>&1; "
f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}"
)
try:
@ -4338,6 +4438,10 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
success = await runner.start()
if not success:
return False
if runner.should_exit_cleanly:
if runner.exit_reason:
logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
return True
# Write PID file so CLI can detect gateway is running
import atexit

View file

@ -11,13 +11,17 @@ that will be useful when we add named profiles (multiple agents running
concurrently under distinct configurations).
"""
import hashlib
import json
import os
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
from typing import Any, Optional
_GATEWAY_KIND = "hermes-gateway"
_RUNTIME_STATUS_FILE = "gateway_state.json"
_LOCKS_DIRNAME = "gateway-locks"
def _get_pid_path() -> Path:
@ -26,6 +30,32 @@ def _get_pid_path() -> Path:
return home / "gateway.pid"
def _get_runtime_status_path() -> Path:
"""Return the persisted runtime health/status file path."""
return _get_pid_path().with_name(_RUNTIME_STATUS_FILE)
def _get_lock_dir() -> Path:
"""Return the machine-local directory for token-scoped gateway locks."""
override = os.getenv("HERMES_GATEWAY_LOCK_DIR")
if override:
return Path(override)
state_home = Path(os.getenv("XDG_STATE_HOME", Path.home() / ".local" / "state"))
return state_home / "hermes" / _LOCKS_DIRNAME
def _utc_now_iso() -> str:
return datetime.now(timezone.utc).isoformat()
def _scope_hash(identity: str) -> str:
return hashlib.sha256(identity.encode("utf-8")).hexdigest()[:16]
def _get_scope_lock_path(scope: str, identity: str) -> Path:
return _get_lock_dir() / f"{scope}-{_scope_hash(identity)}.lock"
def _get_process_start_time(pid: int) -> Optional[int]:
"""Return the kernel start time for a process when available."""
stat_path = Path(f"/proc/{pid}/stat")
@ -73,6 +103,38 @@ def _build_pid_record() -> dict:
}
def _build_runtime_status_record() -> dict[str, Any]:
payload = _build_pid_record()
payload.update({
"gateway_state": "starting",
"exit_reason": None,
"platforms": {},
"updated_at": _utc_now_iso(),
})
return payload
def _read_json_file(path: Path) -> Optional[dict[str, Any]]:
if not path.exists():
return None
try:
raw = path.read_text().strip()
except OSError:
return None
if not raw:
return None
try:
payload = json.loads(raw)
except json.JSONDecodeError:
return None
return payload if isinstance(payload, dict) else None
def _write_json_file(path: Path, payload: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(payload))
def _read_pid_record() -> Optional[dict]:
pid_path = _get_pid_path()
if not pid_path.exists():
@ -99,9 +161,49 @@ def _read_pid_record() -> Optional[dict]:
def write_pid_file() -> None:
"""Write the current process PID and metadata to the gateway PID file."""
pid_path = _get_pid_path()
pid_path.parent.mkdir(parents=True, exist_ok=True)
pid_path.write_text(json.dumps(_build_pid_record()))
_write_json_file(_get_pid_path(), _build_pid_record())
def write_runtime_status(
*,
gateway_state: Optional[str] = None,
exit_reason: Optional[str] = None,
platform: Optional[str] = None,
platform_state: Optional[str] = None,
error_code: Optional[str] = None,
error_message: Optional[str] = None,
) -> None:
"""Persist gateway runtime health information for diagnostics/status."""
path = _get_runtime_status_path()
payload = _read_json_file(path) or _build_runtime_status_record()
payload.setdefault("platforms", {})
payload.setdefault("kind", _GATEWAY_KIND)
payload.setdefault("pid", os.getpid())
payload.setdefault("start_time", _get_process_start_time(os.getpid()))
payload["updated_at"] = _utc_now_iso()
if gateway_state is not None:
payload["gateway_state"] = gateway_state
if exit_reason is not None:
payload["exit_reason"] = exit_reason
if platform is not None:
platform_payload = payload["platforms"].get(platform, {})
if platform_state is not None:
platform_payload["state"] = platform_state
if error_code is not None:
platform_payload["error_code"] = error_code
if error_message is not None:
platform_payload["error_message"] = error_message
platform_payload["updated_at"] = _utc_now_iso()
payload["platforms"][platform] = platform_payload
_write_json_file(path, payload)
def read_runtime_status() -> Optional[dict[str, Any]]:
"""Read the persisted gateway runtime health/status information."""
return _read_json_file(_get_runtime_status_path())
def remove_pid_file() -> None:
@ -112,6 +214,87 @@ def remove_pid_file() -> None:
pass
def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str, Any]] = None) -> tuple[bool, Optional[dict[str, Any]]]:
"""Acquire a machine-local lock keyed by scope + identity.
Used to prevent multiple local gateways from using the same external identity
at once (e.g. the same Telegram bot token across different HERMES_HOME dirs).
"""
lock_path = _get_scope_lock_path(scope, identity)
lock_path.parent.mkdir(parents=True, exist_ok=True)
record = {
**_build_pid_record(),
"scope": scope,
"identity_hash": _scope_hash(identity),
"metadata": metadata or {},
"updated_at": _utc_now_iso(),
}
existing = _read_json_file(lock_path)
if existing:
try:
existing_pid = int(existing["pid"])
except (KeyError, TypeError, ValueError):
existing_pid = None
if existing_pid == os.getpid() and existing.get("start_time") == record.get("start_time"):
_write_json_file(lock_path, record)
return True, existing
stale = existing_pid is None
if not stale:
try:
os.kill(existing_pid, 0)
except (ProcessLookupError, PermissionError):
stale = True
else:
current_start = _get_process_start_time(existing_pid)
if (
existing.get("start_time") is not None
and current_start is not None
and current_start != existing.get("start_time")
):
stale = True
if stale:
try:
lock_path.unlink(missing_ok=True)
except OSError:
pass
else:
return False, existing
try:
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
except FileExistsError:
return False, _read_json_file(lock_path)
try:
with os.fdopen(fd, "w", encoding="utf-8") as handle:
json.dump(record, handle)
except Exception:
try:
lock_path.unlink(missing_ok=True)
except OSError:
pass
raise
return True, None
def release_scoped_lock(scope: str, identity: str) -> None:
"""Release a previously-acquired scope lock when owned by this process."""
lock_path = _get_scope_lock_path(scope, identity)
existing = _read_json_file(lock_path)
if not existing:
return
if existing.get("pid") != os.getpid():
return
if existing.get("start_time") != _get_process_start_time(os.getpid()):
return
try:
lock_path.unlink(missing_ok=True)
except OSError:
pass
def get_running_pid() -> Optional[int]:
"""Return the PID of a running gateway instance, or ``None``.

View file

@ -821,7 +821,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
print(f" ✓ Saved {name}")
print()
else:
print(" Set later with: hermes config set KEY VALUE")
print(" Set later with: hermes config set <key> <value>")
# Check for missing config fields
missing_config = get_missing_config_fields()
@ -1265,7 +1265,7 @@ def show_config():
print()
print(color("" * 60, Colors.DIM))
print(color(" hermes config edit # Edit config file", Colors.DIM))
print(color(" hermes config set KEY VALUE", Colors.DIM))
print(color(" hermes config set <key> <value>", Colors.DIM))
print(color(" hermes setup # Run setup wizard", Colors.DIM))
print()
@ -1391,7 +1391,7 @@ def config_command(args):
key = getattr(args, 'key', None)
value = getattr(args, 'value', None)
if not key or not value:
print("Usage: hermes config set KEY VALUE")
print("Usage: hermes config set <key> <value>")
print()
print("Examples:")
print(" hermes config set model anthropic/claude-sonnet-4")
@ -1506,7 +1506,7 @@ def config_command(args):
print("Available commands:")
print(" hermes config Show current configuration")
print(" hermes config edit Open config in editor")
print(" hermes config set K V Set a config value")
print(" hermes config set <key> <value> Set a config value")
print(" hermes config check Check for missing/outdated config")
print(" hermes config migrate Update config with new options")
print(" hermes config path Show config file path")

View file

@ -251,7 +251,6 @@ StandardError=journal
WantedBy=default.target
"""
def _normalize_service_definition(text: str) -> str:
return "\n".join(line.rstrip() for line in text.strip().splitlines())
@ -279,6 +278,65 @@ def refresh_systemd_unit_if_needed() -> bool:
return True
def _print_linger_enable_warning(username: str, detail: str | None = None) -> None:
print()
print("⚠ Linger not enabled — gateway may stop when you close this terminal.")
if detail:
print(f" Auto-enable failed: {detail}")
print()
print(" On headless servers (VPS, cloud instances) run:")
print(f" sudo loginctl enable-linger {username}")
print()
print(" Then restart the gateway:")
print(f" systemctl --user restart {SERVICE_NAME}.service")
print()
def _ensure_linger_enabled() -> None:
"""Enable linger when possible so the user gateway survives logout."""
if not is_linux():
return
import getpass
import shutil
username = getpass.getuser()
linger_file = Path(f"/var/lib/systemd/linger/{username}")
if linger_file.exists():
print("✓ Systemd linger is enabled (service survives logout)")
return
linger_enabled, linger_detail = get_systemd_linger_status()
if linger_enabled is True:
print("✓ Systemd linger is enabled (service survives logout)")
return
if not shutil.which("loginctl"):
_print_linger_enable_warning(username, linger_detail or "loginctl not found")
return
print("Enabling linger so the gateway survives SSH logout...")
try:
result = subprocess.run(
["loginctl", "enable-linger", username],
capture_output=True,
text=True,
check=False,
)
except Exception as e:
_print_linger_enable_warning(username, str(e))
return
if result.returncode == 0:
print("✓ Linger enabled — gateway will persist after logout")
return
detail = (result.stderr or result.stdout or f"exit {result.returncode}").strip()
_print_linger_enable_warning(username, detail or linger_detail)
def systemd_install(force: bool = False):
unit_path = get_systemd_unit_path()
@ -302,7 +360,7 @@ def systemd_install(force: bool = False):
print(f" hermes gateway status # Check status")
print(f" journalctl --user -u {SERVICE_NAME} -f # View logs")
print()
print_systemd_linger_guidance()
_ensure_linger_enabled()
def systemd_uninstall():
subprocess.run(["systemctl", "--user", "stop", SERVICE_NAME], check=False)
@ -367,6 +425,13 @@ def systemd_status(deep: bool = False):
print("✗ Gateway service is stopped")
print(" Run: hermes gateway start")
runtime_lines = _runtime_health_lines()
if runtime_lines:
print()
print("Recent gateway health:")
for line in runtime_lines:
print(f" {line}")
if deep:
print_systemd_linger_guidance()
else:
@ -693,6 +758,35 @@ def _platform_status(platform: dict) -> str:
return "not configured"
def _runtime_health_lines() -> list[str]:
"""Summarize the latest persisted gateway runtime health state."""
try:
from gateway.status import read_runtime_status
except Exception:
return []
state = read_runtime_status()
if not state:
return []
lines: list[str] = []
gateway_state = state.get("gateway_state")
exit_reason = state.get("exit_reason")
platforms = state.get("platforms", {}) or {}
for platform, pdata in platforms.items():
if pdata.get("state") == "fatal":
message = pdata.get("error_message") or "unknown error"
lines.append(f"{platform}: {message}")
if gateway_state == "startup_failed" and exit_reason:
lines.append(f"⚠ Last startup issue: {exit_reason}")
elif gateway_state == "stopped" and exit_reason:
lines.append(f"⚠ Last shutdown reason: {exit_reason}")
return lines
def _setup_standard_platform(platform: dict):
"""Interactive setup for Telegram, Discord, or Slack."""
emoji = platform["emoji"]
@ -1186,11 +1280,23 @@ def gateway_command(args):
if pids:
print(f"✓ Gateway is running (PID: {', '.join(map(str, pids))})")
print(" (Running manually, not as a system service)")
runtime_lines = _runtime_health_lines()
if runtime_lines:
print()
print("Recent gateway health:")
for line in runtime_lines:
print(f" {line}")
print()
print("To install as a service:")
print(" hermes gateway install")
else:
print("✗ Gateway is not running")
runtime_lines = _runtime_health_lines()
if runtime_lines:
print()
print("Recent gateway health:")
for line in runtime_lines:
print(f" {line}")
print()
print("To start:")
print(" hermes gateway # Run in foreground")

View file

@ -2056,7 +2056,15 @@ def cmd_update(args):
check=True
)
branch = result.stdout.strip()
# Fall back to main if the current branch doesn't exist on the remote
verify = subprocess.run(
git_cmd + ["rev-parse", "--verify", f"origin/{branch}"],
cwd=PROJECT_ROOT, capture_output=True, text=True,
)
if verify.returncode != 0:
branch = "main"
# Check if there are updates
result = subprocess.run(
git_cmd + ["rev-list", f"HEAD..origin/{branch}", "--count"],
@ -2736,7 +2744,7 @@ For more help on a command:
skills_install = skills_subparsers.add_parser("install", help="Install a skill")
skills_install.add_argument("identifier", help="Skill identifier (e.g. openai/skills/skill-creator)")
skills_install.add_argument("--category", default="", help="Category folder to install into")
skills_install.add_argument("--force", action="store_true", help="Install despite caution verdict")
skills_install.add_argument("--force", "--yes", "-y", dest="force", action="store_true", help="Install despite blocked scan verdict")
skills_inspect = skills_subparsers.add_parser("inspect", help="Preview a skill without installing")
skills_inspect.add_argument("identifier", help="Skill identifier")

View file

@ -5,6 +5,7 @@ from __future__ import annotations
import os
from typing import Any, Dict, Optional
from hermes_cli import auth as auth_mod
from hermes_cli.auth import (
AuthError,
PROVIDER_REGISTRY,
@ -18,6 +19,10 @@ from hermes_cli.config import load_config
from hermes_constants import OPENROUTER_BASE_URL
def _normalize_custom_provider_name(value: str) -> str:
return value.strip().lower().replace(" ", "-")
def _get_model_config() -> Dict[str, Any]:
config = load_config()
model_cfg = config.get("model")
@ -47,6 +52,82 @@ def resolve_requested_provider(requested: Optional[str] = None) -> str:
return "auto"
def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, Any]]:
requested_norm = _normalize_custom_provider_name(requested_provider or "")
if not requested_norm or requested_norm == "custom":
return None
# Raw names should only map to custom providers when they are not already
# valid built-in providers or aliases. Explicit menu keys like
# ``custom:local`` always target the saved custom provider.
if requested_norm == "auto":
return None
if not requested_norm.startswith("custom:"):
try:
auth_mod.resolve_provider(requested_norm)
except AuthError:
pass
else:
return None
config = load_config()
custom_providers = config.get("custom_providers")
if not isinstance(custom_providers, list):
return None
for entry in custom_providers:
if not isinstance(entry, dict):
continue
name = entry.get("name")
base_url = entry.get("base_url")
if not isinstance(name, str) or not isinstance(base_url, str):
continue
name_norm = _normalize_custom_provider_name(name)
menu_key = f"custom:{name_norm}"
if requested_norm not in {name_norm, menu_key}:
continue
return {
"name": name.strip(),
"base_url": base_url.strip(),
"api_key": str(entry.get("api_key", "") or "").strip(),
}
return None
def _resolve_named_custom_runtime(
*,
requested_provider: str,
explicit_api_key: Optional[str] = None,
explicit_base_url: Optional[str] = None,
) -> Optional[Dict[str, Any]]:
custom_provider = _get_named_custom_provider(requested_provider)
if not custom_provider:
return None
base_url = (
(explicit_base_url or "").strip()
or custom_provider.get("base_url", "")
).rstrip("/")
if not base_url:
return None
api_key = (
(explicit_api_key or "").strip()
or custom_provider.get("api_key", "")
or os.getenv("OPENAI_API_KEY", "").strip()
or os.getenv("OPENROUTER_API_KEY", "").strip()
)
return {
"provider": "openrouter",
"api_mode": "chat_completions",
"base_url": base_url,
"api_key": api_key,
"source": f"custom_provider:{custom_provider.get('name', requested_provider)}",
}
def _resolve_openrouter_runtime(
*,
requested_provider: str,
@ -122,6 +203,15 @@ def resolve_runtime_provider(
"""Resolve runtime provider credentials for agent execution."""
requested_provider = resolve_requested_provider(requested)
custom_runtime = _resolve_named_custom_runtime(
requested_provider=requested_provider,
explicit_api_key=explicit_api_key,
explicit_base_url=explicit_base_url,
)
if custom_runtime:
custom_runtime["requested_provider"] = requested_provider
return custom_runtime
provider = resolve_provider(
requested_provider,
explicit_api_key=explicit_api_key,

View file

@ -460,12 +460,41 @@ def _print_setup_summary(config: dict, hermes_home):
tool_status = []
# OpenRouter (required for vision, moa)
# Vision — works with OpenRouter, Nous OAuth, Codex OAuth, or OpenAI endpoint
_has_vision = False
if get_env_value("OPENROUTER_API_KEY"):
_has_vision = True
else:
try:
_vauth_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "auth.json"
if _vauth_path.is_file():
import json as _vjson
_vauth = _vjson.loads(_vauth_path.read_text())
if _vauth.get("active_provider") == "nous":
_np = _vauth.get("providers", {}).get("nous", {})
if _np.get("agent_key") or _np.get("access_token"):
_has_vision = True
elif _vauth.get("active_provider") == "openai-codex":
_cp = _vauth.get("providers", {}).get("openai-codex", {})
if _cp.get("tokens", {}).get("access_token"):
_has_vision = True
except Exception:
pass
if not _has_vision:
_oai_base = get_env_value("OPENAI_BASE_URL") or ""
if get_env_value("OPENAI_API_KEY") and "api.openai.com" in _oai_base.lower():
_has_vision = True
if _has_vision:
tool_status.append(("Vision (image analysis)", True, None))
else:
tool_status.append(("Vision (image analysis)", False, "run 'hermes setup' to configure"))
# Mixture of Agents — requires OpenRouter specifically (calls multiple models)
if get_env_value("OPENROUTER_API_KEY"):
tool_status.append(("Mixture of Agents", True, None))
else:
tool_status.append(("Vision (image analysis)", False, "OPENROUTER_API_KEY"))
tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY"))
# Firecrawl (web tools)
@ -602,7 +631,7 @@ def _print_setup_summary(config: dict, hermes_home):
print(
f" {color('hermes config edit', Colors.GREEN)} Open config in your editor"
)
print(f" {color('hermes config set KEY VALUE', Colors.GREEN)}")
print(f" {color('hermes config set <key> <value>', Colors.GREEN)}")
print(f" Set a specific value")
print()
print(f" Or edit the files directly:")
@ -1246,35 +1275,112 @@ def setup_model_provider(config: dict):
elif existing_or:
selected_provider = "openrouter"
# ── OpenRouter API Key for tools (if not already set) ──
# Tools (vision, web, MoA) use OpenRouter independently of the main provider.
# Prompt for OpenRouter key if not set and a non-OpenRouter provider was chosen.
if selected_provider in (
"nous",
"openai-codex",
"custom",
"zai",
"kimi-coding",
"minimax",
"minimax-cn",
"anthropic",
) and not get_env_value("OPENROUTER_API_KEY"):
print()
print_header("OpenRouter API Key (for tools)")
print_info("Tools like vision analysis, web search, and MoA use OpenRouter")
print_info("independently of your main inference provider.")
print_info("Get your API key at: https://openrouter.ai/keys")
# ── Vision & Image Analysis Setup ──
# Vision requires a multimodal-capable provider. Check whether the user's
# chosen provider already covers it — if so, skip the prompt entirely.
_vision_needs_setup = True
api_key = prompt(
" OpenRouter API key (optional, press Enter to skip)", password=True
)
if api_key:
save_env_value("OPENROUTER_API_KEY", api_key)
print_success("OpenRouter API key saved (for tools)")
else:
print_info(
"Skipped - some tools (vision, web scraping) won't work without this"
if selected_provider == "openrouter":
# OpenRouter → Gemini for vision, already configured
_vision_needs_setup = False
elif selected_provider == "nous":
# Nous Portal OAuth → Gemini via Nous, already configured
_vision_needs_setup = False
elif selected_provider == "openai-codex":
# Codex OAuth → gpt-5.3-codex supports vision
_vision_needs_setup = False
elif selected_provider == "custom":
_custom_base = (get_env_value("OPENAI_BASE_URL") or "").lower()
if "api.openai.com" in _custom_base:
# Direct OpenAI endpoint — show vision model picker
print()
print_header("Vision Model")
print_info("Your OpenAI endpoint supports vision. Pick a model for image analysis:")
_oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
_vm_choices = _oai_vision_models + ["Keep default (gpt-4o-mini)"]
_vm_idx = prompt_choice("Select vision model:", _vm_choices, len(_vm_choices) - 1)
_selected_vision_model = (
_oai_vision_models[_vm_idx]
if _vm_idx < len(_oai_vision_models)
else "gpt-4o-mini"
)
save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model)
print_success(f"Vision model set to {_selected_vision_model}")
_vision_needs_setup = False
# Even for providers without native vision, check if existing credentials
# from a previous setup already cover it (e.g. user had OpenRouter before
# switching to z.ai)
if _vision_needs_setup:
if get_env_value("OPENROUTER_API_KEY"):
_vision_needs_setup = False
else:
# Check for Nous Portal OAuth in auth.json
try:
_auth_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "auth.json"
if _auth_path.is_file():
import json as _json
_auth_data = _json.loads(_auth_path.read_text())
if _auth_data.get("active_provider") == "nous":
_nous_p = _auth_data.get("providers", {}).get("nous", {})
if _nous_p.get("agent_key") or _nous_p.get("access_token"):
_vision_needs_setup = False
except Exception:
pass
if _vision_needs_setup:
_prov_names = {
"nous-api": "Nous Portal API key",
"zai": "Z.AI / GLM",
"kimi-coding": "Kimi / Moonshot",
"minimax": "MiniMax",
"minimax-cn": "MiniMax CN",
"anthropic": "Anthropic",
"custom": "your custom endpoint",
}
_prov_display = _prov_names.get(selected_provider, selected_provider or "your provider")
print()
print_header("Vision & Image Analysis (optional)")
print_info(f"Vision requires a multimodal-capable provider. {_prov_display}")
print_info("doesn't natively support it. Choose how to enable vision,")
print_info("or skip to configure later.")
print()
_vision_choices = [
"OpenRouter — uses Gemini (free tier at openrouter.ai/keys)",
"OpenAI — enter API key & choose a vision model",
"Skip for now",
]
_vision_idx = prompt_choice("Configure vision:", _vision_choices, 2)
if _vision_idx == 0: # OpenRouter
_or_key = prompt(" OpenRouter API key", password=True)
if _or_key:
save_env_value("OPENROUTER_API_KEY", _or_key)
print_success("OpenRouter key saved — vision will use Gemini")
else:
print_info("Skipped — vision won't be available")
elif _vision_idx == 1: # OpenAI
_oai_key = prompt(" OpenAI API key", password=True)
if _oai_key:
save_env_value("OPENAI_API_KEY", _oai_key)
save_env_value("OPENAI_BASE_URL", "https://api.openai.com/v1")
_oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
_vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
_vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
_selected_vision_model = (
_oai_vision_models[_vm_idx]
if _vm_idx < len(_oai_vision_models)
else "gpt-4o-mini"
)
save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model)
print_success(f"Vision configured with OpenAI ({_selected_vision_model})")
else:
print_info("Skipped — vision won't be available")
else:
print_info("Skipped — add later with 'hermes config set OPENROUTER_API_KEY ...'")
# ── Model Selection (adapts based on provider) ──
if selected_provider != "custom": # Custom already prompted for model name

View file

@ -1050,11 +1050,11 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
elif action == "install":
if not args:
c.print("[bold red]Usage:[/] /skills install <identifier> [--category <cat>] [--force]\n")
c.print("[bold red]Usage:[/] /skills install <identifier> [--category <cat>] [--force|--yes]\n")
return
identifier = args[0]
category = ""
force = "--force" in args
force = any(flag in args for flag in ("--force", "--yes", "-y"))
for i, a in enumerate(args):
if a == "--category" and i + 1 < len(args):
category = args[i + 1]

View file

@ -3240,7 +3240,7 @@ Prompt Strategy for finetuning Llama2 chat models see also https://github.com/fa
This implementation is based on the Vicuna PR and the fastchat repo, see also: https://github.com/lm-sys/FastChat/blob/cdd7730686cb1bf9ae2b768ee171bdf7d1ff04f3/fastchat/conversation.py#L847
Use dataset type: “llama2_chat” in conig.yml to use this prompt style.
Use dataset type: “llama2_chat” in config.yml to use this prompt style.
E.g. in the config.yml:
@ -4991,7 +4991,7 @@ prompt_strategies.orcamini
Prompt Strategy for finetuning Orca Mini (v2) models see also https://huggingface.co/psmathur/orca_mini_v2_7b for more information
Use dataset type: orcamini in conig.yml to use this prompt style.
Use dataset type: orcamini in config.yml to use this prompt style.
Compared to the alpaca_w_system.open_orca dataset type, this one specifies the system prompt with “### System:”.

View file

@ -2290,7 +2290,7 @@ This call gives the AsyncStager the opportunity to stage the state_dict. T
for serializing the state_dict and writing it to storage.
the serialization thread starts and before returning from dcp.async_save. If this is set to False, the assumption is the user has defined a custom synchronization point for the the purpose of further optimizing save latency in the training loop (for example, by overlapping staging with the forward/backward pass), and it is the respondsibility of the user to call AsyncStager.synchronize_staging at the appropriate time.
the serialization thread starts and before returning from dcp.async_save. If this is set to False, the assumption is the user has defined a custom synchronization point for the purpose of further optimizing save latency in the training loop (for example, by overlapping staging with the forward/backward pass), and it is the respondsibility of the user to call AsyncStager.synchronize_staging at the appropriate time.
Clean up all resources used by the stager.
@ -2430,7 +2430,7 @@ Read the checkpoint metadata.
The metadata object associated with the checkpoint being loaded.
Calls to indicates a brand new checkpoint read is going to happen. A checkpoint_id may be present if users set the checkpoint_id for this checkpoint read. The meaning of the checkpiont_id is storage-dependent. It can be a path to a folder/file or a key for a key-value storage.
Calls to indicates a brand new checkpoint read is going to happen. A checkpoint_id may be present if users set the checkpoint_id for this checkpoint read. The meaning of the checkpoint_id is storage-dependent. It can be a path to a folder/file or a key for a key-value storage.
checkpoint_id (Union[str, os.PathLike, None]) The ID of this checkpoint instance. The meaning of the checkpoint_id depends on the storage. It can be a path to a folder or to a file. It can also be a key if the storage is more like a key-value store. (Default: None)
@ -2488,7 +2488,7 @@ plan (SavePlan) The local plan from the SavePlanner in use.
A transformed SavePlan after storage local planning
Calls to indicates a brand new checkpoint write is going to happen. A checkpoint_id may be present if users set the checkpoint_id for this checkpoint write. The meaning of the checkpiont_id is storage-dependent. It can be a path to a folder/file or a key for a key-value storage.
Calls to indicates a brand new checkpoint write is going to happen. A checkpoint_id may be present if users set the checkpoint_id for this checkpoint write. The meaning of the checkpoint_id is storage-dependent. It can be a path to a folder/file or a key for a key-value storage.
checkpoint_id (Union[str, os.PathLike, None]) The ID of this checkpoint instance. The meaning of the checkpoint_id depends on the storage. It can be a path to a folder or to a file. It can also be a key if the storage is a key-value store. (Default: None)
@ -2498,7 +2498,19 @@ is_coordinator (bool) Whether this instance is responsible for coordinating
Return the storage-specific metadata. This is used to store additional information in a checkpoint that can be useful for providing request-level observability. StorageMeta is passed to the SavePlanner during save calls. Returns None by default.
TODO: provide an example
Example:
```python
from torch.distributed.checkpoint.storage import StorageMeta
class CustomStorageBackend:
def get_storage_metadata(self):
# Return storage-specific metadata that will be stored with the checkpoint
return StorageMeta()
```
This example shows how a storage backend can return `StorageMeta`
to attach additional metadata to a checkpoint.
Optional[StorageMeta]
@ -3441,7 +3453,7 @@ The target module does not have to be an FSDP module.
A StateDictSettings containing the state_dict_type and state_dict / optim_state_dict configs that are currently set.
AssertionError` if the StateDictSettings for differen
AssertionError` if the StateDictSettings for different
FSDP submodules differ.
@ -3766,7 +3778,7 @@ The sharing is done as described by ZeRO.
The local optimizer instance in each rank is only responsible for updating approximately 1 / world_size parameters and hence only needs to keep 1 / world_size optimizer states. After parameters are updated locally, each rank will broadcast its parameters to all other peers to keep all model replicas in the same state. ZeroRedundancyOptimizer can be used in conjunction with torch.nn.parallel.DistributedDataParallel to reduce per-rank peak memory consumption.
ZeroRedundancyOptimizer uses a sorted-greedy algorithm to pack a number of parameters at each rank. Each parameter belongs to a single rank and is not divided among ranks. The partition is arbitrary and might not match the the parameter registration or usage order.
ZeroRedundancyOptimizer uses a sorted-greedy algorithm to pack a number of parameters at each rank. Each parameter belongs to a single rank and is not divided among ranks. The partition is arbitrary and might not match the parameter registration or usage order.
params (Iterable) an Iterable of torch.Tensor s or dict s giving all parameters, which will be sharded across ranks.

View file

@ -6348,7 +6348,7 @@ Our chat templates for the GGUF, our BnB and BF16 uploads and all versions are f
### :1234: Precision issues
We found multiple precision issues in Tesla T4 and float16 machines primarily since the model was trained using BF16, and so outliers and overflows existed. MXFP4 is not actually supported on Ampere and older GPUs, so Triton provides `tl.dot_scaled` for MXFP4 matrix multiplication. It upcasts the matrices to BF16 internaly on the fly.
We found multiple precision issues in Tesla T4 and float16 machines primarily since the model was trained using BF16, and so outliers and overflows existed. MXFP4 is not actually supported on Ampere and older GPUs, so Triton provides `tl.dot_scaled` for MXFP4 matrix multiplication. It upcasts the matrices to BF16 internally on the fly.
We made a [MXFP4 inference notebook](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/GPT_OSS_MXFP4_\(20B\)-Inference.ipynb) as well in Tesla T4 Colab!
@ -14877,7 +14877,7 @@ curl -X POST http://localhost:8000/v1/unload_lora_adapter \
# Text-to-Speech (TTS) Fine-tuning
Learn how to to fine-tune TTS & STT voice models with Unsloth.
Learn how to fine-tune TTS & STT voice models with Unsloth.
Fine-tuning TTS models allows them to adapt to your specific dataset, use case, or desired style and tone. The goal is to customize these models to clone voices, adapt speaking styles and tones, support new languages, handle specific tasks and more. We also support **Speech-to-Text (STT)** models like OpenAI's Whisper.
@ -15306,7 +15306,7 @@ snapshot_download(
)
```
And and let's do inference!
And let's do inference!
{% code overflow="wrap" %}
@ -16036,7 +16036,7 @@ Then train the model as usual via `trainer.train() .`
Tips to solve issues, and frequently asked questions.
If you're still encountering any issues with versions or depencies, please use our [Docker image](https://docs.unsloth.ai/get-started/install-and-update/docker) which will have everything pre-installed.
If you're still encountering any issues with versions or dependencies, please use our [Docker image](https://docs.unsloth.ai/get-started/install-and-update/docker) which will have everything pre-installed.
{% hint style="success" %}
**Try always to update Unsloth if you find any issues.**

View file

@ -40,7 +40,7 @@ Read more on running Llama 4 here: <https://docs.unsloth.ai/basics/tutorial-how-
Example 1 (unknown):
```unknown
And and let's do inference!
And let's do inference!
{% code overflow="wrap" %}
```
@ -4272,7 +4272,7 @@ Read our full DeepSeek-R1 blogpost here: [unsloth.ai/blog/deepseekr1-dynamic](ht
Tips to solve issues, and frequently asked questions.
If you're still encountering any issues with versions or depencies, please use our [Docker image](https://docs.unsloth.ai/get-started/install-and-update/docker) which will have everything pre-installed.
If you're still encountering any issues with versions or dependencies, please use our [Docker image](https://docs.unsloth.ai/get-started/install-and-update/docker) which will have everything pre-installed.
{% hint style="success" %}
**Try always to update Unsloth if you find any issues.**
@ -6638,7 +6638,7 @@ Our chat templates for the GGUF, our BnB and BF16 uploads and all versions are f
### :1234: Precision issues
We found multiple precision issues in Tesla T4 and float16 machines primarily since the model was trained using BF16, and so outliers and overflows existed. MXFP4 is not actually supported on Ampere and older GPUs, so Triton provides `tl.dot_scaled` for MXFP4 matrix multiplication. It upcasts the matrices to BF16 internaly on the fly.
We found multiple precision issues in Tesla T4 and float16 machines primarily since the model was trained using BF16, and so outliers and overflows existed. MXFP4 is not actually supported on Ampere and older GPUs, so Triton provides `tl.dot_scaled` for MXFP4 matrix multiplication. It upcasts the matrices to BF16 internally on the fly.
We made a [MXFP4 inference notebook](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/GPT_OSS_MXFP4_\(20B\)-Inference.ipynb) as well in Tesla T4 Colab!
@ -10259,7 +10259,7 @@ training_args = GRPOConfig(
- Choosing and Loading a TTS Model
- Preparing Your Dataset
Learn how to to fine-tune TTS & STT voice models with Unsloth.
Learn how to fine-tune TTS & STT voice models with Unsloth.
Fine-tuning TTS models allows them to adapt to your specific dataset, use case, or desired style and tone. The goal is to customize these models to clone voices, adapt speaking styles and tones, support new languages, handle specific tasks and more. We also support **Speech-to-Text (STT)** models like OpenAI's Whisper.

View file

@ -67,7 +67,7 @@
- [Troubleshooting Inference](/basics/running-and-saving-models/troubleshooting-inference.md): If you're experiencing issues when running or saving your model.
- [vLLM Engine Arguments](/basics/running-and-saving-models/vllm-engine-arguments.md)
- [LoRA Hot Swapping Guide](/basics/running-and-saving-models/lora-hot-swapping-guide.md)
- [Text-to-Speech (TTS) Fine-tuning](/basics/text-to-speech-tts-fine-tuning.md): Learn how to to fine-tune TTS & STT voice models with Unsloth.
- [Text-to-Speech (TTS) Fine-tuning](/basics/text-to-speech-tts-fine-tuning.md): Learn how to fine-tune TTS & STT voice models with Unsloth.
- [Unsloth Dynamic 2.0 GGUFs](/basics/unsloth-dynamic-2.0-ggufs.md): A big new upgrade to our Dynamic Quants!
- [Vision Fine-tuning](/basics/vision-fine-tuning.md): Learn how to fine-tune vision/multimodal LLMs with Unsloth
- [Fine-tuning LLMs with NVIDIA DGX Spark and Unsloth](/basics/fine-tuning-llms-with-nvidia-dgx-spark-and-unsloth.md): Tutorial on how to fine-tune and do reinforcement learning (RL) with OpenAI gpt-oss on NVIDIA DGX Spark.

View file

@ -15,10 +15,30 @@ from agent.prompt_builder import (
build_context_files_prompt,
CONTEXT_FILE_MAX_CHARS,
DEFAULT_AGENT_IDENTITY,
MEMORY_GUIDANCE,
SESSION_SEARCH_GUIDANCE,
PLATFORM_HINTS,
)
# =========================================================================
# Guidance constants
# =========================================================================
class TestGuidanceConstants:
def test_memory_guidance_discourages_task_logs(self):
assert "durable facts" in MEMORY_GUIDANCE
assert "Do NOT save task progress" in MEMORY_GUIDANCE
assert "session_search" in MEMORY_GUIDANCE
assert "like a diary" not in MEMORY_GUIDANCE
assert ">80%" not in MEMORY_GUIDANCE
def test_session_search_guidance_is_simple_cross_session_recall(self):
assert "relevant cross-session context exists" in SESSION_SEARCH_GUIDANCE
assert "recent turns of the current session" not in SESSION_SEARCH_GUIDANCE
# =========================================================================
# Context injection scanning
# =========================================================================
@ -435,6 +455,7 @@ class TestPromptBuilderConstants:
assert "whatsapp" in PLATFORM_HINTS
assert "telegram" in PLATFORM_HINTS
assert "discord" in PLATFORM_HINTS
assert "cron" in PLATFORM_HINTS
assert "cli" in PLATFORM_HINTS

View file

@ -6,7 +6,7 @@ from unittest.mock import patch, MagicMock
import pytest
from cron.scheduler import _resolve_origin, _deliver_result, run_job
from cron.scheduler import _resolve_origin, _resolve_delivery_target, _deliver_result, run_job
class TestResolveOrigin:
@ -44,6 +44,56 @@ class TestResolveOrigin:
assert _resolve_origin(job) is None
class TestResolveDeliveryTarget:
def test_origin_delivery_preserves_thread_id(self):
job = {
"deliver": "origin",
"origin": {
"platform": "telegram",
"chat_id": "-1001",
"thread_id": "17585",
},
}
assert _resolve_delivery_target(job) == {
"platform": "telegram",
"chat_id": "-1001",
"thread_id": "17585",
}
def test_bare_platform_uses_matching_origin_chat(self):
job = {
"deliver": "telegram",
"origin": {
"platform": "telegram",
"chat_id": "-1001",
"thread_id": "17585",
},
}
assert _resolve_delivery_target(job) == {
"platform": "telegram",
"chat_id": "-1001",
"thread_id": "17585",
}
def test_bare_platform_falls_back_to_home_channel(self, monkeypatch):
monkeypatch.setenv("TELEGRAM_HOME_CHANNEL", "-2002")
job = {
"deliver": "telegram",
"origin": {
"platform": "discord",
"chat_id": "abc",
},
}
assert _resolve_delivery_target(job) == {
"platform": "telegram",
"chat_id": "-2002",
"thread_id": None,
}
class TestDeliverResultMirrorLogging:
"""Verify that mirror_to_session failures are logged, not silently swallowed."""

View file

@ -1,7 +1,7 @@
"""Tests for the delivery routing module."""
from gateway.config import Platform, GatewayConfig, PlatformConfig, HomeChannel
from gateway.delivery import DeliveryTarget, parse_deliver_spec
from gateway.delivery import DeliveryRouter, DeliveryTarget, parse_deliver_spec
from gateway.session import SessionSource
@ -85,3 +85,12 @@ class TestTargetToStringRoundtrip:
reparsed = DeliveryTarget.parse(s)
assert reparsed.platform == Platform.TELEGRAM
assert reparsed.chat_id == "999"
class TestDeliveryRouter:
def test_resolve_targets_does_not_duplicate_local_when_explicit(self):
router = DeliveryRouter(GatewayConfig(always_log_local=True))
targets = router.resolve_targets(["local"])
assert [target.platform for target in targets] == [Platform.LOCAL]

View file

@ -0,0 +1,46 @@
import pytest
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.platforms.base import BasePlatformAdapter
from gateway.run import GatewayRunner
class _FatalAdapter(BasePlatformAdapter):
def __init__(self):
super().__init__(PlatformConfig(enabled=True, token="token"), Platform.TELEGRAM)
async def connect(self) -> bool:
self._set_fatal_error(
"telegram_token_lock",
"Another local Hermes gateway is already using this Telegram bot token.",
retryable=False,
)
return False
async def disconnect(self) -> None:
self._mark_disconnected()
async def send(self, chat_id, content, reply_to=None, metadata=None):
raise NotImplementedError
async def get_chat_info(self, chat_id):
return {"id": chat_id}
@pytest.mark.asyncio
async def test_runner_requests_clean_exit_for_nonretryable_startup_conflict(monkeypatch, tmp_path):
config = GatewayConfig(
platforms={
Platform.TELEGRAM: PlatformConfig(enabled=True, token="token")
},
sessions_dir=tmp_path / "sessions",
)
runner = GatewayRunner(config)
monkeypatch.setattr(runner, "_create_adapter", lambda platform, platform_config: _FatalAdapter())
ok = await runner.start()
assert ok is True
assert runner.should_exit_cleanly is True
assert "already using this Telegram bot token" in runner.exit_reason

View file

@ -25,3 +25,77 @@ class TestGatewayPidState:
assert status.get_running_pid() is None
assert not pid_path.exists()
class TestGatewayRuntimeStatus:
def test_write_runtime_status_records_platform_failure(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
status.write_runtime_status(
gateway_state="startup_failed",
exit_reason="telegram conflict",
platform="telegram",
platform_state="fatal",
error_code="telegram_polling_conflict",
error_message="another poller is active",
)
payload = status.read_runtime_status()
assert payload["gateway_state"] == "startup_failed"
assert payload["exit_reason"] == "telegram conflict"
assert payload["platforms"]["telegram"]["state"] == "fatal"
assert payload["platforms"]["telegram"]["error_code"] == "telegram_polling_conflict"
assert payload["platforms"]["telegram"]["error_message"] == "another poller is active"
class TestScopedLocks:
def test_acquire_scoped_lock_rejects_live_other_process(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
lock_path.parent.mkdir(parents=True, exist_ok=True)
lock_path.write_text(json.dumps({
"pid": 99999,
"start_time": 123,
"kind": "hermes-gateway",
}))
monkeypatch.setattr(status.os, "kill", lambda pid, sig: None)
monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123)
acquired, existing = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"})
assert acquired is False
assert existing["pid"] == 99999
def test_acquire_scoped_lock_replaces_stale_record(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
lock_path.parent.mkdir(parents=True, exist_ok=True)
lock_path.write_text(json.dumps({
"pid": 99999,
"start_time": 123,
"kind": "hermes-gateway",
}))
def fake_kill(pid, sig):
raise ProcessLookupError
monkeypatch.setattr(status.os, "kill", fake_kill)
acquired, existing = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"})
assert acquired is True
payload = json.loads(lock_path.read_text())
assert payload["pid"] == os.getpid()
assert payload["metadata"]["platform"] == "telegram"
def test_release_scoped_lock_only_removes_current_owner(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
acquired, _ = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"})
assert acquired is True
lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
assert lock_path.exists()
status.release_scoped_lock("telegram-bot-token", "secret")
assert not lock_path.exists()

View file

@ -0,0 +1,100 @@
import asyncio
import sys
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
import pytest
from gateway.config import PlatformConfig
def _ensure_telegram_mock():
if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
return
telegram_mod = MagicMock()
telegram_mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
telegram_mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
telegram_mod.constants.ChatType.GROUP = "group"
telegram_mod.constants.ChatType.SUPERGROUP = "supergroup"
telegram_mod.constants.ChatType.CHANNEL = "channel"
telegram_mod.constants.ChatType.PRIVATE = "private"
for name in ("telegram", "telegram.ext", "telegram.constants"):
sys.modules.setdefault(name, telegram_mod)
_ensure_telegram_mock()
from gateway.platforms.telegram import TelegramAdapter # noqa: E402
@pytest.mark.asyncio
async def test_connect_rejects_same_host_token_lock(monkeypatch):
adapter = TelegramAdapter(PlatformConfig(enabled=True, token="secret-token"))
monkeypatch.setattr(
"gateway.status.acquire_scoped_lock",
lambda scope, identity, metadata=None: (False, {"pid": 4242}),
)
ok = await adapter.connect()
assert ok is False
assert adapter.fatal_error_code == "telegram_token_lock"
assert adapter.has_fatal_error is True
assert "already using this Telegram bot token" in adapter.fatal_error_message
@pytest.mark.asyncio
async def test_polling_conflict_stops_polling_and_notifies_handler(monkeypatch):
adapter = TelegramAdapter(PlatformConfig(enabled=True, token="secret-token"))
fatal_handler = AsyncMock()
adapter.set_fatal_error_handler(fatal_handler)
monkeypatch.setattr(
"gateway.status.acquire_scoped_lock",
lambda scope, identity, metadata=None: (True, None),
)
monkeypatch.setattr(
"gateway.status.release_scoped_lock",
lambda scope, identity: None,
)
captured = {}
async def fake_start_polling(**kwargs):
captured["error_callback"] = kwargs["error_callback"]
updater = SimpleNamespace(
start_polling=AsyncMock(side_effect=fake_start_polling),
stop=AsyncMock(),
)
bot = SimpleNamespace(set_my_commands=AsyncMock())
app = SimpleNamespace(
bot=bot,
updater=updater,
add_handler=MagicMock(),
initialize=AsyncMock(),
start=AsyncMock(),
)
builder = MagicMock()
builder.token.return_value = builder
builder.build.return_value = app
monkeypatch.setattr("gateway.platforms.telegram.Application", SimpleNamespace(builder=MagicMock(return_value=builder)))
ok = await adapter.connect()
assert ok is True
assert callable(captured["error_callback"])
conflict = type("Conflict", (Exception,), {})
captured["error_callback"](conflict("Conflict: terminated by other getUpdates request; make sure that only one bot instance is running"))
await asyncio.sleep(0)
await asyncio.sleep(0)
assert adapter.fatal_error_code == "telegram_polling_conflict"
assert adapter.has_fatal_error is True
updater.stop.assert_awaited()
fatal_handler.assert_awaited_once()

View file

@ -81,20 +81,21 @@ def _make_document(
return doc
def _make_message(document=None, caption=None):
"""Build a mock Telegram Message with the given document."""
def _make_message(document=None, caption=None, media_group_id=None, photo=None):
"""Build a mock Telegram Message with the given document/photo."""
msg = MagicMock()
msg.message_id = 42
msg.text = caption or ""
msg.caption = caption
msg.date = None
# Media flags — all None except document
msg.photo = None
# Media flags — all None except explicit payload
msg.photo = photo
msg.video = None
msg.audio = None
msg.voice = None
msg.sticker = None
msg.document = document
msg.media_group_id = media_group_id
# Chat / user
msg.chat = MagicMock()
msg.chat.id = 100
@ -165,6 +166,12 @@ class TestDocumentTypeDetection:
# TestDocumentDownloadBlock
# ---------------------------------------------------------------------------
def _make_photo(file_obj=None):
photo = MagicMock()
photo.get_file = AsyncMock(return_value=file_obj or _make_file_obj(b"photo-bytes"))
return photo
class TestDocumentDownloadBlock:
@pytest.mark.asyncio
async def test_supported_pdf_is_cached(self, adapter):
@ -339,6 +346,50 @@ class TestDocumentDownloadBlock:
adapter.handle_message.assert_called_once()
# ---------------------------------------------------------------------------
# TestMediaGroups — media group (album) buffering
# ---------------------------------------------------------------------------
class TestMediaGroups:
@pytest.mark.asyncio
async def test_photo_album_is_buffered_and_combined(self, adapter):
first_photo = _make_photo(_make_file_obj(b"first"))
second_photo = _make_photo(_make_file_obj(b"second"))
msg1 = _make_message(caption="two images", media_group_id="album-1", photo=[first_photo])
msg2 = _make_message(media_group_id="album-1", photo=[second_photo])
with patch("gateway.platforms.telegram.cache_image_from_bytes", side_effect=["/tmp/one.jpg", "/tmp/two.jpg"]):
await adapter._handle_media_message(_make_update(msg1), MagicMock())
await adapter._handle_media_message(_make_update(msg2), MagicMock())
assert adapter.handle_message.await_count == 0
await asyncio.sleep(adapter.MEDIA_GROUP_WAIT_SECONDS + 0.05)
adapter.handle_message.assert_awaited_once()
event = adapter.handle_message.call_args[0][0]
assert event.text == "two images"
assert event.media_urls == ["/tmp/one.jpg", "/tmp/two.jpg"]
assert len(event.media_types) == 2
@pytest.mark.asyncio
async def test_disconnect_cancels_pending_media_group_flush(self, adapter):
first_photo = _make_photo(_make_file_obj(b"first"))
msg = _make_message(caption="two images", media_group_id="album-2", photo=[first_photo])
with patch("gateway.platforms.telegram.cache_image_from_bytes", return_value="/tmp/one.jpg"):
await adapter._handle_media_message(_make_update(msg), MagicMock())
assert "album-2" in adapter._media_group_events
assert "album-2" in adapter._media_group_tasks
await adapter.disconnect()
await asyncio.sleep(adapter.MEDIA_GROUP_WAIT_SECONDS + 0.05)
assert adapter._media_group_events == {}
assert adapter._media_group_tasks == {}
adapter.handle_message.assert_not_awaited()
# ---------------------------------------------------------------------------
# TestSendDocument — outbound file attachment delivery
# ---------------------------------------------------------------------------

View file

@ -88,7 +88,7 @@ class TestHandleUpdateCommand:
@pytest.mark.asyncio
async def test_no_hermes_binary(self, tmp_path):
"""Returns error when hermes is not on PATH."""
"""Returns error when hermes is not on PATH and hermes_cli is not importable."""
runner = _make_runner()
event = _make_event()
@ -102,10 +102,77 @@ class TestHandleUpdateCommand:
with patch("gateway.run._hermes_home", tmp_path), \
patch("gateway.run.__file__", fake_file), \
patch("shutil.which", return_value=None):
patch("shutil.which", return_value=None), \
patch("importlib.util.find_spec", return_value=None):
result = await runner._handle_update_command(event)
assert "not found on PATH" in result
assert "Could not locate" in result
assert "hermes update" in result
@pytest.mark.asyncio
async def test_fallback_to_sys_executable(self, tmp_path):
"""Falls back to sys.executable -m hermes_cli.main when hermes not on PATH."""
runner = _make_runner()
event = _make_event()
fake_root = tmp_path / "project"
fake_root.mkdir()
(fake_root / ".git").mkdir()
(fake_root / "gateway").mkdir()
(fake_root / "gateway" / "run.py").touch()
fake_file = str(fake_root / "gateway" / "run.py")
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
mock_popen = MagicMock()
fake_spec = MagicMock()
with patch("gateway.run._hermes_home", hermes_home), \
patch("gateway.run.__file__", fake_file), \
patch("shutil.which", return_value=None), \
patch("importlib.util.find_spec", return_value=fake_spec), \
patch("subprocess.Popen", mock_popen):
result = await runner._handle_update_command(event)
assert "Starting Hermes update" in result
call_args = mock_popen.call_args[0][0]
# The update_cmd uses sys.executable -m hermes_cli.main
joined = " ".join(call_args) if isinstance(call_args, list) else call_args
assert "hermes_cli.main" in joined or "bash" in call_args[0]
@pytest.mark.asyncio
async def test_resolve_hermes_bin_prefers_which(self, tmp_path):
"""_resolve_hermes_bin returns argv parts from shutil.which when available."""
from gateway.run import _resolve_hermes_bin
with patch("shutil.which", return_value="/custom/path/hermes"):
result = _resolve_hermes_bin()
assert result == ["/custom/path/hermes"]
@pytest.mark.asyncio
async def test_resolve_hermes_bin_fallback(self):
"""_resolve_hermes_bin falls back to sys.executable argv when which fails."""
import sys
from gateway.run import _resolve_hermes_bin
fake_spec = MagicMock()
with patch("shutil.which", return_value=None), \
patch("importlib.util.find_spec", return_value=fake_spec):
result = _resolve_hermes_bin()
assert result == [sys.executable, "-m", "hermes_cli.main"]
@pytest.mark.asyncio
async def test_resolve_hermes_bin_returns_none_when_both_fail(self):
"""_resolve_hermes_bin returns None when both strategies fail."""
from gateway.run import _resolve_hermes_bin
with patch("shutil.which", return_value=None), \
patch("importlib.util.find_spec", return_value=None):
result = _resolve_hermes_bin()
assert result is None
@pytest.mark.asyncio
async def test_writes_pending_marker(self, tmp_path):

View file

@ -0,0 +1,107 @@
"""Tests for cmd_update — branch fallback when remote branch doesn't exist."""
import subprocess
from types import SimpleNamespace
from unittest.mock import patch
import pytest
from hermes_cli.main import cmd_update, PROJECT_ROOT
def _make_run_side_effect(branch="main", verify_ok=True, commit_count="0"):
"""Build a side_effect function for subprocess.run that simulates git commands."""
def side_effect(cmd, **kwargs):
joined = " ".join(str(c) for c in cmd)
# git rev-parse --abbrev-ref HEAD (get current branch)
if "rev-parse" in joined and "--abbrev-ref" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout=f"{branch}\n", stderr="")
# git rev-parse --verify origin/{branch} (check remote branch exists)
if "rev-parse" in joined and "--verify" in joined:
rc = 0 if verify_ok else 128
return subprocess.CompletedProcess(cmd, rc, stdout="", stderr="")
# git rev-list HEAD..origin/{branch} --count
if "rev-list" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout=f"{commit_count}\n", stderr="")
# Fallback: return a successful CompletedProcess with empty stdout
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
return side_effect
@pytest.fixture
def mock_args():
return SimpleNamespace()
class TestCmdUpdateBranchFallback:
"""cmd_update falls back to main when current branch has no remote counterpart."""
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_update_falls_back_to_main_when_branch_not_on_remote(
self, mock_run, _mock_which, mock_args, capsys
):
mock_run.side_effect = _make_run_side_effect(
branch="fix/stoicneko", verify_ok=False, commit_count="3"
)
cmd_update(mock_args)
commands = [" ".join(str(a) for a in c.args[0]) for c in mock_run.call_args_list]
# rev-list should use origin/main, not origin/fix/stoicneko
rev_list_cmds = [c for c in commands if "rev-list" in c]
assert len(rev_list_cmds) == 1
assert "origin/main" in rev_list_cmds[0]
assert "origin/fix/stoicneko" not in rev_list_cmds[0]
# pull should use main, not fix/stoicneko
pull_cmds = [c for c in commands if "pull" in c]
assert len(pull_cmds) == 1
assert "main" in pull_cmds[0]
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_update_uses_current_branch_when_on_remote(
self, mock_run, _mock_which, mock_args, capsys
):
mock_run.side_effect = _make_run_side_effect(
branch="main", verify_ok=True, commit_count="2"
)
cmd_update(mock_args)
commands = [" ".join(str(a) for a in c.args[0]) for c in mock_run.call_args_list]
rev_list_cmds = [c for c in commands if "rev-list" in c]
assert len(rev_list_cmds) == 1
assert "origin/main" in rev_list_cmds[0]
pull_cmds = [c for c in commands if "pull" in c]
assert len(pull_cmds) == 1
assert "main" in pull_cmds[0]
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_update_already_up_to_date(
self, mock_run, _mock_which, mock_args, capsys
):
mock_run.side_effect = _make_run_side_effect(
branch="main", verify_ok=True, commit_count="0"
)
cmd_update(mock_args)
captured = capsys.readouterr()
assert "Already up to date!" in captured.out
# Should NOT have called pull
commands = [" ".join(str(a) for a in c.args[0]) for c in mock_run.call_args_list]
pull_cmds = [c for c in commands if "pull" in c]
assert len(pull_cmds) == 0

View file

@ -59,15 +59,16 @@ def test_systemd_install_checks_linger_status(monkeypatch, tmp_path, capsys):
unit_path = tmp_path / "systemd" / "user" / "hermes-gateway.service"
monkeypatch.setattr(gateway, "get_systemd_unit_path", lambda: unit_path)
monkeypatch.setattr(gateway, "get_systemd_linger_status", lambda: (False, ""))
calls = []
helper_calls = []
def fake_run(cmd, check=False, **kwargs):
calls.append((cmd, check))
return SimpleNamespace(returncode=0, stdout="", stderr="")
monkeypatch.setattr(gateway.subprocess, "run", fake_run)
monkeypatch.setattr(gateway, "_ensure_linger_enabled", lambda: helper_calls.append(True))
gateway.systemd_install(force=False)
@ -77,6 +78,5 @@ def test_systemd_install_checks_linger_status(monkeypatch, tmp_path, capsys):
["systemctl", "--user", "daemon-reload"],
["systemctl", "--user", "enable", gateway.SERVICE_NAME],
]
assert helper_calls == [True]
assert "Service installed and enabled" in out
assert "Systemd linger is disabled" in out
assert "loginctl enable-linger" in out

View file

@ -0,0 +1,120 @@
"""Tests for gateway linger auto-enable behavior on headless Linux installs."""
from types import SimpleNamespace
import hermes_cli.gateway as gateway
class TestEnsureLingerEnabled:
def test_linger_already_enabled_via_file(self, monkeypatch, capsys):
monkeypatch.setattr(gateway, "is_linux", lambda: True)
monkeypatch.setattr("getpass.getuser", lambda: "testuser")
monkeypatch.setattr(gateway, "Path", lambda _path: SimpleNamespace(exists=lambda: True))
calls = []
monkeypatch.setattr(gateway.subprocess, "run", lambda *args, **kwargs: calls.append((args, kwargs)))
gateway._ensure_linger_enabled()
out = capsys.readouterr().out
assert "Systemd linger is enabled" in out
assert calls == []
def test_status_enabled_skips_enable(self, monkeypatch, capsys):
monkeypatch.setattr(gateway, "is_linux", lambda: True)
monkeypatch.setattr("getpass.getuser", lambda: "testuser")
monkeypatch.setattr(gateway, "Path", lambda _path: SimpleNamespace(exists=lambda: False))
monkeypatch.setattr(gateway, "get_systemd_linger_status", lambda: (True, ""))
calls = []
monkeypatch.setattr(gateway.subprocess, "run", lambda *args, **kwargs: calls.append((args, kwargs)))
gateway._ensure_linger_enabled()
out = capsys.readouterr().out
assert "Systemd linger is enabled" in out
assert calls == []
def test_loginctl_success_enables_linger(self, monkeypatch, capsys):
monkeypatch.setattr(gateway, "is_linux", lambda: True)
monkeypatch.setattr("getpass.getuser", lambda: "testuser")
monkeypatch.setattr(gateway, "Path", lambda _path: SimpleNamespace(exists=lambda: False))
monkeypatch.setattr(gateway, "get_systemd_linger_status", lambda: (False, ""))
monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/loginctl")
run_calls = []
def fake_run(cmd, capture_output=False, text=False, check=False):
run_calls.append((cmd, capture_output, text, check))
return SimpleNamespace(returncode=0, stdout="", stderr="")
monkeypatch.setattr(gateway.subprocess, "run", fake_run)
gateway._ensure_linger_enabled()
out = capsys.readouterr().out
assert "Enabling linger" in out
assert "Linger enabled" in out
assert run_calls == [(["loginctl", "enable-linger", "testuser"], True, True, False)]
def test_missing_loginctl_shows_manual_guidance(self, monkeypatch, capsys):
monkeypatch.setattr(gateway, "is_linux", lambda: True)
monkeypatch.setattr("getpass.getuser", lambda: "testuser")
monkeypatch.setattr(gateway, "Path", lambda _path: SimpleNamespace(exists=lambda: False))
monkeypatch.setattr(gateway, "get_systemd_linger_status", lambda: (None, "loginctl not found"))
monkeypatch.setattr("shutil.which", lambda name: None)
calls = []
monkeypatch.setattr(gateway.subprocess, "run", lambda *args, **kwargs: calls.append((args, kwargs)))
gateway._ensure_linger_enabled()
out = capsys.readouterr().out
assert "sudo loginctl enable-linger testuser" in out
assert "loginctl not found" in out
assert calls == []
def test_loginctl_failure_shows_manual_guidance(self, monkeypatch, capsys):
monkeypatch.setattr(gateway, "is_linux", lambda: True)
monkeypatch.setattr("getpass.getuser", lambda: "testuser")
monkeypatch.setattr(gateway, "Path", lambda _path: SimpleNamespace(exists=lambda: False))
monkeypatch.setattr(gateway, "get_systemd_linger_status", lambda: (False, ""))
monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/loginctl")
monkeypatch.setattr(
gateway.subprocess,
"run",
lambda *args, **kwargs: SimpleNamespace(returncode=1, stdout="", stderr="Permission denied"),
)
gateway._ensure_linger_enabled()
out = capsys.readouterr().out
assert "sudo loginctl enable-linger testuser" in out
assert "Permission denied" in out
def test_systemd_install_calls_linger_helper(monkeypatch, tmp_path, capsys):
unit_path = tmp_path / "systemd" / "user" / "hermes-gateway.service"
monkeypatch.setattr(gateway, "get_systemd_unit_path", lambda: unit_path)
calls = []
def fake_run(cmd, check=False, **kwargs):
calls.append((cmd, check))
return SimpleNamespace(returncode=0, stdout="", stderr="")
helper_calls = []
monkeypatch.setattr(gateway.subprocess, "run", fake_run)
monkeypatch.setattr(gateway, "_ensure_linger_enabled", lambda: helper_calls.append(True))
gateway.systemd_install(force=False)
out = capsys.readouterr().out
assert unit_path.exists()
assert [cmd for cmd, _ in calls] == [
["systemctl", "--user", "daemon-reload"],
["systemctl", "--user", "enable", gateway.SERVICE_NAME],
]
assert helper_calls == [True]
assert "Service installed and enabled" in out

View file

@ -0,0 +1,22 @@
from hermes_cli.gateway import _runtime_health_lines
def test_runtime_health_lines_include_fatal_platform_and_startup_reason(monkeypatch):
monkeypatch.setattr(
"gateway.status.read_runtime_status",
lambda: {
"gateway_state": "startup_failed",
"exit_reason": "telegram conflict",
"platforms": {
"telegram": {
"state": "fatal",
"error_message": "another poller is active",
}
},
},
)
lines = _runtime_health_lines()
assert "⚠ telegram: another poller is active" in lines
assert "⚠ Last startup issue: telegram conflict" in lines

View file

@ -0,0 +1,48 @@
"""Tests for CLI placeholder text in config/setup output."""
import os
from argparse import Namespace
from unittest.mock import patch
import pytest
from hermes_cli.config import config_command, show_config
from hermes_cli.setup import _print_setup_summary
def test_config_set_usage_marks_placeholders(capsys):
args = Namespace(config_command="set", key=None, value=None)
with pytest.raises(SystemExit) as exc:
config_command(args)
assert exc.value.code == 1
out = capsys.readouterr().out
assert "Usage: hermes config set <key> <value>" in out
def test_config_unknown_command_help_marks_placeholders(capsys):
args = Namespace(config_command="wat")
with pytest.raises(SystemExit) as exc:
config_command(args)
assert exc.value.code == 1
out = capsys.readouterr().out
assert "hermes config set <key> <value> Set a config value" in out
def test_show_config_marks_placeholders(tmp_path, capsys):
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
show_config()
out = capsys.readouterr().out
assert "hermes config set <key> <value>" in out
def test_setup_summary_marks_placeholders(tmp_path, capsys):
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
_print_setup_summary({"tts": {"provider": "edge"}}, tmp_path)
out = capsys.readouterr().out
assert "hermes config set <key> <value>" in out

View file

@ -3,7 +3,7 @@
from __future__ import annotations
from hermes_cli.config import load_config, save_config, save_env_value
from hermes_cli.setup import setup_model_provider
from hermes_cli.setup import _print_setup_summary, setup_model_provider
def _read_env(home):
@ -50,11 +50,15 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m
calls = {"count": 0}
def fake_prompt_choice(_question, choices, default=0):
def fake_prompt_choice(question, choices, default=0):
calls["count"] += 1
if calls["count"] == 1:
assert choices[-1] == "Keep current (Custom: https://example.invalid/v1)"
return len(choices) - 1
if calls["count"] == 2:
assert question == "Configure vision:"
assert choices[-1] == "Skip for now"
return len(choices) - 1
raise AssertionError("Model menu should not appear for keep-current custom")
monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
@ -70,7 +74,7 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m
assert reloaded["model"]["provider"] == "custom"
assert reloaded["model"]["default"] == "custom/model"
assert reloaded["model"]["base_url"] == "https://example.invalid/v1"
assert calls["count"] == 1
assert calls["count"] == 2
def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tmp_path, monkeypatch):
@ -88,13 +92,17 @@ def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tm
captured = {"provider_choices": None, "model_choices": None}
calls = {"count": 0}
def fake_prompt_choice(_question, choices, default=0):
def fake_prompt_choice(question, choices, default=0):
calls["count"] += 1
if calls["count"] == 1:
captured["provider_choices"] = list(choices)
assert choices[-1] == "Keep current (Anthropic)"
return len(choices) - 1
if calls["count"] == 2:
assert question == "Configure vision:"
assert choices[-1] == "Skip for now"
return len(choices) - 1
if calls["count"] == 3:
captured["model_choices"] = list(choices)
return len(choices) - 1 # keep current model
raise AssertionError("Unexpected extra prompt_choice call")
@ -113,7 +121,43 @@ def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tm
assert captured["model_choices"] is not None
assert captured["model_choices"][0] == "claude-opus-4-6"
assert "anthropic/claude-opus-4.6 (recommended)" not in captured["model_choices"]
assert calls["count"] == 2
assert calls["count"] == 3
def test_setup_keep_current_anthropic_can_configure_openai_vision_default(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_clear_provider_env(monkeypatch)
config = load_config()
config["model"] = {
"default": "claude-opus-4-6",
"provider": "anthropic",
}
save_config(config)
picks = iter([
9, # keep current provider
1, # configure vision with OpenAI
5, # use default gpt-4o-mini vision model
4, # keep current Anthropic model
])
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: next(picks))
monkeypatch.setattr(
"hermes_cli.setup.prompt",
lambda message, *args, **kwargs: "sk-openai" if "OpenAI API key" in message else "",
)
monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: [])
setup_model_provider(config)
env = _read_env(tmp_path)
assert env.get("OPENAI_API_KEY") == "sk-openai"
assert env.get("OPENAI_BASE_URL") == "https://api.openai.com/v1"
assert env.get("AUXILIARY_VISION_MODEL") == "gpt-4o-mini"
def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config(tmp_path, monkeypatch):
@ -144,7 +188,7 @@ def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config(
"hermes_cli.auth.resolve_codex_runtime_credentials",
lambda *args, **kwargs: {
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "codex-access-token",
"api_key": "codex-...oken",
},
)
monkeypatch.setattr(
@ -163,3 +207,22 @@ def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config(
assert reloaded["model"]["provider"] == "openai-codex"
assert reloaded["model"]["default"] == "openai/gpt-5.3-codex"
assert reloaded["model"]["base_url"] == "https://chatgpt.com/backend-api/codex"
def test_setup_summary_marks_codex_auth_as_vision_available(tmp_path, monkeypatch, capsys):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_clear_provider_env(monkeypatch)
(tmp_path / "auth.json").write_text(
'{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token":"tok"}}}}'
)
monkeypatch.setattr("shutil.which", lambda _name: None)
_print_setup_summary(load_config(), tmp_path)
output = capsys.readouterr().out
assert "Vision (image analysis)" in output
assert "missing run 'hermes setup' to configure" not in output
assert "Mixture of Agents" in output
assert "missing OPENROUTER_API_KEY" in output

View file

@ -3,7 +3,7 @@ from io import StringIO
import pytest
from rich.console import Console
from hermes_cli.skills_hub import do_check, do_list, do_update
from hermes_cli.skills_hub import do_check, do_list, do_update, handle_skills_slash
class _DummyLockFile:

View file

@ -0,0 +1,26 @@
import sys
from types import SimpleNamespace
def test_cli_skills_install_accepts_yes_alias(monkeypatch):
from hermes_cli.main import main
captured = {}
def fake_skills_command(args):
captured["identifier"] = args.identifier
captured["force"] = args.force
monkeypatch.setattr("hermes_cli.skills_hub.skills_command", fake_skills_command)
monkeypatch.setattr(
sys,
"argv",
["hermes", "skills", "install", "official/email/agentmail", "--yes"],
)
main()
assert captured == {
"identifier": "official/email/agentmail",
"force": True,
}

View file

@ -484,3 +484,22 @@ class TestResizeToolPool:
"""resize_tool_pool should not raise."""
resize_tool_pool(16) # Small pool for testing
resize_tool_pool(128) # Restore default
def test_resize_shuts_down_previous_executor(self, monkeypatch):
"""Replacing the global tool executor should shut down the old pool."""
import environments.agent_loop as agent_loop_module
old_executor = MagicMock()
new_executor = MagicMock()
monkeypatch.setattr(agent_loop_module, "_tool_executor", old_executor)
monkeypatch.setattr(
agent_loop_module.concurrent.futures,
"ThreadPoolExecutor",
MagicMock(return_value=new_executor),
)
resize_tool_pool(16)
old_executor.shutdown.assert_called_once_with(wait=False)
assert agent_loop_module._tool_executor is new_executor

View file

@ -0,0 +1,100 @@
import queue
import threading
import time
from types import SimpleNamespace
from unittest.mock import MagicMock
from cli import HermesCLI
def _make_cli_stub():
cli = HermesCLI.__new__(HermesCLI)
cli._approval_state = None
cli._approval_deadline = 0
cli._approval_lock = threading.Lock()
cli._invalidate = MagicMock()
cli._app = SimpleNamespace(invalidate=MagicMock())
return cli
class TestCliApprovalUi:
def test_approval_callback_includes_view_for_long_commands(self):
cli = _make_cli_stub()
command = "sudo dd if=/tmp/githubcli-keyring.gpg of=/usr/share/keyrings/githubcli-archive-keyring.gpg bs=4M status=progress"
result = {}
def _run_callback():
result["value"] = cli._approval_callback(command, "disk copy")
thread = threading.Thread(target=_run_callback, daemon=True)
thread.start()
deadline = time.time() + 2
while cli._approval_state is None and time.time() < deadline:
time.sleep(0.01)
assert cli._approval_state is not None
assert "view" in cli._approval_state["choices"]
cli._approval_state["response_queue"].put("deny")
thread.join(timeout=2)
assert result["value"] == "deny"
def test_handle_approval_selection_view_expands_in_place(self):
cli = _make_cli_stub()
cli._approval_state = {
"command": "sudo dd if=/tmp/in of=/usr/share/keyrings/githubcli-archive-keyring.gpg bs=4M status=progress",
"description": "disk copy",
"choices": ["once", "session", "always", "deny", "view"],
"selected": 4,
"response_queue": queue.Queue(),
}
cli._handle_approval_selection()
assert cli._approval_state is not None
assert cli._approval_state["show_full"] is True
assert "view" not in cli._approval_state["choices"]
assert cli._approval_state["selected"] == 3
assert cli._approval_state["response_queue"].empty()
def test_approval_display_places_title_inside_box_not_border(self):
cli = _make_cli_stub()
cli._approval_state = {
"command": "sudo dd if=/tmp/in of=/usr/share/keyrings/githubcli-archive-keyring.gpg bs=4M status=progress",
"description": "disk copy",
"choices": ["once", "session", "always", "deny", "view"],
"selected": 0,
"response_queue": queue.Queue(),
}
fragments = cli._get_approval_display_fragments()
rendered = "".join(text for _style, text in fragments)
lines = rendered.splitlines()
assert lines[0].startswith("")
assert "Dangerous Command" not in lines[0]
assert any("Dangerous Command" in line for line in lines[1:3])
assert "Show full command" in rendered
assert "githubcli-archive-keyring.gpg" not in rendered
def test_approval_display_shows_full_command_after_view(self):
cli = _make_cli_stub()
full_command = "sudo dd if=/tmp/in of=/usr/share/keyrings/githubcli-archive-keyring.gpg bs=4M status=progress"
cli._approval_state = {
"command": full_command,
"description": "disk copy",
"choices": ["once", "session", "always", "deny"],
"selected": 0,
"show_full": True,
"response_queue": queue.Queue(),
}
fragments = cli._get_approval_display_fragments()
rendered = "".join(text for _style, text in fragments)
assert "..." not in rendered
assert "githubcli-" in rendered
assert "archive-" in rendered
assert "keyring.gpg" in rendered
assert "status=progress" in rendered

View file

@ -0,0 +1,117 @@
"""Tests for slash command prefix matching in HermesCLI.process_command."""
from unittest.mock import MagicMock, patch
from cli import HermesCLI
def _make_cli():
cli_obj = HermesCLI.__new__(HermesCLI)
cli_obj.config = {}
cli_obj.console = MagicMock()
cli_obj.agent = None
cli_obj.conversation_history = []
cli_obj.session_id = None
cli_obj._pending_input = MagicMock()
return cli_obj
class TestSlashCommandPrefixMatching:
def test_unique_prefix_dispatches_command(self):
"""/con should dispatch to /config when it uniquely matches."""
cli_obj = _make_cli()
with patch.object(cli_obj, 'show_config') as mock_config:
cli_obj.process_command("/con")
mock_config.assert_called_once()
def test_unique_prefix_with_args_does_not_recurse(self):
"""/con set key value should expand to /config set key value without infinite recursion."""
cli_obj = _make_cli()
dispatched = []
original = cli_obj.process_command.__func__
def counting_process_command(self_inner, cmd):
dispatched.append(cmd)
if len(dispatched) > 5:
raise RecursionError("process_command called too many times")
return original(self_inner, cmd)
with patch.object(type(cli_obj), 'process_command', counting_process_command):
try:
cli_obj.process_command("/con set key value")
except RecursionError:
assert False, "process_command recursed infinitely"
# Should have been called at most twice: once for /con set..., once for /config set...
assert len(dispatched) <= 2
def test_exact_command_with_args_does_not_recurse(self):
"""/config set key value hits exact branch and does not loop back to prefix."""
cli_obj = _make_cli()
call_count = [0]
original_pc = HermesCLI.process_command
def guarded(self_inner, cmd):
call_count[0] += 1
if call_count[0] > 10:
raise RecursionError("Infinite recursion detected")
return original_pc(self_inner, cmd)
with patch.object(HermesCLI, 'process_command', guarded):
try:
cli_obj.process_command("/config set key value")
except RecursionError:
assert False, "Recursed infinitely on /config set key value"
assert call_count[0] <= 3
def test_ambiguous_prefix_shows_suggestions(self):
"""/re matches multiple commands — should show ambiguous message."""
cli_obj = _make_cli()
cli_obj.process_command("/re")
printed = " ".join(str(c) for c in cli_obj.console.print.call_args_list)
assert "Ambiguous" in printed or "Did you mean" in printed
def test_unknown_command_shows_error(self):
"""/xyz should show unknown command error."""
cli_obj = _make_cli()
cli_obj.process_command("/xyz")
printed = " ".join(str(c) for c in cli_obj.console.print.call_args_list)
assert "Unknown command" in printed
def test_exact_command_still_works(self):
"""/help should still work as exact match."""
cli_obj = _make_cli()
with patch.object(cli_obj, 'show_help') as mock_help:
cli_obj.process_command("/help")
mock_help.assert_called_once()
def test_skill_command_prefix_matches(self):
"""A prefix that uniquely matches a skill command should dispatch it."""
cli_obj = _make_cli()
fake_skill = {"/test-skill-xyz": {"name": "Test Skill", "description": "test"}}
printed = []
cli_obj.console.print = lambda *a, **kw: printed.append(str(a))
import cli as cli_mod
with patch.object(cli_mod, '_skill_commands', fake_skill):
cli_obj.process_command("/test-skill-xy")
# Should NOT show "Unknown command" — should have dispatched or attempted skill
unknown = any("Unknown command" in p for p in printed)
assert not unknown, f"Expected skill prefix to match, got: {printed}"
def test_ambiguous_between_builtin_and_skill(self):
"""Ambiguous prefix spanning builtin + skill commands shows suggestions."""
cli_obj = _make_cli()
# /help-extra is a fake skill that shares /hel prefix with /help
fake_skill = {"/help-extra": {"name": "Help Extra", "description": "test"}}
import cli as cli_mod
with patch.object(cli_mod, '_skill_commands', fake_skill), patch.object(cli_obj, 'show_help') as mock_help:
cli_obj.process_command("/help")
# /help is an exact match so should work normally, not show ambiguous
mock_help.assert_called_once()
printed = " ".join(str(c) for c in cli_obj.console.print.call_args_list)
assert "Ambiguous" not in printed

View file

@ -186,6 +186,11 @@ def test_codex_provider_replaces_incompatible_default_model(monkeypatch):
monkeypatch.delenv("LLM_MODEL", raising=False)
monkeypatch.delenv("OPENAI_MODEL", raising=False)
# Ensure local user config does not leak a model into the test
monkeypatch.setitem(cli.CLI_CONFIG, "model", {
"default": "",
"base_url": "https://openrouter.ai/api/v1",
})
def _runtime_resolve(**kwargs):
return {
@ -240,6 +245,11 @@ def test_codex_provider_uses_config_model(monkeypatch):
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
# Prevent live API call from overriding the config model
monkeypatch.setattr(
"hermes_cli.codex_models.get_codex_model_ids",
lambda access_token=None: ["gpt-5.2-codex"],
)
shell = cli.HermesCLI(compact=True, max_turns=1)

View file

@ -150,7 +150,7 @@ def test_custom_endpoint_auto_provider_prefers_openai_key(monkeypatch):
monkeypatch.setenv("OPENAI_BASE_URL", "https://my-vllm-server.example.com/v1")
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
monkeypatch.setenv("OPENAI_API_KEY", "sk-vllm-key")
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-should-not-leak")
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-...leak")
resolved = rp.resolve_runtime_provider(requested="auto")
@ -158,6 +158,107 @@ def test_custom_endpoint_auto_provider_prefers_openai_key(monkeypatch):
assert resolved["api_key"] == "sk-vllm-key"
def test_named_custom_provider_uses_saved_credentials(monkeypatch):
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
monkeypatch.setattr(
rp,
"load_config",
lambda: {
"custom_providers": [
{
"name": "Local",
"base_url": "http://1.2.3.4:1234/v1",
"api_key": "local-provider-key",
}
]
},
)
monkeypatch.setattr(
rp,
"resolve_provider",
lambda *a, **k: (_ for _ in ()).throw(
AssertionError(
"resolve_provider should not be called for named custom providers"
)
),
)
resolved = rp.resolve_runtime_provider(requested="local")
assert resolved["provider"] == "openrouter"
assert resolved["api_mode"] == "chat_completions"
assert resolved["base_url"] == "http://1.2.3.4:1234/v1"
assert resolved["api_key"] == "local-provider-key"
assert resolved["requested_provider"] == "local"
assert resolved["source"] == "custom_provider:Local"
def test_named_custom_provider_falls_back_to_openai_api_key(monkeypatch):
monkeypatch.setenv("OPENAI_API_KEY", "env-openai-key")
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
monkeypatch.setattr(
rp,
"load_config",
lambda: {
"custom_providers": [
{
"name": "Local LLM",
"base_url": "http://localhost:1234/v1",
}
]
},
)
monkeypatch.setattr(
rp,
"resolve_provider",
lambda *a, **k: (_ for _ in ()).throw(
AssertionError(
"resolve_provider should not be called for named custom providers"
)
),
)
resolved = rp.resolve_runtime_provider(requested="custom:local-llm")
assert resolved["base_url"] == "http://localhost:1234/v1"
assert resolved["api_key"] == "env-openai-key"
assert resolved["requested_provider"] == "custom:local-llm"
def test_named_custom_provider_does_not_shadow_builtin_provider(monkeypatch):
monkeypatch.setattr(
rp,
"load_config",
lambda: {
"custom_providers": [
{
"name": "nous",
"base_url": "http://localhost:1234/v1",
"api_key": "shadow-key",
}
]
},
)
monkeypatch.setattr(
rp,
"resolve_nous_runtime_credentials",
lambda **kwargs: {
"base_url": "https://inference-api.nousresearch.com/v1",
"api_key": "nous-runtime-key",
"source": "portal",
"expires_at": None,
},
)
resolved = rp.resolve_runtime_provider(requested="nous")
assert resolved["provider"] == "nous"
assert resolved["base_url"] == "https://inference-api.nousresearch.com/v1"
assert resolved["api_key"] == "nous-runtime-key"
assert resolved["requested_provider"] == "nous"
def test_explicit_openrouter_skips_openai_base_url(monkeypatch):
"""When the user explicitly requests openrouter, OPENAI_BASE_URL
(which may point to a custom endpoint) must not override the

View file

@ -328,6 +328,34 @@ class TestCronTimezone:
"Overdue job was skipped — _ensure_aware likely shifted absolute time"
)
def test_get_due_jobs_naive_cross_timezone(self, tmp_path, monkeypatch):
"""Naive past timestamps must be detected as due even when Hermes tz
is behind system local tz the scenario that triggered #806."""
import cron.jobs as jobs_module
monkeypatch.setattr(jobs_module, "CRON_DIR", tmp_path / "cron")
monkeypatch.setattr(jobs_module, "JOBS_FILE", tmp_path / "cron" / "jobs.json")
monkeypatch.setattr(jobs_module, "OUTPUT_DIR", tmp_path / "cron" / "output")
# Use a Hermes timezone far behind UTC so that the numeric wall time
# of the naive timestamp exceeds _hermes_now's wall time — this would
# have caused a false "not due" with the old replace(tzinfo=...) approach.
os.environ["HERMES_TIMEZONE"] = "Pacific/Midway" # UTC-11
hermes_time.reset_cache()
from cron.jobs import create_job, load_jobs, save_jobs, get_due_jobs
create_job(prompt="Cross-tz job", schedule="every 1h")
jobs = load_jobs()
# Force a naive past timestamp (system-local wall time, 10 min ago)
naive_past = (datetime.now() - timedelta(minutes=10)).isoformat()
jobs[0]["next_run_at"] = naive_past
save_jobs(jobs)
due = get_due_jobs()
assert len(due) == 1, (
"Naive past timestamp should be due regardless of Hermes timezone"
)
def test_create_job_stores_tz_aware_timestamps(self, tmp_path, monkeypatch):
"""New jobs store timezone-aware created_at and next_run_at."""
import cron.jobs as jobs_module

View file

@ -1,7 +1,10 @@
"""Tests for trajectory_compressor.py — config, metrics, and compression logic."""
import json
from unittest.mock import patch, MagicMock
from types import SimpleNamespace
from unittest.mock import AsyncMock, patch, MagicMock
import pytest
from trajectory_compressor import (
CompressionConfig,
@ -384,3 +387,32 @@ class TestTokenCounting:
tc.tokenizer.encode = MagicMock(side_effect=Exception("fail"))
# Should fallback to len(text) // 4
assert tc.count_tokens("12345678") == 2
class TestGenerateSummary:
def test_generate_summary_handles_none_content(self):
tc = _make_compressor()
tc.client = MagicMock()
tc.client.chat.completions.create.return_value = SimpleNamespace(
choices=[SimpleNamespace(message=SimpleNamespace(content=None))]
)
metrics = TrajectoryMetrics()
summary = tc._generate_summary("Turn content", metrics)
assert summary == "[CONTEXT SUMMARY]:"
@pytest.mark.asyncio
async def test_generate_summary_async_handles_none_content(self):
tc = _make_compressor()
tc.async_client = MagicMock()
tc.async_client.chat.completions.create = AsyncMock(
return_value=SimpleNamespace(
choices=[SimpleNamespace(message=SimpleNamespace(content=None))]
)
)
metrics = TrajectoryMetrics()
summary = await tc._generate_summary_async("Turn content", metrics)
assert summary == "[CONTEXT SUMMARY]:"

View file

@ -0,0 +1,96 @@
"""Regression tests for browser session cleanup and screenshot recovery."""
from unittest.mock import patch
class TestScreenshotPathRecovery:
def test_extracts_standard_absolute_path(self):
from tools.browser_tool import _extract_screenshot_path_from_text
assert (
_extract_screenshot_path_from_text("Screenshot saved to /tmp/foo.png")
== "/tmp/foo.png"
)
def test_extracts_quoted_absolute_path(self):
from tools.browser_tool import _extract_screenshot_path_from_text
assert (
_extract_screenshot_path_from_text(
"Screenshot saved to '/Users/david/.hermes/browser_screenshots/shot.png'"
)
== "/Users/david/.hermes/browser_screenshots/shot.png"
)
class TestBrowserCleanup:
def setup_method(self):
from tools import browser_tool
self.browser_tool = browser_tool
self.orig_active_sessions = browser_tool._active_sessions.copy()
self.orig_session_last_activity = browser_tool._session_last_activity.copy()
self.orig_recording_sessions = browser_tool._recording_sessions.copy()
self.orig_cleanup_done = browser_tool._cleanup_done
def teardown_method(self):
self.browser_tool._active_sessions.clear()
self.browser_tool._active_sessions.update(self.orig_active_sessions)
self.browser_tool._session_last_activity.clear()
self.browser_tool._session_last_activity.update(self.orig_session_last_activity)
self.browser_tool._recording_sessions.clear()
self.browser_tool._recording_sessions.update(self.orig_recording_sessions)
self.browser_tool._cleanup_done = self.orig_cleanup_done
def test_cleanup_browser_clears_tracking_state(self):
browser_tool = self.browser_tool
browser_tool._active_sessions["task-1"] = {
"session_name": "sess-1",
"bb_session_id": None,
}
browser_tool._session_last_activity["task-1"] = 123.0
with (
patch("tools.browser_tool._maybe_stop_recording") as mock_stop,
patch(
"tools.browser_tool._run_browser_command",
return_value={"success": True},
) as mock_run,
patch("tools.browser_tool.os.path.exists", return_value=False),
):
browser_tool.cleanup_browser("task-1")
assert "task-1" not in browser_tool._active_sessions
assert "task-1" not in browser_tool._session_last_activity
mock_stop.assert_called_once_with("task-1")
mock_run.assert_called_once_with("task-1", "close", [], timeout=10)
def test_browser_close_delegates_to_cleanup_browser(self):
import json
browser_tool = self.browser_tool
browser_tool._active_sessions["task-2"] = {"session_name": "sess-2"}
with patch("tools.browser_tool.cleanup_browser") as mock_cleanup:
result = json.loads(browser_tool.browser_close("task-2"))
assert result == {"success": True, "closed": True}
mock_cleanup.assert_called_once_with("task-2")
def test_emergency_cleanup_clears_all_tracking_state(self):
browser_tool = self.browser_tool
browser_tool._cleanup_done = False
browser_tool._active_sessions["task-1"] = {"session_name": "sess-1"}
browser_tool._active_sessions["task-2"] = {"session_name": "sess-2"}
browser_tool._session_last_activity["task-1"] = 1.0
browser_tool._session_last_activity["task-2"] = 2.0
browser_tool._recording_sessions.update({"task-1", "task-2"})
with patch("tools.browser_tool.cleanup_all_browsers") as mock_cleanup_all:
browser_tool._emergency_cleanup_all_sessions()
mock_cleanup_all.assert_called_once_with()
assert browser_tool._active_sessions == {}
assert browser_tool._session_last_activity == {}
assert browser_tool._recording_sessions == set()
assert browser_tool._cleanup_done is True

View file

@ -1,11 +1,8 @@
"""Tests for the --force flag dangerous verdict bypass fix in skills_guard.py.
"""Regression tests for skills guard policy precedence.
Regression test: the old code had `if result.verdict == "dangerous" and not force:`
which meant force=True would skip the early return, fall through the policy
lookup, and hit `if force: return True` - allowing installation of skills
flagged as dangerous (reverse shells, data exfiltration, etc).
The docstring explicitly states: "never overrides dangerous".
Official/builtin skills should follow the INSTALL_POLICY table even when their
scan verdict is dangerous, and --force should override blocked verdicts for
non-builtin sources.
"""
@ -44,10 +41,6 @@ def _new_should_allow(verdict, trust_level, force):
}
VERDICT_INDEX = {"safe": 0, "caution": 1, "dangerous": 2}
# Fixed: no `and not force` - dangerous is always blocked
if verdict == "dangerous":
return False
policy = INSTALL_POLICY.get(trust_level, INSTALL_POLICY["community"])
vi = VERDICT_INDEX.get(verdict, 2)
decision = policy[vi]
@ -61,35 +54,28 @@ def _new_should_allow(verdict, trust_level, force):
return False
class TestForceNeverOverridesDangerous:
"""The core bug: --force bypassed the dangerous verdict block."""
class TestPolicyPrecedenceForDangerousVerdicts:
def test_builtin_dangerous_is_allowed_by_policy(self):
assert _new_should_allow("dangerous", "builtin", force=False) is True
def test_old_code_allows_dangerous_with_force(self):
"""Old code: force=True lets dangerous skills through."""
assert _old_should_allow("dangerous", "community", force=True) is True
def test_trusted_dangerous_is_blocked_without_force(self):
assert _new_should_allow("dangerous", "trusted", force=False) is False
def test_new_code_blocks_dangerous_with_force(self):
"""Fixed code: force=True still blocks dangerous skills."""
assert _new_should_allow("dangerous", "community", force=True) is False
def test_force_overrides_dangerous_for_community(self):
assert _new_should_allow("dangerous", "community", force=True) is True
def test_new_code_blocks_dangerous_trusted_with_force(self):
"""Fixed code: even trusted + force cannot install dangerous."""
assert _new_should_allow("dangerous", "trusted", force=True) is False
def test_force_overrides_dangerous_for_trusted(self):
assert _new_should_allow("dangerous", "trusted", force=True) is True
def test_force_still_overrides_caution(self):
"""force=True should still work for caution verdicts."""
assert _new_should_allow("caution", "community", force=True) is True
def test_caution_community_blocked_without_force(self):
"""Caution + community is blocked without force (unchanged)."""
assert _new_should_allow("caution", "community", force=False) is False
def test_safe_always_allowed(self):
"""Safe verdict is always allowed regardless of force."""
assert _new_should_allow("safe", "community", force=False) is True
assert _new_should_allow("safe", "community", force=True) is True
def test_dangerous_blocked_without_force(self):
"""Dangerous is blocked without force (both old and new agree)."""
assert _old_should_allow("dangerous", "community", force=False) is False
assert _new_should_allow("dangerous", "community", force=False) is False
def test_old_code_happened_to_allow_forced_dangerous_community(self):
assert _old_should_allow("dangerous", "community", force=True) is True

View file

@ -9,9 +9,24 @@ from tools.memory_tool import (
memory_tool,
_scan_memory_content,
ENTRY_DELIMITER,
MEMORY_SCHEMA,
)
# =========================================================================
# Tool schema guidance
# =========================================================================
class TestMemorySchema:
def test_discourages_diary_style_task_logs(self):
description = MEMORY_SCHEMA["description"]
assert "Do NOT save task progress" in description
assert "session_search" in description
assert "like a diary" not in description
assert "temporary task state" in description
assert ">80%" not in description
# =========================================================================
# Security scanning
# =========================================================================

View file

@ -2,6 +2,7 @@
import asyncio
import json
import os
import sys
from pathlib import Path
from types import SimpleNamespace
@ -29,6 +30,118 @@ def _install_telegram_mock(monkeypatch, bot):
class TestSendMessageTool:
def test_cron_duplicate_target_is_skipped_and_explained(self):
home = SimpleNamespace(chat_id="-1001")
config, _telegram_cfg = _make_config()
config.get_home_channel = lambda _platform: home
with patch.dict(
os.environ,
{
"HERMES_CRON_AUTO_DELIVER_PLATFORM": "telegram",
"HERMES_CRON_AUTO_DELIVER_CHAT_ID": "-1001",
},
clear=False,
), \
patch("gateway.config.load_gateway_config", return_value=config), \
patch("tools.interrupt.is_interrupted", return_value=False), \
patch("model_tools._run_async", side_effect=_run_async_immediately), \
patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock:
result = json.loads(
send_message_tool(
{
"action": "send",
"target": "telegram",
"message": "hello",
}
)
)
assert result["success"] is True
assert result["skipped"] is True
assert result["reason"] == "cron_auto_delivery_duplicate_target"
assert "final response" in result["note"]
send_mock.assert_not_awaited()
mirror_mock.assert_not_called()
def test_cron_different_target_still_sends(self):
config, telegram_cfg = _make_config()
with patch.dict(
os.environ,
{
"HERMES_CRON_AUTO_DELIVER_PLATFORM": "telegram",
"HERMES_CRON_AUTO_DELIVER_CHAT_ID": "-1001",
},
clear=False,
), \
patch("gateway.config.load_gateway_config", return_value=config), \
patch("tools.interrupt.is_interrupted", return_value=False), \
patch("model_tools._run_async", side_effect=_run_async_immediately), \
patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock:
result = json.loads(
send_message_tool(
{
"action": "send",
"target": "telegram:-1002",
"message": "hello",
}
)
)
assert result["success"] is True
assert result.get("skipped") is not True
send_mock.assert_awaited_once_with(
Platform.TELEGRAM,
telegram_cfg,
"-1002",
"hello",
thread_id=None,
media_files=[],
)
mirror_mock.assert_called_once_with("telegram", "-1002", "hello", source_label="cli", thread_id=None)
def test_cron_same_chat_different_thread_still_sends(self):
config, telegram_cfg = _make_config()
with patch.dict(
os.environ,
{
"HERMES_CRON_AUTO_DELIVER_PLATFORM": "telegram",
"HERMES_CRON_AUTO_DELIVER_CHAT_ID": "-1001",
"HERMES_CRON_AUTO_DELIVER_THREAD_ID": "17585",
},
clear=False,
), \
patch("gateway.config.load_gateway_config", return_value=config), \
patch("tools.interrupt.is_interrupted", return_value=False), \
patch("model_tools._run_async", side_effect=_run_async_immediately), \
patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock:
result = json.loads(
send_message_tool(
{
"action": "send",
"target": "telegram:-1001:99999",
"message": "hello",
}
)
)
assert result["success"] is True
assert result.get("skipped") is not True
send_mock.assert_awaited_once_with(
Platform.TELEGRAM,
telegram_cfg,
"-1001",
"hello",
thread_id="99999",
media_files=[],
)
mirror_mock.assert_called_once_with("telegram", "-1001", "hello", source_label="cli", thread_id="99999")
def test_sends_to_explicit_telegram_topic_target(self):
config, telegram_cfg = _make_config()

View file

@ -9,9 +9,21 @@ from tools.session_search_tool import (
_format_conversation,
_truncate_around_matches,
MAX_SESSION_CHARS,
SESSION_SEARCH_SCHEMA,
)
# =========================================================================
# Tool schema guidance
# =========================================================================
class TestSessionSearchSchema:
def test_keeps_cross_session_recall_guidance_without_current_session_nudge(self):
description = SESSION_SEARCH_SCHEMA["description"]
assert "past conversations" in description
assert "recent turns of the current session" not in description
# =========================================================================
# _format_timestamp
# =========================================================================

View file

@ -46,9 +46,9 @@ from tools.skills_guard import (
class TestResolveTrustLevel:
def test_builtin_not_exposed(self):
# builtin is only used internally, not resolved from source string
assert _resolve_trust_level("openai/skills") == "trusted"
def test_official_sources_resolve_to_builtin(self):
assert _resolve_trust_level("official") == "builtin"
assert _resolve_trust_level("official/email/agentmail") == "builtin"
def test_trusted_repos(self):
assert _resolve_trust_level("openai/skills") == "trusted"
@ -116,11 +116,17 @@ class TestShouldAllowInstall:
allowed, _ = should_allow_install(self._result("trusted", "caution", f))
assert allowed is True
def test_dangerous_blocked_even_trusted(self):
def test_trusted_dangerous_blocked_without_force(self):
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
allowed, _ = should_allow_install(self._result("trusted", "dangerous", f))
assert allowed is False
def test_builtin_dangerous_allowed_without_force(self):
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
allowed, reason = should_allow_install(self._result("builtin", "dangerous", f))
assert allowed is True
assert "builtin source" in reason
def test_force_overrides_caution(self):
f = [Finding("x", "high", "c", "f", 1, "m", "d")]
allowed, reason = should_allow_install(self._result("community", "caution", f), force=True)
@ -132,22 +138,21 @@ class TestShouldAllowInstall:
allowed, _ = should_allow_install(self._result("community", "dangerous", f), force=False)
assert allowed is False
def test_force_never_overrides_dangerous(self):
"""--force must not bypass dangerous verdict (regression test)."""
def test_force_overrides_dangerous_for_community(self):
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
allowed, reason = should_allow_install(
self._result("community", "dangerous", f), force=True
)
assert allowed is False
assert "DANGEROUS" in reason
assert allowed is True
assert "Force-installed" in reason
def test_force_never_overrides_dangerous_trusted(self):
"""--force must not bypass dangerous even for trusted sources."""
def test_force_overrides_dangerous_for_trusted(self):
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
allowed, _ = should_allow_install(
allowed, reason = should_allow_install(
self._result("trusted", "dangerous", f), force=True
)
assert allowed is False
assert allowed is True
assert "Force-installed" in reason
# ---------------------------------------------------------------------------

View file

@ -53,6 +53,7 @@ import atexit
import json
import logging
import os
import re
import signal
import subprocess
import shutil
@ -165,63 +166,18 @@ def _emergency_cleanup_all_sessions():
if not _active_sessions:
return
logger.info("Emergency cleanup: closing %s active session(s)...", len(_active_sessions))
logger.info("Emergency cleanup: closing %s active session(s)...",
len(_active_sessions))
try:
if _is_local_mode():
# Local mode: just close agent-browser sessions via CLI
for task_id, session_info in list(_active_sessions.items()):
session_name = session_info.get("session_name")
if session_name:
try:
browser_cmd = _find_agent_browser()
task_socket_dir = os.path.join(
_socket_safe_tmpdir(),
f"agent-browser-{session_name}"
)
env = {**os.environ, "AGENT_BROWSER_SOCKET_DIR": task_socket_dir}
subprocess.run(
browser_cmd.split() + ["--session", session_name, "--json", "close"],
capture_output=True, timeout=5, env=env,
)
logger.info("Closed local session %s", session_name)
except Exception as e:
logger.debug("Error closing local session %s: %s", session_name, e)
else:
# Cloud mode: release Browserbase sessions via API
api_key = os.environ.get("BROWSERBASE_API_KEY")
project_id = os.environ.get("BROWSERBASE_PROJECT_ID")
if not api_key or not project_id:
logger.warning("Cannot cleanup - missing BROWSERBASE credentials")
return
for task_id, session_info in list(_active_sessions.items()):
bb_session_id = session_info.get("bb_session_id")
if bb_session_id:
try:
response = requests.post(
f"https://api.browserbase.com/v1/sessions/{bb_session_id}",
headers={
"X-BB-API-Key": api_key,
"Content-Type": "application/json"
},
json={
"projectId": project_id,
"status": "REQUEST_RELEASE"
},
timeout=5 # Short timeout for cleanup
)
if response.status_code in (200, 201, 204):
logger.info("Closed session %s", bb_session_id)
else:
logger.warning("Failed to close session %s: HTTP %s", bb_session_id, response.status_code)
except Exception as e:
logger.error("Error closing session %s: %s", bb_session_id, e)
_active_sessions.clear()
cleanup_all_browsers()
except Exception as e:
logger.error("Emergency cleanup error: %s", e)
finally:
with _cleanup_lock:
_active_sessions.clear()
_session_last_activity.clear()
_recording_sessions.clear()
# Register cleanup via atexit only. Previous versions installed SIGINT/SIGTERM
@ -640,18 +596,14 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]:
def _create_local_session(task_id: str) -> Dict[str, str]:
"""Create a lightweight local browser session (no cloud API call).
Returns the same dict shape as ``_create_browserbase_session`` so the rest
of the code can treat both modes uniformly.
"""
import uuid
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
logger.info("Created local browser session %s", session_name)
session_name = f"h_{uuid.uuid4().hex[:10]}"
logger.info("Created local browser session %s for task %s",
session_name, task_id)
return {
"session_name": session_name,
"bb_session_id": None, # Not applicable in local mode
"cdp_url": None, # Not applicable in local mode
"bb_session_id": None,
"cdp_url": None,
"features": {"local": True},
}
@ -772,6 +724,27 @@ def _find_agent_browser() -> str:
)
def _extract_screenshot_path_from_text(text: str) -> Optional[str]:
"""Extract a screenshot file path from agent-browser human-readable output."""
if not text:
return None
patterns = [
r"Screenshot saved to ['\"](?P<path>/[^'\"]+?\.png)['\"]",
r"Screenshot saved to (?P<path>/\S+?\.png)(?:\s|$)",
r"(?P<path>/\S+?\.png)(?:\s|$)",
]
for pattern in patterns:
match = re.search(pattern, text)
if match:
path = match.group("path").strip().strip("'\"")
if path:
return path
return None
def _run_browser_command(
task_id: str,
command: str,
@ -841,9 +814,20 @@ def _run_browser_command(
command, task_id, task_socket_dir, len(task_socket_dir))
browser_env = {**os.environ}
# Ensure PATH includes standard dirs (systemd services may have minimal PATH)
if "/usr/bin" not in browser_env.get("PATH", "").split(":"):
browser_env["PATH"] = f"{browser_env.get('PATH', '')}:{_SANE_PATH}"
# Ensure PATH includes Hermes-managed Node first, then standard system dirs.
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
hermes_node_bin = str(hermes_home / "node" / "bin")
existing_path = browser_env.get("PATH", "")
path_parts = [p for p in existing_path.split(":") if p]
candidate_dirs = [hermes_node_bin] + [p for p in _SANE_PATH.split(":") if p]
for part in reversed(candidate_dirs):
if os.path.isdir(part) and part not in path_parts:
path_parts.insert(0, part)
browser_env["PATH"] = ":".join(path_parts)
browser_env["AGENT_BROWSER_SOCKET_DIR"] = task_socket_dir
result = subprocess.run(
@ -866,10 +850,11 @@ def _run_browser_command(
command, " ".join(cmd_parts[:4]) + "...",
(result.stderr or "")[:200])
# Parse JSON output
if result.stdout.strip():
stdout_text = result.stdout.strip()
if stdout_text:
try:
parsed = json.loads(result.stdout.strip())
parsed = json.loads(stdout_text)
# Warn if snapshot came back empty (common sign of daemon/CDP issues)
if command == "snapshot" and parsed.get("success"):
snap_data = parsed.get("data", {})
@ -879,13 +864,33 @@ def _run_browser_command(
"returncode=%s", result.returncode)
return parsed
except json.JSONDecodeError:
# Non-JSON output indicates agent-browser crash or version mismatch
raw = result.stdout.strip()[:500]
raw = stdout_text[:2000]
logger.warning("browser '%s' returned non-JSON output (rc=%s): %s",
command, result.returncode, raw[:200])
command, result.returncode, raw[:500])
if command == "screenshot":
stderr_text = (result.stderr or "").strip()
combined_text = "\n".join(
part for part in [stdout_text, stderr_text] if part
)
recovered_path = _extract_screenshot_path_from_text(combined_text)
if recovered_path and Path(recovered_path).exists():
logger.info(
"browser 'screenshot' recovered file from non-JSON output: %s",
recovered_path,
)
return {
"success": True,
"data": {
"path": recovered_path,
"raw": raw,
},
}
return {
"success": True,
"data": {"raw": raw}
"success": False,
"error": f"Non-JSON output from agent-browser for '{command}': {raw}"
}
# Check for errors
@ -1250,46 +1255,26 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str:
def browser_close(task_id: Optional[str] = None) -> str:
"""
Close the browser session.
Args:
task_id: Task identifier for session isolation
Returns:
JSON string with close result
"""
effective_task_id = task_id or "default"
# Stop auto-recording before closing
_maybe_stop_recording(effective_task_id)
result = _run_browser_command(effective_task_id, "close", [])
# Close the backend session (Browserbase API in cloud mode, nothing extra in local mode)
session_key = task_id if task_id and task_id in _active_sessions else "default"
if session_key in _active_sessions:
session_info = _active_sessions[session_key]
bb_session_id = session_info.get("bb_session_id")
if bb_session_id:
# Cloud mode: release the Browserbase session via API
try:
config = _get_browserbase_config()
_close_browserbase_session(bb_session_id, config["api_key"], config["project_id"])
except Exception as e:
logger.warning("Could not close BrowserBase session: %s", e)
del _active_sessions[session_key]
if result.get("success"):
return json.dumps({
"success": True,
"closed": True
}, ensure_ascii=False)
else:
# Even if close fails, session was released
return json.dumps({
"success": True,
"closed": True,
"warning": result.get("error", "Session may not have been active")
}, ensure_ascii=False)
with _cleanup_lock:
had_session = effective_task_id in _active_sessions
cleanup_browser(effective_task_id)
response = {
"success": True,
"closed": True,
}
if not had_session:
response["warning"] = "Session may not have been active"
return json.dumps(response, ensure_ascii=False)
def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str:
@ -1481,9 +1466,11 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
_cleanup_old_screenshots(screenshots_dir, max_age_hours=24)
# Take screenshot using agent-browser
screenshot_args = [str(screenshot_path)]
screenshot_args = []
if annotate:
screenshot_args.insert(0, "--annotate")
screenshot_args.append("--annotate")
screenshot_args.append("--full")
screenshot_args.append(str(screenshot_path))
result = _run_browser_command(
effective_task_id,
"screenshot",
@ -1498,7 +1485,11 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
"success": False,
"error": f"Failed to take screenshot ({mode} mode): {error_detail}"
}, ensure_ascii=False)
actual_screenshot_path = result.get("data", {}).get("path")
if actual_screenshot_path:
screenshot_path = Path(actual_screenshot_path)
# Check if screenshot file was created
if not screenshot_path.exists():
mode = "local" if _is_local_mode() else "cloud"

View file

@ -304,6 +304,12 @@ Jobs run in a fresh session with no current-chat context, so prompts must be sel
If skill or skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction.
On update, passing skills=[] clears attached skills.
NOTE: The agent's final response is auto-delivered to the target — do NOT use
send_message in the prompt for that same destination. Same-target send_message
calls are skipped to avoid duplicate cron deliveries. Put the primary
user-facing content in the final response, and use send_message only for
additional or different targets.
Important safety rule: cron-run sessions should not recursively schedule more cron jobs.""",
"parameters": {
"type": "object",

View file

@ -435,24 +435,25 @@ def check_memory_requirements() -> bool:
MEMORY_SCHEMA = {
"name": "memory",
"description": (
"Save important information to persistent memory that survives across sessions. "
"Your memory appears in your system prompt at session start -- it's how you "
"remember things about the user and your environment between conversations.\n\n"
"Save durable information to persistent memory that survives across sessions. "
"Memory is injected into future turns, so keep it compact and focused on facts "
"that will still matter later.\n\n"
"WHEN TO SAVE (do this proactively, don't wait to be asked):\n"
"- User shares a preference, habit, or personal detail (name, role, timezone, coding style)\n"
"- You discover something about the environment (OS, installed tools, project structure)\n"
"- User corrects you or says 'remember this' / 'don't do that again'\n"
"- You learn a convention, API quirk, or workflow specific to this user's setup\n"
"- You completed something - log it like a diary entry\n"
"- After completing a complex task, save a brief note about what was done\n\n"
"- If you've discovered a new way to do something, solved a problem that could be necessary later, save it as a skill with the skill tool\n\n"
"- You identify a stable fact that will be useful again in future sessions\n\n"
"Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
"state to memory; use session_search to recall those from past transcripts.\n"
"If you've discovered a new way to do something, solved a problem that could be "
"necessary later, save it as a skill with the skill tool.\n\n"
"TWO TARGETS:\n"
"- 'user': who the user is -- name, role, preferences, communication style, pet peeves\n"
"- 'memory': your notes -- environment facts, project conventions, tool quirks, lessons learned\n\n"
"ACTIONS: add (new entry), replace (update existing -- old_text identifies it), "
"remove (delete -- old_text identifies it).\n"
"Capacity shown in system prompt. When >80%, consolidate entries before adding new ones.\n\n"
"SKIP: trivial/obvious info, things easily re-discovered, raw data dumps."
"remove (delete -- old_text identifies it).\n\n"
"SKIP: trivial/obvious info, things easily re-discovered, raw data dumps, and temporary task state."
),
"parameters": {
"type": "object",

View file

@ -153,6 +153,10 @@ def _handle_send(args):
f"or set a home channel via: hermes config set {platform_name.upper()}_HOME_CHANNEL <channel_id>"
})
duplicate_skip = _maybe_skip_cron_duplicate_send(platform_name, chat_id, thread_id)
if duplicate_skip:
return json.dumps(duplicate_skip)
try:
from model_tools import _run_async
result = _run_async(
@ -213,6 +217,51 @@ def _describe_media_for_mirror(media_files):
return f"[Sent {len(media_files)} media attachments]"
def _get_cron_auto_delivery_target():
"""Return the cron scheduler's auto-delivery target for the current run, if any."""
platform = os.getenv("HERMES_CRON_AUTO_DELIVER_PLATFORM", "").strip().lower()
chat_id = os.getenv("HERMES_CRON_AUTO_DELIVER_CHAT_ID", "").strip()
if not platform or not chat_id:
return None
thread_id = os.getenv("HERMES_CRON_AUTO_DELIVER_THREAD_ID", "").strip() or None
return {
"platform": platform,
"chat_id": chat_id,
"thread_id": thread_id,
}
def _maybe_skip_cron_duplicate_send(platform_name: str, chat_id: str, thread_id: str | None):
"""Skip redundant cron send_message calls when the scheduler will auto-deliver there."""
auto_target = _get_cron_auto_delivery_target()
if not auto_target:
return None
same_target = (
auto_target["platform"] == platform_name
and str(auto_target["chat_id"]) == str(chat_id)
and auto_target.get("thread_id") == thread_id
)
if not same_target:
return None
target_label = f"{platform_name}:{chat_id}"
if thread_id is not None:
target_label += f":{thread_id}"
return {
"success": True,
"skipped": True,
"reason": "cron_auto_delivery_duplicate_target",
"target": target_label,
"note": (
f"Skipped send_message to {target_label}. This cron job will already auto-deliver "
"its final response to that same target. Put the intended user-facing content in "
"your final response instead, or use a different target if you want an additional message."
),
}
async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, media_files=None):
"""Route a message to the appropriate platform sender."""
from gateway.config import Platform

View file

@ -341,8 +341,8 @@ SESSION_SEARCH_SCHEMA = {
"- The user references a project, person, or concept that seems familiar but isn't in memory\n"
"- You want to check if you've solved a similar problem before\n"
"- The user asks 'what did we do about X?' or 'how did we fix Y?'\n\n"
"Don't hesitate to search -- it's fast and cheap. Better to search and confirm "
"than to guess or ask the user to repeat themselves.\n\n"
"Don't hesitate to search when it is actually cross-session -- it's fast and cheap. "
"Better to search and confirm than to guess or ask the user to repeat themselves.\n\n"
"Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), "
"phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). "
"IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses "

View file

@ -645,14 +645,11 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool,
Args:
result: Scan result from scan_skill()
force: If True, override blocks for caution verdicts (never overrides dangerous)
force: If True, override blocked policy decisions for this scan result
Returns:
(allowed, reason) tuple
"""
if result.verdict == "dangerous":
return False, f"Scan verdict is DANGEROUS ({len(result.findings)} findings). Blocked."
policy = INSTALL_POLICY.get(result.trust_level, INSTALL_POLICY["community"])
vi = VERDICT_INDEX.get(result.verdict, 2)
decision = policy[vi]
@ -661,7 +658,10 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool,
return True, f"Allowed ({result.trust_level} source, {result.verdict} verdict)"
if force:
return True, f"Force-installed despite {result.verdict} verdict ({len(result.findings)} findings)"
return True, (
f"Force-installed despite blocked {result.verdict} verdict "
f"({len(result.findings)} findings)"
)
return False, (
f"Blocked ({result.trust_level} source + {result.verdict} verdict, "

View file

@ -354,6 +354,7 @@ async def vision_analyze_tool(
# Prepare error response
result = {
"success": False,
"error": error_msg,
"analysis": analysis,
}

View file

@ -495,6 +495,21 @@ class TrajectoryCompressor:
parts.append(f"[Turn {i} - {role.upper()}]:\n{value}")
return "\n\n".join(parts)
@staticmethod
def _coerce_summary_content(content: Any) -> str:
"""Normalize summary-model output to a safe string."""
if not isinstance(content, str):
content = str(content) if content else ""
return content.strip()
@staticmethod
def _ensure_summary_prefix(summary: str) -> str:
"""Normalize summary text to include the expected prefix exactly once."""
text = (summary or "").strip()
if text.startswith("[CONTEXT SUMMARY]:"):
return text
return "[CONTEXT SUMMARY]:" if not text else f"[CONTEXT SUMMARY]: {text}"
def _generate_summary(self, content: str, metrics: TrajectoryMetrics) -> str:
"""
@ -545,13 +560,8 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
max_tokens=self.config.summary_target_tokens * 2,
)
summary = response.choices[0].message.content.strip()
# Ensure it starts with the prefix
if not summary.startswith("[CONTEXT SUMMARY]:"):
summary = "[CONTEXT SUMMARY]: " + summary
return summary
summary = self._coerce_summary_content(response.choices[0].message.content)
return self._ensure_summary_prefix(summary)
except Exception as e:
metrics.summarization_errors += 1
@ -612,13 +622,8 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
max_tokens=self.config.summary_target_tokens * 2,
)
summary = response.choices[0].message.content.strip()
# Ensure it starts with the prefix
if not summary.startswith("[CONTEXT SUMMARY]:"):
summary = "[CONTEXT SUMMARY]: " + summary
return summary
summary = self._coerce_summary_content(response.choices[0].message.content)
return self._ensure_summary_prefix(summary)
except Exception as e:
metrics.summarization_errors += 1

View file

@ -0,0 +1,424 @@
---
sidebar_position: 5
title: "Adding Providers"
description: "How to add a new inference provider to Hermes Agent — auth, runtime resolution, CLI flows, adapters, tests, and docs"
---
# Adding Providers
Hermes can already talk to any OpenAI-compatible endpoint through the custom provider path. Do not add a built-in provider unless you want first-class UX for that service:
- provider-specific auth or token refresh
- a curated model catalog
- setup / `hermes model` menu entries
- provider aliases for `provider:model` syntax
- a non-OpenAI API shape that needs an adapter
If the provider is just "another OpenAI-compatible base URL and API key", a named custom provider may be enough.
## The mental model
A built-in provider has to line up across a few layers:
1. `hermes_cli/auth.py` decides how credentials are found.
2. `hermes_cli/runtime_provider.py` turns that into runtime data:
- `provider`
- `api_mode`
- `base_url`
- `api_key`
- `source`
3. `run_agent.py` uses `api_mode` to decide how requests are built and sent.
4. `hermes_cli/models.py`, `hermes_cli/main.py`, and `hermes_cli/setup.py` make the provider show up in the CLI.
5. `agent/auxiliary_client.py` and `agent/model_metadata.py` keep side tasks and token budgeting working.
The important abstraction is `api_mode`.
- Most providers use `chat_completions`.
- Codex uses `codex_responses`.
- Anthropic uses `anthropic_messages`.
- A new non-OpenAI protocol usually means adding a new adapter and a new `api_mode` branch.
## Choose the implementation path first
### Path A — OpenAI-compatible provider
Use this when the provider accepts standard chat-completions style requests.
Typical work:
- add auth metadata
- add model catalog / aliases
- add runtime resolution
- add CLI menu wiring
- add aux-model defaults
- add tests and user docs
You usually do not need a new adapter or a new `api_mode`.
### Path B — Native provider
Use this when the provider does not behave like OpenAI chat completions.
Examples in-tree today:
- `codex_responses`
- `anthropic_messages`
This path includes everything from Path A plus:
- a provider adapter in `agent/`
- `run_agent.py` branches for request building, dispatch, usage extraction, interrupt handling, and response normalization
- adapter tests
## File checklist
### Required for every built-in provider
1. `hermes_cli/auth.py`
2. `hermes_cli/models.py`
3. `hermes_cli/runtime_provider.py`
4. `hermes_cli/main.py`
5. `hermes_cli/setup.py`
6. `agent/auxiliary_client.py`
7. `agent/model_metadata.py`
8. tests
9. user-facing docs under `website/docs/`
### Additional for native / non-OpenAI providers
10. `agent/<provider>_adapter.py`
11. `run_agent.py`
12. `pyproject.toml` if a provider SDK is required
## Step 1: Pick one canonical provider id
Choose a single provider id and use it everywhere.
Examples from the repo:
- `openai-codex`
- `kimi-coding`
- `minimax-cn`
That same id should appear in:
- `PROVIDER_REGISTRY` in `hermes_cli/auth.py`
- `_PROVIDER_LABELS` in `hermes_cli/models.py`
- `_PROVIDER_ALIASES` in both `hermes_cli/auth.py` and `hermes_cli/models.py`
- CLI `--provider` choices in `hermes_cli/main.py`
- setup / model selection branches
- auxiliary-model defaults
- tests
If the id differs between those files, the provider will feel half-wired: auth may work while `/model`, setup, or runtime resolution silently misses it.
## Step 2: Add auth metadata in `hermes_cli/auth.py`
For API-key providers, add a `ProviderConfig` entry to `PROVIDER_REGISTRY` with:
- `id`
- `name`
- `auth_type="api_key"`
- `inference_base_url`
- `api_key_env_vars`
- optional `base_url_env_var`
Also add aliases to `_PROVIDER_ALIASES`.
Use the existing providers as templates:
- simple API-key path: Z.AI, MiniMax
- API-key path with endpoint detection: Kimi, Z.AI
- native token resolution: Anthropic
- OAuth / auth-store path: Nous, OpenAI Codex
Questions to answer here:
- What env vars should Hermes check, and in what priority order?
- Does the provider need base-URL overrides?
- Does it need endpoint probing or token refresh?
- What should the auth error say when credentials are missing?
If the provider needs something more than "look up an API key", add a dedicated credential resolver instead of shoving logic into unrelated branches.
## Step 3: Add model catalog and aliases in `hermes_cli/models.py`
Update the provider catalog so the provider works in menus and in `provider:model` syntax.
Typical edits:
- `_PROVIDER_MODELS`
- `_PROVIDER_LABELS`
- `_PROVIDER_ALIASES`
- provider display order inside `list_available_providers()`
- `provider_model_ids()` if the provider supports a live `/models` fetch
If the provider exposes a live model list, prefer that first and keep `_PROVIDER_MODELS` as the static fallback.
This file is also what makes inputs like these work:
```text
anthropic:claude-sonnet-4-6
kimi:model-name
```
If aliases are missing here, the provider may authenticate correctly but still fail in `/model` parsing.
## Step 4: Resolve runtime data in `hermes_cli/runtime_provider.py`
`resolve_runtime_provider()` is the shared path used by CLI, gateway, cron, ACP, and helper clients.
Add a branch that returns a dict with at least:
```python
{
"provider": "your-provider",
"api_mode": "chat_completions", # or your native mode
"base_url": "https://...",
"api_key": "...",
"source": "env|portal|auth-store|explicit",
"requested_provider": requested_provider,
}
```
If the provider is OpenAI-compatible, `api_mode` should usually stay `chat_completions`.
Be careful with API-key precedence. Hermes already contains logic to avoid leaking an OpenRouter key to unrelated endpoints. A new provider should be equally explicit about which key goes to which base URL.
## Step 5: Wire the CLI in `hermes_cli/main.py` and `hermes_cli/setup.py`
A provider is not discoverable until it shows up in the interactive flows.
Update:
### `hermes_cli/main.py`
- `provider_labels`
- provider dispatch inside the `model` command
- `--provider` argument choices
- login/logout choices if the provider supports those flows
- a `_model_flow_<provider>()` function, or reuse `_model_flow_api_key_provider()` if it fits
### `hermes_cli/setup.py`
- `provider_choices`
- auth branch for the provider
- model-selection branch
- any provider-specific explanatory text
- any place where a provider should be excluded from OpenRouter-only prompts or routing settings
If you only update one of these files, `hermes model` and `hermes setup` will drift.
## Step 6: Keep auxiliary calls working
Two files matter here:
### `agent/auxiliary_client.py`
Add a cheap / fast default aux model to `_API_KEY_PROVIDER_AUX_MODELS` if this is a direct API-key provider.
Auxiliary tasks include things like:
- vision summarization
- web extraction summarization
- context compression summaries
- session-search summaries
- memory flushes
If the provider has no sensible aux default, side tasks may fall back badly or use an expensive main model unexpectedly.
### `agent/model_metadata.py`
Add context lengths for the provider's models so token budgeting, compression thresholds, and limits stay sane.
## Step 7: If the provider is native, add an adapter and `run_agent.py` support
If the provider is not plain chat completions, isolate the provider-specific logic in `agent/<provider>_adapter.py`.
Keep `run_agent.py` focused on orchestration. It should call adapter helpers, not hand-build provider payloads inline all over the file.
A native provider usually needs work in these places:
### New adapter file
Typical responsibilities:
- build the SDK / HTTP client
- resolve tokens
- convert OpenAI-style conversation messages to the provider's request format
- convert tool schemas if needed
- normalize provider responses back into what `run_agent.py` expects
- extract usage and finish-reason data
### `run_agent.py`
Search for `api_mode` and audit every switch point. At minimum, verify:
- `__init__` chooses the new `api_mode`
- client construction works for the provider
- `_build_api_kwargs()` knows how to format requests
- `_api_call_with_interrupt()` dispatches to the right client call
- interrupt / client rebuild paths work
- response validation accepts the provider's shape
- finish-reason extraction is correct
- token-usage extraction is correct
- fallback-model activation can switch into the new provider cleanly
- summary-generation and memory-flush paths still work
Also search `run_agent.py` for `self.client.`. Any code path that assumes the standard OpenAI client exists can break when a native provider uses a different client object or `self.client = None`.
### Prompt caching and provider-specific request fields
Prompt caching and provider-specific knobs are easy to regress.
Examples already in-tree:
- Anthropic has a native prompt-caching path
- OpenRouter gets provider-routing fields
- not every provider should receive every request-side option
When you add a native provider, double-check that Hermes is only sending fields that provider actually understands.
## Step 8: Tests
At minimum, touch the tests that guard provider wiring.
Common places:
- `tests/test_runtime_provider_resolution.py`
- `tests/test_cli_provider_resolution.py`
- `tests/test_cli_model_command.py`
- `tests/test_setup_model_selection.py`
- `tests/test_provider_parity.py`
- `tests/test_run_agent.py`
- `tests/test_<provider>_adapter.py` for a native provider
For docs-only examples, the exact file set may differ. The point is to cover:
- auth resolution
- CLI menu / provider selection
- runtime provider resolution
- agent execution path
- provider:model parsing
- any adapter-specific message conversion
Run tests with xdist disabled:
```bash
source .venv/bin/activate
python -m pytest tests/test_runtime_provider_resolution.py tests/test_cli_provider_resolution.py tests/test_cli_model_command.py tests/test_setup_model_selection.py -n0 -q
```
For deeper changes, run the full suite before pushing:
```bash
source .venv/bin/activate
python -m pytest tests/ -n0 -q
```
## Step 9: Live verification
After tests, run a real smoke test.
```bash
source .venv/bin/activate
python -m hermes_cli.main chat -q "Say hello" --provider your-provider --model your-model
```
Also test the interactive flows if you changed menus:
```bash
source .venv/bin/activate
python -m hermes_cli.main model
python -m hermes_cli.main setup
```
For native providers, verify at least one tool call too, not just a plain text response.
## Step 10: Update user-facing docs
If the provider is meant to ship as a first-class option, update the user docs too:
- `website/docs/getting-started/quickstart.md`
- `website/docs/user-guide/configuration.md`
- `website/docs/reference/environment-variables.md`
A developer can wire the provider perfectly and still leave users unable to discover the required env vars or setup flow.
## OpenAI-compatible provider checklist
Use this if the provider is standard chat completions.
- [ ] `ProviderConfig` added in `hermes_cli/auth.py`
- [ ] aliases added in `hermes_cli/auth.py` and `hermes_cli/models.py`
- [ ] model catalog added in `hermes_cli/models.py`
- [ ] runtime branch added in `hermes_cli/runtime_provider.py`
- [ ] CLI wiring added in `hermes_cli/main.py`
- [ ] setup wiring added in `hermes_cli/setup.py`
- [ ] aux model added in `agent/auxiliary_client.py`
- [ ] context lengths added in `agent/model_metadata.py`
- [ ] runtime / CLI tests updated
- [ ] user docs updated
## Native provider checklist
Use this when the provider needs a new protocol path.
- [ ] everything in the OpenAI-compatible checklist
- [ ] adapter added in `agent/<provider>_adapter.py`
- [ ] new `api_mode` supported in `run_agent.py`
- [ ] interrupt / rebuild path works
- [ ] usage and finish-reason extraction works
- [ ] fallback path works
- [ ] adapter tests added
- [ ] live smoke test passes
## Common pitfalls
### 1. Adding the provider to auth but not to model parsing
That makes credentials resolve correctly while `/model` and `provider:model` inputs fail.
### 2. Forgetting that `config["model"]` can be a string or a dict
A lot of provider-selection code has to normalize both forms.
### 3. Assuming a built-in provider is required
If the service is just OpenAI-compatible, a custom provider may already solve the user problem with less maintenance.
### 4. Forgetting auxiliary paths
The main chat path can work while summarization, memory flushes, or vision helpers fail because aux routing was never updated.
### 5. Native-provider branches hiding in `run_agent.py`
Search for `api_mode` and `self.client.`. Do not assume the obvious request path is the only one.
### 6. Sending OpenRouter-only knobs to other providers
Fields like provider routing belong only on the providers that support them.
### 7. Updating `hermes model` but not `hermes setup`
Both flows need to know about the provider.
## Good search targets while implementing
If you are hunting for all the places a provider touches, search these symbols:
- `PROVIDER_REGISTRY`
- `_PROVIDER_ALIASES`
- `_PROVIDER_MODELS`
- `resolve_runtime_provider`
- `_model_flow_`
- `provider_choices`
- `api_mode`
- `_API_KEY_PROVIDER_AUX_MODELS`
- `self.client.`
## Related docs
- [Provider Runtime Resolution](./provider-runtime.md)
- [Architecture](./architecture.md)
- [Contributing](./contributing.md)

View file

@ -41,12 +41,13 @@ If you are new to the codebase, read in this order:
2. [Agent Loop Internals](./agent-loop.md)
3. [Prompt Assembly](./prompt-assembly.md)
4. [Provider Runtime Resolution](./provider-runtime.md)
5. [Tools Runtime](./tools-runtime.md)
6. [Session Storage](./session-storage.md)
7. [Gateway Internals](./gateway-internals.md)
8. [Context Compression & Prompt Caching](./context-compression-and-caching.md)
9. [ACP Internals](./acp-internals.md)
10. [Environments, Benchmarks & Data Generation](./environments.md)
5. [Adding Providers](./adding-providers.md)
6. [Tools Runtime](./tools-runtime.md)
7. [Session Storage](./session-storage.md)
8. [Gateway Internals](./gateway-internals.md)
9. [Context Compression & Prompt Caching](./context-compression-and-caching.md)
10. [ACP Internals](./acp-internals.md)
11. [Environments, Benchmarks & Data Generation](./environments.md)
## Major subsystems

View file

@ -20,6 +20,12 @@ We value contributions in this order:
6. **New tools** — rarely needed; most capabilities should be skills
7. **Documentation** — fixes, clarifications, new examples
## Common contribution paths
- Building a new tool? Start with [Adding Tools](./adding-tools.md)
- Building a new skill? Start with [Creating Skills](./creating-skills.md)
- Building a new inference provider? Start with [Adding Providers](./adding-providers.md)
## Development Setup
### Prerequisites

View file

@ -20,6 +20,8 @@ Primary implementation:
- `hermes_cli/auth.py`
- `agent/auxiliary_client.py`
If you are trying to add a new first-class inference provider, read [Adding Providers](./adding-providers.md) alongside this page.
## Resolution precedence
At a high level, provider resolution uses:

View file

@ -119,6 +119,7 @@ uv pip install -e "."
| `cli` | Terminal menu UI for setup wizard | `uv pip install -e ".[cli]"` |
| `modal` | Modal cloud execution backend | `uv pip install -e ".[modal]"` |
| `tts-premium` | ElevenLabs premium voices | `uv pip install -e ".[tts-premium]"` |
| `voice` | CLI microphone input + audio playback | `uv pip install -e ".[voice]"` |
| `pty` | PTY terminal support | `uv pip install -e ".[pty]"` |
| `honcho` | AI-native memory (Honcho integration) | `uv pip install -e ".[honcho]"` |
| `mcp` | Model Context Protocol support | `uv pip install -e ".[mcp]"` |

View file

@ -54,7 +54,9 @@ Deploy Hermes Agent as a bot on your favorite messaging platform.
3. [Messaging Overview](/docs/user-guide/messaging)
4. [Telegram Setup](/docs/user-guide/messaging/telegram)
5. [Discord Setup](/docs/user-guide/messaging/discord)
6. [Security](/docs/user-guide/security)
6. [Voice Mode](/docs/user-guide/features/voice-mode)
7. [Use Voice Mode with Hermes](/docs/guides/use-voice-mode-with-hermes)
8. [Security](/docs/user-guide/security)
For full project examples, see:
- [Daily Briefing Bot](/docs/guides/daily-briefing-bot)

View file

@ -129,6 +129,25 @@ Chat with Hermes from your phone or other surfaces via Telegram, Discord, Slack,
hermes gateway setup # Interactive platform configuration
```
### Add voice mode
Want microphone input in the CLI or spoken replies in messaging?
```bash
pip install hermes-agent[voice]
# Optional but recommended for free local speech-to-text
pip install faster-whisper
```
Then start Hermes and enable it inside the CLI:
```text
/voice on
```
Press `Ctrl+B` to record, or use `/voice tts` to have Hermes speak its replies. See [Voice Mode](../user-guide/features/voice-mode.md) for the full setup across CLI, Telegram, Discord, and Discord voice channels.
### Schedule automated tasks
```

View file

@ -0,0 +1,422 @@
---
sidebar_position: 7
title: "Use Voice Mode with Hermes"
description: "A practical guide to setting up and using Hermes voice mode across CLI, Telegram, Discord, and Discord voice channels"
---
# Use Voice Mode with Hermes
This guide is the practical companion to the [Voice Mode feature reference](/docs/user-guide/features/voice-mode).
If the feature page explains what voice mode can do, this guide shows how to actually use it well.
## What voice mode is good for
Voice mode is especially useful when:
- you want a hands-free CLI workflow
- you want spoken responses in Telegram or Discord
- you want Hermes sitting in a Discord voice channel for live conversation
- you want quick idea capture, debugging, or back-and-forth while walking around instead of typing
## Choose your voice mode setup
There are really three different voice experiences in Hermes.
| Mode | Best for | Platform |
|---|---|---|
| Interactive microphone loop | Personal hands-free use while coding or researching | CLI |
| Voice replies in chat | Spoken responses alongside normal messaging | Telegram, Discord |
| Live voice channel bot | Group or personal live conversation in a VC | Discord voice channels |
A good path is:
1. get text working first
2. enable voice replies second
3. move to Discord voice channels last if you want the full experience
## Step 1: make sure normal Hermes works first
Before touching voice mode, verify that:
- Hermes starts
- your provider is configured
- the agent can answer text prompts normally
```bash
hermes
```
Ask something simple:
```text
What tools do you have available?
```
If that is not solid yet, fix text mode first.
## Step 2: install the right extras
### CLI microphone + playback
```bash
pip install hermes-agent[voice]
```
### Messaging platforms
```bash
pip install hermes-agent[messaging]
```
### Premium ElevenLabs TTS
```bash
pip install hermes-agent[tts-premium]
```
### Everything
```bash
pip install hermes-agent[all]
```
## Step 3: install system dependencies
### macOS
```bash
brew install portaudio ffmpeg opus
```
### Ubuntu / Debian
```bash
sudo apt install portaudio19-dev ffmpeg libopus0
```
Why these matter:
- `portaudio` → microphone input / playback for CLI voice mode
- `ffmpeg` → audio conversion for TTS and messaging delivery
- `opus` → Discord voice codec support
## Step 4: choose STT and TTS providers
Hermes supports both local and cloud speech stacks.
### Easiest / cheapest setup
Use local STT and free Edge TTS:
- STT provider: `local`
- TTS provider: `edge`
This is usually the best place to start.
### Environment file example
Add to `~/.hermes/.env`:
```bash
# Cloud STT options (local needs no key)
GROQ_API_KEY=***
VOICE_TOOLS_OPENAI_KEY=***
# Premium TTS (optional)
ELEVENLABS_API_KEY=***
```
### Provider recommendations
#### Speech-to-text
- `local` → best default for privacy and zero-cost use
- `groq` → very fast cloud transcription
- `openai` → good paid fallback
#### Text-to-speech
- `edge` → free and good enough for most users
- `elevenlabs` → best quality
- `openai` → good middle ground
## Step 5: recommended config
```yaml
voice:
record_key: "ctrl+b"
max_recording_seconds: 120
auto_tts: false
silence_threshold: 200
silence_duration: 3.0
stt:
provider: "local"
local:
model: "base"
tts:
provider: "edge"
edge:
voice: "en-US-AriaNeural"
```
This is a good conservative default for most people.
## Use case 1: CLI voice mode
## Turn it on
Start Hermes:
```bash
hermes
```
Inside the CLI:
```text
/voice on
```
### Recording flow
Default key:
- `Ctrl+B`
Workflow:
1. press `Ctrl+B`
2. speak
3. wait for silence detection to stop recording automatically
4. Hermes transcribes and responds
5. if TTS is on, it speaks the answer
6. the loop can automatically restart for continuous use
### Useful commands
```text
/voice
/voice on
/voice off
/voice tts
/voice status
```
### Good CLI workflows
#### Walk-up debugging
Say:
```text
I keep getting a docker permission error. Help me debug it.
```
Then continue hands-free:
- "Read the last error again"
- "Explain the root cause in simpler terms"
- "Now give me the exact fix"
#### Research / brainstorming
Great for:
- walking around while thinking
- dictating half-formed ideas
- asking Hermes to structure your thoughts in real time
#### Accessibility / low-typing sessions
If typing is inconvenient, voice mode is one of the fastest ways to stay in the full Hermes loop.
## Tuning CLI behavior
### Silence threshold
If Hermes starts/stops too aggressively, tune:
```yaml
voice:
silence_threshold: 250
```
Higher threshold = less sensitive.
### Silence duration
If you pause a lot between sentences, increase:
```yaml
voice:
silence_duration: 4.0
```
### Record key
If `Ctrl+B` conflicts with your terminal or tmux habits:
```yaml
voice:
record_key: "ctrl+space"
```
## Use case 2: voice replies in Telegram or Discord
This mode is simpler than full voice channels.
Hermes stays a normal chat bot, but can speak replies.
### Start the gateway
```bash
hermes gateway
```
### Turn on voice replies
Inside Telegram or Discord:
```text
/voice on
```
or
```text
/voice tts
```
### Modes
| Mode | Meaning |
|---|---|
| `off` | text only |
| `voice_only` | speak only when the user sent voice |
| `all` | speak every reply |
### When to use which mode
- `/voice on` if you want spoken replies only for voice-originating messages
- `/voice tts` if you want a full spoken assistant all the time
### Good messaging workflows
#### Telegram assistant on your phone
Use when:
- you are away from your machine
- you want to send voice notes and get quick spoken replies
- you want Hermes to function like a portable research or ops assistant
#### Discord DMs with spoken output
Useful when you want private interaction without server-channel mention behavior.
## Use case 3: Discord voice channels
This is the most advanced mode.
Hermes joins a Discord VC, listens to user speech, transcribes it, runs the normal agent pipeline, and speaks replies back into the channel.
## Required Discord permissions
In addition to the normal text-bot setup, make sure the bot has:
- Connect
- Speak
- preferably Use Voice Activity
Also enable privileged intents in the Developer Portal:
- Presence Intent
- Server Members Intent
- Message Content Intent
## Join and leave
In a Discord text channel where the bot is present:
```text
/voice join
/voice leave
/voice status
```
### What happens when joined
- users speak in the VC
- Hermes detects speech boundaries
- transcripts are posted in the associated text channel
- Hermes responds in text and audio
- the text channel is the one where `/voice join` was issued
### Best practices for Discord VC use
- keep `DISCORD_ALLOWED_USERS` tight
- use a dedicated bot/testing channel at first
- verify STT and TTS work in ordinary text-chat voice mode before trying VC mode
## Voice quality recommendations
### Best quality setup
- STT: local `large-v3` or Groq `whisper-large-v3`
- TTS: ElevenLabs
### Best speed / convenience setup
- STT: local `base` or Groq
- TTS: Edge
### Best zero-cost setup
- STT: local
- TTS: Edge
## Common failure modes
### "No audio device found"
Install `portaudio`.
### "Bot joins but hears nothing"
Check:
- your Discord user ID is in `DISCORD_ALLOWED_USERS`
- you are not muted
- privileged intents are enabled
- the bot has Connect/Speak permissions
### "It transcribes but does not speak"
Check:
- TTS provider config
- API key / quota for ElevenLabs or OpenAI
- `ffmpeg` install for Edge conversion paths
### "Whisper outputs garbage"
Try:
- quieter environment
- higher `silence_threshold`
- different STT provider/model
- shorter, clearer utterances
### "It works in DMs but not in server channels"
That is often mention policy.
By default, the bot needs an `@mention` in Discord server text channels unless configured otherwise.
## Suggested first-week setup
If you want the shortest path to success:
1. get text Hermes working
2. install `hermes-agent[voice]`
3. use CLI voice mode with local STT + Edge TTS
4. then enable `/voice on` in Telegram or Discord
5. only after that, try Discord VC mode
That progression keeps the debugging surface small.
## Where to read next
- [Voice Mode feature reference](/docs/user-guide/features/voice-mode)
- [Messaging Gateway](/docs/user-guide/messaging)
- [Discord setup](/docs/user-guide/messaging/discord)
- [Telegram setup](/docs/user-guide/messaging/telegram)
- [Configuration](/docs/user-guide/configuration)

View file

@ -33,6 +33,8 @@ It's not a coding copilot tethered to an IDE or a chatbot wrapper around a singl
| 📚 **[Skills System](/docs/user-guide/features/skills)** | Procedural memory the agent creates and reuses |
| 🔌 **[MCP Integration](/docs/user-guide/features/mcp)** | Connect to MCP servers, filter their tools, and extend Hermes safely |
| 🧭 **[Use MCP with Hermes](/docs/guides/use-mcp-with-hermes)** | Practical MCP setup patterns, examples, and tutorials |
| 🎙️ **[Voice Mode](/docs/user-guide/features/voice-mode)** | Real-time voice interaction in CLI, Telegram, Discord, and Discord VC |
| 🗣️ **[Use Voice Mode with Hermes](/docs/guides/use-voice-mode-with-hermes)** | Hands-on setup and usage patterns for Hermes voice workflows |
| 🎭 **[Personality & SOUL.md](/docs/user-guide/features/personality)** | Define Hermes' default voice with a global SOUL.md |
| 📄 **[Context Files](/docs/user-guide/features/context-files)** | Project context files that shape every conversation |
| 🔒 **[Security](/docs/user-guide/security)** | Command approval, authorization, container isolation |

View file

@ -31,7 +31,7 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
| `CLAUDE_CODE_OAUTH_TOKEN` | Claude Code setup-token (same as `ANTHROPIC_TOKEN`) |
| `HERMES_MODEL` | Preferred model name (checked before `LLM_MODEL`, used by gateway) |
| `LLM_MODEL` | Default model name (fallback when not set in config.yaml) |
| `VOICE_TOOLS_OPENAI_KEY` | OpenAI key for TTS and voice transcription (separate from custom endpoint) |
| `VOICE_TOOLS_OPENAI_KEY` | OpenAI key for OpenAI speech-to-text and text-to-speech providers |
| `HERMES_HOME` | Override Hermes config directory (default: `~/.hermes`) |
## Provider Auth (OAuth)
@ -57,7 +57,12 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
| `BROWSERBASE_PROJECT_ID` | Browserbase project ID |
| `BROWSER_INACTIVITY_TIMEOUT` | Browser session inactivity timeout in seconds |
| `FAL_KEY` | Image generation ([fal.ai](https://fal.ai/)) |
| `ELEVENLABS_API_KEY` | Premium TTS voices ([elevenlabs.io](https://elevenlabs.io/)) |
| `GROQ_API_KEY` | Groq Whisper STT API key ([groq.com](https://groq.com/)) |
| `ELEVENLABS_API_KEY` | ElevenLabs premium TTS voices ([elevenlabs.io](https://elevenlabs.io/)) |
| `STT_GROQ_MODEL` | Override the Groq STT model (default: `whisper-large-v3-turbo`) |
| `GROQ_BASE_URL` | Override the Groq OpenAI-compatible STT endpoint |
| `STT_OPENAI_MODEL` | Override the OpenAI STT model (default: `whisper-1`) |
| `STT_OPENAI_BASE_URL` | Override the OpenAI-compatible STT endpoint |
| `HONCHO_API_KEY` | Cross-session user modeling ([honcho.dev](https://honcho.dev/)) |
| `TINKER_API_KEY` | RL training ([tinker-console.thinkingmachines.ai](https://tinker-console.thinkingmachines.ai/)) |
| `WANDB_API_KEY` | RL training metrics ([wandb.ai](https://wandb.ai/)) |

View file

@ -45,6 +45,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
| `/verbose` | Cycle tool progress display: off → new → all → verbose |
| `/reasoning` | Manage reasoning effort and display (usage: /reasoning [level\|show\|hide]) |
| `/skin` | Show or change the display skin/theme |
| `/voice [on\|off\|tts\|status]` | Toggle CLI voice mode and spoken playback. Recording uses `voice.record_key` (default: `Ctrl+B`). |
### Tools & Skills
@ -105,6 +106,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
| `/usage` | Show token usage for the current session. |
| `/insights [days]` | Show usage analytics. |
| `/reasoning [level\|show\|hide]` | Change reasoning effort or toggle reasoning display. |
| `/voice [on\|off\|tts\|join\|channel\|leave\|status]` | Control spoken replies in chat. `join`/`channel`/`leave` manage Discord voice-channel mode. |
| `/rollback [number]` | List or restore filesystem checkpoints. |
| `/background &lt;prompt&gt;` | Run a prompt in a separate background session. |
| `/reload-mcp` | Reload MCP servers from config. |
@ -116,4 +118,5 @@ The messaging gateway supports the following built-in commands inside Telegram,
- `/skin`, `/tools`, `/toolsets`, `/config`, `/prompt`, `/cron`, `/skills`, `/platforms`, `/paste`, and `/verbose` are **CLI-only** commands.
- `/status`, `/stop`, `/sethome`, `/resume`, `/background`, and `/update` are **messaging-only** commands.
- `/reload-mcp` and `/rollback` work in **both** the CLI and the messaging gateway.
- `/voice`, `/reload-mcp`, and `/rollback` work in **both** the CLI and the messaging gateway.
- `/voice join`, `/voice channel`, and `/voice leave` are only meaningful on Discord.

View file

@ -77,6 +77,7 @@ When resuming a previous session (`hermes -c` or `hermes --resume <id>`), a "Pre
| `Alt+Enter` or `Ctrl+J` | New line (multi-line input) |
| `Alt+V` | Paste an image from the clipboard when supported by the terminal |
| `Ctrl+V` | Paste text and opportunistically attach clipboard images |
| `Ctrl+B` | Start/stop voice recording when voice mode is enabled (`voice.record_key`, default: `ctrl+b`) |
| `Ctrl+C` | Interrupt agent (double-press within 2s to force exit) |
| `Ctrl+D` | Exit |
| `Tab` | Autocomplete slash commands |
@ -95,11 +96,15 @@ Common examples:
| `/skills browse` | Browse the skills hub and official optional skills |
| `/background <prompt>` | Run a prompt in a separate background session |
| `/skin` | Show or switch the active CLI skin |
| `/voice on` | Enable CLI voice mode (press `Ctrl+B` to record) |
| `/voice tts` | Toggle spoken playback for Hermes replies |
| `/reasoning high` | Increase reasoning effort |
| `/title My Session` | Name the current session |
For the full built-in CLI and messaging lists, see [Slash Commands Reference](../reference/slash-commands.md).
For setup, providers, silence tuning, and messaging/Discord voice usage, see [Voice Mode](features/voice-mode.md).
:::tip
Commands are case-insensitive — `/HELP` works the same as `/help`. Installed skills also become slash commands automatically.
:::

View file

@ -695,6 +695,8 @@ tts:
voice: "alloy" # alloy, echo, fable, onyx, nova, shimmer
```
This controls both the `text_to_speech` tool and spoken replies in voice mode (`/voice tts` in the CLI or messaging gateway).
## Display Settings
```yaml
@ -719,10 +721,43 @@ display:
```yaml
stt:
provider: "openai" # STT provider
provider: "local" # "local" | "groq" | "openai"
local:
model: "base" # tiny, base, small, medium, large-v3
openai:
model: "whisper-1" # whisper-1 | gpt-4o-mini-transcribe | gpt-4o-transcribe
# model: "whisper-1" # Legacy fallback key still respected
```
Requires `VOICE_TOOLS_OPENAI_KEY` in `.env` for OpenAI STT.
Provider behavior:
- `local` uses `faster-whisper` running on your machine. Install it separately with `pip install faster-whisper`.
- `groq` uses Groq's Whisper-compatible endpoint and reads `GROQ_API_KEY`.
- `openai` uses the OpenAI speech API and reads `VOICE_TOOLS_OPENAI_KEY`.
If the requested provider is unavailable, Hermes falls back automatically in this order: `local``groq``openai`.
Groq and OpenAI model overrides are environment-driven:
```bash
STT_GROQ_MODEL=whisper-large-v3-turbo
STT_OPENAI_MODEL=whisper-1
GROQ_BASE_URL=https://api.groq.com/openai/v1
STT_OPENAI_BASE_URL=https://api.openai.com/v1
```
## Voice Mode (CLI)
```yaml
voice:
record_key: "ctrl+b" # Push-to-talk key inside the CLI
max_recording_seconds: 120 # Hard stop for long recordings
auto_tts: false # Enable spoken replies automatically when /voice on
silence_threshold: 200 # RMS threshold for speech detection
silence_duration: 3.0 # Seconds of silence before auto-stop
```
Use `/voice on` in the CLI to enable microphone mode, `record_key` to start/stop recording, and `/voice tts` to toggle spoken replies. See [Voice Mode](/docs/user-guide/features/voice-mode) for end-to-end setup and platform-specific behavior.
## Quick Commands

View file

@ -194,6 +194,8 @@ The agent's final response is automatically delivered. You do not need to call `
## Schedule formats
The agent's final response is automatically delivered — you do **not** need to include `send_message` in the cron prompt for that same destination. If a cron run calls `send_message` to the exact target the scheduler will already deliver to, Hermes skips that duplicate send and tells the model to put the user-facing content in the final response instead. Use `send_message` only for additional or different targets.
### Relative delays (one-shot)
```text

View file

@ -8,12 +8,14 @@ description: "Real-time voice conversations with Hermes Agent — CLI, Telegram,
Hermes Agent supports full voice interaction across CLI and messaging platforms. Talk to the agent using your microphone, hear spoken replies, and have live voice conversations in Discord voice channels.
If you want a practical setup walkthrough with recommended configurations and real usage patterns, see [Use Voice Mode with Hermes](/docs/guides/use-voice-mode-with-hermes).
## Prerequisites
Before using voice features, make sure you have:
1. **Hermes Agent installed**`pip install hermes-agent` (see [Getting Started](../../getting-started.md))
2. **An LLM provider configured**set `OPENAI_API_KEY`, `OPENAI_BASE_URL`, and `LLM_MODEL` in `~/.hermes/.env`
1. **Hermes Agent installed**`pip install hermes-agent` (see [Installation](/docs/getting-started/installation))
2. **An LLM provider configured**run `hermes model` or set your preferred provider credentials in `~/.hermes/.env`
3. **A working base setup** — run `hermes` to verify the agent responds to text before enabling voice
:::tip

View file

@ -210,8 +210,13 @@ Replace the ID with the actual channel ID (right-click → Copy Channel ID with
Hermes Agent supports Discord voice messages:
- **Incoming voice messages** are automatically transcribed using Whisper (requires `GROQ_API_KEY` or `VOICE_TOOLS_OPENAI_KEY` to be set in your environment).
- **Incoming voice messages** are automatically transcribed using the configured STT provider: local `faster-whisper` (no key), Groq Whisper (`GROQ_API_KEY`), or OpenAI Whisper (`VOICE_TOOLS_OPENAI_KEY`).
- **Text-to-speech**: Use `/voice tts` to have the bot send spoken audio responses alongside text replies.
- **Discord voice channels**: Hermes can also join a voice channel, listen to users speaking, and talk back in the channel.
For the full setup and operational guide, see:
- [Voice Mode](/docs/user-guide/features/voice-mode)
- [Use Voice Mode with Hermes](/docs/guides/use-voice-mode-with-hermes)
## Troubleshooting

View file

@ -8,6 +8,8 @@ description: "Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal,
Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, Email, Home Assistant, or your browser. The gateway is a single background process that connects to all your configured platforms, handles sessions, runs cron jobs, and delivers voice messages.
For the full voice feature set — including CLI microphone mode, spoken replies in messaging, and Discord voice-channel conversations — see [Voice Mode](/docs/user-guide/features/voice-mode) and [Use Voice Mode with Hermes](/docs/guides/use-voice-mode-with-hermes).
## Architecture
```text
@ -77,6 +79,7 @@ hermes gateway status # Check service status
| `/usage` | Show token usage for this session |
| `/insights [days]` | Show usage insights and analytics |
| `/reasoning [level\|show\|hide]` | Change reasoning effort or toggle reasoning display |
| `/voice [on\|off\|tts\|join\|leave\|status]` | Control messaging voice replies and Discord voice-channel behavior |
| `/rollback [number]` | List or restore filesystem checkpoints |
| `/background <prompt>` | Run a prompt in a separate background session |
| `/reload-mcp` | Reload MCP servers from config |

View file

@ -224,7 +224,7 @@ Make sure the bot has been **invited to the channel** (`/invite @Hermes Agent`).
Hermes supports voice on Slack:
- **Incoming:** Voice/audio messages are automatically transcribed using Whisper (requires `VOICE_TOOLS_OPENAI_KEY`)
- **Incoming:** Voice/audio messages are automatically transcribed using the configured STT provider: local `faster-whisper`, Groq Whisper (`GROQ_API_KEY`), or OpenAI Whisper (`VOICE_TOOLS_OPENAI_KEY`)
- **Outgoing:** TTS responses are sent as audio file attachments
---

View file

@ -131,7 +131,11 @@ Group chat IDs are negative numbers (e.g., `-1001234567890`). Your personal DM c
### Incoming Voice (Speech-to-Text)
Voice messages you send on Telegram are automatically transcribed using OpenAI's Whisper API and injected as text into the conversation. This requires `VOICE_TOOLS_OPENAI_KEY` in `~/.hermes/.env`.
Voice messages you send on Telegram are automatically transcribed by Hermes's configured STT provider and injected as text into the conversation.
- `local` uses `faster-whisper` on the machine running Hermes — no API key required
- `groq` uses Groq Whisper and requires `GROQ_API_KEY`
- `openai` uses OpenAI Whisper and requires `VOICE_TOOLS_OPENAI_KEY`
### Outgoing Voice (Text-to-Speech)
@ -173,7 +177,7 @@ Hermes Agent works in Telegram group chats with a few considerations:
| Bot not responding at all | Verify `TELEGRAM_BOT_TOKEN` is correct. Check `hermes gateway` logs for errors. |
| Bot responds with "unauthorized" | Your user ID is not in `TELEGRAM_ALLOWED_USERS`. Double-check with @userinfobot. |
| Bot ignores group messages | Privacy mode is likely on. Disable it (Step 3) or make the bot a group admin. **Remember to remove and re-add the bot after changing privacy.** |
| Voice messages not transcribed | Check that `VOICE_TOOLS_OPENAI_KEY` is set and valid in `~/.hermes/.env`. |
| Voice messages not transcribed | Verify STT is available: install `faster-whisper` for local transcription, or set `GROQ_API_KEY` / `VOICE_TOOLS_OPENAI_KEY` in `~/.hermes/.env`. |
| Voice replies are files, not bubbles | Install `ffmpeg` (needed for Edge TTS Opus conversion). |
| Bot token revoked/invalid | Generate a new token via `/revoke` then `/newbot` or `/token` in BotFather. Update your `.env` file. |

View file

@ -137,7 +137,7 @@ with reconnection logic.
Hermes supports voice on WhatsApp:
- **Incoming:** Voice messages (`.ogg` opus) are automatically transcribed using Whisper (requires `VOICE_TOOLS_OPENAI_KEY`)
- **Incoming:** Voice messages (`.ogg` opus) are automatically transcribed using the configured STT provider: local `faster-whisper`, Groq Whisper (`GROQ_API_KEY`), or OpenAI Whisper (`VOICE_TOOLS_OPENAI_KEY`)
- **Outgoing:** TTS responses are sent as MP3 audio file attachments
- Agent responses are prefixed with "⚕ **Hermes Agent**" for easy identification

View file

@ -24,6 +24,7 @@ const sidebars: SidebarsConfig = {
'guides/python-library',
'guides/use-mcp-with-hermes',
'guides/use-soul-with-hermes',
'guides/use-voice-mode-with-hermes',
],
},
{
@ -75,6 +76,7 @@ const sidebars: SidebarsConfig = {
type: 'category',
label: 'Web & Media',
items: [
'user-guide/features/voice-mode',
'user-guide/features/browser',
'user-guide/features/vision',
'user-guide/features/image-generation',
@ -108,6 +110,7 @@ const sidebars: SidebarsConfig = {
'developer-guide/architecture',
'developer-guide/agent-loop',
'developer-guide/provider-runtime',
'developer-guide/adding-providers',
'developer-guide/prompt-assembly',
'developer-guide/context-compression-and-caching',
'developer-guide/gateway-internals',