Hermes Agent UX Improvements

This commit is contained in:
teknium1 2026-02-22 02:16:11 -08:00
parent b1f55e3ee5
commit ededaaa874
23 changed files with 945 additions and 1545 deletions

128
agent/auxiliary_client.py Normal file
View file

@ -0,0 +1,128 @@
"""Shared auxiliary OpenAI client for cheap/fast side tasks.
Provides a single resolution chain so every consumer (context compression,
session search, web extraction, vision analysis, browser vision) picks up
the best available backend without duplicating fallback logic.
Resolution order for text tasks:
1. OpenRouter (OPENROUTER_API_KEY)
2. Nous Portal (~/.hermes/auth.json active provider)
3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
4. None
Resolution order for vision/multimodal tasks:
1. OpenRouter
2. Nous Portal
3. None (custom endpoints can't substitute for Gemini multimodal)
"""
import json
import logging
import os
from pathlib import Path
from typing import Optional, Tuple
from openai import OpenAI
from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
# Default auxiliary models per provider
_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
_NOUS_MODEL = "gemini-3-flash"
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
_AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
def _read_nous_auth() -> Optional[dict]:
"""Read and validate ~/.hermes/auth.json for an active Nous provider.
Returns the provider state dict if Nous is active with tokens,
otherwise None.
"""
try:
if not _AUTH_JSON_PATH.is_file():
return None
data = json.loads(_AUTH_JSON_PATH.read_text())
if data.get("active_provider") != "nous":
return None
provider = data.get("providers", {}).get("nous", {})
# Must have at least an access_token or agent_key
if not provider.get("agent_key") and not provider.get("access_token"):
return None
return provider
except Exception as exc:
logger.debug("Could not read Nous auth: %s", exc)
return None
def _nous_api_key(provider: dict) -> str:
"""Extract the best API key from a Nous provider state dict."""
return provider.get("agent_key") or provider.get("access_token", "")
def _nous_base_url() -> str:
"""Resolve the Nous inference base URL from env or default."""
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
# ── Public API ──────────────────────────────────────────────────────────────
def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
"""Return (client, model_slug) for text-only auxiliary tasks.
Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None).
"""
# 1. OpenRouter
or_key = os.getenv("OPENROUTER_API_KEY")
if or_key:
logger.debug("Auxiliary text client: OpenRouter")
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL), _OPENROUTER_MODEL
# 2. Nous Portal
nous = _read_nous_auth()
if nous:
logger.debug("Auxiliary text client: Nous Portal")
return (
OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
_NOUS_MODEL,
)
# 3. Custom endpoint (both base URL and key must be set)
custom_base = os.getenv("OPENAI_BASE_URL")
custom_key = os.getenv("OPENAI_API_KEY")
if custom_base and custom_key:
model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini"
logger.debug("Auxiliary text client: custom endpoint (%s)", model)
return OpenAI(api_key=custom_key, base_url=custom_base), model
# 4. Nothing available
logger.debug("Auxiliary text client: none available")
return None, None
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
"""Return (client, model_slug) for vision/multimodal auxiliary tasks.
Only OpenRouter and Nous Portal qualify custom endpoints cannot
substitute for Gemini multimodal.
"""
# 1. OpenRouter
or_key = os.getenv("OPENROUTER_API_KEY")
if or_key:
logger.debug("Auxiliary vision client: OpenRouter")
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL), _OPENROUTER_MODEL
# 2. Nous Portal
nous = _read_nous_auth()
if nous:
logger.debug("Auxiliary vision client: Nous Portal")
return (
OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
_NOUS_MODEL,
)
# 3. Nothing suitable
logger.debug("Auxiliary vision client: none available")
return None, None

View file

@ -9,13 +9,11 @@ import logging
import os import os
from typing import Any, Dict, List from typing import Any, Dict, List
from openai import OpenAI from agent.auxiliary_client import get_text_auxiliary_client
from agent.model_metadata import ( from agent.model_metadata import (
get_model_context_length, get_model_context_length,
estimate_messages_tokens_rough, estimate_messages_tokens_rough,
) )
from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -31,7 +29,6 @@ class ContextCompressor:
self, self,
model: str, model: str,
threshold_percent: float = 0.85, threshold_percent: float = 0.85,
summary_model: str = "google/gemini-3-flash-preview",
protect_first_n: int = 3, protect_first_n: int = 3,
protect_last_n: int = 4, protect_last_n: int = 4,
summary_target_tokens: int = 500, summary_target_tokens: int = 500,
@ -39,7 +36,6 @@ class ContextCompressor:
): ):
self.model = model self.model = model
self.threshold_percent = threshold_percent self.threshold_percent = threshold_percent
self.summary_model = summary_model
self.protect_first_n = protect_first_n self.protect_first_n = protect_first_n
self.protect_last_n = protect_last_n self.protect_last_n = protect_last_n
self.summary_target_tokens = summary_target_tokens self.summary_target_tokens = summary_target_tokens
@ -53,8 +49,7 @@ class ContextCompressor:
self.last_completion_tokens = 0 self.last_completion_tokens = 0
self.last_total_tokens = 0 self.last_total_tokens = 0
api_key = os.getenv("OPENROUTER_API_KEY", "") self.client, self.summary_model = get_text_auxiliary_client()
self.client = OpenAI(api_key=api_key, base_url=OPENROUTER_BASE_URL) if api_key else None
def update_from_response(self, usage: Dict[str, Any]): def update_from_response(self, usage: Dict[str, Any]):
"""Update tracked token usage from API response.""" """Update tracked token usage from API response."""
@ -155,6 +150,26 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
if not self.quiet_mode: if not self.quiet_mode:
print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)") print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)")
print(f" 📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})") print(f" 📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})")
# Truncation fallback when no auxiliary model is available
if self.client is None:
print("⚠️ Context compression: no auxiliary model available. Falling back to message truncation.")
# Keep system message(s) at the front and the protected tail;
# simply drop the oldest non-system messages until under threshold.
kept = []
for msg in messages:
if msg.get("role") == "system":
kept.append(msg.copy())
else:
break
tail = messages[-self.protect_last_n:]
kept.extend(m.copy() for m in tail)
self.compression_count += 1
if not self.quiet_mode:
print(f" ✂️ Truncated: {len(messages)}{len(kept)} messages (dropped middle turns)")
return kept
if not self.quiet_mode:
print(f" 🗜️ Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)") print(f" 🗜️ Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)")
summary = self._generate_summary(turns_to_summarize) summary = self._generate_summary(turns_to_summarize)

View file

@ -4,11 +4,16 @@ Pure display functions and classes with no AIAgent dependency.
Used by AIAgent._execute_tool_calls for CLI feedback. Used by AIAgent._execute_tool_calls for CLI feedback.
""" """
import json
import os import os
import random import random
import threading import threading
import time import time
# ANSI escape codes for coloring tool failure indicators
_RED = "\033[31m"
_RESET = "\033[0m"
# ========================================================================= # =========================================================================
# Tool preview (one-line summary of a tool call's primary argument) # Tool preview (one-line summary of a tool call's primary argument)
@ -242,12 +247,46 @@ KAWAII_GENERIC = [
# Cute tool message (completion line that replaces the spinner) # Cute tool message (completion line that replaces the spinner)
# ========================================================================= # =========================================================================
def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str: def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
"""Inspect a tool result string for signs of failure.
Returns ``(is_failure, suffix)`` where *suffix* is an informational tag
like ``" [exit 1]"`` for terminal failures, or ``" [error]"`` for generic
failures. On success, returns ``(False, "")``.
"""
if result is None:
return False, ""
if tool_name == "terminal":
try:
data = json.loads(result)
exit_code = data.get("exit_code")
if exit_code is not None and exit_code != 0:
return True, f" [exit {exit_code}]"
except (json.JSONDecodeError, TypeError, AttributeError):
pass
return False, ""
# Generic heuristic for non-terminal tools
lower = result[:500].lower()
if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
return True, " [error]"
return False, ""
def get_cute_tool_message(
tool_name: str, args: dict, duration: float, result: str | None = None,
) -> str:
"""Generate a formatted tool completion line for CLI quiet mode. """Generate a formatted tool completion line for CLI quiet mode.
Format: ``| {emoji} {verb:9} {detail} {duration}`` Format: ``| {emoji} {verb:9} {detail} {duration}``
When *result* is provided the line is checked for failure indicators.
Failed tool calls get a red prefix and an informational suffix.
""" """
dur = f"{duration:.1f}s" dur = f"{duration:.1f}s"
is_failure, failure_suffix = _detect_tool_failure(tool_name, result)
def _trunc(s, n=40): def _trunc(s, n=40):
s = str(s) s = str(s)
@ -257,105 +296,111 @@ def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
p = str(p) p = str(p)
return ("..." + p[-(n-3):]) if len(p) > n else p return ("..." + p[-(n-3):]) if len(p) > n else p
def _wrap(line: str) -> str:
"""Apply red coloring and failure suffix when the tool failed."""
if not is_failure:
return line
return f"{_RED}{line}{failure_suffix}{_RESET}"
if tool_name == "web_search": if tool_name == "web_search":
return f"┊ 🔍 search {_trunc(args.get('query', ''), 42)} {dur}" return _wrap(f"┊ 🔍 search {_trunc(args.get('query', ''), 42)} {dur}")
if tool_name == "web_extract": if tool_name == "web_extract":
urls = args.get("urls", []) urls = args.get("urls", [])
if urls: if urls:
url = urls[0] if isinstance(urls, list) else str(urls) url = urls[0] if isinstance(urls, list) else str(urls)
domain = url.replace("https://", "").replace("http://", "").split("/")[0] domain = url.replace("https://", "").replace("http://", "").split("/")[0]
extra = f" +{len(urls)-1}" if len(urls) > 1 else "" extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
return f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}" return _wrap(f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}")
return f"┊ 📄 fetch pages {dur}" return _wrap(f"┊ 📄 fetch pages {dur}")
if tool_name == "web_crawl": if tool_name == "web_crawl":
url = args.get("url", "") url = args.get("url", "")
domain = url.replace("https://", "").replace("http://", "").split("/")[0] domain = url.replace("https://", "").replace("http://", "").split("/")[0]
return f"┊ 🕸️ crawl {_trunc(domain, 35)} {dur}" return _wrap(f"┊ 🕸️ crawl {_trunc(domain, 35)} {dur}")
if tool_name == "terminal": if tool_name == "terminal":
return f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}" return _wrap(f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}")
if tool_name == "process": if tool_name == "process":
action = args.get("action", "?") action = args.get("action", "?")
sid = args.get("session_id", "")[:12] sid = args.get("session_id", "")[:12]
labels = {"list": "ls processes", "poll": f"poll {sid}", "log": f"log {sid}", labels = {"list": "ls processes", "poll": f"poll {sid}", "log": f"log {sid}",
"wait": f"wait {sid}", "kill": f"kill {sid}", "write": f"write {sid}", "submit": f"submit {sid}"} "wait": f"wait {sid}", "kill": f"kill {sid}", "write": f"write {sid}", "submit": f"submit {sid}"}
return f"┊ ⚙️ proc {labels.get(action, f'{action} {sid}')} {dur}" return _wrap(f"┊ ⚙️ proc {labels.get(action, f'{action} {sid}')} {dur}")
if tool_name == "read_file": if tool_name == "read_file":
return f"┊ 📖 read {_path(args.get('path', ''))} {dur}" return _wrap(f"┊ 📖 read {_path(args.get('path', ''))} {dur}")
if tool_name == "write_file": if tool_name == "write_file":
return f"┊ ✍️ write {_path(args.get('path', ''))} {dur}" return _wrap(f"┊ ✍️ write {_path(args.get('path', ''))} {dur}")
if tool_name == "patch": if tool_name == "patch":
return f"┊ 🔧 patch {_path(args.get('path', ''))} {dur}" return _wrap(f"┊ 🔧 patch {_path(args.get('path', ''))} {dur}")
if tool_name == "search_files": if tool_name == "search_files":
pattern = _trunc(args.get("pattern", ""), 35) pattern = _trunc(args.get("pattern", ""), 35)
target = args.get("target", "content") target = args.get("target", "content")
verb = "find" if target == "files" else "grep" verb = "find" if target == "files" else "grep"
return f"┊ 🔎 {verb:9} {pattern} {dur}" return _wrap(f"┊ 🔎 {verb:9} {pattern} {dur}")
if tool_name == "browser_navigate": if tool_name == "browser_navigate":
url = args.get("url", "") url = args.get("url", "")
domain = url.replace("https://", "").replace("http://", "").split("/")[0] domain = url.replace("https://", "").replace("http://", "").split("/")[0]
return f"┊ 🌐 navigate {_trunc(domain, 35)} {dur}" return _wrap(f"┊ 🌐 navigate {_trunc(domain, 35)} {dur}")
if tool_name == "browser_snapshot": if tool_name == "browser_snapshot":
mode = "full" if args.get("full") else "compact" mode = "full" if args.get("full") else "compact"
return f"┊ 📸 snapshot {mode} {dur}" return _wrap(f"┊ 📸 snapshot {mode} {dur}")
if tool_name == "browser_click": if tool_name == "browser_click":
return f"┊ 👆 click {args.get('ref', '?')} {dur}" return _wrap(f"┊ 👆 click {args.get('ref', '?')} {dur}")
if tool_name == "browser_type": if tool_name == "browser_type":
return f"┊ ⌨️ type \"{_trunc(args.get('text', ''), 30)}\" {dur}" return _wrap(f"┊ ⌨️ type \"{_trunc(args.get('text', ''), 30)}\" {dur}")
if tool_name == "browser_scroll": if tool_name == "browser_scroll":
d = args.get("direction", "down") d = args.get("direction", "down")
arrow = {"down": "", "up": "", "right": "", "left": ""}.get(d, "") arrow = {"down": "", "up": "", "right": "", "left": ""}.get(d, "")
return f"{arrow} scroll {d} {dur}" return _wrap(f"{arrow} scroll {d} {dur}")
if tool_name == "browser_back": if tool_name == "browser_back":
return f"┊ ◀️ back {dur}" return _wrap(f"┊ ◀️ back {dur}")
if tool_name == "browser_press": if tool_name == "browser_press":
return f"┊ ⌨️ press {args.get('key', '?')} {dur}" return _wrap(f"┊ ⌨️ press {args.get('key', '?')} {dur}")
if tool_name == "browser_close": if tool_name == "browser_close":
return f"┊ 🚪 close browser {dur}" return _wrap(f"┊ 🚪 close browser {dur}")
if tool_name == "browser_get_images": if tool_name == "browser_get_images":
return f"┊ 🖼️ images extracting {dur}" return _wrap(f"┊ 🖼️ images extracting {dur}")
if tool_name == "browser_vision": if tool_name == "browser_vision":
return f"┊ 👁️ vision analyzing page {dur}" return _wrap(f"┊ 👁️ vision analyzing page {dur}")
if tool_name == "todo": if tool_name == "todo":
todos_arg = args.get("todos") todos_arg = args.get("todos")
merge = args.get("merge", False) merge = args.get("merge", False)
if todos_arg is None: if todos_arg is None:
return f"┊ 📋 plan reading tasks {dur}" return _wrap(f"┊ 📋 plan reading tasks {dur}")
elif merge: elif merge:
return f"┊ 📋 plan update {len(todos_arg)} task(s) {dur}" return _wrap(f"┊ 📋 plan update {len(todos_arg)} task(s) {dur}")
else: else:
return f"┊ 📋 plan {len(todos_arg)} task(s) {dur}" return _wrap(f"┊ 📋 plan {len(todos_arg)} task(s) {dur}")
if tool_name == "session_search": if tool_name == "session_search":
return f"┊ 🔍 recall \"{_trunc(args.get('query', ''), 35)}\" {dur}" return _wrap(f"┊ 🔍 recall \"{_trunc(args.get('query', ''), 35)}\" {dur}")
if tool_name == "memory": if tool_name == "memory":
action = args.get("action", "?") action = args.get("action", "?")
target = args.get("target", "") target = args.get("target", "")
if action == "add": if action == "add":
return f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}" return _wrap(f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}")
elif action == "replace": elif action == "replace":
return f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}" return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
elif action == "remove": elif action == "remove":
return f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}" return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
return f"┊ 🧠 memory {action} {dur}" return _wrap(f"┊ 🧠 memory {action} {dur}")
if tool_name == "skills_list": if tool_name == "skills_list":
return f"┊ 📚 skills list {args.get('category', 'all')} {dur}" return _wrap(f"┊ 📚 skills list {args.get('category', 'all')} {dur}")
if tool_name == "skill_view": if tool_name == "skill_view":
return f"┊ 📚 skill {_trunc(args.get('name', ''), 30)} {dur}" return _wrap(f"┊ 📚 skill {_trunc(args.get('name', ''), 30)} {dur}")
if tool_name == "image_generate": if tool_name == "image_generate":
return f"┊ 🎨 create {_trunc(args.get('prompt', ''), 35)} {dur}" return _wrap(f"┊ 🎨 create {_trunc(args.get('prompt', ''), 35)} {dur}")
if tool_name == "text_to_speech": if tool_name == "text_to_speech":
return f"┊ 🔊 speak {_trunc(args.get('text', ''), 30)} {dur}" return _wrap(f"┊ 🔊 speak {_trunc(args.get('text', ''), 30)} {dur}")
if tool_name == "vision_analyze": if tool_name == "vision_analyze":
return f"┊ 👁️ vision {_trunc(args.get('question', ''), 30)} {dur}" return _wrap(f"┊ 👁️ vision {_trunc(args.get('question', ''), 30)} {dur}")
if tool_name == "mixture_of_agents": if tool_name == "mixture_of_agents":
return f"┊ 🧠 reason {_trunc(args.get('user_prompt', ''), 30)} {dur}" return _wrap(f"┊ 🧠 reason {_trunc(args.get('user_prompt', ''), 30)} {dur}")
if tool_name == "send_message": if tool_name == "send_message":
return f"┊ 📨 send {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\" {dur}" return _wrap(f"┊ 📨 send {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\" {dur}")
if tool_name == "schedule_cronjob": if tool_name == "schedule_cronjob":
return f"┊ ⏰ schedule {_trunc(args.get('name', args.get('prompt', 'task')), 30)} {dur}" return _wrap(f"┊ ⏰ schedule {_trunc(args.get('name', args.get('prompt', 'task')), 30)} {dur}")
if tool_name == "list_cronjobs": if tool_name == "list_cronjobs":
return f"┊ ⏰ jobs listing {dur}" return _wrap(f"┊ ⏰ jobs listing {dur}")
if tool_name == "remove_cronjob": if tool_name == "remove_cronjob":
return f"┊ ⏰ remove job {args.get('job_id', '?')} {dur}" return _wrap(f"┊ ⏰ remove job {args.get('job_id', '?')} {dur}")
if tool_name.startswith("rl_"): if tool_name.startswith("rl_"):
rl = { rl = {
"rl_list_environments": "list envs", "rl_select_environment": f"select {args.get('name', '')}", "rl_list_environments": "list envs", "rl_select_environment": f"select {args.get('name', '')}",
@ -364,16 +409,16 @@ def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
"rl_stop_training": f"stop {args.get('run_id', '?')[:12]}", "rl_get_results": f"results {args.get('run_id', '?')[:12]}", "rl_stop_training": f"stop {args.get('run_id', '?')[:12]}", "rl_get_results": f"results {args.get('run_id', '?')[:12]}",
"rl_list_runs": "list runs", "rl_test_inference": "test inference", "rl_list_runs": "list runs", "rl_test_inference": "test inference",
} }
return f"┊ 🧪 rl {rl.get(tool_name, tool_name.replace('rl_', ''))} {dur}" return _wrap(f"┊ 🧪 rl {rl.get(tool_name, tool_name.replace('rl_', ''))} {dur}")
if tool_name == "execute_code": if tool_name == "execute_code":
code = args.get("code", "") code = args.get("code", "")
first_line = code.strip().split("\n")[0] if code.strip() else "" first_line = code.strip().split("\n")[0] if code.strip() else ""
return f"┊ 🐍 exec {_trunc(first_line, 35)} {dur}" return _wrap(f"┊ 🐍 exec {_trunc(first_line, 35)} {dur}")
if tool_name == "delegate_task": if tool_name == "delegate_task":
tasks = args.get("tasks") tasks = args.get("tasks")
if tasks and isinstance(tasks, list): if tasks and isinstance(tasks, list):
return f"┊ 🔀 delegate {len(tasks)} parallel tasks {dur}" return _wrap(f"┊ 🔀 delegate {len(tasks)} parallel tasks {dur}")
return f"┊ 🔀 delegate {_trunc(args.get('goal', ''), 35)} {dur}" return _wrap(f"┊ 🔀 delegate {_trunc(args.get('goal', ''), 35)} {dur}")
preview = build_tool_preview(tool_name, args) or "" preview = build_tool_preview(tool_name, args) or ""
return f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)} {dur}" return _wrap(f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)} {dur}")

3
cli.py
View file

@ -339,9 +339,6 @@ def _cprint(text: str):
""" """
_pt_print(_PT_ANSI(text)) _pt_print(_PT_ANSI(text))
# Version string
VERSION = "v1.0.0"
# ASCII Art - HERMES-AGENT logo (full width, single line - requires ~95 char terminal) # ASCII Art - HERMES-AGENT logo (full width, single line - requires ~95 char terminal)
HERMES_AGENT_LOGO = """[bold #FFD700]██╗ ██╗███████╗██████╗ ███╗ ███╗███████╗███████╗ █████╗ ██████╗ ███████╗███╗ ██╗████████╗[/] HERMES_AGENT_LOGO = """[bold #FFD700]██╗ ██╗███████╗██████╗ ███╗ ███╗███████╗███████╗ █████╗ ██████╗ ███████╗███╗ ██╗████████╗[/]
[bold #FFD700]██║ ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝[/] [bold #FFD700]██║ ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝[/]

View file

@ -8,6 +8,7 @@ Handles loading and validating configuration for:
- Delivery preferences - Delivery preferences
""" """
import logging
import os import os
import json import json
from pathlib import Path from pathlib import Path
@ -15,6 +16,8 @@ from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any from typing import Dict, List, Optional, Any
from enum import Enum from enum import Enum
logger = logging.getLogger(__name__)
class Platform(Enum): class Platform(Enum):
"""Supported messaging platforms.""" """Supported messaging platforms."""
@ -264,6 +267,40 @@ def load_gateway_config() -> GatewayConfig:
# Override with environment variables # Override with environment variables
_apply_env_overrides(config) _apply_env_overrides(config)
# --- Validate loaded values ---
policy = config.default_reset_policy
if not (0 <= policy.at_hour <= 23):
logger.warning(
"Invalid at_hour=%s (must be 0-23). Using default 4.", policy.at_hour
)
policy.at_hour = 4
if policy.idle_minutes is None or policy.idle_minutes <= 0:
logger.warning(
"Invalid idle_minutes=%s (must be positive). Using default 1440.",
policy.idle_minutes,
)
policy.idle_minutes = 1440
# Warn about empty bot tokens — platforms that loaded an empty string
# won't connect and the cause can be confusing without a log line.
_token_env_names = {
Platform.TELEGRAM: "TELEGRAM_BOT_TOKEN",
Platform.DISCORD: "DISCORD_BOT_TOKEN",
Platform.SLACK: "SLACK_BOT_TOKEN",
}
for platform, pconfig in config.platforms.items():
if not pconfig.enabled:
continue
env_name = _token_env_names.get(platform)
if env_name and pconfig.token is not None and not pconfig.token.strip():
logger.warning(
"%s is enabled but %s is empty. "
"The adapter will likely fail to connect.",
platform.value, env_name,
)
return config return config

View file

@ -8,12 +8,18 @@ Routes messages to the appropriate destination based on:
- Local (always saved to files) - Local (always saved to files)
""" """
import logging
from pathlib import Path from pathlib import Path
from datetime import datetime from datetime import datetime
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Optional, Any, Union from typing import Dict, List, Optional, Any, Union
from enum import Enum from enum import Enum
logger = logging.getLogger(__name__)
MAX_PLATFORM_OUTPUT = 4000
TRUNCATED_VISIBLE = 3800
from .config import Platform, GatewayConfig from .config import Platform, GatewayConfig
from .session import SessionSource from .session import SessionSource
@ -245,6 +251,15 @@ class DeliveryRouter:
"timestamp": timestamp "timestamp": timestamp
} }
def _save_full_output(self, content: str, job_id: str) -> Path:
"""Save full cron output to disk and return the file path."""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
out_dir = Path.home() / ".hermes" / "cron" / "output"
out_dir.mkdir(parents=True, exist_ok=True)
path = out_dir / f"{job_id}_{timestamp}.txt"
path.write_text(content)
return path
async def _deliver_to_platform( async def _deliver_to_platform(
self, self,
target: DeliveryTarget, target: DeliveryTarget,
@ -260,8 +275,16 @@ class DeliveryRouter:
if not target.chat_id: if not target.chat_id:
raise ValueError(f"No chat ID for {target.platform.value} delivery") raise ValueError(f"No chat ID for {target.platform.value} delivery")
# Call the adapter's send method # Guard: truncate oversized cron output to stay within platform limits
# Adapters should implement: async def send(chat_id: str, content: str) -> Dict if len(content) > MAX_PLATFORM_OUTPUT:
job_id = (metadata or {}).get("job_id", "unknown")
saved_path = self._save_full_output(content, job_id)
logger.info("Cron output truncated (%d chars) — full output: %s", len(content), saved_path)
content = (
content[:TRUNCATED_VISIBLE]
+ f"\n\n... [truncated, full output saved to {saved_path}]"
)
return await adapter.send(target.chat_id, content, metadata=metadata) return await adapter.send(target.chat_id, content, metadata=metadata)

View file

@ -659,7 +659,12 @@ class BasePlatformAdapter(ABC):
def truncate_message(self, content: str, max_length: int = 4096) -> List[str]: def truncate_message(self, content: str, max_length: int = 4096) -> List[str]:
""" """
Split a long message into chunks. Split a long message into chunks, preserving code block boundaries.
When a split falls inside a triple-backtick code block, the fence is
closed at the end of the current chunk and reopened (with the original
language tag) at the start of the next chunk. Multi-chunk responses
receive indicators like ``(1/3)``.
Args: Args:
content: The full message content content: The full message content
@ -671,22 +676,73 @@ class BasePlatformAdapter(ABC):
if len(content) <= max_length: if len(content) <= max_length:
return [content] return [content]
chunks = [] INDICATOR_RESERVE = 10 # room for " (XX/XX)"
while content: FENCE_CLOSE = "\n```"
if len(content) <= max_length:
chunks.append(content) chunks: List[str] = []
remaining = content
# When the previous chunk ended mid-code-block, this holds the
# language tag (possibly "") so we can reopen the fence.
carry_lang: Optional[str] = None
while remaining:
# If we're continuing a code block from the previous chunk,
# prepend a new opening fence with the same language tag.
prefix = f"```{carry_lang}\n" if carry_lang is not None else ""
# How much body text we can fit after accounting for the prefix,
# a potential closing fence, and the chunk indicator.
headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE)
if headroom < 1:
headroom = max_length // 2
# Everything remaining fits in one final chunk
if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE:
chunks.append(prefix + remaining)
break break
# Try to split at a newline # Find a natural split point (prefer newlines, then spaces)
split_idx = content.rfind("\n", 0, max_length) region = remaining[:headroom]
if split_idx == -1: split_at = region.rfind("\n")
# No newline, split at space if split_at < headroom // 2:
split_idx = content.rfind(" ", 0, max_length) split_at = region.rfind(" ")
if split_idx == -1: if split_at < 1:
# No space either, hard split split_at = headroom
split_idx = max_length
chunks.append(content[:split_idx]) chunk_body = remaining[:split_at]
content = content[split_idx:].lstrip() remaining = remaining[split_at:].lstrip()
full_chunk = prefix + chunk_body
# Walk the chunk line-by-line to determine whether we end
# inside an open code block.
in_code = carry_lang is not None
lang = carry_lang or ""
for line in full_chunk.split("\n"):
stripped = line.strip()
if stripped.startswith("```"):
if in_code:
in_code = False
lang = ""
else:
in_code = True
tag = stripped[3:].strip()
lang = tag.split()[0] if tag else ""
if in_code:
# Close the orphaned fence so the chunk is valid on its own
full_chunk += FENCE_CLOSE
carry_lang = lang
else:
carry_lang = None
chunks.append(full_chunk)
# Append chunk indicators when the response spans multiple messages
if len(chunks) > 1:
total = len(chunks)
chunks = [
f"{chunk} ({i + 1}/{total})" for i, chunk in enumerate(chunks)
]
return chunks return chunks

View file

@ -8,6 +8,7 @@ Uses python-telegram-bot library for:
""" """
import asyncio import asyncio
import re
from typing import Dict, List, Optional, Any from typing import Dict, List, Optional, Any
try: try:
@ -49,6 +50,16 @@ def check_telegram_requirements() -> bool:
return TELEGRAM_AVAILABLE return TELEGRAM_AVAILABLE
# Matches every character that MarkdownV2 requires to be backslash-escaped
# when it appears outside a code span or fenced code block.
_MDV2_ESCAPE_RE = re.compile(r'([_*\[\]()~`>#\+\-=|{}.!\\])')
def _escape_mdv2(text: str) -> str:
"""Escape Telegram MarkdownV2 special characters with a preceding backslash."""
return _MDV2_ESCAPE_RE.sub(r'\\\1', text)
class TelegramAdapter(BasePlatformAdapter): class TelegramAdapter(BasePlatformAdapter):
""" """
Telegram bot adapter. Telegram bot adapter.
@ -167,7 +178,7 @@ class TelegramAdapter(BasePlatformAdapter):
msg = await self._bot.send_message( msg = await self._bot.send_message(
chat_id=int(chat_id), chat_id=int(chat_id),
text=chunk, text=chunk,
parse_mode=ParseMode.MARKDOWN, parse_mode=ParseMode.MARKDOWN_V2,
reply_to_message_id=int(reply_to) if reply_to and i == 0 else None, reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
message_thread_id=int(thread_id) if thread_id else None, message_thread_id=int(thread_id) if thread_id else None,
) )
@ -297,14 +308,81 @@ class TelegramAdapter(BasePlatformAdapter):
def format_message(self, content: str) -> str: def format_message(self, content: str) -> str:
""" """
Format message for Telegram. Convert standard markdown to Telegram MarkdownV2 format.
Telegram uses a subset of markdown. We'll use the simpler Protected regions (code blocks, inline code) are extracted first so
Markdown mode (not MarkdownV2) for compatibility. their contents are never modified. Standard markdown constructs
(headers, bold, italic, links) are translated to MarkdownV2 syntax,
and all remaining special characters are escaped.
""" """
# Basic escaping for Telegram Markdown if not content:
# In Markdown mode (not V2), only certain characters need escaping return content
return content
placeholders: dict = {}
counter = [0]
def _ph(value: str) -> str:
"""Stash *value* behind a placeholder token that survives escaping."""
key = f"\x00PH{counter[0]}\x00"
counter[0] += 1
placeholders[key] = value
return key
text = content
# 1) Protect fenced code blocks (``` ... ```)
text = re.sub(
r'(```(?:[^\n]*\n)?[\s\S]*?```)',
lambda m: _ph(m.group(0)),
text,
)
# 2) Protect inline code (`...`)
text = re.sub(r'(`[^`]+`)', lambda m: _ph(m.group(0)), text)
# 3) Convert markdown links escape the display text; inside the URL
# only ')' and '\' need escaping per the MarkdownV2 spec.
def _convert_link(m):
display = _escape_mdv2(m.group(1))
url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
return _ph(f'[{display}]({url})')
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
# 4) Convert markdown headers (## Title) → bold *Title*
def _convert_header(m):
inner = m.group(1).strip()
# Strip redundant bold markers that may appear inside a header
inner = re.sub(r'\*\*(.+?)\*\*', r'\1', inner)
return _ph(f'*{_escape_mdv2(inner)}*')
text = re.sub(
r'^#{1,6}\s+(.+)$', _convert_header, text, flags=re.MULTILINE
)
# 5) Convert bold: **text** → *text* (MarkdownV2 bold)
text = re.sub(
r'\*\*(.+?)\*\*',
lambda m: _ph(f'*{_escape_mdv2(m.group(1))}*'),
text,
)
# 6) Convert italic: *text* (single asterisk) → _text_ (MarkdownV2 italic)
text = re.sub(
r'\*([^*]+)\*',
lambda m: _ph(f'_{_escape_mdv2(m.group(1))}_'),
text,
)
# 7) Escape remaining special characters in plain text
text = _escape_mdv2(text)
# 8) Restore placeholders in reverse insertion order so that
# nested references (a placeholder inside another) resolve correctly.
for key in reversed(list(placeholders.keys())):
text = text.replace(key, placeholders[key])
return text
async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Handle incoming text messages.""" """Handle incoming text messages."""

View file

@ -20,6 +20,7 @@ import re
import sys import sys
import signal import signal
import threading import threading
from logging.handlers import RotatingFileHandler
from pathlib import Path from pathlib import Path
from datetime import datetime from datetime import datetime
from typing import Dict, Optional, Any, List from typing import Dict, Optional, Any, List
@ -402,9 +403,27 @@ class GatewayRunner:
# Build the context prompt to inject # Build the context prompt to inject
context_prompt = build_session_context_prompt(context) context_prompt = build_session_context_prompt(context)
# If the previous session expired and was auto-reset, prepend a notice
# so the agent knows this is a fresh conversation (not an intentional /reset).
if getattr(session_entry, 'was_auto_reset', False):
context_prompt = (
"[System note: The user's previous session expired due to inactivity. "
"This is a fresh conversation with no prior context.]\n\n"
+ context_prompt
)
session_entry.was_auto_reset = False
# Load conversation history from transcript # Load conversation history from transcript
history = self.session_store.load_transcript(session_entry.session_id) history = self.session_store.load_transcript(session_entry.session_id)
# First-message onboarding for brand-new messaging platform users
if not history:
context_prompt += (
"\n\n[System note: This is the user's very first message in this session. "
"Briefly introduce yourself and mention that /help shows available commands. "
"Keep the introduction concise -- one or two sentences max.]"
)
# ----------------------------------------------------------------- # -----------------------------------------------------------------
# Auto-analyze images sent by the user # Auto-analyze images sent by the user
# #
@ -1342,15 +1361,32 @@ def _start_cron_ticker(stop_event: threading.Event, interval: int = 60):
Runs inside the gateway process so cronjobs fire automatically without Runs inside the gateway process so cronjobs fire automatically without
needing a separate `hermes cron daemon` or system cron entry. needing a separate `hermes cron daemon` or system cron entry.
Every 60th tick (~once per hour) the image/audio cache is pruned so
stale temp files don't accumulate.
""" """
from cron.scheduler import tick as cron_tick from cron.scheduler import tick as cron_tick
from gateway.platforms.base import cleanup_image_cache
IMAGE_CACHE_EVERY = 60 # ticks — once per hour at default 60s interval
logger.info("Cron ticker started (interval=%ds)", interval) logger.info("Cron ticker started (interval=%ds)", interval)
tick_count = 0
while not stop_event.is_set(): while not stop_event.is_set():
try: try:
cron_tick(verbose=False) cron_tick(verbose=False)
except Exception as e: except Exception as e:
logger.debug("Cron tick error: %s", e) logger.debug("Cron tick error: %s", e)
tick_count += 1
if tick_count % IMAGE_CACHE_EVERY == 0:
try:
removed = cleanup_image_cache(max_age_hours=24)
if removed:
logger.info("Image cache cleanup: removed %d stale file(s)", removed)
except Exception as e:
logger.debug("Image cache cleanup error: %s", e)
stop_event.wait(timeout=interval) stop_event.wait(timeout=interval)
logger.info("Cron ticker stopped") logger.info("Cron ticker stopped")
@ -1363,6 +1399,18 @@ async def start_gateway(config: Optional[GatewayConfig] = None) -> bool:
Returns True if the gateway ran successfully, False if it failed to start. Returns True if the gateway ran successfully, False if it failed to start.
A False return causes a non-zero exit code so systemd can auto-restart. A False return causes a non-zero exit code so systemd can auto-restart.
""" """
# Configure rotating file log so gateway output is persisted for debugging
log_dir = Path.home() / '.hermes' / 'logs'
log_dir.mkdir(parents=True, exist_ok=True)
file_handler = RotatingFileHandler(
log_dir / 'gateway.log',
maxBytes=5 * 1024 * 1024,
backupCount=3,
)
file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
logging.getLogger().addHandler(file_handler)
logging.getLogger().setLevel(logging.INFO)
runner = GatewayRunner(config) runner = GatewayRunner(config)
# Set up signal handlers # Set up signal handlers

View file

@ -219,6 +219,10 @@ class SessionEntry:
output_tokens: int = 0 output_tokens: int = 0
total_tokens: int = 0 total_tokens: int = 0
# Set when a session was created because the previous one expired;
# consumed once by the message handler to inject a notice into context
was_auto_reset: bool = False
def to_dict(self) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]:
result = { result = {
"session_key": self.session_key, "session_key": self.session_key,
@ -388,11 +392,14 @@ class SessionStore:
return entry return entry
else: else:
# Session is being reset -- end the old one in SQLite # Session is being reset -- end the old one in SQLite
was_auto_reset = True
if self._db: if self._db:
try: try:
self._db.end_session(entry.session_id, "session_reset") self._db.end_session(entry.session_id, "session_reset")
except Exception as e: except Exception as e:
logger.debug("Session DB operation failed: %s", e) logger.debug("Session DB operation failed: %s", e)
else:
was_auto_reset = False
# Create new session # Create new session
session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}" session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
@ -406,6 +413,7 @@ class SessionStore:
display_name=source.chat_name, display_name=source.chat_name,
platform=source.platform, platform=source.platform,
chat_type=source.chat_type, chat_type=source.chat_type,
was_auto_reset=was_auto_reset,
) )
self._entries[session_key] = entry self._entries[session_key] = entry

View file

@ -11,4 +11,4 @@ Provides subcommands for:
- hermes cron - Manage cron jobs - hermes cron - Manage cron jobs
""" """
__version__ = "0.1.0" __version__ = "v1.0.0"

View file

@ -33,7 +33,7 @@ def cprint(text: str):
# ASCII Art & Branding # ASCII Art & Branding
# ========================================================================= # =========================================================================
VERSION = "v1.0.0" from hermes_cli import __version__ as VERSION
HERMES_AGENT_LOGO = """[bold #FFD700]██╗ ██╗███████╗██████╗ ███╗ ███╗███████╗███████╗ █████╗ ██████╗ ███████╗███╗ ██╗████████╗[/] HERMES_AGENT_LOGO = """[bold #FFD700]██╗ ██╗███████╗██████╗ ███╗ ███╗███████╗███████╗ █████╗ ██████╗ ███████╗███╗ ██╗████████╗[/]
[bold #FFD700]██║ ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝[/] [bold #FFD700]██║ ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝[/]

View file

@ -44,6 +44,8 @@ def run_doctor(args):
should_fix = getattr(args, 'fix', False) should_fix = getattr(args, 'fix', False)
issues = [] issues = []
manual_issues = [] # issues that can't be auto-fixed
fixed_count = 0
print() print()
print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN)) print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
@ -135,8 +137,15 @@ def run_doctor(args):
check_ok(".env file exists (in project directory)") check_ok(".env file exists (in project directory)")
else: else:
check_fail("~/.hermes/.env file missing") check_fail("~/.hermes/.env file missing")
check_info("Run 'hermes setup' to create one") if should_fix:
issues.append("Run 'hermes setup' to create .env") env_path.parent.mkdir(parents=True, exist_ok=True)
env_path.touch()
check_ok("Created empty ~/.hermes/.env")
check_info("Run 'hermes setup' to configure API keys")
fixed_count += 1
else:
check_info("Run 'hermes setup' to create one")
issues.append("Run 'hermes setup' to create .env")
# Check ~/.hermes/config.yaml (primary) or project cli-config.yaml (fallback) # Check ~/.hermes/config.yaml (primary) or project cli-config.yaml (fallback)
config_path = HERMES_HOME / 'config.yaml' config_path = HERMES_HOME / 'config.yaml'
@ -147,7 +156,17 @@ def run_doctor(args):
if fallback_config.exists(): if fallback_config.exists():
check_ok("cli-config.yaml exists (in project directory)") check_ok("cli-config.yaml exists (in project directory)")
else: else:
check_warn("config.yaml not found", "(using defaults)") example_config = PROJECT_ROOT / 'cli-config.yaml.example'
if should_fix and example_config.exists():
config_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(str(example_config), str(config_path))
check_ok("Created ~/.hermes/config.yaml from cli-config.yaml.example")
fixed_count += 1
elif should_fix:
check_warn("config.yaml not found and no example to copy from")
manual_issues.append("Create ~/.hermes/config.yaml manually")
else:
check_warn("config.yaml not found", "(using defaults)")
# ========================================================================= # =========================================================================
# Check: Directory structure # Check: Directory structure
@ -159,7 +178,26 @@ def run_doctor(args):
if hermes_home.exists(): if hermes_home.exists():
check_ok("~/.hermes directory exists") check_ok("~/.hermes directory exists")
else: else:
check_warn("~/.hermes not found", "(will be created on first use)") if should_fix:
hermes_home.mkdir(parents=True, exist_ok=True)
check_ok("Created ~/.hermes directory")
fixed_count += 1
else:
check_warn("~/.hermes not found", "(will be created on first use)")
# Check expected subdirectories
expected_subdirs = ["cron", "sessions", "logs", "skills", "memories"]
for subdir_name in expected_subdirs:
subdir_path = hermes_home / subdir_name
if subdir_path.exists():
check_ok(f"~/.hermes/{subdir_name}/ exists")
else:
if should_fix:
subdir_path.mkdir(parents=True, exist_ok=True)
check_ok(f"Created ~/.hermes/{subdir_name}/")
fixed_count += 1
else:
check_warn(f"~/.hermes/{subdir_name}/ not found", "(will be created on first use)")
# Check for SOUL.md persona file # Check for SOUL.md persona file
soul_path = hermes_home / "SOUL.md" soul_path = hermes_home / "SOUL.md"
@ -175,14 +213,25 @@ def run_doctor(args):
check_warn("~/.hermes/SOUL.md not found", "(create it to give Hermes a custom personality)") check_warn("~/.hermes/SOUL.md not found", "(create it to give Hermes a custom personality)")
if should_fix: if should_fix:
soul_path.parent.mkdir(parents=True, exist_ok=True) soul_path.parent.mkdir(parents=True, exist_ok=True)
soul_path.write_text("# Hermes Agent Persona\n\n<!-- Edit this file to customize how Hermes communicates. -->\n", encoding="utf-8") soul_path.write_text(
check_ok("Created ~/.hermes/SOUL.md") "# Hermes Agent Persona\n\n"
"<!-- Edit this file to customize how Hermes communicates. -->\n\n"
"You are Hermes, a helpful AI assistant.\n",
encoding="utf-8",
)
check_ok("Created ~/.hermes/SOUL.md with basic template")
fixed_count += 1
logs_dir = PROJECT_ROOT / "logs" logs_dir = PROJECT_ROOT / "logs"
if logs_dir.exists(): if logs_dir.exists():
check_ok("logs/ directory exists") check_ok("logs/ directory exists (project root)")
else: else:
check_warn("logs/ not found", "(will be created on first use)") if should_fix:
logs_dir.mkdir(parents=True, exist_ok=True)
check_ok("Created logs/ directory")
fixed_count += 1
else:
check_warn("logs/ not found", "(will be created on first use)")
# Check memory directory # Check memory directory
memories_dir = hermes_home / "memories" memories_dir = hermes_home / "memories"
@ -205,6 +254,7 @@ def run_doctor(args):
if should_fix: if should_fix:
memories_dir.mkdir(parents=True, exist_ok=True) memories_dir.mkdir(parents=True, exist_ok=True)
check_ok("Created ~/.hermes/memories/") check_ok("Created ~/.hermes/memories/")
fixed_count += 1
# Check SQLite session store # Check SQLite session store
state_db_path = hermes_home / "state.db" state_db_path = hermes_home / "state.db"
@ -299,6 +349,7 @@ def run_doctor(args):
openrouter_key = os.getenv("OPENROUTER_API_KEY") openrouter_key = os.getenv("OPENROUTER_API_KEY")
if openrouter_key: if openrouter_key:
print(" Checking OpenRouter API...", end="", flush=True)
try: try:
import httpx import httpx
response = httpx.get( response = httpx.get(
@ -307,20 +358,21 @@ def run_doctor(args):
timeout=10 timeout=10
) )
if response.status_code == 200: if response.status_code == 200:
check_ok("OpenRouter API") print(f"\r {color('', Colors.GREEN)} OpenRouter API ")
elif response.status_code == 401: elif response.status_code == 401:
check_fail("OpenRouter API", "(invalid API key)") print(f"\r {color('', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)} ")
issues.append("Check OPENROUTER_API_KEY in .env") issues.append("Check OPENROUTER_API_KEY in .env")
else: else:
check_fail("OpenRouter API", f"(HTTP {response.status_code})") print(f"\r {color('', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)} ")
except Exception as e: except Exception as e:
check_fail("OpenRouter API", f"({e})") print(f"\r {color('', Colors.RED)} OpenRouter API {color(f'({e})', Colors.DIM)} ")
issues.append("Check network connectivity") issues.append("Check network connectivity")
else: else:
check_warn("OpenRouter API", "(not configured)") check_warn("OpenRouter API", "(not configured)")
anthropic_key = os.getenv("ANTHROPIC_API_KEY") anthropic_key = os.getenv("ANTHROPIC_API_KEY")
if anthropic_key: if anthropic_key:
print(" Checking Anthropic API...", end="", flush=True)
try: try:
import httpx import httpx
response = httpx.get( response = httpx.get(
@ -332,14 +384,14 @@ def run_doctor(args):
timeout=10 timeout=10
) )
if response.status_code == 200: if response.status_code == 200:
check_ok("Anthropic API") print(f"\r {color('', Colors.GREEN)} Anthropic API ")
elif response.status_code == 401: elif response.status_code == 401:
check_fail("Anthropic API", "(invalid API key)") print(f"\r {color('', Colors.RED)} Anthropic API {color('(invalid API key)', Colors.DIM)} ")
else: else:
# Note: Anthropic may not have /models endpoint msg = "(couldn't verify)"
check_warn("Anthropic API", "(couldn't verify)") print(f"\r {color('', Colors.YELLOW)} Anthropic API {color(msg, Colors.DIM)} ")
except Exception as e: except Exception as e:
check_warn("Anthropic API", f"({e})") print(f"\r {color('', Colors.YELLOW)} Anthropic API {color(f'({e})', Colors.DIM)} ")
# ========================================================================= # =========================================================================
# Check: Submodules # Check: Submodules
@ -440,17 +492,28 @@ def run_doctor(args):
# Summary # Summary
# ========================================================================= # =========================================================================
print() print()
if issues: remaining_issues = issues + manual_issues
print(color("" * 60, Colors.YELLOW)) if should_fix and fixed_count > 0:
print(color(f" Found {len(issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD)) print(color("" * 60, Colors.GREEN))
print(color(f" Fixed {fixed_count} issue(s).", Colors.GREEN, Colors.BOLD), end="")
if remaining_issues:
print(color(f" {len(remaining_issues)} issue(s) require manual intervention.", Colors.YELLOW, Colors.BOLD))
else:
print()
print() print()
for i, issue in enumerate(issues, 1): if remaining_issues:
for i, issue in enumerate(remaining_issues, 1):
print(f" {i}. {issue}")
print()
elif remaining_issues:
print(color("" * 60, Colors.YELLOW))
print(color(f" Found {len(remaining_issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD))
print()
for i, issue in enumerate(remaining_issues, 1):
print(f" {i}. {issue}") print(f" {i}. {issue}")
print() print()
if not should_fix:
if should_fix: print(color(" Tip: run 'hermes doctor --fix' to auto-fix what's possible.", Colors.DIM))
print(color(" Attempting auto-fix is not yet implemented.", Colors.DIM))
print(color(" Please resolve issues manually.", Colors.DIM))
else: else:
print(color("" * 60, Colors.GREEN)) print(color("" * 60, Colors.GREEN))
print(color(" All checks passed! 🎉", Colors.GREEN, Colors.BOLD)) print(color(" All checks passed! 🎉", Colors.GREEN, Colors.BOLD))

View file

@ -47,8 +47,66 @@ from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _has_any_provider_configured() -> bool:
"""Check if at least one inference provider is usable."""
from hermes_cli.config import get_env_path, get_hermes_home
# Check env vars (may be set by .env or shell)
if os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY"):
return True
# Check .env file for keys
env_file = get_env_path()
if env_file.exists():
try:
for line in env_file.read_text().splitlines():
line = line.strip()
if line.startswith("#") or "=" not in line:
continue
key, _, val = line.partition("=")
val = val.strip().strip("'\"")
if key.strip() in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY") and val:
return True
except Exception:
pass
# Check for Nous Portal OAuth credentials
auth_file = get_hermes_home() / "auth.json"
if auth_file.exists():
try:
import json
auth = json.loads(auth_file.read_text())
active = auth.get("active_provider")
if active:
state = auth.get("providers", {}).get(active, {})
if state.get("access_token") or state.get("refresh_token"):
return True
except Exception:
pass
return False
def cmd_chat(args): def cmd_chat(args):
"""Run interactive chat CLI.""" """Run interactive chat CLI."""
# First-run guard: check if any provider is configured before launching
if not _has_any_provider_configured():
print()
print("It looks like Hermes isn't configured yet -- no API keys or providers found.")
print()
print(" Run: hermes setup")
print()
try:
reply = input("Run setup now? [Y/n] ").strip().lower()
except (EOFError, KeyboardInterrupt):
reply = "n"
if reply in ("", "y", "yes"):
cmd_setup(args)
return
print()
print("You can run 'hermes setup' at any time to configure.")
sys.exit(1)
# Import and run the CLI # Import and run the CLI
from cli import main as cli_main from cli import main as cli_main
@ -219,20 +277,10 @@ def _model_flow_openrouter(config, current_model=""):
print("API key saved.") print("API key saved.")
print() print()
OPENROUTER_MODELS = [ from hermes_cli.models import model_ids
"anthropic/claude-opus-4.6", openrouter_models = model_ids()
"anthropic/claude-sonnet-4.5",
"anthropic/claude-opus-4.5",
"openai/gpt-5.2",
"openai/gpt-5.2-codex",
"google/gemini-3-pro-preview",
"google/gemini-3-flash-preview",
"z-ai/glm-4.7",
"moonshotai/kimi-k2.5",
"minimax/minimax-m2.1",
]
selected = _prompt_model_selection(OPENROUTER_MODELS, current_model=current_model) selected = _prompt_model_selection(openrouter_models, current_model=current_model)
if selected: if selected:
# Clear any custom endpoint and set provider to openrouter # Clear any custom endpoint and set provider to openrouter
if get_env_value("OPENAI_BASE_URL"): if get_env_value("OPENAI_BASE_URL"):

33
hermes_cli/models.py Normal file
View file

@ -0,0 +1,33 @@
"""
Canonical list of OpenRouter models offered in CLI and setup wizards.
Add, remove, or reorder entries here both `hermes setup` and
`hermes` provider-selection will pick up the change automatically.
"""
# (model_id, display description shown in menus)
OPENROUTER_MODELS: list[tuple[str, str]] = [
("anthropic/claude-opus-4.6", "recommended"),
("anthropic/claude-sonnet-4.5", ""),
("anthropic/claude-opus-4.5", ""),
("openai/gpt-5.2", ""),
("openai/gpt-5.2-codex", ""),
("google/gemini-3-pro-preview", ""),
("google/gemini-3-flash-preview", ""),
("z-ai/glm-4.7", ""),
("moonshotai/kimi-k2.5", ""),
("minimax/minimax-m2.1", ""),
]
def model_ids() -> list[str]:
"""Return just the model-id strings (convenience helper)."""
return [mid for mid, _ in OPENROUTER_MODELS]
def menu_labels() -> list[str]:
"""Return display labels like 'anthropic/claude-opus-4.6 (recommended)'."""
labels = []
for mid, desc in OPENROUTER_MODELS:
labels.append(f"{mid} ({desc})" if desc else mid)
return labels

View file

@ -611,46 +611,27 @@ def run_setup_wizard(args):
save_env_value("LLM_MODEL", custom) save_env_value("LLM_MODEL", custom)
# else: keep current # else: keep current
else: else:
# Static list for OpenRouter / fallback # Static list for OpenRouter / fallback (from canonical list)
model_choices = [ from hermes_cli.models import model_ids, menu_labels
"anthropic/claude-opus-4.6 (recommended)",
"anthropic/claude-sonnet-4.5", ids = model_ids()
"anthropic/claude-opus-4.5", model_choices = menu_labels() + [
"openai/gpt-5.2",
"openai/gpt-5.2-codex",
"google/gemini-3-pro-preview",
"google/gemini-3-flash-preview",
"z-ai/glm-4.7",
"moonshotai/kimi-k2.5",
"minimax/minimax-m2.1",
"Custom model", "Custom model",
f"Keep current ({current_model})" f"Keep current ({current_model})",
] ]
model_idx = prompt_choice("Select default model:", model_choices, 11) keep_idx = len(model_choices) - 1
model_idx = prompt_choice("Select default model:", model_choices, keep_idx)
model_map = { if model_idx < len(ids):
0: "anthropic/claude-opus-4.6", config['model'] = ids[model_idx]
1: "anthropic/claude-sonnet-4.5", save_env_value("LLM_MODEL", ids[model_idx])
2: "anthropic/claude-opus-4.5", elif model_idx == len(ids): # Custom
3: "openai/gpt-5.2",
4: "openai/gpt-5.2-codex",
5: "google/gemini-3-pro-preview",
6: "google/gemini-3-flash-preview",
7: "z-ai/glm-4.7",
8: "moonshotai/kimi-k2.5",
9: "minimax/minimax-m2.1",
}
if model_idx in model_map:
config['model'] = model_map[model_idx]
save_env_value("LLM_MODEL", model_map[model_idx])
elif model_idx == 10: # Custom
custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)") custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)")
if custom: if custom:
config['model'] = custom config['model'] = custom
save_env_value("LLM_MODEL", custom) save_env_value("LLM_MODEL", custom)
# else: Keep current (model_idx == 11) # else: Keep current
# ========================================================================= # =========================================================================
# Step 4: Terminal Backend # Step 4: Terminal Backend

File diff suppressed because it is too large Load diff

View file

@ -281,7 +281,12 @@ def check_dangerous_command(command: str, env_type: str,
approval_callback=approval_callback) approval_callback=approval_callback)
if choice == "deny": if choice == "deny":
return {"approved": False, "message": "BLOCKED: User denied this potentially dangerous command. Do NOT retry this command - the user has explicitly rejected it."} return {
"approved": False,
"message": f"BLOCKED: User denied this potentially dangerous command (matched '{description}' pattern). Do NOT retry this command - the user has explicitly rejected it.",
"pattern_key": pattern_key,
"description": description,
}
if choice == "session": if choice == "session":
approve_session(session_key, pattern_key) approve_session(session_key, pattern_key)

View file

@ -51,25 +51,16 @@ import signal
import subprocess import subprocess
import shutil import shutil
import sys import sys
import asyncio
import tempfile import tempfile
import threading import threading
import time import time
import requests import requests
from typing import Dict, Any, Optional, List from typing import Dict, Any, Optional, List
from pathlib import Path from pathlib import Path
from hermes_constants import OPENROUTER_CHAT_URL from agent.auxiliary_client import get_vision_auxiliary_client
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Try to import httpx for async LLM calls
try:
import httpx
HTTPX_AVAILABLE = True
except ImportError:
HTTPX_AVAILABLE = False
# ============================================================================ # ============================================================================
# Configuration # Configuration
# ============================================================================ # ============================================================================
@ -83,8 +74,8 @@ DEFAULT_SESSION_TIMEOUT = 300
# Max tokens for snapshot content before summarization # Max tokens for snapshot content before summarization
SNAPSHOT_SUMMARIZE_THRESHOLD = 8000 SNAPSHOT_SUMMARIZE_THRESHOLD = 8000
# Model for task-aware extraction # Resolve vision auxiliary client for extraction/vision tasks
EXTRACTION_MODEL = "google/gemini-3-flash-preview" _aux_vision_client, EXTRACTION_MODEL = get_vision_auxiliary_client()
# Track active sessions per task # Track active sessions per task
# Now stores tuple of (session_name, browserbase_session_id, cdp_url) # Now stores tuple of (session_name, browserbase_session_id, cdp_url)
@ -782,87 +773,49 @@ def _run_browser_command(
return {"success": False, "error": str(e)} return {"success": False, "error": str(e)}
async def _extract_relevant_content( def _extract_relevant_content(
snapshot_text: str, snapshot_text: str,
user_task: Optional[str] = None user_task: Optional[str] = None
) -> str: ) -> str:
"""Use LLM to extract relevant content from a snapshot based on the user's task.
Falls back to simple truncation when no auxiliary vision model is configured.
""" """
Use LLM to extract relevant content from a snapshot based on the user's task. if _aux_vision_client is None or EXTRACTION_MODEL is None:
This provides task-aware summarization that preserves meaningful text content
(paragraphs, prices, descriptions) relevant to what the user is trying to accomplish.
Args:
snapshot_text: The full snapshot text
user_task: The user's current task/goal (optional)
Returns:
Summarized/extracted content
"""
if not HTTPX_AVAILABLE:
# Fall back to simple truncation
return _truncate_snapshot(snapshot_text) return _truncate_snapshot(snapshot_text)
# Get API key
api_key = os.environ.get("OPENROUTER_API_KEY")
if not api_key:
return _truncate_snapshot(snapshot_text)
# Build extraction prompt
if user_task: if user_task:
extraction_prompt = f"""You are a content extractor for a browser automation agent. extraction_prompt = (
f"You are a content extractor for a browser automation agent.\n\n"
The user's task is: {user_task} f"The user's task is: {user_task}\n\n"
f"Given the following page snapshot (accessibility tree representation), "
Given the following page snapshot (accessibility tree representation), extract and summarize the most relevant information for completing this task. Focus on: f"extract and summarize the most relevant information for completing this task. Focus on:\n"
1. Interactive elements (buttons, links, inputs) that might be needed f"1. Interactive elements (buttons, links, inputs) that might be needed\n"
2. Text content relevant to the task (prices, descriptions, headings, important info) f"2. Text content relevant to the task (prices, descriptions, headings, important info)\n"
3. Navigation structure if relevant f"3. Navigation structure if relevant\n\n"
f"Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.\n\n"
Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them. f"Page Snapshot:\n{snapshot_text}\n\n"
f"Provide a concise summary that preserves actionable information and relevant content."
Page Snapshot: )
{snapshot_text}
Provide a concise summary that preserves actionable information and relevant content."""
else: else:
extraction_prompt = f"""Summarize this page snapshot, preserving: extraction_prompt = (
1. All interactive elements with their ref IDs (like [ref=e5]) f"Summarize this page snapshot, preserving:\n"
2. Key text content and headings f"1. All interactive elements with their ref IDs (like [ref=e5])\n"
3. Important information visible on the page f"2. Key text content and headings\n"
f"3. Important information visible on the page\n\n"
Page Snapshot: f"Page Snapshot:\n{snapshot_text}\n\n"
{snapshot_text} f"Provide a concise summary focused on interactive elements and key content."
)
Provide a concise summary focused on interactive elements and key content."""
try: try:
async with httpx.AsyncClient(timeout=30.0) as client: response = _aux_vision_client.chat.completions.create(
response = await client.post( model=EXTRACTION_MODEL,
OPENROUTER_CHAT_URL, messages=[{"role": "user", "content": extraction_prompt}],
headers={ max_tokens=4000,
"Authorization": f"Bearer {api_key}", temperature=0.1,
"Content-Type": "application/json" )
}, return response.choices[0].message.content
json={
"model": EXTRACTION_MODEL,
"messages": [
{"role": "user", "content": extraction_prompt}
],
"max_tokens": 4000,
"temperature": 0.1
}
)
if response.status_code == 200:
result = response.json()
return result["choices"][0]["message"]["content"]
else:
# Fall back to truncation on API error
return _truncate_snapshot(snapshot_text)
except Exception: except Exception:
# Fall back to truncation on any error
return _truncate_snapshot(snapshot_text) return _truncate_snapshot(snapshot_text)
@ -991,16 +944,7 @@ def browser_snapshot(
# Check if snapshot needs summarization # Check if snapshot needs summarization
if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD and user_task: if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD and user_task:
# Run async extraction snapshot_text = _extract_relevant_content(snapshot_text, user_task)
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
snapshot_text = loop.run_until_complete(
_extract_relevant_content(snapshot_text, user_task)
)
elif len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD: elif len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD:
snapshot_text = _truncate_snapshot(snapshot_text) snapshot_text = _truncate_snapshot(snapshot_text)
@ -1286,12 +1230,12 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
effective_task_id = task_id or "default" effective_task_id = task_id or "default"
# Check for OpenRouter API key # Check auxiliary vision client
api_key = os.environ.get("OPENROUTER_API_KEY") if _aux_vision_client is None or EXTRACTION_MODEL is None:
if not api_key:
return json.dumps({ return json.dumps({
"success": False, "success": False,
"error": "OPENROUTER_API_KEY not set. Vision analysis requires this API key." "error": "Browser vision unavailable: no auxiliary vision model configured. "
"Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
}, ensure_ascii=False) }, ensure_ascii=False)
# Create a temporary file for the screenshot # Create a temporary file for the screenshot
@ -1325,110 +1269,36 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
image_base64 = base64.b64encode(image_data).decode("ascii") image_base64 = base64.b64encode(image_data).decode("ascii")
data_url = f"data:image/png;base64,{image_base64}" data_url = f"data:image/png;base64,{image_base64}"
# Prepare the vision prompt vision_prompt = (
vision_prompt = f"""You are analyzing a screenshot of a web browser. f"You are analyzing a screenshot of a web browser.\n\n"
f"User's question: {question}\n\n"
f"Provide a detailed and helpful answer based on what you see in the screenshot. "
f"If there are interactive elements, describe them. If there are verification challenges "
f"or CAPTCHAs, describe what type they are and what action might be needed. "
f"Focus on answering the user's specific question."
)
User's question: {question} # Use the sync auxiliary vision client directly
response = _aux_vision_client.chat.completions.create(
Provide a detailed and helpful answer based on what you see in the screenshot. model=EXTRACTION_MODEL,
If there are interactive elements, describe them. If there are verification challenges messages=[
or CAPTCHAs, describe what type they are and what action might be needed. {
Focus on answering the user's specific question.""" "role": "user",
"content": [
# Call OpenRouter/Gemini for vision analysis {"type": "text", "text": vision_prompt},
if HTTPX_AVAILABLE: {"type": "image_url", "image_url": {"url": data_url}},
import asyncio
async def analyze_screenshot():
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
OPENROUTER_CHAT_URL,
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"model": "google/gemini-3-flash-preview",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": vision_prompt},
{
"type": "image_url",
"image_url": {"url": data_url}
}
]
}
],
"max_tokens": 2000,
"temperature": 0.1
}
)
if response.status_code != 200:
return {
"success": False,
"error": f"Vision API error: {response.status_code} - {response.text[:200]}"
}
result_data = response.json()
analysis = result_data["choices"][0]["message"]["content"]
return {
"success": True,
"analysis": analysis
}
# Run the async function
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
vision_result = loop.run_until_complete(analyze_screenshot())
return json.dumps(vision_result, ensure_ascii=False)
else:
# Fallback: use synchronous requests
response = requests.post(
OPENROUTER_CHAT_URL,
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"model": "google/gemini-3-flash-preview",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": vision_prompt},
{
"type": "image_url",
"image_url": {"url": data_url}
}
]
}
], ],
"max_tokens": 2000, }
"temperature": 0.1 ],
}, max_tokens=2000,
timeout=60 temperature=0.1,
) )
if response.status_code != 200: analysis = response.choices[0].message.content
return json.dumps({ return json.dumps({
"success": False, "success": True,
"error": f"Vision API error: {response.status_code} - {response.text[:200]}" "analysis": analysis,
}, ensure_ascii=False) }, ensure_ascii=False)
result_data = response.json()
analysis = result_data["choices"][0]["message"]["content"]
return json.dumps({
"success": True,
"analysis": analysis
}, ensure_ascii=False)
except Exception as e: except Exception as e:
return json.dumps({ return json.dumps({

View file

@ -22,9 +22,19 @@ import os
import logging import logging
from typing import Dict, Any, List, Optional from typing import Dict, Any, List, Optional
from tools.openrouter_client import get_async_client as _get_client from openai import AsyncOpenAI, OpenAI
SUMMARIZER_MODEL = "google/gemini-3-flash-preview" from agent.auxiliary_client import get_text_auxiliary_client
# Resolve the auxiliary client at import time so we have the model slug.
# We build an AsyncOpenAI from the same credentials for async summarization.
_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client()
_async_aux_client: AsyncOpenAI | None = None
if _aux_client is not None:
_async_aux_client = AsyncOpenAI(
api_key=_aux_client.api_key,
base_url=str(_aux_client.base_url),
)
MAX_SESSION_CHARS = 100_000 MAX_SESSION_CHARS = 100_000
MAX_SUMMARY_TOKENS = 2000 MAX_SUMMARY_TOKENS = 2000
@ -126,11 +136,15 @@ async def _summarize_session(
f"Summarize this conversation with focus on: {query}" f"Summarize this conversation with focus on: {query}"
) )
if _async_aux_client is None or _SUMMARIZER_MODEL is None:
logging.warning("No auxiliary model available for session summarization")
return None
max_retries = 3 max_retries = 3
for attempt in range(max_retries): for attempt in range(max_retries):
try: try:
response = await _get_client().chat.completions.create( response = await _async_aux_client.chat.completions.create(
model=SUMMARIZER_MODEL, model=_SUMMARIZER_MODEL,
messages=[ messages=[
{"role": "system", "content": system_prompt}, {"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}, {"role": "user", "content": user_prompt},
@ -252,8 +266,8 @@ def session_search(
def check_session_search_requirements() -> bool: def check_session_search_requirements() -> bool:
"""Requires SQLite state database and OpenRouter API key.""" """Requires SQLite state database and an auxiliary text model."""
if not os.getenv("OPENROUTER_API_KEY"): if _async_aux_client is None:
return False return False
try: try:
from hermes_state import DEFAULT_DB_PATH from hermes_state import DEFAULT_DB_PATH
@ -316,5 +330,4 @@ registry.register(
limit=args.get("limit", 3), limit=args.get("limit", 3),
db=kw.get("db")), db=kw.get("db")),
check_fn=check_session_search_requirements, check_fn=check_session_search_requirements,
requires_env=["OPENROUTER_API_KEY"],
) )

View file

@ -359,7 +359,6 @@ Do NOT use vim/nano/interactive tools without pty=true — they hang without a p
# Global state for environment lifecycle management # Global state for environment lifecycle management
_active_environments: Dict[str, Any] = {} _active_environments: Dict[str, Any] = {}
_task_workdirs: Dict[str, str] = {} # Maps task_id to working directory
_last_activity: Dict[str, float] = {} _last_activity: Dict[str, float] = {}
_env_lock = threading.Lock() _env_lock = threading.Lock()
_creation_locks: Dict[str, threading.Lock] = {} # Per-task locks for sandbox creation _creation_locks: Dict[str, threading.Lock] = {} # Per-task locks for sandbox creation
@ -530,7 +529,6 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
if current_time - last_time > lifetime_seconds: if current_time - last_time > lifetime_seconds:
env = _active_environments.pop(task_id, None) env = _active_environments.pop(task_id, None)
_last_activity.pop(task_id, None) _last_activity.pop(task_id, None)
_task_workdirs.pop(task_id, None)
if env is not None: if env is not None:
envs_to_stop.append((task_id, env)) envs_to_stop.append((task_id, env))
@ -609,7 +607,7 @@ def get_active_environments_info() -> Dict[str, Any]:
info = { info = {
"count": len(_active_environments), "count": len(_active_environments),
"task_ids": list(_active_environments.keys()), "task_ids": list(_active_environments.keys()),
"workdirs": dict(_task_workdirs), "workdirs": {},
} }
# Calculate total disk usage # Calculate total disk usage
@ -632,7 +630,7 @@ def get_active_environments_info() -> Dict[str, Any]:
def cleanup_all_environments(): def cleanup_all_environments():
"""Clean up ALL active environments. Use with caution.""" """Clean up ALL active environments. Use with caution."""
global _active_environments, _last_activity, _task_workdirs global _active_environments, _last_activity
task_ids = list(_active_environments.keys()) task_ids = list(_active_environments.keys())
cleaned = 0 cleaned = 0
@ -661,7 +659,7 @@ def cleanup_all_environments():
def cleanup_vm(task_id: str): def cleanup_vm(task_id: str):
"""Manually clean up a specific environment by task_id.""" """Manually clean up a specific environment by task_id."""
global _active_environments, _last_activity, _task_workdirs global _active_environments, _last_activity
# Remove from tracking dicts while holding the lock, but defer the # Remove from tracking dicts while holding the lock, but defer the
# actual (potentially slow) env.cleanup() call to outside the lock # actual (potentially slow) env.cleanup() call to outside the lock
@ -669,7 +667,6 @@ def cleanup_vm(task_id: str):
env = None env = None
with _env_lock: with _env_lock:
env = _active_environments.pop(task_id, None) env = _active_environments.pop(task_id, None)
_task_workdirs.pop(task_id, None)
_last_activity.pop(task_id, None) _last_activity.pop(task_id, None)
# Clean up per-task creation lock # Clean up per-task creation lock
@ -782,17 +779,6 @@ def terminal_tool(
default_timeout = config["timeout"] default_timeout = config["timeout"]
effective_timeout = timeout or default_timeout effective_timeout = timeout or default_timeout
# For local environment in batch mode, create a unique subdirectory per task
# This prevents parallel tasks from overwriting each other's files
# In CLI mode (HERMES_QUIET), use the cwd directly without subdirectories
if env_type == "local" and not os.getenv("HERMES_QUIET"):
with _env_lock:
if effective_task_id not in _task_workdirs:
task_workdir = Path(cwd) / f"hermes-{effective_task_id}-{uuid.uuid4().hex[:8]}"
task_workdir.mkdir(parents=True, exist_ok=True)
_task_workdirs[effective_task_id] = str(task_workdir)
cwd = _task_workdirs[effective_task_id]
# Start cleanup thread # Start cleanup thread
_start_cleanup_thread() _start_cleanup_thread()
@ -874,11 +860,16 @@ def terminal_tool(
"description": approval.get("description", "dangerous command"), "description": approval.get("description", "dangerous command"),
"pattern_key": approval.get("pattern_key", ""), "pattern_key": approval.get("pattern_key", ""),
}, ensure_ascii=False) }, ensure_ascii=False)
# Command was blocked - return informative message # Command was blocked - include the pattern category so the caller knows why
desc = approval.get("description", "potentially dangerous operation")
fallback_msg = (
f"Command denied: matches '{desc}' pattern. "
"Use the approval prompt to allow it, or rephrase the command."
)
return json.dumps({ return json.dumps({
"output": "", "output": "",
"exit_code": -1, "exit_code": -1,
"error": approval.get("message", "Command denied - potentially dangerous operation"), "error": approval.get("message", fallback_msg),
"status": "blocked" "status": "blocked"
}, ensure_ascii=False) }, ensure_ascii=False)
@ -996,11 +987,17 @@ def terminal_tool(
# Add helpful message for sudo failures in messaging context # Add helpful message for sudo failures in messaging context
output = _handle_sudo_failure(output, env_type) output = _handle_sudo_failure(output, env_type)
# Truncate output if too long # Truncate output if too long, keeping both head and tail
MAX_OUTPUT_CHARS = 50000 MAX_OUTPUT_CHARS = 50000
if len(output) > MAX_OUTPUT_CHARS: if len(output) > MAX_OUTPUT_CHARS:
truncated_notice = f"\n\n... [OUTPUT TRUNCATED - showing last {MAX_OUTPUT_CHARS} chars of {len(output)} total] ..." head_chars = int(MAX_OUTPUT_CHARS * 0.4) # 40% head (error messages often appear early)
output = truncated_notice + output[-MAX_OUTPUT_CHARS:] tail_chars = MAX_OUTPUT_CHARS - head_chars # 60% tail (most recent/relevant output)
omitted = len(output) - head_chars - tail_chars
truncated_notice = (
f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted "
f"out of {len(output)} total] ...\n\n"
)
output = output[:head_chars] + truncated_notice + output[-tail_chars:]
return json.dumps({ return json.dumps({
"output": output.strip() if output else "", "output": output.strip() if output else "",

View file

@ -36,13 +36,20 @@ import base64
from pathlib import Path from pathlib import Path
from typing import Dict, Any, Optional from typing import Dict, Any, Optional
import httpx import httpx
from tools.openrouter_client import get_async_client as _get_openrouter_client, check_api_key as check_openrouter_api_key from openai import AsyncOpenAI
from agent.auxiliary_client import get_vision_auxiliary_client
from tools.debug_helpers import DebugSession from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Configuration for vision processing # Resolve vision auxiliary client at module level; build an async wrapper.
DEFAULT_VISION_MODEL = "google/gemini-3-flash-preview" _aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
_aux_async_client: AsyncOpenAI | None = None
if _aux_sync_client is not None:
_aux_async_client = AsyncOpenAI(
api_key=_aux_sync_client.api_key,
base_url=str(_aux_sync_client.base_url),
)
_debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG") _debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
@ -230,9 +237,13 @@ async def vision_analyze_tool(
logger.info("Analyzing image: %s", image_url[:60]) logger.info("Analyzing image: %s", image_url[:60])
logger.info("User prompt: %s", user_prompt[:100]) logger.info("User prompt: %s", user_prompt[:100])
# Check API key availability # Check auxiliary vision client availability
if not os.getenv("OPENROUTER_API_KEY"): if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
raise ValueError("OPENROUTER_API_KEY environment variable not set") return json.dumps({
"success": False,
"analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
"Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
}, indent=2, ensure_ascii=False)
# Determine if this is a local file path or a remote URL # Determine if this is a local file path or a remote URL
local_path = Path(image_url) local_path = Path(image_url)
@ -291,18 +302,12 @@ async def vision_analyze_tool(
logger.info("Processing image with %s...", model) logger.info("Processing image with %s...", model)
# Call the vision API with reasoning enabled # Call the vision API
response = await _get_openrouter_client().chat.completions.create( response = await _aux_async_client.chat.completions.create(
model=model, model=model,
messages=messages, messages=messages,
temperature=0.1, # Low temperature for consistent analysis temperature=0.1,
max_tokens=2000, # Generous limit for detailed analysis max_tokens=2000,
extra_body={
"reasoning": {
"enabled": True,
"effort": "xhigh"
}
}
) )
# Extract the analysis # Extract the analysis
@ -353,13 +358,8 @@ async def vision_analyze_tool(
def check_vision_requirements() -> bool: def check_vision_requirements() -> bool:
""" """Check if an auxiliary vision model is available."""
Check if all requirements for vision tools are met. return _aux_async_client is not None
Returns:
bool: True if requirements are met, False otherwise
"""
return check_openrouter_api_key()
def get_debug_session_info() -> Dict[str, Any]: def get_debug_session_info() -> Dict[str, Any]:
@ -379,16 +379,15 @@ if __name__ == "__main__":
print("👁️ Vision Tools Module") print("👁️ Vision Tools Module")
print("=" * 40) print("=" * 40)
# Check if API key is available # Check if vision model is available
api_available = check_openrouter_api_key() api_available = check_vision_requirements()
if not api_available: if not api_available:
print("❌ OPENROUTER_API_KEY environment variable not set") print("❌ No auxiliary vision model available")
print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'") print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
print("Get API key at: https://openrouter.ai/")
exit(1) exit(1)
else: else:
print("✅ OpenRouter API key found") print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}")
print("🛠️ Vision tools ready for use!") print("🛠️ Vision tools ready for use!")
print(f"🧠 Using model: {DEFAULT_VISION_MODEL}") print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
@ -455,7 +454,8 @@ def _handle_vision_analyze(args, **kw):
image_url = args.get("image_url", "") image_url = args.get("image_url", "")
question = args.get("question", "") question = args.get("question", "")
full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}" full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}"
return vision_analyze_tool(image_url, full_prompt, "google/gemini-3-flash-preview") model = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview"
return vision_analyze_tool(image_url, full_prompt, model)
registry.register( registry.register(
@ -464,6 +464,5 @@ registry.register(
schema=VISION_ANALYZE_SCHEMA, schema=VISION_ANALYZE_SCHEMA,
handler=_handle_vision_analyze, handler=_handle_vision_analyze,
check_fn=check_vision_requirements, check_fn=check_vision_requirements,
requires_env=["OPENROUTER_API_KEY"],
is_async=True, is_async=True,
) )

View file

@ -47,7 +47,8 @@ import re
import asyncio import asyncio
from typing import List, Dict, Any, Optional from typing import List, Dict, Any, Optional
from firecrawl import Firecrawl from firecrawl import Firecrawl
from tools.openrouter_client import get_async_client as _get_openrouter_client from openai import AsyncOpenAI
from agent.auxiliary_client import get_text_auxiliary_client
from tools.debug_helpers import DebugSession from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -64,9 +65,17 @@ def _get_firecrawl_client():
_firecrawl_client = Firecrawl(api_key=api_key) _firecrawl_client = Firecrawl(api_key=api_key)
return _firecrawl_client return _firecrawl_client
DEFAULT_SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000 DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
# Resolve auxiliary text client at module level; build an async wrapper.
_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
_aux_async_client: AsyncOpenAI | None = None
if _aux_sync_client is not None:
_aux_async_client = AsyncOpenAI(
api_key=_aux_sync_client.api_key,
base_url=str(_aux_sync_client.base_url),
)
_debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG") _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
@ -223,7 +232,10 @@ Create a markdown summary that captures all key information in a well-organized,
for attempt in range(max_retries): for attempt in range(max_retries):
try: try:
response = await _get_openrouter_client().chat.completions.create( if _aux_async_client is None:
logger.warning("No auxiliary model available for web content processing")
return None
response = await _aux_async_client.chat.completions.create(
model=model, model=model,
messages=[ messages=[
{"role": "system", "content": system_prompt}, {"role": "system", "content": system_prompt},
@ -231,12 +243,6 @@ Create a markdown summary that captures all key information in a well-organized,
], ],
temperature=0.1, temperature=0.1,
max_tokens=max_tokens, max_tokens=max_tokens,
extra_body={
"reasoning": {
"enabled": True,
"effort": "xhigh"
}
}
) )
return response.choices[0].message.content.strip() return response.choices[0].message.content.strip()
except Exception as api_error: except Exception as api_error:
@ -342,7 +348,14 @@ Synthesize these into ONE cohesive, comprehensive summary that:
Create a single, unified markdown summary.""" Create a single, unified markdown summary."""
try: try:
response = await _get_openrouter_client().chat.completions.create( if _aux_async_client is None:
logger.warning("No auxiliary model for synthesis, concatenating summaries")
fallback = "\n\n".join(summaries)
if len(fallback) > max_output_size:
fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
return fallback
response = await _aux_async_client.chat.completions.create(
model=model, model=model,
messages=[ messages=[
{"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."}, {"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
@ -350,12 +363,6 @@ Create a single, unified markdown summary."""
], ],
temperature=0.1, temperature=0.1,
max_tokens=4000, max_tokens=4000,
extra_body={
"reasoning": {
"enabled": True,
"effort": "xhigh"
}
}
) )
final_summary = response.choices[0].message.content.strip() final_summary = response.choices[0].message.content.strip()
@ -677,8 +684,8 @@ async def web_extract_tool(
debug_call_data["pages_extracted"] = pages_extracted debug_call_data["pages_extracted"] = pages_extracted
debug_call_data["original_response_size"] = len(json.dumps(response)) debug_call_data["original_response_size"] = len(json.dumps(response))
# Process each result with LLM if enabled # Process each result with LLM if enabled and auxiliary client is available
if use_llm_processing and os.getenv("OPENROUTER_API_KEY"): if use_llm_processing and _aux_async_client is not None:
logger.info("Processing extracted content with LLM (parallel)...") logger.info("Processing extracted content with LLM (parallel)...")
debug_call_data["processing_applied"].append("llm_processing") debug_call_data["processing_applied"].append("llm_processing")
@ -744,8 +751,8 @@ async def web_extract_tool(
else: else:
logger.warning("%s (no content to process)", url) logger.warning("%s (no content to process)", url)
else: else:
if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"): if use_llm_processing and _aux_async_client is None:
logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content") logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
debug_call_data["processing_applied"].append("llm_processing_unavailable") debug_call_data["processing_applied"].append("llm_processing_unavailable")
# Print summary of extracted pages for debugging (original behavior) # Print summary of extracted pages for debugging (original behavior)
@ -973,8 +980,8 @@ async def web_crawl_tool(
debug_call_data["pages_crawled"] = pages_crawled debug_call_data["pages_crawled"] = pages_crawled
debug_call_data["original_response_size"] = len(json.dumps(response)) debug_call_data["original_response_size"] = len(json.dumps(response))
# Process each result with LLM if enabled # Process each result with LLM if enabled and auxiliary client is available
if use_llm_processing and os.getenv("OPENROUTER_API_KEY"): if use_llm_processing and _aux_async_client is not None:
logger.info("Processing crawled content with LLM (parallel)...") logger.info("Processing crawled content with LLM (parallel)...")
debug_call_data["processing_applied"].append("llm_processing") debug_call_data["processing_applied"].append("llm_processing")
@ -1040,8 +1047,8 @@ async def web_crawl_tool(
else: else:
logger.warning("%s (no content to process)", page_url) logger.warning("%s (no content to process)", page_url)
else: else:
if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"): if use_llm_processing and _aux_async_client is None:
logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content") logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
debug_call_data["processing_applied"].append("llm_processing_unavailable") debug_call_data["processing_applied"].append("llm_processing_unavailable")
# Print summary of crawled pages for debugging (original behavior) # Print summary of crawled pages for debugging (original behavior)
@ -1096,14 +1103,9 @@ def check_firecrawl_api_key() -> bool:
return bool(os.getenv("FIRECRAWL_API_KEY")) return bool(os.getenv("FIRECRAWL_API_KEY"))
def check_nous_api_key() -> bool: def check_auxiliary_model() -> bool:
""" """Check if an auxiliary text model is available for LLM content processing."""
Check if the Nous Research API key is available in environment variables. return _aux_async_client is not None
Returns:
bool: True if API key is set, False otherwise
"""
return bool(os.getenv("OPENROUTER_API_KEY"))
def get_debug_session_info() -> Dict[str, Any]: def get_debug_session_info() -> Dict[str, Any]:
@ -1120,7 +1122,7 @@ if __name__ == "__main__":
# Check if API keys are available # Check if API keys are available
firecrawl_available = check_firecrawl_api_key() firecrawl_available = check_firecrawl_api_key()
nous_available = check_nous_api_key() nous_available = check_auxiliary_model()
if not firecrawl_available: if not firecrawl_available:
print("❌ FIRECRAWL_API_KEY environment variable not set") print("❌ FIRECRAWL_API_KEY environment variable not set")
@ -1130,12 +1132,11 @@ if __name__ == "__main__":
print("✅ Firecrawl API key found") print("✅ Firecrawl API key found")
if not nous_available: if not nous_available:
print("❌ OPENROUTER_API_KEY environment variable not set") print("❌ No auxiliary model available for LLM content processing")
print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'") print("Set OPENROUTER_API_KEY, configure Nous Portal, or set OPENAI_BASE_URL + OPENAI_API_KEY")
print("Get API key at: https://inference-api.nousresearch.com/") print("⚠️ Without an auxiliary model, LLM content processing will be disabled")
print("⚠️ Without Nous API key, LLM content processing will be disabled")
else: else:
print("✅ Nous Research API key found") print(f"✅ Auxiliary model available: {DEFAULT_SUMMARIZER_MODEL}")
if not firecrawl_available: if not firecrawl_available:
exit(1) exit(1)
@ -1143,7 +1144,7 @@ if __name__ == "__main__":
print("🛠️ Web tools ready for use!") print("🛠️ Web tools ready for use!")
if nous_available: if nous_available:
print("🧠 LLM content processing available with Gemini 3 Flash Preview via OpenRouter") print(f"🧠 LLM content processing available with {DEFAULT_SUMMARIZER_MODEL}")
print(f" Default min length for processing: {DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION} chars") print(f" Default min length for processing: {DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION} chars")
# Show debug mode status # Show debug mode status