Hermes Agent UX Improvements

2026-02-22 02:16:11 -08:00 · 2026-02-22 02:16:11 -08:00 · ededaaa874
commit ededaaa874
parent b1f55e3ee5
23 changed files with 945 additions and 1545 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -0,0 +1,128 @@
 """Shared auxiliary OpenAI client for cheap/fast side tasks.
 Provides a single resolution chain so every consumer (context compression,
 session search, web extraction, vision analysis, browser vision) picks up
 the best available backend without duplicating fallback logic.
 Resolution order for text tasks:
  1. OpenRouter  (OPENROUTER_API_KEY)
  2. Nous Portal (~/.hermes/auth.json active provider)
  3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
  4. None
 Resolution order for vision/multimodal tasks:
  1. OpenRouter
  2. Nous Portal
  3. None  (custom endpoints can't substitute for Gemini multimodal)
 """
 import json
 import logging
 import os
 from pathlib import Path
 from typing import Optional, Tuple
 from openai import OpenAI
 from hermes_constants import OPENROUTER_BASE_URL
 logger = logging.getLogger(__name__)
 # Default auxiliary models per provider
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "gemini-3-flash"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
 def _read_nous_auth() -> Optional[dict]:
    """Read and validate ~/.hermes/auth.json for an active Nous provider.
    Returns the provider state dict if Nous is active with tokens,
    otherwise None.
    """
    try:
        if not _AUTH_JSON_PATH.is_file():
            return None
        data = json.loads(_AUTH_JSON_PATH.read_text())
        if data.get("active_provider") != "nous":
            return None
        provider = data.get("providers", {}).get("nous", {})
        # Must have at least an access_token or agent_key
        if not provider.get("agent_key") and not provider.get("access_token"):
            return None
        return provider
    except Exception as exc:
        logger.debug("Could not read Nous auth: %s", exc)
        return None
 def _nous_api_key(provider: dict) -> str:
    """Extract the best API key from a Nous provider state dict."""
    return provider.get("agent_key") or provider.get("access_token", "")
 def _nous_base_url() -> str:
    """Resolve the Nous inference base URL from env or default."""
    return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
 # ── Public API ──────────────────────────────────────────────────────────────
 def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
    """Return (client, model_slug) for text-only auxiliary tasks.
    Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None).
    """
    # 1. OpenRouter
    or_key = os.getenv("OPENROUTER_API_KEY")
    if or_key:
        logger.debug("Auxiliary text client: OpenRouter")
        return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL), _OPENROUTER_MODEL
    # 2. Nous Portal
    nous = _read_nous_auth()
    if nous:
        logger.debug("Auxiliary text client: Nous Portal")
        return (
            OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
            _NOUS_MODEL,
        )
    # 3. Custom endpoint (both base URL and key must be set)
    custom_base = os.getenv("OPENAI_BASE_URL")
    custom_key = os.getenv("OPENAI_API_KEY")
    if custom_base and custom_key:
        model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini"
        logger.debug("Auxiliary text client: custom endpoint (%s)", model)
        return OpenAI(api_key=custom_key, base_url=custom_base), model
    # 4. Nothing available
    logger.debug("Auxiliary text client: none available")
    return None, None
 def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
    """Return (client, model_slug) for vision/multimodal auxiliary tasks.
    Only OpenRouter and Nous Portal qualify — custom endpoints cannot
    substitute for Gemini multimodal.
    """
    # 1. OpenRouter
    or_key = os.getenv("OPENROUTER_API_KEY")
    if or_key:
        logger.debug("Auxiliary vision client: OpenRouter")
        return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL), _OPENROUTER_MODEL
    # 2. Nous Portal
    nous = _read_nous_auth()
    if nous:
        logger.debug("Auxiliary vision client: Nous Portal")
        return (
            OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
            _NOUS_MODEL,
        )
    # 3. Nothing suitable
    logger.debug("Auxiliary vision client: none available")
    return None, None
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -9,13 +9,11 @@ import logging
 import os
 from typing import Any, Dict, List
-from openai import OpenAI
+from agent.auxiliary_client import get_text_auxiliary_client
 from agent.model_metadata import (
    get_model_context_length,
    estimate_messages_tokens_rough,
 )
 from hermes_constants import OPENROUTER_BASE_URL
 logger = logging.getLogger(__name__)
@ -31,7 +29,6 @@ class ContextCompressor:
        self,
        model: str,
        threshold_percent: float = 0.85,
        summary_model: str = "google/gemini-3-flash-preview",
        protect_first_n: int = 3,
        protect_last_n: int = 4,
        summary_target_tokens: int = 500,
@ -39,7 +36,6 @@ class ContextCompressor:
    ):
        self.model = model
        self.threshold_percent = threshold_percent
        self.summary_model = summary_model
        self.protect_first_n = protect_first_n
        self.protect_last_n = protect_last_n
        self.summary_target_tokens = summary_target_tokens
@ -53,8 +49,7 @@ class ContextCompressor:
        self.last_completion_tokens = 0
        self.last_total_tokens = 0
-        api_key = os.getenv("OPENROUTER_API_KEY", "")
+        self.client, self.summary_model = get_text_auxiliary_client()
        self.client = OpenAI(api_key=api_key, base_url=OPENROUTER_BASE_URL) if api_key else None
    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@ -155,6 +150,26 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
        if not self.quiet_mode:
            print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)")
            print(f"   📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})")
        # Truncation fallback when no auxiliary model is available
        if self.client is None:
            print("⚠️  Context compression: no auxiliary model available. Falling back to message truncation.")
            # Keep system message(s) at the front and the protected tail;
            # simply drop the oldest non-system messages until under threshold.
            kept = []
            for msg in messages:
                if msg.get("role") == "system":
                    kept.append(msg.copy())
                else:
                    break
            tail = messages[-self.protect_last_n:]
            kept.extend(m.copy() for m in tail)
            self.compression_count += 1
            if not self.quiet_mode:
                print(f"   ✂️  Truncated: {len(messages)} → {len(kept)} messages (dropped middle turns)")
            return kept
        if not self.quiet_mode:
            print(f"   🗜️  Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)")
        summary = self._generate_summary(turns_to_summarize)
--- a/agent/display.py
+++ b/agent/display.py
@ -4,11 +4,16 @@ Pure display functions and classes with no AIAgent dependency.
 Used by AIAgent._execute_tool_calls for CLI feedback.
 """
 import json
 import os
 import random
 import threading
 import time
 # ANSI escape codes for coloring tool failure indicators
 _RED = "\033[31m"
 _RESET = "\033[0m"
 # =========================================================================
 # Tool preview (one-line summary of a tool call's primary argument)
@ -242,12 +247,46 @@ KAWAII_GENERIC = [
 # Cute tool message (completion line that replaces the spinner)
 # =========================================================================
-def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
+def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
    """Inspect a tool result string for signs of failure.
    Returns ``(is_failure, suffix)`` where *suffix* is an informational tag
    like ``" [exit 1]"`` for terminal failures, or ``" [error]"`` for generic
    failures.  On success, returns ``(False, "")``.
    """
    if result is None:
        return False, ""
    if tool_name == "terminal":
        try:
            data = json.loads(result)
            exit_code = data.get("exit_code")
            if exit_code is not None and exit_code != 0:
                return True, f" [exit {exit_code}]"
        except (json.JSONDecodeError, TypeError, AttributeError):
            pass
        return False, ""
    # Generic heuristic for non-terminal tools
    lower = result[:500].lower()
    if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
        return True, " [error]"
    return False, ""
 def get_cute_tool_message(
    tool_name: str, args: dict, duration: float, result: str | None = None,
 ) -> str:
    """Generate a formatted tool completion line for CLI quiet mode.
    Format: ``| {emoji} {verb:9} {detail}  {duration}``
    When *result* is provided the line is checked for failure indicators.
    Failed tool calls get a red prefix and an informational suffix.
    """
    dur = f"{duration:.1f}s"
    is_failure, failure_suffix = _detect_tool_failure(tool_name, result)
    def _trunc(s, n=40):
        s = str(s)
@ -257,105 +296,111 @@ def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
        p = str(p)
        return ("..." + p[-(n-3):]) if len(p) > n else p
    def _wrap(line: str) -> str:
        """Apply red coloring and failure suffix when the tool failed."""
        if not is_failure:
            return line
        return f"{_RED}{line}{failure_suffix}{_RESET}"
    if tool_name == "web_search":
-        return f"┊ 🔍 search    {_trunc(args.get('query', ''), 42)}  {dur}"
+        return _wrap(f"┊ 🔍 search    {_trunc(args.get('query', ''), 42)}  {dur}")
    if tool_name == "web_extract":
        urls = args.get("urls", [])
        if urls:
            url = urls[0] if isinstance(urls, list) else str(urls)
            domain = url.replace("https://", "").replace("http://", "").split("/")[0]
            extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
-            return f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}"
+            return _wrap(f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}")
-        return f"┊ 📄 fetch     pages  {dur}"
+        return _wrap(f"┊ 📄 fetch     pages  {dur}")
    if tool_name == "web_crawl":
        url = args.get("url", "")
        domain = url.replace("https://", "").replace("http://", "").split("/")[0]
-        return f"┊ 🕸️  crawl     {_trunc(domain, 35)}  {dur}"
+        return _wrap(f"┊ 🕸️  crawl     {_trunc(domain, 35)}  {dur}")
    if tool_name == "terminal":
-        return f"┊ 💻 $         {_trunc(args.get('command', ''), 42)}  {dur}"
+        return _wrap(f"┊ 💻 $         {_trunc(args.get('command', ''), 42)}  {dur}")
    if tool_name == "process":
        action = args.get("action", "?")
        sid = args.get("session_id", "")[:12]
        labels = {"list": "ls processes", "poll": f"poll {sid}", "log": f"log {sid}",
                  "wait": f"wait {sid}", "kill": f"kill {sid}", "write": f"write {sid}", "submit": f"submit {sid}"}
-        return f"┊ ⚙️  proc      {labels.get(action, f'{action} {sid}')}  {dur}"
+        return _wrap(f"┊ ⚙️  proc      {labels.get(action, f'{action} {sid}')}  {dur}")
    if tool_name == "read_file":
-        return f"┊ 📖 read      {_path(args.get('path', ''))}  {dur}"
+        return _wrap(f"┊ 📖 read      {_path(args.get('path', ''))}  {dur}")
    if tool_name == "write_file":
-        return f"┊ ✍️  write     {_path(args.get('path', ''))}  {dur}"
+        return _wrap(f"┊ ✍️  write     {_path(args.get('path', ''))}  {dur}")
    if tool_name == "patch":
-        return f"┊ 🔧 patch     {_path(args.get('path', ''))}  {dur}"
+        return _wrap(f"┊ 🔧 patch     {_path(args.get('path', ''))}  {dur}")
    if tool_name == "search_files":
        pattern = _trunc(args.get("pattern", ""), 35)
        target = args.get("target", "content")
        verb = "find" if target == "files" else "grep"
-        return f"┊ 🔎 {verb:9} {pattern}  {dur}"
+        return _wrap(f"┊ 🔎 {verb:9} {pattern}  {dur}")
    if tool_name == "browser_navigate":
        url = args.get("url", "")
        domain = url.replace("https://", "").replace("http://", "").split("/")[0]
-        return f"┊ 🌐 navigate  {_trunc(domain, 35)}  {dur}"
+        return _wrap(f"┊ 🌐 navigate  {_trunc(domain, 35)}  {dur}")
    if tool_name == "browser_snapshot":
        mode = "full" if args.get("full") else "compact"
-        return f"┊ 📸 snapshot  {mode}  {dur}"
+        return _wrap(f"┊ 📸 snapshot  {mode}  {dur}")
    if tool_name == "browser_click":
-        return f"┊ 👆 click     {args.get('ref', '?')}  {dur}"
+        return _wrap(f"┊ 👆 click     {args.get('ref', '?')}  {dur}")
    if tool_name == "browser_type":
-        return f"┊ ⌨️  type      \"{_trunc(args.get('text', ''), 30)}\"  {dur}"
+        return _wrap(f"┊ ⌨️  type      \"{_trunc(args.get('text', ''), 30)}\"  {dur}")
    if tool_name == "browser_scroll":
        d = args.get("direction", "down")
        arrow = {"down": "↓", "up": "↑", "right": "→", "left": "←"}.get(d, "↓")
-        return f"┊ {arrow}  scroll    {d}  {dur}"
+        return _wrap(f"┊ {arrow}  scroll    {d}  {dur}")
    if tool_name == "browser_back":
-        return f"┊ ◀️  back      {dur}"
+        return _wrap(f"┊ ◀️  back      {dur}")
    if tool_name == "browser_press":
-        return f"┊ ⌨️  press     {args.get('key', '?')}  {dur}"
+        return _wrap(f"┊ ⌨️  press     {args.get('key', '?')}  {dur}")
    if tool_name == "browser_close":
-        return f"┊ 🚪 close     browser  {dur}"
+        return _wrap(f"┊ 🚪 close     browser  {dur}")
    if tool_name == "browser_get_images":
-        return f"┊ 🖼️  images    extracting  {dur}"
+        return _wrap(f"┊ 🖼️  images    extracting  {dur}")
    if tool_name == "browser_vision":
-        return f"┊ 👁️  vision    analyzing page  {dur}"
+        return _wrap(f"┊ 👁️  vision    analyzing page  {dur}")
    if tool_name == "todo":
        todos_arg = args.get("todos")
        merge = args.get("merge", False)
        if todos_arg is None:
-            return f"┊ 📋 plan      reading tasks  {dur}"
+            return _wrap(f"┊ 📋 plan      reading tasks  {dur}")
        elif merge:
-            return f"┊ 📋 plan      update {len(todos_arg)} task(s)  {dur}"
+            return _wrap(f"┊ 📋 plan      update {len(todos_arg)} task(s)  {dur}")
        else:
-            return f"┊ 📋 plan      {len(todos_arg)} task(s)  {dur}"
+            return _wrap(f"┊ 📋 plan      {len(todos_arg)} task(s)  {dur}")
    if tool_name == "session_search":
-        return f"┊ 🔍 recall    \"{_trunc(args.get('query', ''), 35)}\"  {dur}"
+        return _wrap(f"┊ 🔍 recall    \"{_trunc(args.get('query', ''), 35)}\"  {dur}")
    if tool_name == "memory":
        action = args.get("action", "?")
        target = args.get("target", "")
        if action == "add":
-            return f"┊ 🧠 memory    +{target}: \"{_trunc(args.get('content', ''), 30)}\"  {dur}"
+            return _wrap(f"┊ 🧠 memory    +{target}: \"{_trunc(args.get('content', ''), 30)}\"  {dur}")
        elif action == "replace":
-            return f"┊ 🧠 memory    ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}"
+            return _wrap(f"┊ 🧠 memory    ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}")
        elif action == "remove":
-            return f"┊ 🧠 memory    -{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}"
+            return _wrap(f"┊ 🧠 memory    -{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}")
-        return f"┊ 🧠 memory    {action}  {dur}"
+        return _wrap(f"┊ 🧠 memory    {action}  {dur}")
    if tool_name == "skills_list":
-        return f"┊ 📚 skills    list {args.get('category', 'all')}  {dur}"
+        return _wrap(f"┊ 📚 skills    list {args.get('category', 'all')}  {dur}")
    if tool_name == "skill_view":
-        return f"┊ 📚 skill     {_trunc(args.get('name', ''), 30)}  {dur}"
+        return _wrap(f"┊ 📚 skill     {_trunc(args.get('name', ''), 30)}  {dur}")
    if tool_name == "image_generate":
-        return f"┊ 🎨 create    {_trunc(args.get('prompt', ''), 35)}  {dur}"
+        return _wrap(f"┊ 🎨 create    {_trunc(args.get('prompt', ''), 35)}  {dur}")
    if tool_name == "text_to_speech":
-        return f"┊ 🔊 speak     {_trunc(args.get('text', ''), 30)}  {dur}"
+        return _wrap(f"┊ 🔊 speak     {_trunc(args.get('text', ''), 30)}  {dur}")
    if tool_name == "vision_analyze":
-        return f"┊ 👁️  vision    {_trunc(args.get('question', ''), 30)}  {dur}"
+        return _wrap(f"┊ 👁️  vision    {_trunc(args.get('question', ''), 30)}  {dur}")
    if tool_name == "mixture_of_agents":
-        return f"┊ 🧠 reason    {_trunc(args.get('user_prompt', ''), 30)}  {dur}"
+        return _wrap(f"┊ 🧠 reason    {_trunc(args.get('user_prompt', ''), 30)}  {dur}")
    if tool_name == "send_message":
-        return f"┊ 📨 send      {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\"  {dur}"
+        return _wrap(f"┊ 📨 send      {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\"  {dur}")
    if tool_name == "schedule_cronjob":
-        return f"┊ ⏰ schedule  {_trunc(args.get('name', args.get('prompt', 'task')), 30)}  {dur}"
+        return _wrap(f"┊ ⏰ schedule  {_trunc(args.get('name', args.get('prompt', 'task')), 30)}  {dur}")
    if tool_name == "list_cronjobs":
-        return f"┊ ⏰ jobs      listing  {dur}"
+        return _wrap(f"┊ ⏰ jobs      listing  {dur}")
    if tool_name == "remove_cronjob":
-        return f"┊ ⏰ remove    job {args.get('job_id', '?')}  {dur}"
+        return _wrap(f"┊ ⏰ remove    job {args.get('job_id', '?')}  {dur}")
    if tool_name.startswith("rl_"):
        rl = {
            "rl_list_environments": "list envs", "rl_select_environment": f"select {args.get('name', '')}",
@ -364,16 +409,16 @@ def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
            "rl_stop_training": f"stop {args.get('run_id', '?')[:12]}", "rl_get_results": f"results {args.get('run_id', '?')[:12]}",
            "rl_list_runs": "list runs", "rl_test_inference": "test inference",
        }
-        return f"┊ 🧪 rl        {rl.get(tool_name, tool_name.replace('rl_', ''))}  {dur}"
+        return _wrap(f"┊ 🧪 rl        {rl.get(tool_name, tool_name.replace('rl_', ''))}  {dur}")
    if tool_name == "execute_code":
        code = args.get("code", "")
        first_line = code.strip().split("\n")[0] if code.strip() else ""
-        return f"┊ 🐍 exec      {_trunc(first_line, 35)}  {dur}"
+        return _wrap(f"┊ 🐍 exec      {_trunc(first_line, 35)}  {dur}")
    if tool_name == "delegate_task":
        tasks = args.get("tasks")
        if tasks and isinstance(tasks, list):
-            return f"┊ 🔀 delegate  {len(tasks)} parallel tasks  {dur}"
+            return _wrap(f"┊ 🔀 delegate  {len(tasks)} parallel tasks  {dur}")
-        return f"┊ 🔀 delegate  {_trunc(args.get('goal', ''), 35)}  {dur}"
+        return _wrap(f"┊ 🔀 delegate  {_trunc(args.get('goal', ''), 35)}  {dur}")
    preview = build_tool_preview(tool_name, args) or ""
-    return f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)}  {dur}"
+    return _wrap(f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)}  {dur}")
--- a/cli.py
+++ b/cli.py
@ -339,9 +339,6 @@ def _cprint(text: str):
    """
    _pt_print(_PT_ANSI(text))
 # Version string
 VERSION = "v1.0.0"
 # ASCII Art - HERMES-AGENT logo (full width, single line - requires ~95 char terminal)
 HERMES_AGENT_LOGO = """[bold #FFD700]██╗  ██╗███████╗██████╗ ███╗   ███╗███████╗███████╗       █████╗  ██████╗ ███████╗███╗   ██╗████████╗[/]
 [bold #FFD700]██║  ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝      ██╔══██╗██╔════╝ ██╔════╝████╗  ██║╚══██╔══╝[/]
--- a/gateway/config.py
+++ b/gateway/config.py
@ -8,6 +8,7 @@ Handles loading and validating configuration for:
 - Delivery preferences
 """
 import logging
 import os
 import json
 from pathlib import Path
@ -15,6 +16,8 @@ from dataclasses import dataclass, field
 from typing import Dict, List, Optional, Any
 from enum import Enum
 logger = logging.getLogger(__name__)
 class Platform(Enum):
    """Supported messaging platforms."""
@ -264,6 +267,40 @@ def load_gateway_config() -> GatewayConfig:
    # Override with environment variables
    _apply_env_overrides(config)
    # --- Validate loaded values ---
    policy = config.default_reset_policy
    if not (0 <= policy.at_hour <= 23):
        logger.warning(
            "Invalid at_hour=%s (must be 0-23). Using default 4.", policy.at_hour
        )
        policy.at_hour = 4
    if policy.idle_minutes is None or policy.idle_minutes <= 0:
        logger.warning(
            "Invalid idle_minutes=%s (must be positive). Using default 1440.",
            policy.idle_minutes,
        )
        policy.idle_minutes = 1440
    # Warn about empty bot tokens — platforms that loaded an empty string
    # won't connect and the cause can be confusing without a log line.
    _token_env_names = {
        Platform.TELEGRAM: "TELEGRAM_BOT_TOKEN",
        Platform.DISCORD: "DISCORD_BOT_TOKEN",
        Platform.SLACK: "SLACK_BOT_TOKEN",
    }
    for platform, pconfig in config.platforms.items():
        if not pconfig.enabled:
            continue
        env_name = _token_env_names.get(platform)
        if env_name and pconfig.token is not None and not pconfig.token.strip():
            logger.warning(
                "%s is enabled but %s is empty. "
                "The adapter will likely fail to connect.",
                platform.value, env_name,
            )
    return config
--- a/gateway/delivery.py
+++ b/gateway/delivery.py
@ -8,12 +8,18 @@ Routes messages to the appropriate destination based on:
 - Local (always saved to files)
 """
 import logging
 from pathlib import Path
 from datetime import datetime
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Any, Union
 from enum import Enum
 logger = logging.getLogger(__name__)
 MAX_PLATFORM_OUTPUT = 4000
 TRUNCATED_VISIBLE = 3800
 from .config import Platform, GatewayConfig
 from .session import SessionSource
@ -245,6 +251,15 @@ class DeliveryRouter:
            "timestamp": timestamp
        }
    def _save_full_output(self, content: str, job_id: str) -> Path:
        """Save full cron output to disk and return the file path."""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        out_dir = Path.home() / ".hermes" / "cron" / "output"
        out_dir.mkdir(parents=True, exist_ok=True)
        path = out_dir / f"{job_id}_{timestamp}.txt"
        path.write_text(content)
        return path
    async def _deliver_to_platform(
        self,
        target: DeliveryTarget,
@ -260,8 +275,16 @@ class DeliveryRouter:
        if not target.chat_id:
            raise ValueError(f"No chat ID for {target.platform.value} delivery")
-        # Call the adapter's send method
+        # Guard: truncate oversized cron output to stay within platform limits
-        # Adapters should implement: async def send(chat_id: str, content: str) -> Dict
+        if len(content) > MAX_PLATFORM_OUTPUT:
            job_id = (metadata or {}).get("job_id", "unknown")
            saved_path = self._save_full_output(content, job_id)
            logger.info("Cron output truncated (%d chars) — full output: %s", len(content), saved_path)
            content = (
                content[:TRUNCATED_VISIBLE]
                + f"\n\n... [truncated, full output saved to {saved_path}]"
            )
        return await adapter.send(target.chat_id, content, metadata=metadata)
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -659,7 +659,12 @@ class BasePlatformAdapter(ABC):
    def truncate_message(self, content: str, max_length: int = 4096) -> List[str]:
        """
-        Split a long message into chunks.
+        Split a long message into chunks, preserving code block boundaries.
        When a split falls inside a triple-backtick code block, the fence is
        closed at the end of the current chunk and reopened (with the original
        language tag) at the start of the next chunk.  Multi-chunk responses
        receive indicators like ``(1/3)``.
        Args:
            content: The full message content
@ -671,22 +676,73 @@ class BasePlatformAdapter(ABC):
        if len(content) <= max_length:
            return [content]
-        chunks = []
+        INDICATOR_RESERVE = 10   # room for " (XX/XX)"
-        while content:
+        FENCE_CLOSE = "\n```"
-            if len(content) <= max_length:
+
-                chunks.append(content)
+        chunks: List[str] = []
        remaining = content
        # When the previous chunk ended mid-code-block, this holds the
        # language tag (possibly "") so we can reopen the fence.
        carry_lang: Optional[str] = None
        while remaining:
            # If we're continuing a code block from the previous chunk,
            # prepend a new opening fence with the same language tag.
            prefix = f"```{carry_lang}\n" if carry_lang is not None else ""
            # How much body text we can fit after accounting for the prefix,
            # a potential closing fence, and the chunk indicator.
            headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE)
            if headroom < 1:
                headroom = max_length // 2
            # Everything remaining fits in one final chunk
            if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE:
                chunks.append(prefix + remaining)
                break
-            # Try to split at a newline
+            # Find a natural split point (prefer newlines, then spaces)
-            split_idx = content.rfind("\n", 0, max_length)
+            region = remaining[:headroom]
-            if split_idx == -1:
+            split_at = region.rfind("\n")
-                # No newline, split at space
+            if split_at < headroom // 2:
-                split_idx = content.rfind(" ", 0, max_length)
+                split_at = region.rfind(" ")
-            if split_idx == -1:
+            if split_at < 1:
-                # No space either, hard split
+                split_at = headroom
                split_idx = max_length
-            chunks.append(content[:split_idx])
+            chunk_body = remaining[:split_at]
-            content = content[split_idx:].lstrip()
+            remaining = remaining[split_at:].lstrip()
            full_chunk = prefix + chunk_body
            # Walk the chunk line-by-line to determine whether we end
            # inside an open code block.
            in_code = carry_lang is not None
            lang = carry_lang or ""
            for line in full_chunk.split("\n"):
                stripped = line.strip()
                if stripped.startswith("```"):
                    if in_code:
                        in_code = False
                        lang = ""
                    else:
                        in_code = True
                        tag = stripped[3:].strip()
                        lang = tag.split()[0] if tag else ""
            if in_code:
                # Close the orphaned fence so the chunk is valid on its own
                full_chunk += FENCE_CLOSE
                carry_lang = lang
            else:
                carry_lang = None
            chunks.append(full_chunk)
        # Append chunk indicators when the response spans multiple messages
        if len(chunks) > 1:
            total = len(chunks)
            chunks = [
                f"{chunk} ({i + 1}/{total})" for i, chunk in enumerate(chunks)
            ]
        return chunks
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@ -8,6 +8,7 @@ Uses python-telegram-bot library for:
 """
 import asyncio
 import re
 from typing import Dict, List, Optional, Any
 try:
@ -49,6 +50,16 @@ def check_telegram_requirements() -> bool:
    return TELEGRAM_AVAILABLE
 # Matches every character that MarkdownV2 requires to be backslash-escaped
 # when it appears outside a code span or fenced code block.
 _MDV2_ESCAPE_RE = re.compile(r'([_*\[\]()~`>#\+\-=|{}.!\\])')
 def _escape_mdv2(text: str) -> str:
    """Escape Telegram MarkdownV2 special characters with a preceding backslash."""
    return _MDV2_ESCAPE_RE.sub(r'\\\1', text)
 class TelegramAdapter(BasePlatformAdapter):
    """
    Telegram bot adapter.
@ -167,7 +178,7 @@ class TelegramAdapter(BasePlatformAdapter):
                    msg = await self._bot.send_message(
                        chat_id=int(chat_id),
                        text=chunk,
-                        parse_mode=ParseMode.MARKDOWN,
+                        parse_mode=ParseMode.MARKDOWN_V2,
                        reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
                        message_thread_id=int(thread_id) if thread_id else None,
                    )
@ -297,14 +308,81 @@ class TelegramAdapter(BasePlatformAdapter):
    def format_message(self, content: str) -> str:
        """
-        Format message for Telegram.
+        Convert standard markdown to Telegram MarkdownV2 format.
-        Telegram uses a subset of markdown. We'll use the simpler
+        Protected regions (code blocks, inline code) are extracted first so
-        Markdown mode (not MarkdownV2) for compatibility.
+        their contents are never modified.  Standard markdown constructs
        (headers, bold, italic, links) are translated to MarkdownV2 syntax,
        and all remaining special characters are escaped.
        """
-        # Basic escaping for Telegram Markdown
+        if not content:
-        # In Markdown mode (not V2), only certain characters need escaping
+            return content
-        return content
+
        placeholders: dict = {}
        counter = [0]
        def _ph(value: str) -> str:
            """Stash *value* behind a placeholder token that survives escaping."""
            key = f"\x00PH{counter[0]}\x00"
            counter[0] += 1
            placeholders[key] = value
            return key
        text = content
        # 1) Protect fenced code blocks (``` ... ```)
        text = re.sub(
            r'(```(?:[^\n]*\n)?[\s\S]*?```)',
            lambda m: _ph(m.group(0)),
            text,
        )
        # 2) Protect inline code (`...`)
        text = re.sub(r'(`[^`]+`)', lambda m: _ph(m.group(0)), text)
        # 3) Convert markdown links – escape the display text; inside the URL
        #    only ')' and '\' need escaping per the MarkdownV2 spec.
        def _convert_link(m):
            display = _escape_mdv2(m.group(1))
            url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
            return _ph(f'[{display}]({url})')
        text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
        # 4) Convert markdown headers (## Title) → bold *Title*
        def _convert_header(m):
            inner = m.group(1).strip()
            # Strip redundant bold markers that may appear inside a header
            inner = re.sub(r'\*\*(.+?)\*\*', r'\1', inner)
            return _ph(f'*{_escape_mdv2(inner)}*')
        text = re.sub(
            r'^#{1,6}\s+(.+)$', _convert_header, text, flags=re.MULTILINE
        )
        # 5) Convert bold: **text** → *text* (MarkdownV2 bold)
        text = re.sub(
            r'\*\*(.+?)\*\*',
            lambda m: _ph(f'*{_escape_mdv2(m.group(1))}*'),
            text,
        )
        # 6) Convert italic: *text* (single asterisk) → _text_ (MarkdownV2 italic)
        text = re.sub(
            r'\*([^*]+)\*',
            lambda m: _ph(f'_{_escape_mdv2(m.group(1))}_'),
            text,
        )
        # 7) Escape remaining special characters in plain text
        text = _escape_mdv2(text)
        # 8) Restore placeholders in reverse insertion order so that
        #    nested references (a placeholder inside another) resolve correctly.
        for key in reversed(list(placeholders.keys())):
            text = text.replace(key, placeholders[key])
        return text
    async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming text messages."""
--- a/gateway/run.py
+++ b/gateway/run.py
@ -20,6 +20,7 @@ import re
 import sys
 import signal
 import threading
 from logging.handlers import RotatingFileHandler
 from pathlib import Path
 from datetime import datetime
 from typing import Dict, Optional, Any, List
@ -402,9 +403,27 @@ class GatewayRunner:
        # Build the context prompt to inject
        context_prompt = build_session_context_prompt(context)
        # If the previous session expired and was auto-reset, prepend a notice
        # so the agent knows this is a fresh conversation (not an intentional /reset).
        if getattr(session_entry, 'was_auto_reset', False):
            context_prompt = (
                "[System note: The user's previous session expired due to inactivity. "
                "This is a fresh conversation with no prior context.]\n\n"
                + context_prompt
            )
            session_entry.was_auto_reset = False
        # Load conversation history from transcript
        history = self.session_store.load_transcript(session_entry.session_id)
        # First-message onboarding for brand-new messaging platform users
        if not history:
            context_prompt += (
                "\n\n[System note: This is the user's very first message in this session. "
                "Briefly introduce yourself and mention that /help shows available commands. "
                "Keep the introduction concise -- one or two sentences max.]"
            )
        # -----------------------------------------------------------------
        # Auto-analyze images sent by the user
        #
@ -1342,15 +1361,32 @@ def _start_cron_ticker(stop_event: threading.Event, interval: int = 60):
    Runs inside the gateway process so cronjobs fire automatically without
    needing a separate `hermes cron daemon` or system cron entry.
    Every 60th tick (~once per hour) the image/audio cache is pruned so
    stale temp files don't accumulate.
    """
    from cron.scheduler import tick as cron_tick
    from gateway.platforms.base import cleanup_image_cache
    IMAGE_CACHE_EVERY = 60  # ticks — once per hour at default 60s interval
    logger.info("Cron ticker started (interval=%ds)", interval)
    tick_count = 0
    while not stop_event.is_set():
        try:
            cron_tick(verbose=False)
        except Exception as e:
            logger.debug("Cron tick error: %s", e)
        tick_count += 1
        if tick_count % IMAGE_CACHE_EVERY == 0:
            try:
                removed = cleanup_image_cache(max_age_hours=24)
                if removed:
                    logger.info("Image cache cleanup: removed %d stale file(s)", removed)
            except Exception as e:
                logger.debug("Image cache cleanup error: %s", e)
        stop_event.wait(timeout=interval)
    logger.info("Cron ticker stopped")
@ -1363,6 +1399,18 @@ async def start_gateway(config: Optional[GatewayConfig] = None) -> bool:
    Returns True if the gateway ran successfully, False if it failed to start.
    A False return causes a non-zero exit code so systemd can auto-restart.
    """
    # Configure rotating file log so gateway output is persisted for debugging
    log_dir = Path.home() / '.hermes' / 'logs'
    log_dir.mkdir(parents=True, exist_ok=True)
    file_handler = RotatingFileHandler(
        log_dir / 'gateway.log',
        maxBytes=5 * 1024 * 1024,
        backupCount=3,
    )
    file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
    logging.getLogger().addHandler(file_handler)
    logging.getLogger().setLevel(logging.INFO)
    runner = GatewayRunner(config)
    # Set up signal handlers
--- a/gateway/session.py
+++ b/gateway/session.py
@ -219,6 +219,10 @@ class SessionEntry:
    output_tokens: int = 0
    total_tokens: int = 0
    # Set when a session was created because the previous one expired;
    # consumed once by the message handler to inject a notice into context
    was_auto_reset: bool = False
    def to_dict(self) -> Dict[str, Any]:
        result = {
            "session_key": self.session_key,
@ -388,11 +392,14 @@ class SessionStore:
                return entry
            else:
                # Session is being reset -- end the old one in SQLite
                was_auto_reset = True
                if self._db:
                    try:
                        self._db.end_session(entry.session_id, "session_reset")
                    except Exception as e:
                        logger.debug("Session DB operation failed: %s", e)
        else:
            was_auto_reset = False
        # Create new session
        session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
@ -406,6 +413,7 @@ class SessionStore:
            display_name=source.chat_name,
            platform=source.platform,
            chat_type=source.chat_type,
            was_auto_reset=was_auto_reset,
        )
        self._entries[session_key] = entry
--- a/hermes_cli/init.py
+++ b/hermes_cli/init.py
@ -11,4 +11,4 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """
-__version__ = "0.1.0"
+__version__ = "v1.0.0"
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@ -33,7 +33,7 @@ def cprint(text: str):
 # ASCII Art & Branding
 # =========================================================================
-VERSION = "v1.0.0"
+from hermes_cli import __version__ as VERSION
 HERMES_AGENT_LOGO = """[bold #FFD700]██╗  ██╗███████╗██████╗ ███╗   ███╗███████╗███████╗       █████╗  ██████╗ ███████╗███╗   ██╗████████╗[/]
 [bold #FFD700]██║  ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝      ██╔══██╗██╔════╝ ██╔════╝████╗  ██║╚══██╔══╝[/]
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@ -44,6 +44,8 @@ def run_doctor(args):
    should_fix = getattr(args, 'fix', False)
    issues = []
    manual_issues = []  # issues that can't be auto-fixed
    fixed_count = 0
    print()
    print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
@ -135,8 +137,15 @@ def run_doctor(args):
            check_ok(".env file exists (in project directory)")
        else:
            check_fail("~/.hermes/.env file missing")
-            check_info("Run 'hermes setup' to create one")
+            if should_fix:
-            issues.append("Run 'hermes setup' to create .env")
+                env_path.parent.mkdir(parents=True, exist_ok=True)
                env_path.touch()
                check_ok("Created empty ~/.hermes/.env")
                check_info("Run 'hermes setup' to configure API keys")
                fixed_count += 1
            else:
                check_info("Run 'hermes setup' to create one")
                issues.append("Run 'hermes setup' to create .env")
    # Check ~/.hermes/config.yaml (primary) or project cli-config.yaml (fallback)
    config_path = HERMES_HOME / 'config.yaml'
@ -147,7 +156,17 @@ def run_doctor(args):
        if fallback_config.exists():
            check_ok("cli-config.yaml exists (in project directory)")
        else:
-            check_warn("config.yaml not found", "(using defaults)")
+            example_config = PROJECT_ROOT / 'cli-config.yaml.example'
            if should_fix and example_config.exists():
                config_path.parent.mkdir(parents=True, exist_ok=True)
                shutil.copy2(str(example_config), str(config_path))
                check_ok("Created ~/.hermes/config.yaml from cli-config.yaml.example")
                fixed_count += 1
            elif should_fix:
                check_warn("config.yaml not found and no example to copy from")
                manual_issues.append("Create ~/.hermes/config.yaml manually")
            else:
                check_warn("config.yaml not found", "(using defaults)")
    # =========================================================================
    # Check: Directory structure
@ -159,7 +178,26 @@ def run_doctor(args):
    if hermes_home.exists():
        check_ok("~/.hermes directory exists")
    else:
-        check_warn("~/.hermes not found", "(will be created on first use)")
+        if should_fix:
            hermes_home.mkdir(parents=True, exist_ok=True)
            check_ok("Created ~/.hermes directory")
            fixed_count += 1
        else:
            check_warn("~/.hermes not found", "(will be created on first use)")
    # Check expected subdirectories
    expected_subdirs = ["cron", "sessions", "logs", "skills", "memories"]
    for subdir_name in expected_subdirs:
        subdir_path = hermes_home / subdir_name
        if subdir_path.exists():
            check_ok(f"~/.hermes/{subdir_name}/ exists")
        else:
            if should_fix:
                subdir_path.mkdir(parents=True, exist_ok=True)
                check_ok(f"Created ~/.hermes/{subdir_name}/")
                fixed_count += 1
            else:
                check_warn(f"~/.hermes/{subdir_name}/ not found", "(will be created on first use)")
    # Check for SOUL.md persona file
    soul_path = hermes_home / "SOUL.md"
@ -175,14 +213,25 @@ def run_doctor(args):
        check_warn("~/.hermes/SOUL.md not found", "(create it to give Hermes a custom personality)")
        if should_fix:
            soul_path.parent.mkdir(parents=True, exist_ok=True)
-            soul_path.write_text("# Hermes Agent Persona\n\n<!-- Edit this file to customize how Hermes communicates. -->\n", encoding="utf-8")
+            soul_path.write_text(
-            check_ok("Created ~/.hermes/SOUL.md")
+                "# Hermes Agent Persona\n\n"
                "<!-- Edit this file to customize how Hermes communicates. -->\n\n"
                "You are Hermes, a helpful AI assistant.\n",
                encoding="utf-8",
            )
            check_ok("Created ~/.hermes/SOUL.md with basic template")
            fixed_count += 1
    logs_dir = PROJECT_ROOT / "logs"
    if logs_dir.exists():
-        check_ok("logs/ directory exists")
+        check_ok("logs/ directory exists (project root)")
    else:
-        check_warn("logs/ not found", "(will be created on first use)")
+        if should_fix:
            logs_dir.mkdir(parents=True, exist_ok=True)
            check_ok("Created logs/ directory")
            fixed_count += 1
        else:
            check_warn("logs/ not found", "(will be created on first use)")
    # Check memory directory
    memories_dir = hermes_home / "memories"
@ -205,6 +254,7 @@ def run_doctor(args):
        if should_fix:
            memories_dir.mkdir(parents=True, exist_ok=True)
            check_ok("Created ~/.hermes/memories/")
            fixed_count += 1
    # Check SQLite session store
    state_db_path = hermes_home / "state.db"
@ -299,6 +349,7 @@ def run_doctor(args):
    openrouter_key = os.getenv("OPENROUTER_API_KEY")
    if openrouter_key:
        print("  Checking OpenRouter API...", end="", flush=True)
        try:
            import httpx
            response = httpx.get(
@ -307,20 +358,21 @@ def run_doctor(args):
                timeout=10
            )
            if response.status_code == 200:
-                check_ok("OpenRouter API")
+                print(f"\r  {color('✓', Colors.GREEN)} OpenRouter API                          ")
            elif response.status_code == 401:
-                check_fail("OpenRouter API", "(invalid API key)")
+                print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)}                ")
                issues.append("Check OPENROUTER_API_KEY in .env")
            else:
-                check_fail("OpenRouter API", f"(HTTP {response.status_code})")
+                print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)}                ")
        except Exception as e:
-            check_fail("OpenRouter API", f"({e})")
+            print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color(f'({e})', Colors.DIM)}                ")
            issues.append("Check network connectivity")
    else:
        check_warn("OpenRouter API", "(not configured)")
    anthropic_key = os.getenv("ANTHROPIC_API_KEY")
    if anthropic_key:
        print("  Checking Anthropic API...", end="", flush=True)
        try:
            import httpx
            response = httpx.get(
@ -332,14 +384,14 @@ def run_doctor(args):
                timeout=10
            )
            if response.status_code == 200:
-                check_ok("Anthropic API")
+                print(f"\r  {color('✓', Colors.GREEN)} Anthropic API                           ")
            elif response.status_code == 401:
-                check_fail("Anthropic API", "(invalid API key)")
+                print(f"\r  {color('✗', Colors.RED)} Anthropic API {color('(invalid API key)', Colors.DIM)}                 ")
            else:
-                # Note: Anthropic may not have /models endpoint
+                msg = "(couldn't verify)"
-                check_warn("Anthropic API", "(couldn't verify)")
+                print(f"\r  {color('⚠', Colors.YELLOW)} Anthropic API {color(msg, Colors.DIM)}                 ")
        except Exception as e:
-            check_warn("Anthropic API", f"({e})")
+            print(f"\r  {color('⚠', Colors.YELLOW)} Anthropic API {color(f'({e})', Colors.DIM)}                 ")
    # =========================================================================
    # Check: Submodules
@ -440,17 +492,28 @@ def run_doctor(args):
    # Summary
    # =========================================================================
    print()
-    if issues:
+    remaining_issues = issues + manual_issues
-        print(color("─" * 60, Colors.YELLOW))
+    if should_fix and fixed_count > 0:
-        print(color(f"  Found {len(issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD))
+        print(color("─" * 60, Colors.GREEN))
        print(color(f"  Fixed {fixed_count} issue(s).", Colors.GREEN, Colors.BOLD), end="")
        if remaining_issues:
            print(color(f" {len(remaining_issues)} issue(s) require manual intervention.", Colors.YELLOW, Colors.BOLD))
        else:
            print()
        print()
-        for i, issue in enumerate(issues, 1):
+        if remaining_issues:
            for i, issue in enumerate(remaining_issues, 1):
                print(f"  {i}. {issue}")
            print()
    elif remaining_issues:
        print(color("─" * 60, Colors.YELLOW))
        print(color(f"  Found {len(remaining_issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD))
        print()
        for i, issue in enumerate(remaining_issues, 1):
            print(f"  {i}. {issue}")
        print()
-        
+        if not should_fix:
-        if should_fix:
+            print(color("  Tip: run 'hermes doctor --fix' to auto-fix what's possible.", Colors.DIM))
            print(color("  Attempting auto-fix is not yet implemented.", Colors.DIM))
            print(color("  Please resolve issues manually.", Colors.DIM))
    else:
        print(color("─" * 60, Colors.GREEN))
        print(color("  All checks passed! 🎉", Colors.GREEN, Colors.BOLD))
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -47,8 +47,66 @@ from hermes_constants import OPENROUTER_BASE_URL
 logger = logging.getLogger(__name__)
 def _has_any_provider_configured() -> bool:
    """Check if at least one inference provider is usable."""
    from hermes_cli.config import get_env_path, get_hermes_home
    # Check env vars (may be set by .env or shell)
    if os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY"):
        return True
    # Check .env file for keys
    env_file = get_env_path()
    if env_file.exists():
        try:
            for line in env_file.read_text().splitlines():
                line = line.strip()
                if line.startswith("#") or "=" not in line:
                    continue
                key, _, val = line.partition("=")
                val = val.strip().strip("'\"")
                if key.strip() in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY") and val:
                    return True
        except Exception:
            pass
    # Check for Nous Portal OAuth credentials
    auth_file = get_hermes_home() / "auth.json"
    if auth_file.exists():
        try:
            import json
            auth = json.loads(auth_file.read_text())
            active = auth.get("active_provider")
            if active:
                state = auth.get("providers", {}).get(active, {})
                if state.get("access_token") or state.get("refresh_token"):
                    return True
        except Exception:
            pass
    return False
 def cmd_chat(args):
    """Run interactive chat CLI."""
    # First-run guard: check if any provider is configured before launching
    if not _has_any_provider_configured():
        print()
        print("It looks like Hermes isn't configured yet -- no API keys or providers found.")
        print()
        print("  Run:  hermes setup")
        print()
        try:
            reply = input("Run setup now? [Y/n] ").strip().lower()
        except (EOFError, KeyboardInterrupt):
            reply = "n"
        if reply in ("", "y", "yes"):
            cmd_setup(args)
            return
        print()
        print("You can run 'hermes setup' at any time to configure.")
        sys.exit(1)
    # Import and run the CLI
    from cli import main as cli_main
@ -219,20 +277,10 @@ def _model_flow_openrouter(config, current_model=""):
        print("API key saved.")
        print()
-    OPENROUTER_MODELS = [
+    from hermes_cli.models import model_ids
-        "anthropic/claude-opus-4.6",
+    openrouter_models = model_ids()
        "anthropic/claude-sonnet-4.5",
        "anthropic/claude-opus-4.5",
        "openai/gpt-5.2",
        "openai/gpt-5.2-codex",
        "google/gemini-3-pro-preview",
        "google/gemini-3-flash-preview",
        "z-ai/glm-4.7",
        "moonshotai/kimi-k2.5",
        "minimax/minimax-m2.1",
    ]
-    selected = _prompt_model_selection(OPENROUTER_MODELS, current_model=current_model)
+    selected = _prompt_model_selection(openrouter_models, current_model=current_model)
    if selected:
        # Clear any custom endpoint and set provider to openrouter
        if get_env_value("OPENAI_BASE_URL"):
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -0,0 +1,33 @@
 """
 Canonical list of OpenRouter models offered in CLI and setup wizards.
 Add, remove, or reorder entries here — both `hermes setup` and
 `hermes` provider-selection will pick up the change automatically.
 """
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("anthropic/claude-opus-4.6",       "recommended"),
    ("anthropic/claude-sonnet-4.5",     ""),
    ("anthropic/claude-opus-4.5",       ""),
    ("openai/gpt-5.2",                  ""),
    ("openai/gpt-5.2-codex",            ""),
    ("google/gemini-3-pro-preview",     ""),
    ("google/gemini-3-flash-preview",   ""),
    ("z-ai/glm-4.7",                    ""),
    ("moonshotai/kimi-k2.5",            ""),
    ("minimax/minimax-m2.1",            ""),
 ]
 def model_ids() -> list[str]:
    """Return just the model-id strings (convenience helper)."""
    return [mid for mid, _ in OPENROUTER_MODELS]
 def menu_labels() -> list[str]:
    """Return display labels like 'anthropic/claude-opus-4.6 (recommended)'."""
    labels = []
    for mid, desc in OPENROUTER_MODELS:
        labels.append(f"{mid} ({desc})" if desc else mid)
    return labels
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -611,46 +611,27 @@ def run_setup_wizard(args):
                    save_env_value("LLM_MODEL", custom)
            # else: keep current
        else:
-            # Static list for OpenRouter / fallback
+            # Static list for OpenRouter / fallback (from canonical list)
-            model_choices = [
+            from hermes_cli.models import model_ids, menu_labels
-                "anthropic/claude-opus-4.6 (recommended)",
+
-                "anthropic/claude-sonnet-4.5",
+            ids = model_ids()
-                "anthropic/claude-opus-4.5",
+            model_choices = menu_labels() + [
                "openai/gpt-5.2",
                "openai/gpt-5.2-codex",
                "google/gemini-3-pro-preview",
                "google/gemini-3-flash-preview",
                "z-ai/glm-4.7",
                "moonshotai/kimi-k2.5",
                "minimax/minimax-m2.1",
                "Custom model",
-                f"Keep current ({current_model})"
+                f"Keep current ({current_model})",
            ]
-            model_idx = prompt_choice("Select default model:", model_choices, 11)
+            keep_idx = len(model_choices) - 1
            model_idx = prompt_choice("Select default model:", model_choices, keep_idx)
-            model_map = {
+            if model_idx < len(ids):
-                0: "anthropic/claude-opus-4.6",
+                config['model'] = ids[model_idx]
-                1: "anthropic/claude-sonnet-4.5",
+                save_env_value("LLM_MODEL", ids[model_idx])
-                2: "anthropic/claude-opus-4.5",
+            elif model_idx == len(ids):  # Custom
                3: "openai/gpt-5.2",
                4: "openai/gpt-5.2-codex",
                5: "google/gemini-3-pro-preview",
                6: "google/gemini-3-flash-preview",
                7: "z-ai/glm-4.7",
                8: "moonshotai/kimi-k2.5",
                9: "minimax/minimax-m2.1",
            }
            if model_idx in model_map:
                config['model'] = model_map[model_idx]
                save_env_value("LLM_MODEL", model_map[model_idx])
            elif model_idx == 10:  # Custom
                custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)")
                if custom:
                    config['model'] = custom
                    save_env_value("LLM_MODEL", custom)
-            # else: Keep current (model_idx == 11)
+            # else: Keep current
    # =========================================================================
    # Step 4: Terminal Backend
--- a/run_agent.py
+++ b/run_agent.py
--- a/tools/approval.py
+++ b/tools/approval.py
@ -281,7 +281,12 @@ def check_dangerous_command(command: str, env_type: str,
                                       approval_callback=approval_callback)
    if choice == "deny":
-        return {"approved": False, "message": "BLOCKED: User denied this potentially dangerous command. Do NOT retry this command - the user has explicitly rejected it."}
+        return {
            "approved": False,
            "message": f"BLOCKED: User denied this potentially dangerous command (matched '{description}' pattern). Do NOT retry this command - the user has explicitly rejected it.",
            "pattern_key": pattern_key,
            "description": description,
        }
    if choice == "session":
        approve_session(session_key, pattern_key)
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@ -51,25 +51,16 @@ import signal
 import subprocess
 import shutil
 import sys
 import asyncio
 import tempfile
 import threading
 import time
 import requests
 from typing import Dict, Any, Optional, List
 from pathlib import Path
-from hermes_constants import OPENROUTER_CHAT_URL
+from agent.auxiliary_client import get_vision_auxiliary_client
 logger = logging.getLogger(__name__)
 # Try to import httpx for async LLM calls
 try:
    import httpx
    HTTPX_AVAILABLE = True
 except ImportError:
    HTTPX_AVAILABLE = False
 # ============================================================================
 # Configuration
 # ============================================================================
@ -83,8 +74,8 @@ DEFAULT_SESSION_TIMEOUT = 300
 # Max tokens for snapshot content before summarization
 SNAPSHOT_SUMMARIZE_THRESHOLD = 8000
-# Model for task-aware extraction
+# Resolve vision auxiliary client for extraction/vision tasks
-EXTRACTION_MODEL = "google/gemini-3-flash-preview"
+_aux_vision_client, EXTRACTION_MODEL = get_vision_auxiliary_client()
 # Track active sessions per task
 # Now stores tuple of (session_name, browserbase_session_id, cdp_url)
@ -782,87 +773,49 @@ def _run_browser_command(
        return {"success": False, "error": str(e)}
-async def _extract_relevant_content(
+def _extract_relevant_content(
    snapshot_text: str,
    user_task: Optional[str] = None
 ) -> str:
    """Use LLM to extract relevant content from a snapshot based on the user's task.
    Falls back to simple truncation when no auxiliary vision model is configured.
    """
-    Use LLM to extract relevant content from a snapshot based on the user's task.
+    if _aux_vision_client is None or EXTRACTION_MODEL is None:
    This provides task-aware summarization that preserves meaningful text content
    (paragraphs, prices, descriptions) relevant to what the user is trying to accomplish.
    Args:
        snapshot_text: The full snapshot text
        user_task: The user's current task/goal (optional)
    Returns:
        Summarized/extracted content
    """
    if not HTTPX_AVAILABLE:
        # Fall back to simple truncation
        return _truncate_snapshot(snapshot_text)
    # Get API key
    api_key = os.environ.get("OPENROUTER_API_KEY")
    if not api_key:
        return _truncate_snapshot(snapshot_text)
    # Build extraction prompt
    if user_task:
-        extraction_prompt = f"""You are a content extractor for a browser automation agent.
+        extraction_prompt = (
-
+            f"You are a content extractor for a browser automation agent.\n\n"
-The user's task is: {user_task}
+            f"The user's task is: {user_task}\n\n"
-
+            f"Given the following page snapshot (accessibility tree representation), "
-Given the following page snapshot (accessibility tree representation), extract and summarize the most relevant information for completing this task. Focus on:
+            f"extract and summarize the most relevant information for completing this task. Focus on:\n"
-1. Interactive elements (buttons, links, inputs) that might be needed
+            f"1. Interactive elements (buttons, links, inputs) that might be needed\n"
-2. Text content relevant to the task (prices, descriptions, headings, important info)
+            f"2. Text content relevant to the task (prices, descriptions, headings, important info)\n"
-3. Navigation structure if relevant
+            f"3. Navigation structure if relevant\n\n"
-
+            f"Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.\n\n"
-Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.
+            f"Page Snapshot:\n{snapshot_text}\n\n"
-
+            f"Provide a concise summary that preserves actionable information and relevant content."
-Page Snapshot:
+        )
 {snapshot_text}
 Provide a concise summary that preserves actionable information and relevant content."""
    else:
-        extraction_prompt = f"""Summarize this page snapshot, preserving:
+        extraction_prompt = (
-1. All interactive elements with their ref IDs (like [ref=e5])
+            f"Summarize this page snapshot, preserving:\n"
-2. Key text content and headings
+            f"1. All interactive elements with their ref IDs (like [ref=e5])\n"
-3. Important information visible on the page
+            f"2. Key text content and headings\n"
-
+            f"3. Important information visible on the page\n\n"
-Page Snapshot:
+            f"Page Snapshot:\n{snapshot_text}\n\n"
-{snapshot_text}
+            f"Provide a concise summary focused on interactive elements and key content."
-
+        )
 Provide a concise summary focused on interactive elements and key content."""
    try:
-        async with httpx.AsyncClient(timeout=30.0) as client:
+        response = _aux_vision_client.chat.completions.create(
-            response = await client.post(
+            model=EXTRACTION_MODEL,
-                OPENROUTER_CHAT_URL,
+            messages=[{"role": "user", "content": extraction_prompt}],
-                headers={
+            max_tokens=4000,
-                    "Authorization": f"Bearer {api_key}",
+            temperature=0.1,
-                    "Content-Type": "application/json"
+        )
-                },
+        return response.choices[0].message.content
                json={
                    "model": EXTRACTION_MODEL,
                    "messages": [
                        {"role": "user", "content": extraction_prompt}
                    ],
                    "max_tokens": 4000,
                    "temperature": 0.1
                }
            )
            if response.status_code == 200:
                result = response.json()
                return result["choices"][0]["message"]["content"]
            else:
                # Fall back to truncation on API error
                return _truncate_snapshot(snapshot_text)
    except Exception:
        # Fall back to truncation on any error
        return _truncate_snapshot(snapshot_text)
@ -991,16 +944,7 @@ def browser_snapshot(
        # Check if snapshot needs summarization
        if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD and user_task:
-            # Run async extraction
+            snapshot_text = _extract_relevant_content(snapshot_text, user_task)
            try:
                loop = asyncio.get_event_loop()
            except RuntimeError:
                loop = asyncio.new_event_loop()
                asyncio.set_event_loop(loop)
            snapshot_text = loop.run_until_complete(
                _extract_relevant_content(snapshot_text, user_task)
            )
        elif len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD:
            snapshot_text = _truncate_snapshot(snapshot_text)
@ -1286,12 +1230,12 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
    effective_task_id = task_id or "default"
-    # Check for OpenRouter API key
+    # Check auxiliary vision client
-    api_key = os.environ.get("OPENROUTER_API_KEY")
+    if _aux_vision_client is None or EXTRACTION_MODEL is None:
    if not api_key:
        return json.dumps({
            "success": False,
-            "error": "OPENROUTER_API_KEY not set. Vision analysis requires this API key."
+            "error": "Browser vision unavailable: no auxiliary vision model configured. "
                     "Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
        }, ensure_ascii=False)
    # Create a temporary file for the screenshot
@ -1325,110 +1269,36 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
        image_base64 = base64.b64encode(image_data).decode("ascii")
        data_url = f"data:image/png;base64,{image_base64}"
-        # Prepare the vision prompt
+        vision_prompt = (
-        vision_prompt = f"""You are analyzing a screenshot of a web browser.
+            f"You are analyzing a screenshot of a web browser.\n\n"
            f"User's question: {question}\n\n"
            f"Provide a detailed and helpful answer based on what you see in the screenshot. "
            f"If there are interactive elements, describe them. If there are verification challenges "
            f"or CAPTCHAs, describe what type they are and what action might be needed. "
            f"Focus on answering the user's specific question."
        )
-User's question: {question}
+        # Use the sync auxiliary vision client directly
-
+        response = _aux_vision_client.chat.completions.create(
-Provide a detailed and helpful answer based on what you see in the screenshot. 
+            model=EXTRACTION_MODEL,
-If there are interactive elements, describe them. If there are verification challenges 
+            messages=[
-or CAPTCHAs, describe what type they are and what action might be needed.
+                {
-Focus on answering the user's specific question."""
+                    "role": "user",
-
+                    "content": [
-        # Call OpenRouter/Gemini for vision analysis
+                        {"type": "text", "text": vision_prompt},
-        if HTTPX_AVAILABLE:
+                        {"type": "image_url", "image_url": {"url": data_url}},
            import asyncio
            async def analyze_screenshot():
                async with httpx.AsyncClient(timeout=60.0) as client:
                    response = await client.post(
                        OPENROUTER_CHAT_URL,
                        headers={
                            "Authorization": f"Bearer {api_key}",
                            "Content-Type": "application/json"
                        },
                        json={
                            "model": "google/gemini-3-flash-preview",
                            "messages": [
                                {
                                    "role": "user",
                                    "content": [
                                        {"type": "text", "text": vision_prompt},
                                        {
                                            "type": "image_url",
                                            "image_url": {"url": data_url}
                                        }
                                    ]
                                }
                            ],
                            "max_tokens": 2000,
                            "temperature": 0.1
                        }
                    )
                    if response.status_code != 200:
                        return {
                            "success": False,
                            "error": f"Vision API error: {response.status_code} - {response.text[:200]}"
                        }
                    result_data = response.json()
                    analysis = result_data["choices"][0]["message"]["content"]
                    return {
                        "success": True,
                        "analysis": analysis
                    }
            # Run the async function
            try:
                loop = asyncio.get_event_loop()
            except RuntimeError:
                loop = asyncio.new_event_loop()
                asyncio.set_event_loop(loop)
            vision_result = loop.run_until_complete(analyze_screenshot())
            return json.dumps(vision_result, ensure_ascii=False)
        else:
            # Fallback: use synchronous requests
            response = requests.post(
                OPENROUTER_CHAT_URL,
                headers={
                    "Authorization": f"Bearer {api_key}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": "google/gemini-3-flash-preview",
                    "messages": [
                        {
                            "role": "user",
                            "content": [
                                {"type": "text", "text": vision_prompt},
                                {
                                    "type": "image_url",
                                    "image_url": {"url": data_url}
                                }
                            ]
                        }
                    ],
-                    "max_tokens": 2000,
+                }
-                    "temperature": 0.1
+            ],
-                },
+            max_tokens=2000,
-                timeout=60
+            temperature=0.1,
-            )
+        )
-            if response.status_code != 200:
+        analysis = response.choices[0].message.content
-                return json.dumps({
+        return json.dumps({
-                    "success": False,
+            "success": True,
-                    "error": f"Vision API error: {response.status_code} - {response.text[:200]}"
+            "analysis": analysis,
-                }, ensure_ascii=False)
+        }, ensure_ascii=False)
            result_data = response.json()
            analysis = result_data["choices"][0]["message"]["content"]
            return json.dumps({
                "success": True,
                "analysis": analysis
            }, ensure_ascii=False)
    except Exception as e:
        return json.dumps({
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@ -22,9 +22,19 @@ import os
 import logging
 from typing import Dict, Any, List, Optional
-from tools.openrouter_client import get_async_client as _get_client
+from openai import AsyncOpenAI, OpenAI
-SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
+from agent.auxiliary_client import get_text_auxiliary_client
 # Resolve the auxiliary client at import time so we have the model slug.
 # We build an AsyncOpenAI from the same credentials for async summarization.
 _aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client()
 _async_aux_client: AsyncOpenAI | None = None
 if _aux_client is not None:
    _async_aux_client = AsyncOpenAI(
        api_key=_aux_client.api_key,
        base_url=str(_aux_client.base_url),
    )
 MAX_SESSION_CHARS = 100_000
 MAX_SUMMARY_TOKENS = 2000
@ -126,11 +136,15 @@ async def _summarize_session(
        f"Summarize this conversation with focus on: {query}"
    )
    if _async_aux_client is None or _SUMMARIZER_MODEL is None:
        logging.warning("No auxiliary model available for session summarization")
        return None
    max_retries = 3
    for attempt in range(max_retries):
        try:
-            response = await _get_client().chat.completions.create(
+            response = await _async_aux_client.chat.completions.create(
-                model=SUMMARIZER_MODEL,
+                model=_SUMMARIZER_MODEL,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt},
@ -252,8 +266,8 @@ def session_search(
 def check_session_search_requirements() -> bool:
-    """Requires SQLite state database and OpenRouter API key."""
+    """Requires SQLite state database and an auxiliary text model."""
-    if not os.getenv("OPENROUTER_API_KEY"):
+    if _async_aux_client is None:
        return False
    try:
        from hermes_state import DEFAULT_DB_PATH
@ -316,5 +330,4 @@ registry.register(
        limit=args.get("limit", 3),
        db=kw.get("db")),
    check_fn=check_session_search_requirements,
    requires_env=["OPENROUTER_API_KEY"],
 )
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -359,7 +359,6 @@ Do NOT use vim/nano/interactive tools without pty=true — they hang without a p
 # Global state for environment lifecycle management
 _active_environments: Dict[str, Any] = {}
 _task_workdirs: Dict[str, str] = {}  # Maps task_id to working directory
 _last_activity: Dict[str, float] = {}
 _env_lock = threading.Lock()
 _creation_locks: Dict[str, threading.Lock] = {}  # Per-task locks for sandbox creation
@ -530,7 +529,6 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
            if current_time - last_time > lifetime_seconds:
                env = _active_environments.pop(task_id, None)
                _last_activity.pop(task_id, None)
                _task_workdirs.pop(task_id, None)
                if env is not None:
                    envs_to_stop.append((task_id, env))
@ -609,7 +607,7 @@ def get_active_environments_info() -> Dict[str, Any]:
    info = {
        "count": len(_active_environments),
        "task_ids": list(_active_environments.keys()),
-        "workdirs": dict(_task_workdirs),
+        "workdirs": {},
    }
    # Calculate total disk usage
@ -632,7 +630,7 @@ def get_active_environments_info() -> Dict[str, Any]:
 def cleanup_all_environments():
    """Clean up ALL active environments. Use with caution."""
-    global _active_environments, _last_activity, _task_workdirs
+    global _active_environments, _last_activity
    task_ids = list(_active_environments.keys())
    cleaned = 0
@ -661,7 +659,7 @@ def cleanup_all_environments():
 def cleanup_vm(task_id: str):
    """Manually clean up a specific environment by task_id."""
-    global _active_environments, _last_activity, _task_workdirs
+    global _active_environments, _last_activity
    # Remove from tracking dicts while holding the lock, but defer the
    # actual (potentially slow) env.cleanup() call to outside the lock
@ -669,7 +667,6 @@ def cleanup_vm(task_id: str):
    env = None
    with _env_lock:
        env = _active_environments.pop(task_id, None)
        _task_workdirs.pop(task_id, None)
        _last_activity.pop(task_id, None)
    # Clean up per-task creation lock
@ -782,17 +779,6 @@ def terminal_tool(
        default_timeout = config["timeout"]
        effective_timeout = timeout or default_timeout
        # For local environment in batch mode, create a unique subdirectory per task
        # This prevents parallel tasks from overwriting each other's files
        # In CLI mode (HERMES_QUIET), use the cwd directly without subdirectories
        if env_type == "local" and not os.getenv("HERMES_QUIET"):
            with _env_lock:
                if effective_task_id not in _task_workdirs:
                    task_workdir = Path(cwd) / f"hermes-{effective_task_id}-{uuid.uuid4().hex[:8]}"
                    task_workdir.mkdir(parents=True, exist_ok=True)
                    _task_workdirs[effective_task_id] = str(task_workdir)
                cwd = _task_workdirs[effective_task_id]
        # Start cleanup thread
        _start_cleanup_thread()
@ -874,11 +860,16 @@ def terminal_tool(
                        "description": approval.get("description", "dangerous command"),
                        "pattern_key": approval.get("pattern_key", ""),
                    }, ensure_ascii=False)
-                # Command was blocked - return informative message
+                # Command was blocked - include the pattern category so the caller knows why
                desc = approval.get("description", "potentially dangerous operation")
                fallback_msg = (
                    f"Command denied: matches '{desc}' pattern. "
                    "Use the approval prompt to allow it, or rephrase the command."
                )
                return json.dumps({
                    "output": "",
                    "exit_code": -1,
-                    "error": approval.get("message", "Command denied - potentially dangerous operation"),
+                    "error": approval.get("message", fallback_msg),
                    "status": "blocked"
                }, ensure_ascii=False)
@ -996,11 +987,17 @@ def terminal_tool(
            # Add helpful message for sudo failures in messaging context
            output = _handle_sudo_failure(output, env_type)
-            # Truncate output if too long
+            # Truncate output if too long, keeping both head and tail
            MAX_OUTPUT_CHARS = 50000
            if len(output) > MAX_OUTPUT_CHARS:
-                truncated_notice = f"\n\n... [OUTPUT TRUNCATED - showing last {MAX_OUTPUT_CHARS} chars of {len(output)} total] ..."
+                head_chars = int(MAX_OUTPUT_CHARS * 0.4)  # 40% head (error messages often appear early)
-                output = truncated_notice + output[-MAX_OUTPUT_CHARS:]
+                tail_chars = MAX_OUTPUT_CHARS - head_chars  # 60% tail (most recent/relevant output)
                omitted = len(output) - head_chars - tail_chars
                truncated_notice = (
                    f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted "
                    f"out of {len(output)} total] ...\n\n"
                )
                output = output[:head_chars] + truncated_notice + output[-tail_chars:]
            return json.dumps({
                "output": output.strip() if output else "",
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@ -36,13 +36,20 @@ import base64
 from pathlib import Path
 from typing import Dict, Any, Optional
 import httpx
-from tools.openrouter_client import get_async_client as _get_openrouter_client, check_api_key as check_openrouter_api_key
+from openai import AsyncOpenAI
 from agent.auxiliary_client import get_vision_auxiliary_client
 from tools.debug_helpers import DebugSession
 logger = logging.getLogger(__name__)
-# Configuration for vision processing
+# Resolve vision auxiliary client at module level; build an async wrapper.
-DEFAULT_VISION_MODEL = "google/gemini-3-flash-preview"
+_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
 _aux_async_client: AsyncOpenAI | None = None
 if _aux_sync_client is not None:
    _aux_async_client = AsyncOpenAI(
        api_key=_aux_sync_client.api_key,
        base_url=str(_aux_sync_client.base_url),
    )
 _debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
@ -230,9 +237,13 @@ async def vision_analyze_tool(
        logger.info("Analyzing image: %s", image_url[:60])
        logger.info("User prompt: %s", user_prompt[:100])
-        # Check API key availability
+        # Check auxiliary vision client availability
-        if not os.getenv("OPENROUTER_API_KEY"):
+        if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
-            raise ValueError("OPENROUTER_API_KEY environment variable not set")
+            return json.dumps({
                "success": False,
                "analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
                            "Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
            }, indent=2, ensure_ascii=False)
        # Determine if this is a local file path or a remote URL
        local_path = Path(image_url)
@ -291,18 +302,12 @@ async def vision_analyze_tool(
        logger.info("Processing image with %s...", model)
-        # Call the vision API with reasoning enabled
+        # Call the vision API
-        response = await _get_openrouter_client().chat.completions.create(
+        response = await _aux_async_client.chat.completions.create(
            model=model,
            messages=messages,
-            temperature=0.1,  # Low temperature for consistent analysis
+            temperature=0.1,
-            max_tokens=2000,  # Generous limit for detailed analysis
+            max_tokens=2000,
            extra_body={
                "reasoning": {
                    "enabled": True,
                    "effort": "xhigh"
                }
            }
        )
        # Extract the analysis
@ -353,13 +358,8 @@ async def vision_analyze_tool(
 def check_vision_requirements() -> bool:
-    """
+    """Check if an auxiliary vision model is available."""
-    Check if all requirements for vision tools are met.
+    return _aux_async_client is not None
    Returns:
        bool: True if requirements are met, False otherwise
    """
    return check_openrouter_api_key()
 def get_debug_session_info() -> Dict[str, Any]:
@ -379,16 +379,15 @@ if __name__ == "__main__":
    print("👁️ Vision Tools Module")
    print("=" * 40)
-    # Check if API key is available
+    # Check if vision model is available
-    api_available = check_openrouter_api_key()
+    api_available = check_vision_requirements()
    if not api_available:
-        print("❌ OPENROUTER_API_KEY environment variable not set")
+        print("❌ No auxiliary vision model available")
-        print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")
+        print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
        print("Get API key at: https://openrouter.ai/")
        exit(1)
    else:
-        print("✅ OpenRouter API key found")
+        print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}")
    print("🛠️ Vision tools ready for use!")
    print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
@ -455,7 +454,8 @@ def _handle_vision_analyze(args, **kw):
    image_url = args.get("image_url", "")
    question = args.get("question", "")
    full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}"
-    return vision_analyze_tool(image_url, full_prompt, "google/gemini-3-flash-preview")
+    model = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview"
    return vision_analyze_tool(image_url, full_prompt, model)
 registry.register(
@ -464,6 +464,5 @@ registry.register(
    schema=VISION_ANALYZE_SCHEMA,
    handler=_handle_vision_analyze,
    check_fn=check_vision_requirements,
    requires_env=["OPENROUTER_API_KEY"],
    is_async=True,
 )
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@ -47,7 +47,8 @@ import re
 import asyncio
 from typing import List, Dict, Any, Optional
 from firecrawl import Firecrawl
-from tools.openrouter_client import get_async_client as _get_openrouter_client
+from openai import AsyncOpenAI
 from agent.auxiliary_client import get_text_auxiliary_client
 from tools.debug_helpers import DebugSession
 logger = logging.getLogger(__name__)
@ -64,9 +65,17 @@ def _get_firecrawl_client():
        _firecrawl_client = Firecrawl(api_key=api_key)
    return _firecrawl_client
 DEFAULT_SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
 DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
 # Resolve auxiliary text client at module level; build an async wrapper.
 _aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
 _aux_async_client: AsyncOpenAI | None = None
 if _aux_sync_client is not None:
    _aux_async_client = AsyncOpenAI(
        api_key=_aux_sync_client.api_key,
        base_url=str(_aux_sync_client.base_url),
    )
 _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
@ -223,7 +232,10 @@ Create a markdown summary that captures all key information in a well-organized,
    for attempt in range(max_retries):
        try:
-            response = await _get_openrouter_client().chat.completions.create(
+            if _aux_async_client is None:
                logger.warning("No auxiliary model available for web content processing")
                return None
            response = await _aux_async_client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": system_prompt},
@ -231,12 +243,6 @@ Create a markdown summary that captures all key information in a well-organized,
                ],
                temperature=0.1,
                max_tokens=max_tokens,
                extra_body={
                    "reasoning": {
                        "enabled": True,
                        "effort": "xhigh"
                    }
                }
            )
            return response.choices[0].message.content.strip()
        except Exception as api_error:
@ -342,7 +348,14 @@ Synthesize these into ONE cohesive, comprehensive summary that:
 Create a single, unified markdown summary."""
    try:
-        response = await _get_openrouter_client().chat.completions.create(
+        if _aux_async_client is None:
            logger.warning("No auxiliary model for synthesis, concatenating summaries")
            fallback = "\n\n".join(summaries)
            if len(fallback) > max_output_size:
                fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
            return fallback
        response = await _aux_async_client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
@ -350,12 +363,6 @@ Create a single, unified markdown summary."""
            ],
            temperature=0.1,
            max_tokens=4000,
            extra_body={
                "reasoning": {
                    "enabled": True,
                    "effort": "xhigh"
                }
            }
        )
        final_summary = response.choices[0].message.content.strip()
@ -677,8 +684,8 @@ async def web_extract_tool(
        debug_call_data["pages_extracted"] = pages_extracted
        debug_call_data["original_response_size"] = len(json.dumps(response))
-        # Process each result with LLM if enabled
+        # Process each result with LLM if enabled and auxiliary client is available
-        if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
+        if use_llm_processing and _aux_async_client is not None:
            logger.info("Processing extracted content with LLM (parallel)...")
            debug_call_data["processing_applied"].append("llm_processing")
@ -744,8 +751,8 @@ async def web_extract_tool(
                else:
                    logger.warning("%s (no content to process)", url)
        else:
-            if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
+            if use_llm_processing and _aux_async_client is None:
-                logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
+                logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
                debug_call_data["processing_applied"].append("llm_processing_unavailable")
            # Print summary of extracted pages for debugging (original behavior)
@ -973,8 +980,8 @@ async def web_crawl_tool(
        debug_call_data["pages_crawled"] = pages_crawled
        debug_call_data["original_response_size"] = len(json.dumps(response))
-        # Process each result with LLM if enabled
+        # Process each result with LLM if enabled and auxiliary client is available
-        if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
+        if use_llm_processing and _aux_async_client is not None:
            logger.info("Processing crawled content with LLM (parallel)...")
            debug_call_data["processing_applied"].append("llm_processing")
@ -1040,8 +1047,8 @@ async def web_crawl_tool(
                else:
                    logger.warning("%s (no content to process)", page_url)
        else:
-            if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
+            if use_llm_processing and _aux_async_client is None:
-                logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
+                logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
                debug_call_data["processing_applied"].append("llm_processing_unavailable")
            # Print summary of crawled pages for debugging (original behavior)
@ -1096,14 +1103,9 @@ def check_firecrawl_api_key() -> bool:
    return bool(os.getenv("FIRECRAWL_API_KEY"))
-def check_nous_api_key() -> bool:
+def check_auxiliary_model() -> bool:
-    """
+    """Check if an auxiliary text model is available for LLM content processing."""
-    Check if the Nous Research API key is available in environment variables.
+    return _aux_async_client is not None
    Returns:
        bool: True if API key is set, False otherwise
    """
    return bool(os.getenv("OPENROUTER_API_KEY"))
 def get_debug_session_info() -> Dict[str, Any]:
@ -1120,7 +1122,7 @@ if __name__ == "__main__":
    # Check if API keys are available
    firecrawl_available = check_firecrawl_api_key()
-    nous_available = check_nous_api_key()
+    nous_available = check_auxiliary_model()
    if not firecrawl_available:
        print("❌ FIRECRAWL_API_KEY environment variable not set")
@ -1130,12 +1132,11 @@ if __name__ == "__main__":
        print("✅ Firecrawl API key found")
    if not nous_available:
-        print("❌ OPENROUTER_API_KEY environment variable not set")
+        print("❌ No auxiliary model available for LLM content processing")
-        print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")  
+        print("Set OPENROUTER_API_KEY, configure Nous Portal, or set OPENAI_BASE_URL + OPENAI_API_KEY")
-        print("Get API key at: https://inference-api.nousresearch.com/")
+        print("⚠️  Without an auxiliary model, LLM content processing will be disabled")
        print("⚠️  Without Nous API key, LLM content processing will be disabled")
    else:
-        print("✅ Nous Research API key found")
+        print(f"✅ Auxiliary model available: {DEFAULT_SUMMARIZER_MODEL}")
    if not firecrawl_available:
        exit(1)
@ -1143,7 +1144,7 @@ if __name__ == "__main__":
    print("🛠️  Web tools ready for use!")
    if nous_available:
-        print("🧠 LLM content processing available with Gemini 3 Flash Preview via OpenRouter")
+        print(f"🧠 LLM content processing available with {DEFAULT_SUMMARIZER_MODEL}")
        print(f"   Default min length for processing: {DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION} chars")
    # Show debug mode status