Hermes Agent UX Improvements

2026-02-22 02:16:11 -08:00 · 2026-02-22 02:16:11 -08:00 · ededaaa874
commit ededaaa874
parent b1f55e3ee5
23 changed files with 945 additions and 1545 deletions
--- a/tools/approval.py
+++ b/tools/approval.py
@ -281,7 +281,12 @@ def check_dangerous_command(command: str, env_type: str,
                                       approval_callback=approval_callback)

    if choice == "deny":
-        return {"approved": False, "message": "BLOCKED: User denied this potentially dangerous command. Do NOT retry this command - the user has explicitly rejected it."}
+        return {
+            "approved": False,
+            "message": f"BLOCKED: User denied this potentially dangerous command (matched '{description}' pattern). Do NOT retry this command - the user has explicitly rejected it.",
+            "pattern_key": pattern_key,
+            "description": description,
+        }

    if choice == "session":
        approve_session(session_key, pattern_key)
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@ -51,25 +51,16 @@ import signal
 import subprocess
 import shutil
 import sys
-import asyncio
 import tempfile
 import threading
 import time
 import requests
 from typing import Dict, Any, Optional, List
 from pathlib import Path
-from hermes_constants import OPENROUTER_CHAT_URL
+from agent.auxiliary_client import get_vision_auxiliary_client

 logger = logging.getLogger(__name__)

-# Try to import httpx for async LLM calls
-try:
-    import httpx
-    HTTPX_AVAILABLE = True
-except ImportError:
-    HTTPX_AVAILABLE = False
-
-
 # ============================================================================
 # Configuration
 # ============================================================================
@ -83,8 +74,8 @@ DEFAULT_SESSION_TIMEOUT = 300
 # Max tokens for snapshot content before summarization
 SNAPSHOT_SUMMARIZE_THRESHOLD = 8000

-# Model for task-aware extraction
-EXTRACTION_MODEL = "google/gemini-3-flash-preview"
+# Resolve vision auxiliary client for extraction/vision tasks
+_aux_vision_client, EXTRACTION_MODEL = get_vision_auxiliary_client()

 # Track active sessions per task
 # Now stores tuple of (session_name, browserbase_session_id, cdp_url)
@ -782,87 +773,49 @@ def _run_browser_command(
        return {"success": False, "error": str(e)}


-async def _extract_relevant_content(
+def _extract_relevant_content(
    snapshot_text: str,
    user_task: Optional[str] = None
 ) -> str:
+    """Use LLM to extract relevant content from a snapshot based on the user's task.
+
+    Falls back to simple truncation when no auxiliary vision model is configured.
    """
-    Use LLM to extract relevant content from a snapshot based on the user's task.
-    
-    This provides task-aware summarization that preserves meaningful text content
-    (paragraphs, prices, descriptions) relevant to what the user is trying to accomplish.
-    
-    Args:
-        snapshot_text: The full snapshot text
-        user_task: The user's current task/goal (optional)
-        
-    Returns:
-        Summarized/extracted content
-    """
-    if not HTTPX_AVAILABLE:
-        # Fall back to simple truncation
+    if _aux_vision_client is None or EXTRACTION_MODEL is None:
        return _truncate_snapshot(snapshot_text)
-    
-    # Get API key
-    api_key = os.environ.get("OPENROUTER_API_KEY")
-    if not api_key:
-        return _truncate_snapshot(snapshot_text)
-    
-    # Build extraction prompt
+
    if user_task:
-        extraction_prompt = f"""You are a content extractor for a browser automation agent.
-
-The user's task is: {user_task}
-
-Given the following page snapshot (accessibility tree representation), extract and summarize the most relevant information for completing this task. Focus on:
-1. Interactive elements (buttons, links, inputs) that might be needed
-2. Text content relevant to the task (prices, descriptions, headings, important info)
-3. Navigation structure if relevant
-
-Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.
-
-Page Snapshot:
-{snapshot_text}
-
-Provide a concise summary that preserves actionable information and relevant content."""
+        extraction_prompt = (
+            f"You are a content extractor for a browser automation agent.\n\n"
+            f"The user's task is: {user_task}\n\n"
+            f"Given the following page snapshot (accessibility tree representation), "
+            f"extract and summarize the most relevant information for completing this task. Focus on:\n"
+            f"1. Interactive elements (buttons, links, inputs) that might be needed\n"
+            f"2. Text content relevant to the task (prices, descriptions, headings, important info)\n"
+            f"3. Navigation structure if relevant\n\n"
+            f"Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.\n\n"
+            f"Page Snapshot:\n{snapshot_text}\n\n"
+            f"Provide a concise summary that preserves actionable information and relevant content."
+        )
    else:
-        extraction_prompt = f"""Summarize this page snapshot, preserving:
-1. All interactive elements with their ref IDs (like [ref=e5])
-2. Key text content and headings
-3. Important information visible on the page
-
-Page Snapshot:
-{snapshot_text}
-
-Provide a concise summary focused on interactive elements and key content."""
+        extraction_prompt = (
+            f"Summarize this page snapshot, preserving:\n"
+            f"1. All interactive elements with their ref IDs (like [ref=e5])\n"
+            f"2. Key text content and headings\n"
+            f"3. Important information visible on the page\n\n"
+            f"Page Snapshot:\n{snapshot_text}\n\n"
+            f"Provide a concise summary focused on interactive elements and key content."
+        )

    try:
-        async with httpx.AsyncClient(timeout=30.0) as client:
-            response = await client.post(
-                OPENROUTER_CHAT_URL,
-                headers={
-                    "Authorization": f"Bearer {api_key}",
-                    "Content-Type": "application/json"
-                },
-                json={
-                    "model": EXTRACTION_MODEL,
-                    "messages": [
-                        {"role": "user", "content": extraction_prompt}
-                    ],
-                    "max_tokens": 4000,
-                    "temperature": 0.1
-                }
-            )
-            
-            if response.status_code == 200:
-                result = response.json()
-                return result["choices"][0]["message"]["content"]
-            else:
-                # Fall back to truncation on API error
-                return _truncate_snapshot(snapshot_text)
-                
+        response = _aux_vision_client.chat.completions.create(
+            model=EXTRACTION_MODEL,
+            messages=[{"role": "user", "content": extraction_prompt}],
+            max_tokens=4000,
+            temperature=0.1,
+        )
+        return response.choices[0].message.content
    except Exception:
-        # Fall back to truncation on any error
        return _truncate_snapshot(snapshot_text)


@ -991,16 +944,7 @@ def browser_snapshot(
        
        # Check if snapshot needs summarization
        if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD and user_task:
-            # Run async extraction
-            try:
-                loop = asyncio.get_event_loop()
-            except RuntimeError:
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-            
-            snapshot_text = loop.run_until_complete(
-                _extract_relevant_content(snapshot_text, user_task)
-            )
+            snapshot_text = _extract_relevant_content(snapshot_text, user_task)
        elif len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD:
            snapshot_text = _truncate_snapshot(snapshot_text)
        
@ -1286,12 +1230,12 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
    
    effective_task_id = task_id or "default"
    
-    # Check for OpenRouter API key
-    api_key = os.environ.get("OPENROUTER_API_KEY")
-    if not api_key:
+    # Check auxiliary vision client
+    if _aux_vision_client is None or EXTRACTION_MODEL is None:
        return json.dumps({
            "success": False,
-            "error": "OPENROUTER_API_KEY not set. Vision analysis requires this API key."
+            "error": "Browser vision unavailable: no auxiliary vision model configured. "
+                     "Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
        }, ensure_ascii=False)
    
    # Create a temporary file for the screenshot
@ -1325,110 +1269,36 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
        image_base64 = base64.b64encode(image_data).decode("ascii")
        data_url = f"data:image/png;base64,{image_base64}"
        
-        # Prepare the vision prompt
-        vision_prompt = f"""You are analyzing a screenshot of a web browser.
+        vision_prompt = (
+            f"You are analyzing a screenshot of a web browser.\n\n"
+            f"User's question: {question}\n\n"
+            f"Provide a detailed and helpful answer based on what you see in the screenshot. "
+            f"If there are interactive elements, describe them. If there are verification challenges "
+            f"or CAPTCHAs, describe what type they are and what action might be needed. "
+            f"Focus on answering the user's specific question."
+        )

-User's question: {question}
-
-Provide a detailed and helpful answer based on what you see in the screenshot. 
-If there are interactive elements, describe them. If there are verification challenges 
-or CAPTCHAs, describe what type they are and what action might be needed.
-Focus on answering the user's specific question."""
-
-        # Call OpenRouter/Gemini for vision analysis
-        if HTTPX_AVAILABLE:
-            import asyncio
-            
-            async def analyze_screenshot():
-                async with httpx.AsyncClient(timeout=60.0) as client:
-                    response = await client.post(
-                        OPENROUTER_CHAT_URL,
-                        headers={
-                            "Authorization": f"Bearer {api_key}",
-                            "Content-Type": "application/json"
-                        },
-                        json={
-                            "model": "google/gemini-3-flash-preview",
-                            "messages": [
-                                {
-                                    "role": "user",
-                                    "content": [
-                                        {"type": "text", "text": vision_prompt},
-                                        {
-                                            "type": "image_url",
-                                            "image_url": {"url": data_url}
-                                        }
-                                    ]
-                                }
-                            ],
-                            "max_tokens": 2000,
-                            "temperature": 0.1
-                        }
-                    )
-                    
-                    if response.status_code != 200:
-                        return {
-                            "success": False,
-                            "error": f"Vision API error: {response.status_code} - {response.text[:200]}"
-                        }
-                    
-                    result_data = response.json()
-                    analysis = result_data["choices"][0]["message"]["content"]
-                    return {
-                        "success": True,
-                        "analysis": analysis
-                    }
-            
-            # Run the async function
-            try:
-                loop = asyncio.get_event_loop()
-            except RuntimeError:
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-            
-            vision_result = loop.run_until_complete(analyze_screenshot())
-            return json.dumps(vision_result, ensure_ascii=False)
-        
-        else:
-            # Fallback: use synchronous requests
-            response = requests.post(
-                OPENROUTER_CHAT_URL,
-                headers={
-                    "Authorization": f"Bearer {api_key}",
-                    "Content-Type": "application/json"
-                },
-                json={
-                    "model": "google/gemini-3-flash-preview",
-                    "messages": [
-                        {
-                            "role": "user",
-                            "content": [
-                                {"type": "text", "text": vision_prompt},
-                                {
-                                    "type": "image_url",
-                                    "image_url": {"url": data_url}
-                                }
-                            ]
-                        }
+        # Use the sync auxiliary vision client directly
+        response = _aux_vision_client.chat.completions.create(
+            model=EXTRACTION_MODEL,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": vision_prompt},
+                        {"type": "image_url", "image_url": {"url": data_url}},
                    ],
-                    "max_tokens": 2000,
-                    "temperature": 0.1
-                },
-                timeout=60
-            )
-            
-            if response.status_code != 200:
-                return json.dumps({
-                    "success": False,
-                    "error": f"Vision API error: {response.status_code} - {response.text[:200]}"
-                }, ensure_ascii=False)
-            
-            result_data = response.json()
-            analysis = result_data["choices"][0]["message"]["content"]
-            return json.dumps({
-                "success": True,
-                "analysis": analysis
-            }, ensure_ascii=False)
+                }
+            ],
+            max_tokens=2000,
+            temperature=0.1,
+        )
+        
+        analysis = response.choices[0].message.content
+        return json.dumps({
+            "success": True,
+            "analysis": analysis,
+        }, ensure_ascii=False)
    
    except Exception as e:
        return json.dumps({
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@ -22,9 +22,19 @@ import os
 import logging
 from typing import Dict, Any, List, Optional

-from tools.openrouter_client import get_async_client as _get_client
+from openai import AsyncOpenAI, OpenAI

-SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
+from agent.auxiliary_client import get_text_auxiliary_client
+
+# Resolve the auxiliary client at import time so we have the model slug.
+# We build an AsyncOpenAI from the same credentials for async summarization.
+_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client()
+_async_aux_client: AsyncOpenAI | None = None
+if _aux_client is not None:
+    _async_aux_client = AsyncOpenAI(
+        api_key=_aux_client.api_key,
+        base_url=str(_aux_client.base_url),
+    )
 MAX_SESSION_CHARS = 100_000
 MAX_SUMMARY_TOKENS = 2000

@ -126,11 +136,15 @@ async def _summarize_session(
        f"Summarize this conversation with focus on: {query}"
    )

+    if _async_aux_client is None or _SUMMARIZER_MODEL is None:
+        logging.warning("No auxiliary model available for session summarization")
+        return None
+
    max_retries = 3
    for attempt in range(max_retries):
        try:
-            response = await _get_client().chat.completions.create(
-                model=SUMMARIZER_MODEL,
+            response = await _async_aux_client.chat.completions.create(
+                model=_SUMMARIZER_MODEL,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt},
@ -252,8 +266,8 @@ def session_search(


 def check_session_search_requirements() -> bool:
-    """Requires SQLite state database and OpenRouter API key."""
-    if not os.getenv("OPENROUTER_API_KEY"):
+    """Requires SQLite state database and an auxiliary text model."""
+    if _async_aux_client is None:
        return False
    try:
        from hermes_state import DEFAULT_DB_PATH
@ -316,5 +330,4 @@ registry.register(
        limit=args.get("limit", 3),
        db=kw.get("db")),
    check_fn=check_session_search_requirements,
-    requires_env=["OPENROUTER_API_KEY"],
 )
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -359,7 +359,6 @@ Do NOT use vim/nano/interactive tools without pty=true — they hang without a p

 # Global state for environment lifecycle management
 _active_environments: Dict[str, Any] = {}
-_task_workdirs: Dict[str, str] = {}  # Maps task_id to working directory
 _last_activity: Dict[str, float] = {}
 _env_lock = threading.Lock()
 _creation_locks: Dict[str, threading.Lock] = {}  # Per-task locks for sandbox creation
@ -530,7 +529,6 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
            if current_time - last_time > lifetime_seconds:
                env = _active_environments.pop(task_id, None)
                _last_activity.pop(task_id, None)
-                _task_workdirs.pop(task_id, None)
                if env is not None:
                    envs_to_stop.append((task_id, env))

@ -609,7 +607,7 @@ def get_active_environments_info() -> Dict[str, Any]:
    info = {
        "count": len(_active_environments),
        "task_ids": list(_active_environments.keys()),
-        "workdirs": dict(_task_workdirs),
+        "workdirs": {},
    }
    
    # Calculate total disk usage
@ -632,7 +630,7 @@ def get_active_environments_info() -> Dict[str, Any]:

 def cleanup_all_environments():
    """Clean up ALL active environments. Use with caution."""
-    global _active_environments, _last_activity, _task_workdirs
+    global _active_environments, _last_activity
    
    task_ids = list(_active_environments.keys())
    cleaned = 0
@ -661,7 +659,7 @@ def cleanup_all_environments():

 def cleanup_vm(task_id: str):
    """Manually clean up a specific environment by task_id."""
-    global _active_environments, _last_activity, _task_workdirs
+    global _active_environments, _last_activity

    # Remove from tracking dicts while holding the lock, but defer the
    # actual (potentially slow) env.cleanup() call to outside the lock
@ -669,7 +667,6 @@ def cleanup_vm(task_id: str):
    env = None
    with _env_lock:
        env = _active_environments.pop(task_id, None)
-        _task_workdirs.pop(task_id, None)
        _last_activity.pop(task_id, None)

    # Clean up per-task creation lock
@ -782,17 +779,6 @@ def terminal_tool(
        default_timeout = config["timeout"]
        effective_timeout = timeout or default_timeout

-        # For local environment in batch mode, create a unique subdirectory per task
-        # This prevents parallel tasks from overwriting each other's files
-        # In CLI mode (HERMES_QUIET), use the cwd directly without subdirectories
-        if env_type == "local" and not os.getenv("HERMES_QUIET"):
-            with _env_lock:
-                if effective_task_id not in _task_workdirs:
-                    task_workdir = Path(cwd) / f"hermes-{effective_task_id}-{uuid.uuid4().hex[:8]}"
-                    task_workdir.mkdir(parents=True, exist_ok=True)
-                    _task_workdirs[effective_task_id] = str(task_workdir)
-                cwd = _task_workdirs[effective_task_id]
-
        # Start cleanup thread
        _start_cleanup_thread()

@ -874,11 +860,16 @@ def terminal_tool(
                        "description": approval.get("description", "dangerous command"),
                        "pattern_key": approval.get("pattern_key", ""),
                    }, ensure_ascii=False)
-                # Command was blocked - return informative message
+                # Command was blocked - include the pattern category so the caller knows why
+                desc = approval.get("description", "potentially dangerous operation")
+                fallback_msg = (
+                    f"Command denied: matches '{desc}' pattern. "
+                    "Use the approval prompt to allow it, or rephrase the command."
+                )
                return json.dumps({
                    "output": "",
                    "exit_code": -1,
-                    "error": approval.get("message", "Command denied - potentially dangerous operation"),
+                    "error": approval.get("message", fallback_msg),
                    "status": "blocked"
                }, ensure_ascii=False)

@ -996,11 +987,17 @@ def terminal_tool(
            # Add helpful message for sudo failures in messaging context
            output = _handle_sudo_failure(output, env_type)
            
-            # Truncate output if too long
+            # Truncate output if too long, keeping both head and tail
            MAX_OUTPUT_CHARS = 50000
            if len(output) > MAX_OUTPUT_CHARS:
-                truncated_notice = f"\n\n... [OUTPUT TRUNCATED - showing last {MAX_OUTPUT_CHARS} chars of {len(output)} total] ..."
-                output = truncated_notice + output[-MAX_OUTPUT_CHARS:]
+                head_chars = int(MAX_OUTPUT_CHARS * 0.4)  # 40% head (error messages often appear early)
+                tail_chars = MAX_OUTPUT_CHARS - head_chars  # 60% tail (most recent/relevant output)
+                omitted = len(output) - head_chars - tail_chars
+                truncated_notice = (
+                    f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted "
+                    f"out of {len(output)} total] ...\n\n"
+                )
+                output = output[:head_chars] + truncated_notice + output[-tail_chars:]

            return json.dumps({
                "output": output.strip() if output else "",
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@ -36,13 +36,20 @@ import base64
 from pathlib import Path
 from typing import Dict, Any, Optional
 import httpx
-from tools.openrouter_client import get_async_client as _get_openrouter_client, check_api_key as check_openrouter_api_key
+from openai import AsyncOpenAI
+from agent.auxiliary_client import get_vision_auxiliary_client
 from tools.debug_helpers import DebugSession

 logger = logging.getLogger(__name__)

-# Configuration for vision processing
-DEFAULT_VISION_MODEL = "google/gemini-3-flash-preview"
+# Resolve vision auxiliary client at module level; build an async wrapper.
+_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
+_aux_async_client: AsyncOpenAI | None = None
+if _aux_sync_client is not None:
+    _aux_async_client = AsyncOpenAI(
+        api_key=_aux_sync_client.api_key,
+        base_url=str(_aux_sync_client.base_url),
+    )

 _debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")

@ -230,9 +237,13 @@ async def vision_analyze_tool(
        logger.info("Analyzing image: %s", image_url[:60])
        logger.info("User prompt: %s", user_prompt[:100])
        
-        # Check API key availability
-        if not os.getenv("OPENROUTER_API_KEY"):
-            raise ValueError("OPENROUTER_API_KEY environment variable not set")
+        # Check auxiliary vision client availability
+        if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
+            return json.dumps({
+                "success": False,
+                "analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
+                            "Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
+            }, indent=2, ensure_ascii=False)
        
        # Determine if this is a local file path or a remote URL
        local_path = Path(image_url)
@ -291,18 +302,12 @@ async def vision_analyze_tool(
        
        logger.info("Processing image with %s...", model)
        
-        # Call the vision API with reasoning enabled
-        response = await _get_openrouter_client().chat.completions.create(
+        # Call the vision API
+        response = await _aux_async_client.chat.completions.create(
            model=model,
            messages=messages,
-            temperature=0.1,  # Low temperature for consistent analysis
-            max_tokens=2000,  # Generous limit for detailed analysis
-            extra_body={
-                "reasoning": {
-                    "enabled": True,
-                    "effort": "xhigh"
-                }
-            }
+            temperature=0.1,
+            max_tokens=2000,
        )
        
        # Extract the analysis
@ -353,13 +358,8 @@ async def vision_analyze_tool(


 def check_vision_requirements() -> bool:
-    """
-    Check if all requirements for vision tools are met.
-    
-    Returns:
-        bool: True if requirements are met, False otherwise
-    """
-    return check_openrouter_api_key()
+    """Check if an auxiliary vision model is available."""
+    return _aux_async_client is not None


 def get_debug_session_info() -> Dict[str, Any]:
@ -379,16 +379,15 @@ if __name__ == "__main__":
    print("👁️ Vision Tools Module")
    print("=" * 40)
    
-    # Check if API key is available
-    api_available = check_openrouter_api_key()
+    # Check if vision model is available
+    api_available = check_vision_requirements()
    
    if not api_available:
-        print("❌ OPENROUTER_API_KEY environment variable not set")
-        print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")
-        print("Get API key at: https://openrouter.ai/")
+        print("❌ No auxiliary vision model available")
+        print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
        exit(1)
    else:
-        print("✅ OpenRouter API key found")
+        print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}")
    
    print("🛠️ Vision tools ready for use!")
    print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
@ -455,7 +454,8 @@ def _handle_vision_analyze(args, **kw):
    image_url = args.get("image_url", "")
    question = args.get("question", "")
    full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}"
-    return vision_analyze_tool(image_url, full_prompt, "google/gemini-3-flash-preview")
+    model = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview"
+    return vision_analyze_tool(image_url, full_prompt, model)


 registry.register(
@ -464,6 +464,5 @@ registry.register(
    schema=VISION_ANALYZE_SCHEMA,
    handler=_handle_vision_analyze,
    check_fn=check_vision_requirements,
-    requires_env=["OPENROUTER_API_KEY"],
    is_async=True,
 )
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@ -47,7 +47,8 @@ import re
 import asyncio
 from typing import List, Dict, Any, Optional
 from firecrawl import Firecrawl
-from tools.openrouter_client import get_async_client as _get_openrouter_client
+from openai import AsyncOpenAI
+from agent.auxiliary_client import get_text_auxiliary_client
 from tools.debug_helpers import DebugSession

 logger = logging.getLogger(__name__)
@ -64,9 +65,17 @@ def _get_firecrawl_client():
        _firecrawl_client = Firecrawl(api_key=api_key)
    return _firecrawl_client

-DEFAULT_SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
 DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000

+# Resolve auxiliary text client at module level; build an async wrapper.
+_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
+_aux_async_client: AsyncOpenAI | None = None
+if _aux_sync_client is not None:
+    _aux_async_client = AsyncOpenAI(
+        api_key=_aux_sync_client.api_key,
+        base_url=str(_aux_sync_client.base_url),
+    )
+
 _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")


@ -223,7 +232,10 @@ Create a markdown summary that captures all key information in a well-organized,

    for attempt in range(max_retries):
        try:
-            response = await _get_openrouter_client().chat.completions.create(
+            if _aux_async_client is None:
+                logger.warning("No auxiliary model available for web content processing")
+                return None
+            response = await _aux_async_client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": system_prompt},
@ -231,12 +243,6 @@ Create a markdown summary that captures all key information in a well-organized,
                ],
                temperature=0.1,
                max_tokens=max_tokens,
-                extra_body={
-                    "reasoning": {
-                        "enabled": True,
-                        "effort": "xhigh"
-                    }
-                }
            )
            return response.choices[0].message.content.strip()
        except Exception as api_error:
@ -342,7 +348,14 @@ Synthesize these into ONE cohesive, comprehensive summary that:
 Create a single, unified markdown summary."""

    try:
-        response = await _get_openrouter_client().chat.completions.create(
+        if _aux_async_client is None:
+            logger.warning("No auxiliary model for synthesis, concatenating summaries")
+            fallback = "\n\n".join(summaries)
+            if len(fallback) > max_output_size:
+                fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
+            return fallback
+
+        response = await _aux_async_client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
@ -350,12 +363,6 @@ Create a single, unified markdown summary."""
            ],
            temperature=0.1,
            max_tokens=4000,
-            extra_body={
-                "reasoning": {
-                    "enabled": True,
-                    "effort": "xhigh"
-                }
-            }
        )
        final_summary = response.choices[0].message.content.strip()
        
@ -677,8 +684,8 @@ async def web_extract_tool(
        debug_call_data["pages_extracted"] = pages_extracted
        debug_call_data["original_response_size"] = len(json.dumps(response))
        
-        # Process each result with LLM if enabled
-        if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
+        # Process each result with LLM if enabled and auxiliary client is available
+        if use_llm_processing and _aux_async_client is not None:
            logger.info("Processing extracted content with LLM (parallel)...")
            debug_call_data["processing_applied"].append("llm_processing")
            
@ -744,8 +751,8 @@ async def web_extract_tool(
                else:
                    logger.warning("%s (no content to process)", url)
        else:
-            if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
-                logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
+            if use_llm_processing and _aux_async_client is None:
+                logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
                debug_call_data["processing_applied"].append("llm_processing_unavailable")
            
            # Print summary of extracted pages for debugging (original behavior)
@ -973,8 +980,8 @@ async def web_crawl_tool(
        debug_call_data["pages_crawled"] = pages_crawled
        debug_call_data["original_response_size"] = len(json.dumps(response))
        
-        # Process each result with LLM if enabled
-        if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
+        # Process each result with LLM if enabled and auxiliary client is available
+        if use_llm_processing and _aux_async_client is not None:
            logger.info("Processing crawled content with LLM (parallel)...")
            debug_call_data["processing_applied"].append("llm_processing")
            
@ -1040,8 +1047,8 @@ async def web_crawl_tool(
                else:
                    logger.warning("%s (no content to process)", page_url)
        else:
-            if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
-                logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
+            if use_llm_processing and _aux_async_client is None:
+                logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
                debug_call_data["processing_applied"].append("llm_processing_unavailable")
            
            # Print summary of crawled pages for debugging (original behavior)
@ -1096,14 +1103,9 @@ def check_firecrawl_api_key() -> bool:
    return bool(os.getenv("FIRECRAWL_API_KEY"))


-def check_nous_api_key() -> bool:
-    """
-    Check if the Nous Research API key is available in environment variables.
-    
-    Returns:
-        bool: True if API key is set, False otherwise
-    """
-    return bool(os.getenv("OPENROUTER_API_KEY"))
+def check_auxiliary_model() -> bool:
+    """Check if an auxiliary text model is available for LLM content processing."""
+    return _aux_async_client is not None


 def get_debug_session_info() -> Dict[str, Any]:
@ -1120,7 +1122,7 @@ if __name__ == "__main__":
    
    # Check if API keys are available
    firecrawl_available = check_firecrawl_api_key()
-    nous_available = check_nous_api_key()
+    nous_available = check_auxiliary_model()
    
    if not firecrawl_available:
        print("❌ FIRECRAWL_API_KEY environment variable not set")
@ -1130,12 +1132,11 @@ if __name__ == "__main__":
        print("✅ Firecrawl API key found")
    
    if not nous_available:
-        print("❌ OPENROUTER_API_KEY environment variable not set")
-        print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")  
-        print("Get API key at: https://inference-api.nousresearch.com/")
-        print("⚠️  Without Nous API key, LLM content processing will be disabled")
+        print("❌ No auxiliary model available for LLM content processing")
+        print("Set OPENROUTER_API_KEY, configure Nous Portal, or set OPENAI_BASE_URL + OPENAI_API_KEY")
+        print("⚠️  Without an auxiliary model, LLM content processing will be disabled")
    else:
-        print("✅ Nous Research API key found")
+        print(f"✅ Auxiliary model available: {DEFAULT_SUMMARIZER_MODEL}")
    
    if not firecrawl_available:
        exit(1)
@ -1143,7 +1144,7 @@ if __name__ == "__main__":
    print("🛠️  Web tools ready for use!")
    
    if nous_available:
-        print("🧠 LLM content processing available with Gemini 3 Flash Preview via OpenRouter")
+        print(f"🧠 LLM content processing available with {DEFAULT_SUMMARIZER_MODEL}")
        print(f"   Default min length for processing: {DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION} chars")
    
    # Show debug mode status