merge: resolve conflict with main in subagent interrupt test

2026-03-12 16:28:57 -04:00 · 2026-03-12 16:28:57 -04:00 · fefc709b2c
commit fefc709b2c
parent 45d3e83ad1 e004c094ea
75 changed files with 8124 additions and 1376 deletions
--- a/tools/approval.py
+++ b/tools/approval.py
@ -184,43 +184,52 @@ def prompt_dangerous_approval(command: str, description: str,

    os.environ["HERMES_SPINNER_PAUSE"] = "1"
    try:
-        print()
-        print(f"  ⚠️  DANGEROUS COMMAND: {description}")
-        print(f"      {command[:80]}{'...' if len(command) > 80 else ''}")
-        print()
-        print(f"      [o]nce  |  [s]ession  |  [a]lways  |  [d]eny")
-        print()
-        sys.stdout.flush()
+        is_truncated = len(command) > 80
+        while True:
+            print()
+            print(f"  ⚠️  DANGEROUS COMMAND: {description}")
+            print(f"      {command[:80]}{'...' if is_truncated else ''}")
+            print()
+            view_hint = "  |  [v]iew full" if is_truncated else ""
+            print(f"      [o]nce  |  [s]ession  |  [a]lways  |  [d]eny{view_hint}")
+            print()
+            sys.stdout.flush()

-        result = {"choice": ""}
+            result = {"choice": ""}

-        def get_input():
-            try:
-                result["choice"] = input("      Choice [o/s/a/D]: ").strip().lower()
-            except (EOFError, OSError):
-                result["choice"] = ""
+            def get_input():
+                try:
+                    result["choice"] = input("      Choice [o/s/a/D]: ").strip().lower()
+                except (EOFError, OSError):
+                    result["choice"] = ""

-        thread = threading.Thread(target=get_input, daemon=True)
-        thread.start()
-        thread.join(timeout=timeout_seconds)
+            thread = threading.Thread(target=get_input, daemon=True)
+            thread.start()
+            thread.join(timeout=timeout_seconds)

-        if thread.is_alive():
-            print("\n      ⏱ Timeout - denying command")
-            return "deny"
+            if thread.is_alive():
+                print("\n      ⏱ Timeout - denying command")
+                return "deny"

-        choice = result["choice"]
-        if choice in ('o', 'once'):
-            print("      ✓ Allowed once")
-            return "once"
-        elif choice in ('s', 'session'):
-            print("      ✓ Allowed for this session")
-            return "session"
-        elif choice in ('a', 'always'):
-            print("      ✓ Added to permanent allowlist")
-            return "always"
-        else:
-            print("      ✗ Denied")
-            return "deny"
+            choice = result["choice"]
+            if choice in ('v', 'view') and is_truncated:
+                print()
+                print("      Full command:")
+                print(f"      {command}")
+                is_truncated = False  # show full on next loop iteration too
+                continue
+            if choice in ('o', 'once'):
+                print("      ✓ Allowed once")
+                return "once"
+            elif choice in ('s', 'session'):
+                print("      ✓ Allowed for this session")
+                return "session"
+            elif choice in ('a', 'always'):
+                print("      ✓ Added to permanent allowlist")
+                return "always"
+            else:
+                print("      ✗ Denied")
+                return "deny"

    except (EOFError, KeyboardInterrupt):
        print("\n      ✗ Cancelled")
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@ -63,7 +63,7 @@ import time
 import requests
 from typing import Dict, Any, Optional, List
 from pathlib import Path
-from agent.auxiliary_client import get_vision_auxiliary_client, get_text_auxiliary_client
+from agent.auxiliary_client import call_llm

 logger = logging.getLogger(__name__)

@ -80,38 +80,15 @@ DEFAULT_SESSION_TIMEOUT = 300
 # Max tokens for snapshot content before summarization
 SNAPSHOT_SUMMARIZE_THRESHOLD = 8000

-# Vision client — for browser_vision (screenshot analysis)
-# Wrapped in try/except so a broken auxiliary config doesn't prevent the entire
-# browser_tool module from importing (which would disable all 10 browser tools).
-try:
-    _aux_vision_client, _DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
-except Exception as _init_err:
-    logger.debug("Could not initialise vision auxiliary client: %s", _init_err)
-    _aux_vision_client, _DEFAULT_VISION_MODEL = None, None

-# Text client — for page snapshot summarization (same config as web_extract)
-try:
-    _aux_text_client, _DEFAULT_TEXT_MODEL = get_text_auxiliary_client("web_extract")
-except Exception as _init_err:
-    logger.debug("Could not initialise text auxiliary client: %s", _init_err)
-    _aux_text_client, _DEFAULT_TEXT_MODEL = None, None
-
-# Module-level alias for availability checks
-EXTRACTION_MODEL = _DEFAULT_TEXT_MODEL or _DEFAULT_VISION_MODEL
-
-
-def _get_vision_model() -> str:
+def _get_vision_model() -> Optional[str]:
    """Model for browser_vision (screenshot analysis — multimodal)."""
-    return (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
-            or _DEFAULT_VISION_MODEL
-            or "google/gemini-3-flash-preview")
+    return os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None


-def _get_extraction_model() -> str:
+def _get_extraction_model() -> Optional[str]:
    """Model for page snapshot text summarization — same as web_extract."""
-    return (os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip()
-            or _DEFAULT_TEXT_MODEL
-            or "google/gemini-3-flash-preview")
+    return os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None


 def _is_local_mode() -> bool:
@ -941,9 +918,6 @@ def _extract_relevant_content(

    Falls back to simple truncation when no auxiliary text model is configured.
    """
-    if _aux_text_client is None:
-        return _truncate_snapshot(snapshot_text)
-
    if user_task:
        extraction_prompt = (
            f"You are a content extractor for a browser automation agent.\n\n"
@ -968,13 +942,16 @@ def _extract_relevant_content(
        )

    try:
-        from agent.auxiliary_client import auxiliary_max_tokens_param
-        response = _aux_text_client.chat.completions.create(
-            model=_get_extraction_model(),
-            messages=[{"role": "user", "content": extraction_prompt}],
-            **auxiliary_max_tokens_param(4000),
-            temperature=0.1,
-        )
+        call_kwargs = {
+            "task": "web_extract",
+            "messages": [{"role": "user", "content": extraction_prompt}],
+            "max_tokens": 4000,
+            "temperature": 0.1,
+        }
+        model = _get_extraction_model()
+        if model:
+            call_kwargs["model"] = model
+        response = call_llm(**call_kwargs)
        return response.choices[0].message.content
    except Exception:
        return _truncate_snapshot(snapshot_text)
@ -1497,14 +1474,6 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
    
    effective_task_id = task_id or "default"
    
-    # Check auxiliary vision client
-    if _aux_vision_client is None or _DEFAULT_VISION_MODEL is None:
-        return json.dumps({
-            "success": False,
-            "error": "Browser vision unavailable: no auxiliary vision model configured. "
-                     "Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
-        }, ensure_ascii=False)
-    
    # Save screenshot to persistent location so it can be shared with users
    hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
    screenshots_dir = hermes_home / "browser_screenshots"
@ -1562,14 +1531,13 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
            f"Focus on answering the user's specific question."
        )

-        # Use the sync auxiliary vision client directly
-        from agent.auxiliary_client import auxiliary_max_tokens_param
+        # Use the centralized LLM router
        vision_model = _get_vision_model()
-        logger.debug("browser_vision: analysing screenshot (%d bytes) with model=%s",
-                     len(image_data), vision_model)
-        response = _aux_vision_client.chat.completions.create(
-            model=vision_model,
-            messages=[
+        logger.debug("browser_vision: analysing screenshot (%d bytes)",
+                     len(image_data))
+        call_kwargs = {
+            "task": "vision",
+            "messages": [
                {
                    "role": "user",
                    "content": [
@ -1578,9 +1546,12 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
                    ],
                }
            ],
-            **auxiliary_max_tokens_param(2000),
-            temperature=0.1,
-        )
+            "max_tokens": 2000,
+            "temperature": 0.1,
+        }
+        if vision_model:
+            call_kwargs["model"] = vision_model
+        response = call_llm(**call_kwargs)
        
        analysis = response.choices[0].message.content
        response_data = {
--- a/tools/image_generation_tool.py
+++ b/tools/image_generation_tool.py
@ -209,7 +209,7 @@ def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any]:
            return None
            
    except Exception as e:
-        logger.error("Error upscaling image: %s", e)
+        logger.error("Error upscaling image: %s", e, exc_info=True)
        return None


@ -377,7 +377,7 @@ def image_generate_tool(
    except Exception as e:
        generation_time = (datetime.datetime.now() - start_time).total_seconds()
        error_msg = f"Error generating image: {str(e)}"
-        logger.error("%s", error_msg)
+        logger.error("%s", error_msg, exc_info=True)
        
        # Prepare error response - minimal format
        response_data = {
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@ -456,17 +456,13 @@ class SamplingHandler:
        # Resolve model
        model = self._resolve_model(getattr(params, "modelPreferences", None))

-        # Get auxiliary LLM client
-        from agent.auxiliary_client import get_text_auxiliary_client
-        client, default_model = get_text_auxiliary_client()
-        if client is None:
-            self.metrics["errors"] += 1
-            return self._error("No LLM provider available for sampling")
+        # Get auxiliary LLM client via centralized router
+        from agent.auxiliary_client import call_llm

-        resolved_model = model or default_model
+        # Model whitelist check (we need to resolve model before calling)
+        resolved_model = model or self.model_override or ""

-        # Model whitelist check
-        if self.allowed_models and resolved_model not in self.allowed_models:
+        if self.allowed_models and resolved_model and resolved_model not in self.allowed_models:
            logger.warning(
                "MCP server '%s' requested model '%s' not in allowed_models",
                self.server_name, resolved_model,
@ -484,20 +480,15 @@ class SamplingHandler:

        # Build LLM call kwargs
        max_tokens = min(params.maxTokens, self.max_tokens_cap)
-        call_kwargs: dict = {
-            "model": resolved_model,
-            "messages": messages,
-            "max_tokens": max_tokens,
-        }
+        call_temperature = None
        if hasattr(params, "temperature") and params.temperature is not None:
-            call_kwargs["temperature"] = params.temperature
-        if stop := getattr(params, "stopSequences", None):
-            call_kwargs["stop"] = stop
+            call_temperature = params.temperature

        # Forward server-provided tools
+        call_tools = None
        server_tools = getattr(params, "tools", None)
        if server_tools:
-            call_kwargs["tools"] = [
+            call_tools = [
                {
                    "type": "function",
                    "function": {
@ -508,9 +499,6 @@ class SamplingHandler:
                }
                for t in server_tools
            ]
-            if tool_choice := getattr(params, "toolChoice", None):
-                mode = getattr(tool_choice, "mode", "auto")
-                call_kwargs["tool_choice"] = {"auto": "auto", "required": "required", "none": "none"}.get(mode, "auto")

        logger.log(
            self.audit_level,
@ -520,7 +508,15 @@ class SamplingHandler:

        # Offload sync LLM call to thread (non-blocking)
        def _sync_call():
-            return client.chat.completions.create(**call_kwargs)
+            return call_llm(
+                task="mcp",
+                model=resolved_model or None,
+                messages=messages,
+                temperature=call_temperature,
+                max_tokens=max_tokens,
+                tools=call_tools,
+                timeout=self.timeout,
+            )

        try:
            response = await asyncio.wait_for(
--- a/tools/openrouter_client.py
+++ b/tools/openrouter_client.py
@ -1,39 +1,30 @@
 """Shared OpenRouter API client for Hermes tools.

 Provides a single lazy-initialized AsyncOpenAI client that all tool modules
-can share, eliminating the duplicated _get_openrouter_client() / 
-_get_summarizer_client() pattern previously copy-pasted across web_tools,
-vision_tools, mixture_of_agents_tool, and session_search_tool.
+can share.  Routes through the centralized provider router in
+agent/auxiliary_client.py so auth, headers, and API format are handled
+consistently.
 """

 import os

-from openai import AsyncOpenAI
-from hermes_constants import OPENROUTER_BASE_URL
-
-_client: AsyncOpenAI | None = None
+_client = None


-def get_async_client() -> AsyncOpenAI:
-    """Return a shared AsyncOpenAI client pointed at OpenRouter.
+def get_async_client():
+    """Return a shared async OpenAI-compatible client for OpenRouter.

    The client is created lazily on first call and reused thereafter.
+    Uses the centralized provider router for auth and client construction.
    Raises ValueError if OPENROUTER_API_KEY is not set.
    """
    global _client
    if _client is None:
-        api_key = os.getenv("OPENROUTER_API_KEY")
-        if not api_key:
+        from agent.auxiliary_client import resolve_provider_client
+        client, _model = resolve_provider_client("openrouter", async_mode=True)
+        if client is None:
            raise ValueError("OPENROUTER_API_KEY environment variable not set")
-        _client = AsyncOpenAI(
-            api_key=api_key,
-            base_url=OPENROUTER_BASE_URL,
-            default_headers={
-                "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
-                "X-OpenRouter-Title": "Hermes Agent",
-                "X-OpenRouter-Categories": "productivity,cli-agent",
-            },
-        )
+        _client = client
    return _client


--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@ -22,13 +22,7 @@ import os
 import logging
 from typing import Dict, Any, List, Optional, Union

-from openai import AsyncOpenAI, OpenAI
-
-from agent.auxiliary_client import get_async_text_auxiliary_client
-
-# Resolve the async auxiliary client at import time so we have the model slug.
-# Handles Codex Responses API adapter transparently.
-_async_aux_client, _SUMMARIZER_MODEL = get_async_text_auxiliary_client()
+from agent.auxiliary_client import async_call_llm
 MAX_SESSION_CHARS = 100_000
 MAX_SUMMARY_TOKENS = 10000

@ -156,26 +150,22 @@ async def _summarize_session(
        f"Summarize this conversation with focus on: {query}"
    )

-    if _async_aux_client is None or _SUMMARIZER_MODEL is None:
-        logging.warning("No auxiliary model available for session summarization")
-        return None
-
    max_retries = 3
    for attempt in range(max_retries):
        try:
-            from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
-            _extra = get_auxiliary_extra_body()
-            response = await _async_aux_client.chat.completions.create(
-                model=_SUMMARIZER_MODEL,
+            response = await async_call_llm(
+                task="session_search",
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt},
                ],
-                **({} if not _extra else {"extra_body": _extra}),
                temperature=0.1,
-                **auxiliary_max_tokens_param(MAX_SUMMARY_TOKENS),
+                max_tokens=MAX_SUMMARY_TOKENS,
            )
            return response.choices[0].message.content.strip()
+        except RuntimeError:
+            logging.warning("No auxiliary model available for session summarization")
+            return None
        except Exception as e:
            if attempt < max_retries - 1:
                await asyncio.sleep(1 * (attempt + 1))
@ -333,8 +323,6 @@ def session_search(

 def check_session_search_requirements() -> bool:
    """Requires SQLite state database and an auxiliary text model."""
-    if _async_aux_client is None:
-        return False
    try:
        from hermes_state import DEFAULT_DB_PATH
        return DEFAULT_DB_PATH.parent.exists()
--- a/tools/skills_guard.py
+++ b/tools/skills_guard.py
@ -29,7 +29,7 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import List, Tuple

-from hermes_constants import OPENROUTER_BASE_URL
+


 # ---------------------------------------------------------------------------
@ -934,25 +934,12 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult,
    if not model:
        return static_result

-    # Call the LLM via the OpenAI SDK (same pattern as run_agent.py)
+    # Call the LLM via the centralized provider router
    try:
-        from openai import OpenAI
-        import os
+        from agent.auxiliary_client import call_llm

-        api_key = os.getenv("OPENROUTER_API_KEY", "")
-        if not api_key:
-            return static_result
-
-        client = OpenAI(
-            base_url=OPENROUTER_BASE_URL,
-            api_key=api_key,
-            default_headers={
-                "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
-                "X-OpenRouter-Title": "Hermes Agent",
-                "X-OpenRouter-Categories": "productivity,cli-agent",
-            },
-        )
-        response = client.chat.completions.create(
+        response = call_llm(
+            provider="openrouter",
            model=model,
            messages=[{
                "role": "user",
--- a/tools/skills_hub.py
+++ b/tools/skills_hub.py
@ -572,14 +572,23 @@ class ClawHubSource(SkillSource):
            logger.warning("ClawHub fetch failed for %s: could not resolve latest version", slug)
            return None

-        version_data = self._get_json(f"{self.BASE_URL}/skills/{slug}/versions/{latest_version}")
-        if not isinstance(version_data, dict):
-            return None
+        # Primary method: download the skill as a ZIP bundle from /download
+        files = self._download_zip(slug, latest_version)
+
+        # Fallback: try the version metadata endpoint for inline/raw content
+        if "SKILL.md" not in files:
+            version_data = self._get_json(f"{self.BASE_URL}/skills/{slug}/versions/{latest_version}")
+            if isinstance(version_data, dict):
+                # Files may be nested under version_data["version"]["files"]
+                files = self._extract_files(version_data) or files
+                if "SKILL.md" not in files:
+                    nested = version_data.get("version", {})
+                    if isinstance(nested, dict):
+                        files = self._extract_files(nested) or files

-        files = self._extract_files(version_data)
        if "SKILL.md" not in files:
            logger.warning(
-                "ClawHub fetch for %s resolved version %s but no inline/raw file content was available",
+                "ClawHub fetch for %s resolved version %s but could not retrieve file content",
                slug,
                latest_version,
            )
@ -674,6 +683,65 @@ class ClawHubSource(SkillSource):

        return files

+    def _download_zip(self, slug: str, version: str) -> Dict[str, str]:
+        """Download skill as a ZIP bundle from the /download endpoint and extract text files."""
+        import io
+        import zipfile
+
+        files: Dict[str, str] = {}
+        max_retries = 3
+        for attempt in range(max_retries):
+            try:
+                resp = httpx.get(
+                    f"{self.BASE_URL}/download",
+                    params={"slug": slug, "version": version},
+                    timeout=30,
+                    follow_redirects=True,
+                )
+                if resp.status_code == 429:
+                    retry_after = int(resp.headers.get("retry-after", "5"))
+                    retry_after = min(retry_after, 15)  # Cap wait time
+                    logger.debug(
+                        "ClawHub download rate-limited for %s, retrying in %ds (attempt %d/%d)",
+                        slug, retry_after, attempt + 1, max_retries,
+                    )
+                    time.sleep(retry_after)
+                    continue
+                if resp.status_code != 200:
+                    logger.debug("ClawHub ZIP download for %s v%s returned %s", slug, version, resp.status_code)
+                    return files
+
+                with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
+                    for info in zf.infolist():
+                        if info.is_dir():
+                            continue
+                        # Sanitize path — strip leading slashes and ..
+                        name = info.filename.lstrip("/")
+                        if ".." in name or name.startswith("/"):
+                            continue
+                        # Only extract text-sized files (skip large binaries)
+                        if info.file_size > 500_000:
+                            logger.debug("Skipping large file in ZIP: %s (%d bytes)", name, info.file_size)
+                            continue
+                        try:
+                            raw = zf.read(info.filename)
+                            files[name] = raw.decode("utf-8")
+                        except (UnicodeDecodeError, KeyError):
+                            logger.debug("Skipping non-text file in ZIP: %s", name)
+                            continue
+
+                return files
+
+            except zipfile.BadZipFile:
+                logger.warning("ClawHub returned invalid ZIP for %s v%s", slug, version)
+                return files
+            except httpx.HTTPError as exc:
+                logger.debug("ClawHub ZIP download failed for %s v%s: %s", slug, version, exc)
+                return files
+
+        logger.debug("ClawHub ZIP download exhausted retries for %s v%s", slug, version)
+        return files
+
    def _fetch_text(self, url: str) -> Optional[str]:
        try:
            resp = httpx.get(url, timeout=20)
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@ -37,28 +37,11 @@ from pathlib import Path
 from typing import Any, Awaitable, Dict, Optional
 from urllib.parse import urlparse
 import httpx
-from openai import AsyncOpenAI
-from agent.auxiliary_client import get_vision_auxiliary_client
+from agent.auxiliary_client import async_call_llm
 from tools.debug_helpers import DebugSession

 logger = logging.getLogger(__name__)

-# Resolve vision auxiliary client at module level; build an async wrapper.
-_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
-_aux_async_client: AsyncOpenAI | None = None
-if _aux_sync_client is not None:
-    _async_kwargs = {
-        "api_key": _aux_sync_client.api_key,
-        "base_url": str(_aux_sync_client.base_url),
-    }
-    if "openrouter" in str(_aux_sync_client.base_url).lower():
-        _async_kwargs["default_headers"] = {
-            "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
-            "X-OpenRouter-Title": "Hermes Agent",
-                "X-OpenRouter-Categories": "productivity,cli-agent",
-        }
-    _aux_async_client = AsyncOpenAI(**_async_kwargs)
-
 _debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")


@ -197,7 +180,7 @@ def _image_to_base64_data_url(image_path: Path, mime_type: Optional[str] = None)
 async def vision_analyze_tool(
    image_url: str,
    user_prompt: str,
-    model: str = DEFAULT_VISION_MODEL,
+    model: str = None,
 ) -> str:
    """
    Analyze an image from a URL or local file path using vision AI.
@ -257,15 +240,6 @@ async def vision_analyze_tool(
        logger.info("Analyzing image: %s", image_url[:60])
        logger.info("User prompt: %s", user_prompt[:100])
        
-        # Check auxiliary vision client availability
-        if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
-            logger.error("Vision analysis unavailable: no auxiliary vision model configured")
-            return json.dumps({
-                "success": False,
-                "analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
-                            "Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
-            }, indent=2, ensure_ascii=False)
-        
        # Determine if this is a local file path or a remote URL
        local_path = Path(image_url)
        if local_path.is_file():
@ -321,18 +295,18 @@ async def vision_analyze_tool(
            }
        ]
        
-        logger.info("Processing image with %s...", model)
+        logger.info("Processing image with vision model...")
        
-        # Call the vision API
-        from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
-        _extra = get_auxiliary_extra_body()
-        response = await _aux_async_client.chat.completions.create(
-            model=model,
-            messages=messages,
-            temperature=0.1,
-            **auxiliary_max_tokens_param(2000),
-            **({} if not _extra else {"extra_body": _extra}),
-        )
+        # Call the vision API via centralized router
+        call_kwargs = {
+            "task": "vision",
+            "messages": messages,
+            "temperature": 0.1,
+            "max_tokens": 2000,
+        }
+        if model:
+            call_kwargs["model"] = model
+        response = await async_call_llm(**call_kwargs)
        
        # Extract the analysis
        analysis = response.choices[0].message.content.strip()
@ -359,10 +333,28 @@ async def vision_analyze_tool(
        error_msg = f"Error analyzing image: {str(e)}"
        logger.error("%s", error_msg, exc_info=True)
        
+        # Detect vision capability errors — give the model a clear message
+        # so it can inform the user instead of a cryptic API error.
+        err_str = str(e).lower()
+        if any(hint in err_str for hint in (
+            "does not support", "not support image", "invalid_request",
+            "content_policy", "image_url", "multimodal",
+            "unrecognized request argument", "image input",
+        )):
+            analysis = (
+                f"{model} does not support vision or our request was not "
+                f"accepted by the server. Error: {e}"
+            )
+        else:
+            analysis = (
+                "There was a problem with the request and the image could not "
+                f"be analyzed. Error: {e}"
+            )
+        
        # Prepare error response
        result = {
            "success": False,
-            "analysis": "There was a problem with the request and the image could not be analyzed."
+            "analysis": analysis,
        }
        
        debug_call_data["error"] = error_msg
@ -385,7 +377,18 @@ async def vision_analyze_tool(

 def check_vision_requirements() -> bool:
    """Check if an auxiliary vision model is available."""
-    return _aux_async_client is not None
+    try:
+        from agent.auxiliary_client import resolve_provider_client
+        client, _ = resolve_provider_client("openrouter")
+        if client is not None:
+            return True
+        client, _ = resolve_provider_client("nous")
+        if client is not None:
+            return True
+        client, _ = resolve_provider_client("custom")
+        return client is not None
+    except Exception:
+        return False


 def get_debug_session_info() -> Dict[str, Any]:
@ -413,10 +416,9 @@ if __name__ == "__main__":
        print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
        exit(1)
    else:
-        print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}")
+        print("✅ Vision model available")
    
    print("🛠️ Vision tools ready for use!")
-    print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
    
    # Show debug mode status
    if _debug.active:
@ -483,9 +485,7 @@ def _handle_vision_analyze(args: Dict[str, Any], **kw: Any) -> Awaitable[str]:
        "Fully describe and explain everything about this image, then answer the "
        f"following question:\n\n{question}"
    )
-    model = (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
-             or DEFAULT_VISION_MODEL
-             or "google/gemini-3-flash-preview")
+    model = os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
    return vision_analyze_tool(image_url, full_prompt, model)


--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@ -47,8 +47,7 @@ import re
 import asyncio
 from typing import List, Dict, Any, Optional
 from firecrawl import Firecrawl
-from openai import AsyncOpenAI
-from agent.auxiliary_client import get_async_text_auxiliary_client
+from agent.auxiliary_client import async_call_llm
 from tools.debug_helpers import DebugSession

 logger = logging.getLogger(__name__)
@ -83,15 +82,8 @@ def _get_firecrawl_client():

 DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000

-# Resolve async auxiliary client at module level.
-# Handles Codex Responses API adapter transparently.
-_aux_async_client, _DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client("web_extract")
-
-# Allow per-task override via config.yaml auxiliary.web_extract_model
-DEFAULT_SUMMARIZER_MODEL = (
-    os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip()
-    or _DEFAULT_SUMMARIZER_MODEL
-)
+# Allow per-task override via env var
+DEFAULT_SUMMARIZER_MODEL = os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None

 _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")

@ -249,22 +241,22 @@ Create a markdown summary that captures all key information in a well-organized,

    for attempt in range(max_retries):
        try:
-            if _aux_async_client is None:
-                logger.warning("No auxiliary model available for web content processing")
-                return None
-            from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
-            _extra = get_auxiliary_extra_body()
-            response = await _aux_async_client.chat.completions.create(
-                model=model,
-                messages=[
+            call_kwargs = {
+                "task": "web_extract",
+                "messages": [
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
-                temperature=0.1,
-                **auxiliary_max_tokens_param(max_tokens),
-                **({} if not _extra else {"extra_body": _extra}),
-            )
+                "temperature": 0.1,
+                "max_tokens": max_tokens,
+            }
+            if model:
+                call_kwargs["model"] = model
+            response = await async_call_llm(**call_kwargs)
            return response.choices[0].message.content.strip()
+        except RuntimeError:
+            logger.warning("No auxiliary model available for web content processing")
+            return None
        except Exception as api_error:
            last_error = api_error
            if attempt < max_retries - 1:
@ -368,25 +360,18 @@ Synthesize these into ONE cohesive, comprehensive summary that:
 Create a single, unified markdown summary."""

    try:
-        if _aux_async_client is None:
-            logger.warning("No auxiliary model for synthesis, concatenating summaries")
-            fallback = "\n\n".join(summaries)
-            if len(fallback) > max_output_size:
-                fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
-            return fallback
-
-        from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
-        _extra = get_auxiliary_extra_body()
-        response = await _aux_async_client.chat.completions.create(
-            model=model,
-            messages=[
+        call_kwargs = {
+            "task": "web_extract",
+            "messages": [
                {"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
                {"role": "user", "content": synthesis_prompt}
            ],
-            temperature=0.1,
-            **auxiliary_max_tokens_param(20000),
-            **({} if not _extra else {"extra_body": _extra}),
-        )
+            "temperature": 0.1,
+            "max_tokens": 20000,
+        }
+        if model:
+            call_kwargs["model"] = model
+        response = await async_call_llm(**call_kwargs)
        final_summary = response.choices[0].message.content.strip()
        
        # Enforce hard cap
@ -713,8 +698,8 @@ async def web_extract_tool(
        debug_call_data["pages_extracted"] = pages_extracted
        debug_call_data["original_response_size"] = len(json.dumps(response))
        
-        # Process each result with LLM if enabled and auxiliary client is available
-        if use_llm_processing and _aux_async_client is not None:
+        # Process each result with LLM if enabled
+        if use_llm_processing:
            logger.info("Processing extracted content with LLM (parallel)...")
            debug_call_data["processing_applied"].append("llm_processing")
            
@ -780,10 +765,6 @@ async def web_extract_tool(
                else:
                    logger.warning("%s (no content to process)", url)
        else:
-            if use_llm_processing and _aux_async_client is None:
-                logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
-                debug_call_data["processing_applied"].append("llm_processing_unavailable")
-            
            # Print summary of extracted pages for debugging (original behavior)
            for result in response.get('results', []):
                url = result.get('url', 'Unknown URL')
@ -1013,8 +994,8 @@ async def web_crawl_tool(
        debug_call_data["pages_crawled"] = pages_crawled
        debug_call_data["original_response_size"] = len(json.dumps(response))
        
-        # Process each result with LLM if enabled and auxiliary client is available
-        if use_llm_processing and _aux_async_client is not None:
+        # Process each result with LLM if enabled
+        if use_llm_processing:
            logger.info("Processing crawled content with LLM (parallel)...")
            debug_call_data["processing_applied"].append("llm_processing")
            
@ -1080,10 +1061,6 @@ async def web_crawl_tool(
                else:
                    logger.warning("%s (no content to process)", page_url)
        else:
-            if use_llm_processing and _aux_async_client is None:
-                logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
-                debug_call_data["processing_applied"].append("llm_processing_unavailable")
-            
            # Print summary of crawled pages for debugging (original behavior)
            for result in response.get('results', []):
                page_url = result.get('url', 'Unknown URL')
@ -1138,7 +1115,15 @@ def check_firecrawl_api_key() -> bool:

 def check_auxiliary_model() -> bool:
    """Check if an auxiliary text model is available for LLM content processing."""
-    return _aux_async_client is not None
+    try:
+        from agent.auxiliary_client import resolve_provider_client
+        for p in ("openrouter", "nous", "custom", "codex"):
+            client, _ = resolve_provider_client(p)
+            if client is not None:
+                return True
+        return False
+    except Exception:
+        return False


 def get_debug_session_info() -> Dict[str, Any]: