merge: resolve conflict with main in subagent interrupt test

This commit is contained in:
Erosika 2026-03-12 16:28:57 -04:00
commit fefc709b2c
75 changed files with 8124 additions and 1376 deletions

View file

@ -184,43 +184,52 @@ def prompt_dangerous_approval(command: str, description: str,
os.environ["HERMES_SPINNER_PAUSE"] = "1"
try:
print()
print(f" ⚠️ DANGEROUS COMMAND: {description}")
print(f" {command[:80]}{'...' if len(command) > 80 else ''}")
print()
print(f" [o]nce | [s]ession | [a]lways | [d]eny")
print()
sys.stdout.flush()
is_truncated = len(command) > 80
while True:
print()
print(f" ⚠️ DANGEROUS COMMAND: {description}")
print(f" {command[:80]}{'...' if is_truncated else ''}")
print()
view_hint = " | [v]iew full" if is_truncated else ""
print(f" [o]nce | [s]ession | [a]lways | [d]eny{view_hint}")
print()
sys.stdout.flush()
result = {"choice": ""}
result = {"choice": ""}
def get_input():
try:
result["choice"] = input(" Choice [o/s/a/D]: ").strip().lower()
except (EOFError, OSError):
result["choice"] = ""
def get_input():
try:
result["choice"] = input(" Choice [o/s/a/D]: ").strip().lower()
except (EOFError, OSError):
result["choice"] = ""
thread = threading.Thread(target=get_input, daemon=True)
thread.start()
thread.join(timeout=timeout_seconds)
thread = threading.Thread(target=get_input, daemon=True)
thread.start()
thread.join(timeout=timeout_seconds)
if thread.is_alive():
print("\n ⏱ Timeout - denying command")
return "deny"
if thread.is_alive():
print("\n ⏱ Timeout - denying command")
return "deny"
choice = result["choice"]
if choice in ('o', 'once'):
print(" ✓ Allowed once")
return "once"
elif choice in ('s', 'session'):
print(" ✓ Allowed for this session")
return "session"
elif choice in ('a', 'always'):
print(" ✓ Added to permanent allowlist")
return "always"
else:
print(" ✗ Denied")
return "deny"
choice = result["choice"]
if choice in ('v', 'view') and is_truncated:
print()
print(" Full command:")
print(f" {command}")
is_truncated = False # show full on next loop iteration too
continue
if choice in ('o', 'once'):
print(" ✓ Allowed once")
return "once"
elif choice in ('s', 'session'):
print(" ✓ Allowed for this session")
return "session"
elif choice in ('a', 'always'):
print(" ✓ Added to permanent allowlist")
return "always"
else:
print(" ✗ Denied")
return "deny"
except (EOFError, KeyboardInterrupt):
print("\n ✗ Cancelled")

View file

@ -63,7 +63,7 @@ import time
import requests
from typing import Dict, Any, Optional, List
from pathlib import Path
from agent.auxiliary_client import get_vision_auxiliary_client, get_text_auxiliary_client
from agent.auxiliary_client import call_llm
logger = logging.getLogger(__name__)
@ -80,38 +80,15 @@ DEFAULT_SESSION_TIMEOUT = 300
# Max tokens for snapshot content before summarization
SNAPSHOT_SUMMARIZE_THRESHOLD = 8000
# Vision client — for browser_vision (screenshot analysis)
# Wrapped in try/except so a broken auxiliary config doesn't prevent the entire
# browser_tool module from importing (which would disable all 10 browser tools).
try:
_aux_vision_client, _DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
except Exception as _init_err:
logger.debug("Could not initialise vision auxiliary client: %s", _init_err)
_aux_vision_client, _DEFAULT_VISION_MODEL = None, None
# Text client — for page snapshot summarization (same config as web_extract)
try:
_aux_text_client, _DEFAULT_TEXT_MODEL = get_text_auxiliary_client("web_extract")
except Exception as _init_err:
logger.debug("Could not initialise text auxiliary client: %s", _init_err)
_aux_text_client, _DEFAULT_TEXT_MODEL = None, None
# Module-level alias for availability checks
EXTRACTION_MODEL = _DEFAULT_TEXT_MODEL or _DEFAULT_VISION_MODEL
def _get_vision_model() -> str:
def _get_vision_model() -> Optional[str]:
"""Model for browser_vision (screenshot analysis — multimodal)."""
return (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
or _DEFAULT_VISION_MODEL
or "google/gemini-3-flash-preview")
return os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
def _get_extraction_model() -> str:
def _get_extraction_model() -> Optional[str]:
"""Model for page snapshot text summarization — same as web_extract."""
return (os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip()
or _DEFAULT_TEXT_MODEL
or "google/gemini-3-flash-preview")
return os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None
def _is_local_mode() -> bool:
@ -941,9 +918,6 @@ def _extract_relevant_content(
Falls back to simple truncation when no auxiliary text model is configured.
"""
if _aux_text_client is None:
return _truncate_snapshot(snapshot_text)
if user_task:
extraction_prompt = (
f"You are a content extractor for a browser automation agent.\n\n"
@ -968,13 +942,16 @@ def _extract_relevant_content(
)
try:
from agent.auxiliary_client import auxiliary_max_tokens_param
response = _aux_text_client.chat.completions.create(
model=_get_extraction_model(),
messages=[{"role": "user", "content": extraction_prompt}],
**auxiliary_max_tokens_param(4000),
temperature=0.1,
)
call_kwargs = {
"task": "web_extract",
"messages": [{"role": "user", "content": extraction_prompt}],
"max_tokens": 4000,
"temperature": 0.1,
}
model = _get_extraction_model()
if model:
call_kwargs["model"] = model
response = call_llm(**call_kwargs)
return response.choices[0].message.content
except Exception:
return _truncate_snapshot(snapshot_text)
@ -1497,14 +1474,6 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
effective_task_id = task_id or "default"
# Check auxiliary vision client
if _aux_vision_client is None or _DEFAULT_VISION_MODEL is None:
return json.dumps({
"success": False,
"error": "Browser vision unavailable: no auxiliary vision model configured. "
"Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
}, ensure_ascii=False)
# Save screenshot to persistent location so it can be shared with users
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
screenshots_dir = hermes_home / "browser_screenshots"
@ -1562,14 +1531,13 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
f"Focus on answering the user's specific question."
)
# Use the sync auxiliary vision client directly
from agent.auxiliary_client import auxiliary_max_tokens_param
# Use the centralized LLM router
vision_model = _get_vision_model()
logger.debug("browser_vision: analysing screenshot (%d bytes) with model=%s",
len(image_data), vision_model)
response = _aux_vision_client.chat.completions.create(
model=vision_model,
messages=[
logger.debug("browser_vision: analysing screenshot (%d bytes)",
len(image_data))
call_kwargs = {
"task": "vision",
"messages": [
{
"role": "user",
"content": [
@ -1578,9 +1546,12 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
],
}
],
**auxiliary_max_tokens_param(2000),
temperature=0.1,
)
"max_tokens": 2000,
"temperature": 0.1,
}
if vision_model:
call_kwargs["model"] = vision_model
response = call_llm(**call_kwargs)
analysis = response.choices[0].message.content
response_data = {

View file

@ -209,7 +209,7 @@ def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any]:
return None
except Exception as e:
logger.error("Error upscaling image: %s", e)
logger.error("Error upscaling image: %s", e, exc_info=True)
return None
@ -377,7 +377,7 @@ def image_generate_tool(
except Exception as e:
generation_time = (datetime.datetime.now() - start_time).total_seconds()
error_msg = f"Error generating image: {str(e)}"
logger.error("%s", error_msg)
logger.error("%s", error_msg, exc_info=True)
# Prepare error response - minimal format
response_data = {

View file

@ -456,17 +456,13 @@ class SamplingHandler:
# Resolve model
model = self._resolve_model(getattr(params, "modelPreferences", None))
# Get auxiliary LLM client
from agent.auxiliary_client import get_text_auxiliary_client
client, default_model = get_text_auxiliary_client()
if client is None:
self.metrics["errors"] += 1
return self._error("No LLM provider available for sampling")
# Get auxiliary LLM client via centralized router
from agent.auxiliary_client import call_llm
resolved_model = model or default_model
# Model whitelist check (we need to resolve model before calling)
resolved_model = model or self.model_override or ""
# Model whitelist check
if self.allowed_models and resolved_model not in self.allowed_models:
if self.allowed_models and resolved_model and resolved_model not in self.allowed_models:
logger.warning(
"MCP server '%s' requested model '%s' not in allowed_models",
self.server_name, resolved_model,
@ -484,20 +480,15 @@ class SamplingHandler:
# Build LLM call kwargs
max_tokens = min(params.maxTokens, self.max_tokens_cap)
call_kwargs: dict = {
"model": resolved_model,
"messages": messages,
"max_tokens": max_tokens,
}
call_temperature = None
if hasattr(params, "temperature") and params.temperature is not None:
call_kwargs["temperature"] = params.temperature
if stop := getattr(params, "stopSequences", None):
call_kwargs["stop"] = stop
call_temperature = params.temperature
# Forward server-provided tools
call_tools = None
server_tools = getattr(params, "tools", None)
if server_tools:
call_kwargs["tools"] = [
call_tools = [
{
"type": "function",
"function": {
@ -508,9 +499,6 @@ class SamplingHandler:
}
for t in server_tools
]
if tool_choice := getattr(params, "toolChoice", None):
mode = getattr(tool_choice, "mode", "auto")
call_kwargs["tool_choice"] = {"auto": "auto", "required": "required", "none": "none"}.get(mode, "auto")
logger.log(
self.audit_level,
@ -520,7 +508,15 @@ class SamplingHandler:
# Offload sync LLM call to thread (non-blocking)
def _sync_call():
return client.chat.completions.create(**call_kwargs)
return call_llm(
task="mcp",
model=resolved_model or None,
messages=messages,
temperature=call_temperature,
max_tokens=max_tokens,
tools=call_tools,
timeout=self.timeout,
)
try:
response = await asyncio.wait_for(

View file

@ -1,39 +1,30 @@
"""Shared OpenRouter API client for Hermes tools.
Provides a single lazy-initialized AsyncOpenAI client that all tool modules
can share, eliminating the duplicated _get_openrouter_client() /
_get_summarizer_client() pattern previously copy-pasted across web_tools,
vision_tools, mixture_of_agents_tool, and session_search_tool.
can share. Routes through the centralized provider router in
agent/auxiliary_client.py so auth, headers, and API format are handled
consistently.
"""
import os
from openai import AsyncOpenAI
from hermes_constants import OPENROUTER_BASE_URL
_client: AsyncOpenAI | None = None
_client = None
def get_async_client() -> AsyncOpenAI:
"""Return a shared AsyncOpenAI client pointed at OpenRouter.
def get_async_client():
"""Return a shared async OpenAI-compatible client for OpenRouter.
The client is created lazily on first call and reused thereafter.
Uses the centralized provider router for auth and client construction.
Raises ValueError if OPENROUTER_API_KEY is not set.
"""
global _client
if _client is None:
api_key = os.getenv("OPENROUTER_API_KEY")
if not api_key:
from agent.auxiliary_client import resolve_provider_client
client, _model = resolve_provider_client("openrouter", async_mode=True)
if client is None:
raise ValueError("OPENROUTER_API_KEY environment variable not set")
_client = AsyncOpenAI(
api_key=api_key,
base_url=OPENROUTER_BASE_URL,
default_headers={
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
},
)
_client = client
return _client

View file

@ -22,13 +22,7 @@ import os
import logging
from typing import Dict, Any, List, Optional, Union
from openai import AsyncOpenAI, OpenAI
from agent.auxiliary_client import get_async_text_auxiliary_client
# Resolve the async auxiliary client at import time so we have the model slug.
# Handles Codex Responses API adapter transparently.
_async_aux_client, _SUMMARIZER_MODEL = get_async_text_auxiliary_client()
from agent.auxiliary_client import async_call_llm
MAX_SESSION_CHARS = 100_000
MAX_SUMMARY_TOKENS = 10000
@ -156,26 +150,22 @@ async def _summarize_session(
f"Summarize this conversation with focus on: {query}"
)
if _async_aux_client is None or _SUMMARIZER_MODEL is None:
logging.warning("No auxiliary model available for session summarization")
return None
max_retries = 3
for attempt in range(max_retries):
try:
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
_extra = get_auxiliary_extra_body()
response = await _async_aux_client.chat.completions.create(
model=_SUMMARIZER_MODEL,
response = await async_call_llm(
task="session_search",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
**({} if not _extra else {"extra_body": _extra}),
temperature=0.1,
**auxiliary_max_tokens_param(MAX_SUMMARY_TOKENS),
max_tokens=MAX_SUMMARY_TOKENS,
)
return response.choices[0].message.content.strip()
except RuntimeError:
logging.warning("No auxiliary model available for session summarization")
return None
except Exception as e:
if attempt < max_retries - 1:
await asyncio.sleep(1 * (attempt + 1))
@ -333,8 +323,6 @@ def session_search(
def check_session_search_requirements() -> bool:
"""Requires SQLite state database and an auxiliary text model."""
if _async_aux_client is None:
return False
try:
from hermes_state import DEFAULT_DB_PATH
return DEFAULT_DB_PATH.parent.exists()

View file

@ -29,7 +29,7 @@ from datetime import datetime, timezone
from pathlib import Path
from typing import List, Tuple
from hermes_constants import OPENROUTER_BASE_URL
# ---------------------------------------------------------------------------
@ -934,25 +934,12 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult,
if not model:
return static_result
# Call the LLM via the OpenAI SDK (same pattern as run_agent.py)
# Call the LLM via the centralized provider router
try:
from openai import OpenAI
import os
from agent.auxiliary_client import call_llm
api_key = os.getenv("OPENROUTER_API_KEY", "")
if not api_key:
return static_result
client = OpenAI(
base_url=OPENROUTER_BASE_URL,
api_key=api_key,
default_headers={
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
},
)
response = client.chat.completions.create(
response = call_llm(
provider="openrouter",
model=model,
messages=[{
"role": "user",

View file

@ -572,14 +572,23 @@ class ClawHubSource(SkillSource):
logger.warning("ClawHub fetch failed for %s: could not resolve latest version", slug)
return None
version_data = self._get_json(f"{self.BASE_URL}/skills/{slug}/versions/{latest_version}")
if not isinstance(version_data, dict):
return None
# Primary method: download the skill as a ZIP bundle from /download
files = self._download_zip(slug, latest_version)
# Fallback: try the version metadata endpoint for inline/raw content
if "SKILL.md" not in files:
version_data = self._get_json(f"{self.BASE_URL}/skills/{slug}/versions/{latest_version}")
if isinstance(version_data, dict):
# Files may be nested under version_data["version"]["files"]
files = self._extract_files(version_data) or files
if "SKILL.md" not in files:
nested = version_data.get("version", {})
if isinstance(nested, dict):
files = self._extract_files(nested) or files
files = self._extract_files(version_data)
if "SKILL.md" not in files:
logger.warning(
"ClawHub fetch for %s resolved version %s but no inline/raw file content was available",
"ClawHub fetch for %s resolved version %s but could not retrieve file content",
slug,
latest_version,
)
@ -674,6 +683,65 @@ class ClawHubSource(SkillSource):
return files
def _download_zip(self, slug: str, version: str) -> Dict[str, str]:
"""Download skill as a ZIP bundle from the /download endpoint and extract text files."""
import io
import zipfile
files: Dict[str, str] = {}
max_retries = 3
for attempt in range(max_retries):
try:
resp = httpx.get(
f"{self.BASE_URL}/download",
params={"slug": slug, "version": version},
timeout=30,
follow_redirects=True,
)
if resp.status_code == 429:
retry_after = int(resp.headers.get("retry-after", "5"))
retry_after = min(retry_after, 15) # Cap wait time
logger.debug(
"ClawHub download rate-limited for %s, retrying in %ds (attempt %d/%d)",
slug, retry_after, attempt + 1, max_retries,
)
time.sleep(retry_after)
continue
if resp.status_code != 200:
logger.debug("ClawHub ZIP download for %s v%s returned %s", slug, version, resp.status_code)
return files
with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
for info in zf.infolist():
if info.is_dir():
continue
# Sanitize path — strip leading slashes and ..
name = info.filename.lstrip("/")
if ".." in name or name.startswith("/"):
continue
# Only extract text-sized files (skip large binaries)
if info.file_size > 500_000:
logger.debug("Skipping large file in ZIP: %s (%d bytes)", name, info.file_size)
continue
try:
raw = zf.read(info.filename)
files[name] = raw.decode("utf-8")
except (UnicodeDecodeError, KeyError):
logger.debug("Skipping non-text file in ZIP: %s", name)
continue
return files
except zipfile.BadZipFile:
logger.warning("ClawHub returned invalid ZIP for %s v%s", slug, version)
return files
except httpx.HTTPError as exc:
logger.debug("ClawHub ZIP download failed for %s v%s: %s", slug, version, exc)
return files
logger.debug("ClawHub ZIP download exhausted retries for %s v%s", slug, version)
return files
def _fetch_text(self, url: str) -> Optional[str]:
try:
resp = httpx.get(url, timeout=20)

View file

@ -37,28 +37,11 @@ from pathlib import Path
from typing import Any, Awaitable, Dict, Optional
from urllib.parse import urlparse
import httpx
from openai import AsyncOpenAI
from agent.auxiliary_client import get_vision_auxiliary_client
from agent.auxiliary_client import async_call_llm
from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__)
# Resolve vision auxiliary client at module level; build an async wrapper.
_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
_aux_async_client: AsyncOpenAI | None = None
if _aux_sync_client is not None:
_async_kwargs = {
"api_key": _aux_sync_client.api_key,
"base_url": str(_aux_sync_client.base_url),
}
if "openrouter" in str(_aux_sync_client.base_url).lower():
_async_kwargs["default_headers"] = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
_aux_async_client = AsyncOpenAI(**_async_kwargs)
_debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
@ -197,7 +180,7 @@ def _image_to_base64_data_url(image_path: Path, mime_type: Optional[str] = None)
async def vision_analyze_tool(
image_url: str,
user_prompt: str,
model: str = DEFAULT_VISION_MODEL,
model: str = None,
) -> str:
"""
Analyze an image from a URL or local file path using vision AI.
@ -257,15 +240,6 @@ async def vision_analyze_tool(
logger.info("Analyzing image: %s", image_url[:60])
logger.info("User prompt: %s", user_prompt[:100])
# Check auxiliary vision client availability
if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
logger.error("Vision analysis unavailable: no auxiliary vision model configured")
return json.dumps({
"success": False,
"analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
"Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
}, indent=2, ensure_ascii=False)
# Determine if this is a local file path or a remote URL
local_path = Path(image_url)
if local_path.is_file():
@ -321,18 +295,18 @@ async def vision_analyze_tool(
}
]
logger.info("Processing image with %s...", model)
logger.info("Processing image with vision model...")
# Call the vision API
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
_extra = get_auxiliary_extra_body()
response = await _aux_async_client.chat.completions.create(
model=model,
messages=messages,
temperature=0.1,
**auxiliary_max_tokens_param(2000),
**({} if not _extra else {"extra_body": _extra}),
)
# Call the vision API via centralized router
call_kwargs = {
"task": "vision",
"messages": messages,
"temperature": 0.1,
"max_tokens": 2000,
}
if model:
call_kwargs["model"] = model
response = await async_call_llm(**call_kwargs)
# Extract the analysis
analysis = response.choices[0].message.content.strip()
@ -359,10 +333,28 @@ async def vision_analyze_tool(
error_msg = f"Error analyzing image: {str(e)}"
logger.error("%s", error_msg, exc_info=True)
# Detect vision capability errors — give the model a clear message
# so it can inform the user instead of a cryptic API error.
err_str = str(e).lower()
if any(hint in err_str for hint in (
"does not support", "not support image", "invalid_request",
"content_policy", "image_url", "multimodal",
"unrecognized request argument", "image input",
)):
analysis = (
f"{model} does not support vision or our request was not "
f"accepted by the server. Error: {e}"
)
else:
analysis = (
"There was a problem with the request and the image could not "
f"be analyzed. Error: {e}"
)
# Prepare error response
result = {
"success": False,
"analysis": "There was a problem with the request and the image could not be analyzed."
"analysis": analysis,
}
debug_call_data["error"] = error_msg
@ -385,7 +377,18 @@ async def vision_analyze_tool(
def check_vision_requirements() -> bool:
"""Check if an auxiliary vision model is available."""
return _aux_async_client is not None
try:
from agent.auxiliary_client import resolve_provider_client
client, _ = resolve_provider_client("openrouter")
if client is not None:
return True
client, _ = resolve_provider_client("nous")
if client is not None:
return True
client, _ = resolve_provider_client("custom")
return client is not None
except Exception:
return False
def get_debug_session_info() -> Dict[str, Any]:
@ -413,10 +416,9 @@ if __name__ == "__main__":
print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
exit(1)
else:
print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}")
print("✅ Vision model available")
print("🛠️ Vision tools ready for use!")
print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
# Show debug mode status
if _debug.active:
@ -483,9 +485,7 @@ def _handle_vision_analyze(args: Dict[str, Any], **kw: Any) -> Awaitable[str]:
"Fully describe and explain everything about this image, then answer the "
f"following question:\n\n{question}"
)
model = (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
or DEFAULT_VISION_MODEL
or "google/gemini-3-flash-preview")
model = os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
return vision_analyze_tool(image_url, full_prompt, model)

View file

@ -47,8 +47,7 @@ import re
import asyncio
from typing import List, Dict, Any, Optional
from firecrawl import Firecrawl
from openai import AsyncOpenAI
from agent.auxiliary_client import get_async_text_auxiliary_client
from agent.auxiliary_client import async_call_llm
from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__)
@ -83,15 +82,8 @@ def _get_firecrawl_client():
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
# Resolve async auxiliary client at module level.
# Handles Codex Responses API adapter transparently.
_aux_async_client, _DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client("web_extract")
# Allow per-task override via config.yaml auxiliary.web_extract_model
DEFAULT_SUMMARIZER_MODEL = (
os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip()
or _DEFAULT_SUMMARIZER_MODEL
)
# Allow per-task override via env var
DEFAULT_SUMMARIZER_MODEL = os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None
_debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
@ -249,22 +241,22 @@ Create a markdown summary that captures all key information in a well-organized,
for attempt in range(max_retries):
try:
if _aux_async_client is None:
logger.warning("No auxiliary model available for web content processing")
return None
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
_extra = get_auxiliary_extra_body()
response = await _aux_async_client.chat.completions.create(
model=model,
messages=[
call_kwargs = {
"task": "web_extract",
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.1,
**auxiliary_max_tokens_param(max_tokens),
**({} if not _extra else {"extra_body": _extra}),
)
"temperature": 0.1,
"max_tokens": max_tokens,
}
if model:
call_kwargs["model"] = model
response = await async_call_llm(**call_kwargs)
return response.choices[0].message.content.strip()
except RuntimeError:
logger.warning("No auxiliary model available for web content processing")
return None
except Exception as api_error:
last_error = api_error
if attempt < max_retries - 1:
@ -368,25 +360,18 @@ Synthesize these into ONE cohesive, comprehensive summary that:
Create a single, unified markdown summary."""
try:
if _aux_async_client is None:
logger.warning("No auxiliary model for synthesis, concatenating summaries")
fallback = "\n\n".join(summaries)
if len(fallback) > max_output_size:
fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
return fallback
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
_extra = get_auxiliary_extra_body()
response = await _aux_async_client.chat.completions.create(
model=model,
messages=[
call_kwargs = {
"task": "web_extract",
"messages": [
{"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
{"role": "user", "content": synthesis_prompt}
],
temperature=0.1,
**auxiliary_max_tokens_param(20000),
**({} if not _extra else {"extra_body": _extra}),
)
"temperature": 0.1,
"max_tokens": 20000,
}
if model:
call_kwargs["model"] = model
response = await async_call_llm(**call_kwargs)
final_summary = response.choices[0].message.content.strip()
# Enforce hard cap
@ -713,8 +698,8 @@ async def web_extract_tool(
debug_call_data["pages_extracted"] = pages_extracted
debug_call_data["original_response_size"] = len(json.dumps(response))
# Process each result with LLM if enabled and auxiliary client is available
if use_llm_processing and _aux_async_client is not None:
# Process each result with LLM if enabled
if use_llm_processing:
logger.info("Processing extracted content with LLM (parallel)...")
debug_call_data["processing_applied"].append("llm_processing")
@ -780,10 +765,6 @@ async def web_extract_tool(
else:
logger.warning("%s (no content to process)", url)
else:
if use_llm_processing and _aux_async_client is None:
logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
debug_call_data["processing_applied"].append("llm_processing_unavailable")
# Print summary of extracted pages for debugging (original behavior)
for result in response.get('results', []):
url = result.get('url', 'Unknown URL')
@ -1013,8 +994,8 @@ async def web_crawl_tool(
debug_call_data["pages_crawled"] = pages_crawled
debug_call_data["original_response_size"] = len(json.dumps(response))
# Process each result with LLM if enabled and auxiliary client is available
if use_llm_processing and _aux_async_client is not None:
# Process each result with LLM if enabled
if use_llm_processing:
logger.info("Processing crawled content with LLM (parallel)...")
debug_call_data["processing_applied"].append("llm_processing")
@ -1080,10 +1061,6 @@ async def web_crawl_tool(
else:
logger.warning("%s (no content to process)", page_url)
else:
if use_llm_processing and _aux_async_client is None:
logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
debug_call_data["processing_applied"].append("llm_processing_unavailable")
# Print summary of crawled pages for debugging (original behavior)
for result in response.get('results', []):
page_url = result.get('url', 'Unknown URL')
@ -1138,7 +1115,15 @@ def check_firecrawl_api_key() -> bool:
def check_auxiliary_model() -> bool:
"""Check if an auxiliary text model is available for LLM content processing."""
return _aux_async_client is not None
try:
from agent.auxiliary_client import resolve_provider_client
for p in ("openrouter", "nous", "custom", "codex"):
client, _ = resolve_provider_client(p)
if client is not None:
return True
return False
except Exception:
return False
def get_debug_session_info() -> Dict[str, Any]: