Hermes Agent UX Improvements

This commit is contained in:
teknium1 2026-02-22 02:16:11 -08:00
parent b1f55e3ee5
commit ededaaa874
23 changed files with 945 additions and 1545 deletions

View file

@ -281,7 +281,12 @@ def check_dangerous_command(command: str, env_type: str,
approval_callback=approval_callback)
if choice == "deny":
return {"approved": False, "message": "BLOCKED: User denied this potentially dangerous command. Do NOT retry this command - the user has explicitly rejected it."}
return {
"approved": False,
"message": f"BLOCKED: User denied this potentially dangerous command (matched '{description}' pattern). Do NOT retry this command - the user has explicitly rejected it.",
"pattern_key": pattern_key,
"description": description,
}
if choice == "session":
approve_session(session_key, pattern_key)

View file

@ -51,25 +51,16 @@ import signal
import subprocess
import shutil
import sys
import asyncio
import tempfile
import threading
import time
import requests
from typing import Dict, Any, Optional, List
from pathlib import Path
from hermes_constants import OPENROUTER_CHAT_URL
from agent.auxiliary_client import get_vision_auxiliary_client
logger = logging.getLogger(__name__)
# Try to import httpx for async LLM calls
try:
import httpx
HTTPX_AVAILABLE = True
except ImportError:
HTTPX_AVAILABLE = False
# ============================================================================
# Configuration
# ============================================================================
@ -83,8 +74,8 @@ DEFAULT_SESSION_TIMEOUT = 300
# Max tokens for snapshot content before summarization
SNAPSHOT_SUMMARIZE_THRESHOLD = 8000
# Model for task-aware extraction
EXTRACTION_MODEL = "google/gemini-3-flash-preview"
# Resolve vision auxiliary client for extraction/vision tasks
_aux_vision_client, EXTRACTION_MODEL = get_vision_auxiliary_client()
# Track active sessions per task
# Now stores tuple of (session_name, browserbase_session_id, cdp_url)
@ -782,87 +773,49 @@ def _run_browser_command(
return {"success": False, "error": str(e)}
async def _extract_relevant_content(
def _extract_relevant_content(
snapshot_text: str,
user_task: Optional[str] = None
) -> str:
"""Use LLM to extract relevant content from a snapshot based on the user's task.
Falls back to simple truncation when no auxiliary vision model is configured.
"""
Use LLM to extract relevant content from a snapshot based on the user's task.
This provides task-aware summarization that preserves meaningful text content
(paragraphs, prices, descriptions) relevant to what the user is trying to accomplish.
Args:
snapshot_text: The full snapshot text
user_task: The user's current task/goal (optional)
Returns:
Summarized/extracted content
"""
if not HTTPX_AVAILABLE:
# Fall back to simple truncation
if _aux_vision_client is None or EXTRACTION_MODEL is None:
return _truncate_snapshot(snapshot_text)
# Get API key
api_key = os.environ.get("OPENROUTER_API_KEY")
if not api_key:
return _truncate_snapshot(snapshot_text)
# Build extraction prompt
if user_task:
extraction_prompt = f"""You are a content extractor for a browser automation agent.
The user's task is: {user_task}
Given the following page snapshot (accessibility tree representation), extract and summarize the most relevant information for completing this task. Focus on:
1. Interactive elements (buttons, links, inputs) that might be needed
2. Text content relevant to the task (prices, descriptions, headings, important info)
3. Navigation structure if relevant
Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.
Page Snapshot:
{snapshot_text}
Provide a concise summary that preserves actionable information and relevant content."""
extraction_prompt = (
f"You are a content extractor for a browser automation agent.\n\n"
f"The user's task is: {user_task}\n\n"
f"Given the following page snapshot (accessibility tree representation), "
f"extract and summarize the most relevant information for completing this task. Focus on:\n"
f"1. Interactive elements (buttons, links, inputs) that might be needed\n"
f"2. Text content relevant to the task (prices, descriptions, headings, important info)\n"
f"3. Navigation structure if relevant\n\n"
f"Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.\n\n"
f"Page Snapshot:\n{snapshot_text}\n\n"
f"Provide a concise summary that preserves actionable information and relevant content."
)
else:
extraction_prompt = f"""Summarize this page snapshot, preserving:
1. All interactive elements with their ref IDs (like [ref=e5])
2. Key text content and headings
3. Important information visible on the page
Page Snapshot:
{snapshot_text}
Provide a concise summary focused on interactive elements and key content."""
extraction_prompt = (
f"Summarize this page snapshot, preserving:\n"
f"1. All interactive elements with their ref IDs (like [ref=e5])\n"
f"2. Key text content and headings\n"
f"3. Important information visible on the page\n\n"
f"Page Snapshot:\n{snapshot_text}\n\n"
f"Provide a concise summary focused on interactive elements and key content."
)
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
OPENROUTER_CHAT_URL,
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"model": EXTRACTION_MODEL,
"messages": [
{"role": "user", "content": extraction_prompt}
],
"max_tokens": 4000,
"temperature": 0.1
}
)
if response.status_code == 200:
result = response.json()
return result["choices"][0]["message"]["content"]
else:
# Fall back to truncation on API error
return _truncate_snapshot(snapshot_text)
response = _aux_vision_client.chat.completions.create(
model=EXTRACTION_MODEL,
messages=[{"role": "user", "content": extraction_prompt}],
max_tokens=4000,
temperature=0.1,
)
return response.choices[0].message.content
except Exception:
# Fall back to truncation on any error
return _truncate_snapshot(snapshot_text)
@ -991,16 +944,7 @@ def browser_snapshot(
# Check if snapshot needs summarization
if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD and user_task:
# Run async extraction
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
snapshot_text = loop.run_until_complete(
_extract_relevant_content(snapshot_text, user_task)
)
snapshot_text = _extract_relevant_content(snapshot_text, user_task)
elif len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD:
snapshot_text = _truncate_snapshot(snapshot_text)
@ -1286,12 +1230,12 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
effective_task_id = task_id or "default"
# Check for OpenRouter API key
api_key = os.environ.get("OPENROUTER_API_KEY")
if not api_key:
# Check auxiliary vision client
if _aux_vision_client is None or EXTRACTION_MODEL is None:
return json.dumps({
"success": False,
"error": "OPENROUTER_API_KEY not set. Vision analysis requires this API key."
"error": "Browser vision unavailable: no auxiliary vision model configured. "
"Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
}, ensure_ascii=False)
# Create a temporary file for the screenshot
@ -1325,110 +1269,36 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
image_base64 = base64.b64encode(image_data).decode("ascii")
data_url = f"data:image/png;base64,{image_base64}"
# Prepare the vision prompt
vision_prompt = f"""You are analyzing a screenshot of a web browser.
vision_prompt = (
f"You are analyzing a screenshot of a web browser.\n\n"
f"User's question: {question}\n\n"
f"Provide a detailed and helpful answer based on what you see in the screenshot. "
f"If there are interactive elements, describe them. If there are verification challenges "
f"or CAPTCHAs, describe what type they are and what action might be needed. "
f"Focus on answering the user's specific question."
)
User's question: {question}
Provide a detailed and helpful answer based on what you see in the screenshot.
If there are interactive elements, describe them. If there are verification challenges
or CAPTCHAs, describe what type they are and what action might be needed.
Focus on answering the user's specific question."""
# Call OpenRouter/Gemini for vision analysis
if HTTPX_AVAILABLE:
import asyncio
async def analyze_screenshot():
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
OPENROUTER_CHAT_URL,
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"model": "google/gemini-3-flash-preview",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": vision_prompt},
{
"type": "image_url",
"image_url": {"url": data_url}
}
]
}
],
"max_tokens": 2000,
"temperature": 0.1
}
)
if response.status_code != 200:
return {
"success": False,
"error": f"Vision API error: {response.status_code} - {response.text[:200]}"
}
result_data = response.json()
analysis = result_data["choices"][0]["message"]["content"]
return {
"success": True,
"analysis": analysis
}
# Run the async function
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
vision_result = loop.run_until_complete(analyze_screenshot())
return json.dumps(vision_result, ensure_ascii=False)
else:
# Fallback: use synchronous requests
response = requests.post(
OPENROUTER_CHAT_URL,
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"model": "google/gemini-3-flash-preview",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": vision_prompt},
{
"type": "image_url",
"image_url": {"url": data_url}
}
]
}
# Use the sync auxiliary vision client directly
response = _aux_vision_client.chat.completions.create(
model=EXTRACTION_MODEL,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": vision_prompt},
{"type": "image_url", "image_url": {"url": data_url}},
],
"max_tokens": 2000,
"temperature": 0.1
},
timeout=60
)
if response.status_code != 200:
return json.dumps({
"success": False,
"error": f"Vision API error: {response.status_code} - {response.text[:200]}"
}, ensure_ascii=False)
result_data = response.json()
analysis = result_data["choices"][0]["message"]["content"]
return json.dumps({
"success": True,
"analysis": analysis
}, ensure_ascii=False)
}
],
max_tokens=2000,
temperature=0.1,
)
analysis = response.choices[0].message.content
return json.dumps({
"success": True,
"analysis": analysis,
}, ensure_ascii=False)
except Exception as e:
return json.dumps({

View file

@ -22,9 +22,19 @@ import os
import logging
from typing import Dict, Any, List, Optional
from tools.openrouter_client import get_async_client as _get_client
from openai import AsyncOpenAI, OpenAI
SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
from agent.auxiliary_client import get_text_auxiliary_client
# Resolve the auxiliary client at import time so we have the model slug.
# We build an AsyncOpenAI from the same credentials for async summarization.
_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client()
_async_aux_client: AsyncOpenAI | None = None
if _aux_client is not None:
_async_aux_client = AsyncOpenAI(
api_key=_aux_client.api_key,
base_url=str(_aux_client.base_url),
)
MAX_SESSION_CHARS = 100_000
MAX_SUMMARY_TOKENS = 2000
@ -126,11 +136,15 @@ async def _summarize_session(
f"Summarize this conversation with focus on: {query}"
)
if _async_aux_client is None or _SUMMARIZER_MODEL is None:
logging.warning("No auxiliary model available for session summarization")
return None
max_retries = 3
for attempt in range(max_retries):
try:
response = await _get_client().chat.completions.create(
model=SUMMARIZER_MODEL,
response = await _async_aux_client.chat.completions.create(
model=_SUMMARIZER_MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
@ -252,8 +266,8 @@ def session_search(
def check_session_search_requirements() -> bool:
"""Requires SQLite state database and OpenRouter API key."""
if not os.getenv("OPENROUTER_API_KEY"):
"""Requires SQLite state database and an auxiliary text model."""
if _async_aux_client is None:
return False
try:
from hermes_state import DEFAULT_DB_PATH
@ -316,5 +330,4 @@ registry.register(
limit=args.get("limit", 3),
db=kw.get("db")),
check_fn=check_session_search_requirements,
requires_env=["OPENROUTER_API_KEY"],
)

View file

@ -359,7 +359,6 @@ Do NOT use vim/nano/interactive tools without pty=true — they hang without a p
# Global state for environment lifecycle management
_active_environments: Dict[str, Any] = {}
_task_workdirs: Dict[str, str] = {} # Maps task_id to working directory
_last_activity: Dict[str, float] = {}
_env_lock = threading.Lock()
_creation_locks: Dict[str, threading.Lock] = {} # Per-task locks for sandbox creation
@ -530,7 +529,6 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
if current_time - last_time > lifetime_seconds:
env = _active_environments.pop(task_id, None)
_last_activity.pop(task_id, None)
_task_workdirs.pop(task_id, None)
if env is not None:
envs_to_stop.append((task_id, env))
@ -609,7 +607,7 @@ def get_active_environments_info() -> Dict[str, Any]:
info = {
"count": len(_active_environments),
"task_ids": list(_active_environments.keys()),
"workdirs": dict(_task_workdirs),
"workdirs": {},
}
# Calculate total disk usage
@ -632,7 +630,7 @@ def get_active_environments_info() -> Dict[str, Any]:
def cleanup_all_environments():
"""Clean up ALL active environments. Use with caution."""
global _active_environments, _last_activity, _task_workdirs
global _active_environments, _last_activity
task_ids = list(_active_environments.keys())
cleaned = 0
@ -661,7 +659,7 @@ def cleanup_all_environments():
def cleanup_vm(task_id: str):
"""Manually clean up a specific environment by task_id."""
global _active_environments, _last_activity, _task_workdirs
global _active_environments, _last_activity
# Remove from tracking dicts while holding the lock, but defer the
# actual (potentially slow) env.cleanup() call to outside the lock
@ -669,7 +667,6 @@ def cleanup_vm(task_id: str):
env = None
with _env_lock:
env = _active_environments.pop(task_id, None)
_task_workdirs.pop(task_id, None)
_last_activity.pop(task_id, None)
# Clean up per-task creation lock
@ -782,17 +779,6 @@ def terminal_tool(
default_timeout = config["timeout"]
effective_timeout = timeout or default_timeout
# For local environment in batch mode, create a unique subdirectory per task
# This prevents parallel tasks from overwriting each other's files
# In CLI mode (HERMES_QUIET), use the cwd directly without subdirectories
if env_type == "local" and not os.getenv("HERMES_QUIET"):
with _env_lock:
if effective_task_id not in _task_workdirs:
task_workdir = Path(cwd) / f"hermes-{effective_task_id}-{uuid.uuid4().hex[:8]}"
task_workdir.mkdir(parents=True, exist_ok=True)
_task_workdirs[effective_task_id] = str(task_workdir)
cwd = _task_workdirs[effective_task_id]
# Start cleanup thread
_start_cleanup_thread()
@ -874,11 +860,16 @@ def terminal_tool(
"description": approval.get("description", "dangerous command"),
"pattern_key": approval.get("pattern_key", ""),
}, ensure_ascii=False)
# Command was blocked - return informative message
# Command was blocked - include the pattern category so the caller knows why
desc = approval.get("description", "potentially dangerous operation")
fallback_msg = (
f"Command denied: matches '{desc}' pattern. "
"Use the approval prompt to allow it, or rephrase the command."
)
return json.dumps({
"output": "",
"exit_code": -1,
"error": approval.get("message", "Command denied - potentially dangerous operation"),
"error": approval.get("message", fallback_msg),
"status": "blocked"
}, ensure_ascii=False)
@ -996,11 +987,17 @@ def terminal_tool(
# Add helpful message for sudo failures in messaging context
output = _handle_sudo_failure(output, env_type)
# Truncate output if too long
# Truncate output if too long, keeping both head and tail
MAX_OUTPUT_CHARS = 50000
if len(output) > MAX_OUTPUT_CHARS:
truncated_notice = f"\n\n... [OUTPUT TRUNCATED - showing last {MAX_OUTPUT_CHARS} chars of {len(output)} total] ..."
output = truncated_notice + output[-MAX_OUTPUT_CHARS:]
head_chars = int(MAX_OUTPUT_CHARS * 0.4) # 40% head (error messages often appear early)
tail_chars = MAX_OUTPUT_CHARS - head_chars # 60% tail (most recent/relevant output)
omitted = len(output) - head_chars - tail_chars
truncated_notice = (
f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted "
f"out of {len(output)} total] ...\n\n"
)
output = output[:head_chars] + truncated_notice + output[-tail_chars:]
return json.dumps({
"output": output.strip() if output else "",

View file

@ -36,13 +36,20 @@ import base64
from pathlib import Path
from typing import Dict, Any, Optional
import httpx
from tools.openrouter_client import get_async_client as _get_openrouter_client, check_api_key as check_openrouter_api_key
from openai import AsyncOpenAI
from agent.auxiliary_client import get_vision_auxiliary_client
from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__)
# Configuration for vision processing
DEFAULT_VISION_MODEL = "google/gemini-3-flash-preview"
# Resolve vision auxiliary client at module level; build an async wrapper.
_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
_aux_async_client: AsyncOpenAI | None = None
if _aux_sync_client is not None:
_aux_async_client = AsyncOpenAI(
api_key=_aux_sync_client.api_key,
base_url=str(_aux_sync_client.base_url),
)
_debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
@ -230,9 +237,13 @@ async def vision_analyze_tool(
logger.info("Analyzing image: %s", image_url[:60])
logger.info("User prompt: %s", user_prompt[:100])
# Check API key availability
if not os.getenv("OPENROUTER_API_KEY"):
raise ValueError("OPENROUTER_API_KEY environment variable not set")
# Check auxiliary vision client availability
if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
return json.dumps({
"success": False,
"analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
"Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
}, indent=2, ensure_ascii=False)
# Determine if this is a local file path or a remote URL
local_path = Path(image_url)
@ -291,18 +302,12 @@ async def vision_analyze_tool(
logger.info("Processing image with %s...", model)
# Call the vision API with reasoning enabled
response = await _get_openrouter_client().chat.completions.create(
# Call the vision API
response = await _aux_async_client.chat.completions.create(
model=model,
messages=messages,
temperature=0.1, # Low temperature for consistent analysis
max_tokens=2000, # Generous limit for detailed analysis
extra_body={
"reasoning": {
"enabled": True,
"effort": "xhigh"
}
}
temperature=0.1,
max_tokens=2000,
)
# Extract the analysis
@ -353,13 +358,8 @@ async def vision_analyze_tool(
def check_vision_requirements() -> bool:
"""
Check if all requirements for vision tools are met.
Returns:
bool: True if requirements are met, False otherwise
"""
return check_openrouter_api_key()
"""Check if an auxiliary vision model is available."""
return _aux_async_client is not None
def get_debug_session_info() -> Dict[str, Any]:
@ -379,16 +379,15 @@ if __name__ == "__main__":
print("👁️ Vision Tools Module")
print("=" * 40)
# Check if API key is available
api_available = check_openrouter_api_key()
# Check if vision model is available
api_available = check_vision_requirements()
if not api_available:
print("❌ OPENROUTER_API_KEY environment variable not set")
print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")
print("Get API key at: https://openrouter.ai/")
print("❌ No auxiliary vision model available")
print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
exit(1)
else:
print("✅ OpenRouter API key found")
print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}")
print("🛠️ Vision tools ready for use!")
print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
@ -455,7 +454,8 @@ def _handle_vision_analyze(args, **kw):
image_url = args.get("image_url", "")
question = args.get("question", "")
full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}"
return vision_analyze_tool(image_url, full_prompt, "google/gemini-3-flash-preview")
model = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview"
return vision_analyze_tool(image_url, full_prompt, model)
registry.register(
@ -464,6 +464,5 @@ registry.register(
schema=VISION_ANALYZE_SCHEMA,
handler=_handle_vision_analyze,
check_fn=check_vision_requirements,
requires_env=["OPENROUTER_API_KEY"],
is_async=True,
)

View file

@ -47,7 +47,8 @@ import re
import asyncio
from typing import List, Dict, Any, Optional
from firecrawl import Firecrawl
from tools.openrouter_client import get_async_client as _get_openrouter_client
from openai import AsyncOpenAI
from agent.auxiliary_client import get_text_auxiliary_client
from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__)
@ -64,9 +65,17 @@ def _get_firecrawl_client():
_firecrawl_client = Firecrawl(api_key=api_key)
return _firecrawl_client
DEFAULT_SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
# Resolve auxiliary text client at module level; build an async wrapper.
_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
_aux_async_client: AsyncOpenAI | None = None
if _aux_sync_client is not None:
_aux_async_client = AsyncOpenAI(
api_key=_aux_sync_client.api_key,
base_url=str(_aux_sync_client.base_url),
)
_debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
@ -223,7 +232,10 @@ Create a markdown summary that captures all key information in a well-organized,
for attempt in range(max_retries):
try:
response = await _get_openrouter_client().chat.completions.create(
if _aux_async_client is None:
logger.warning("No auxiliary model available for web content processing")
return None
response = await _aux_async_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
@ -231,12 +243,6 @@ Create a markdown summary that captures all key information in a well-organized,
],
temperature=0.1,
max_tokens=max_tokens,
extra_body={
"reasoning": {
"enabled": True,
"effort": "xhigh"
}
}
)
return response.choices[0].message.content.strip()
except Exception as api_error:
@ -342,7 +348,14 @@ Synthesize these into ONE cohesive, comprehensive summary that:
Create a single, unified markdown summary."""
try:
response = await _get_openrouter_client().chat.completions.create(
if _aux_async_client is None:
logger.warning("No auxiliary model for synthesis, concatenating summaries")
fallback = "\n\n".join(summaries)
if len(fallback) > max_output_size:
fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
return fallback
response = await _aux_async_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
@ -350,12 +363,6 @@ Create a single, unified markdown summary."""
],
temperature=0.1,
max_tokens=4000,
extra_body={
"reasoning": {
"enabled": True,
"effort": "xhigh"
}
}
)
final_summary = response.choices[0].message.content.strip()
@ -677,8 +684,8 @@ async def web_extract_tool(
debug_call_data["pages_extracted"] = pages_extracted
debug_call_data["original_response_size"] = len(json.dumps(response))
# Process each result with LLM if enabled
if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
# Process each result with LLM if enabled and auxiliary client is available
if use_llm_processing and _aux_async_client is not None:
logger.info("Processing extracted content with LLM (parallel)...")
debug_call_data["processing_applied"].append("llm_processing")
@ -744,8 +751,8 @@ async def web_extract_tool(
else:
logger.warning("%s (no content to process)", url)
else:
if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
if use_llm_processing and _aux_async_client is None:
logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
debug_call_data["processing_applied"].append("llm_processing_unavailable")
# Print summary of extracted pages for debugging (original behavior)
@ -973,8 +980,8 @@ async def web_crawl_tool(
debug_call_data["pages_crawled"] = pages_crawled
debug_call_data["original_response_size"] = len(json.dumps(response))
# Process each result with LLM if enabled
if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
# Process each result with LLM if enabled and auxiliary client is available
if use_llm_processing and _aux_async_client is not None:
logger.info("Processing crawled content with LLM (parallel)...")
debug_call_data["processing_applied"].append("llm_processing")
@ -1040,8 +1047,8 @@ async def web_crawl_tool(
else:
logger.warning("%s (no content to process)", page_url)
else:
if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
if use_llm_processing and _aux_async_client is None:
logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
debug_call_data["processing_applied"].append("llm_processing_unavailable")
# Print summary of crawled pages for debugging (original behavior)
@ -1096,14 +1103,9 @@ def check_firecrawl_api_key() -> bool:
return bool(os.getenv("FIRECRAWL_API_KEY"))
def check_nous_api_key() -> bool:
"""
Check if the Nous Research API key is available in environment variables.
Returns:
bool: True if API key is set, False otherwise
"""
return bool(os.getenv("OPENROUTER_API_KEY"))
def check_auxiliary_model() -> bool:
"""Check if an auxiliary text model is available for LLM content processing."""
return _aux_async_client is not None
def get_debug_session_info() -> Dict[str, Any]:
@ -1120,7 +1122,7 @@ if __name__ == "__main__":
# Check if API keys are available
firecrawl_available = check_firecrawl_api_key()
nous_available = check_nous_api_key()
nous_available = check_auxiliary_model()
if not firecrawl_available:
print("❌ FIRECRAWL_API_KEY environment variable not set")
@ -1130,12 +1132,11 @@ if __name__ == "__main__":
print("✅ Firecrawl API key found")
if not nous_available:
print("❌ OPENROUTER_API_KEY environment variable not set")
print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")
print("Get API key at: https://inference-api.nousresearch.com/")
print("⚠️ Without Nous API key, LLM content processing will be disabled")
print("❌ No auxiliary model available for LLM content processing")
print("Set OPENROUTER_API_KEY, configure Nous Portal, or set OPENAI_BASE_URL + OPENAI_API_KEY")
print("⚠️ Without an auxiliary model, LLM content processing will be disabled")
else:
print("✅ Nous Research API key found")
print(f"✅ Auxiliary model available: {DEFAULT_SUMMARIZER_MODEL}")
if not firecrawl_available:
exit(1)
@ -1143,7 +1144,7 @@ if __name__ == "__main__":
print("🛠️ Web tools ready for use!")
if nous_available:
print("🧠 LLM content processing available with Gemini 3 Flash Preview via OpenRouter")
print(f"🧠 LLM content processing available with {DEFAULT_SUMMARIZER_MODEL}")
print(f" Default min length for processing: {DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION} chars")
# Show debug mode status