Add a claude code-like CLI
- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets. - Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets. - Added `hermes` launcher script for convenient CLI access. - Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution. - Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output. - Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities. - Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.
This commit is contained in:
parent
8e986584f4
commit
bc76a032ba
10 changed files with 2251 additions and 118 deletions
|
|
@ -1343,8 +1343,9 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
|
|||
if task_id is None:
|
||||
task_id = "default"
|
||||
|
||||
print(f"[browser_tool] cleanup_browser called for task_id: {task_id}", file=sys.stderr)
|
||||
print(f"[browser_tool] Active sessions: {list(_active_sessions.keys())}", file=sys.stderr)
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(f"[browser_tool] cleanup_browser called for task_id: {task_id}", file=sys.stderr)
|
||||
print(f"[browser_tool] Active sessions: {list(_active_sessions.keys())}", file=sys.stderr)
|
||||
|
||||
if task_id in _active_sessions:
|
||||
session_info = _active_sessions[task_id]
|
||||
|
|
@ -1368,8 +1369,9 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
|
|||
print(f"[browser_tool] Exception during BrowserBase session close: {e}", file=sys.stderr)
|
||||
|
||||
del _active_sessions[task_id]
|
||||
print(f"[browser_tool] Removed task {task_id} from active sessions", file=sys.stderr)
|
||||
else:
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(f"[browser_tool] Removed task {task_id} from active sessions", file=sys.stderr)
|
||||
elif not os.getenv("HERMES_QUIET"):
|
||||
print(f"[browser_tool] No active session found for task_id: {task_id}", file=sys.stderr)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -64,11 +64,13 @@ def _get_scratch_dir() -> Path:
|
|||
# Create user-specific subdirectory
|
||||
user_scratch = scratch / os.getenv("USER", "hermes") / "hermes-agent"
|
||||
user_scratch.mkdir(parents=True, exist_ok=True)
|
||||
print(f"[Terminal] Using /scratch for sandboxes: {user_scratch}")
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(f"[Terminal] Using /scratch for sandboxes: {user_scratch}")
|
||||
return user_scratch
|
||||
|
||||
# Fall back to /tmp
|
||||
print("[Terminal] Warning: /scratch not available, using /tmp (limited space)")
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print("[Terminal] Warning: /scratch not available, using /tmp (limited space)")
|
||||
return Path(tempfile.gettempdir())
|
||||
|
||||
|
||||
|
|
@ -307,6 +309,144 @@ class _SingularityEnvironment:
|
|||
"""Cleanup on destruction."""
|
||||
self.cleanup()
|
||||
|
||||
|
||||
class _SSHEnvironment:
|
||||
"""
|
||||
SSH-based remote execution environment.
|
||||
|
||||
Runs commands on a remote machine over SSH, keeping the agent code
|
||||
completely isolated from the execution environment. Uses SSH ControlMaster
|
||||
for connection persistence (faster subsequent commands).
|
||||
|
||||
Security benefits:
|
||||
- Agent cannot modify its own code
|
||||
- Remote machine acts as a sandbox
|
||||
- Clear separation between agent and execution environment
|
||||
"""
|
||||
|
||||
def __init__(self, host: str, user: str, cwd: str = "/tmp", timeout: int = 60,
|
||||
port: int = 22, key_path: str = ""):
|
||||
self.host = host
|
||||
self.user = user
|
||||
self.cwd = cwd
|
||||
self.timeout = timeout
|
||||
self.port = port
|
||||
self.key_path = key_path
|
||||
|
||||
# Create control socket directory for connection persistence
|
||||
self.control_dir = Path(tempfile.gettempdir()) / "hermes-ssh"
|
||||
self.control_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.control_socket = self.control_dir / f"{user}@{host}:{port}.sock"
|
||||
|
||||
# Test connection and establish ControlMaster
|
||||
self._establish_connection()
|
||||
|
||||
def _build_ssh_command(self, extra_args: list = None) -> list:
|
||||
"""Build base SSH command with connection options."""
|
||||
cmd = ["ssh"]
|
||||
|
||||
# Connection multiplexing for performance
|
||||
cmd.extend(["-o", f"ControlPath={self.control_socket}"])
|
||||
cmd.extend(["-o", "ControlMaster=auto"])
|
||||
cmd.extend(["-o", "ControlPersist=300"]) # Keep connection alive for 5 min
|
||||
|
||||
# Standard options
|
||||
cmd.extend(["-o", "BatchMode=yes"]) # No password prompts
|
||||
cmd.extend(["-o", "StrictHostKeyChecking=accept-new"]) # Accept new hosts
|
||||
cmd.extend(["-o", "ConnectTimeout=10"])
|
||||
|
||||
# Port
|
||||
if self.port != 22:
|
||||
cmd.extend(["-p", str(self.port)])
|
||||
|
||||
# Private key
|
||||
if self.key_path:
|
||||
cmd.extend(["-i", self.key_path])
|
||||
|
||||
# Extra args (like -t for TTY)
|
||||
if extra_args:
|
||||
cmd.extend(extra_args)
|
||||
|
||||
# Target
|
||||
cmd.append(f"{self.user}@{self.host}")
|
||||
|
||||
return cmd
|
||||
|
||||
def _establish_connection(self):
|
||||
"""Test SSH connection and establish ControlMaster."""
|
||||
cmd = self._build_ssh_command()
|
||||
cmd.append("echo 'SSH connection established'")
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15
|
||||
)
|
||||
if result.returncode != 0:
|
||||
error_msg = result.stderr.strip() or result.stdout.strip()
|
||||
raise RuntimeError(f"SSH connection failed: {error_msg}")
|
||||
except subprocess.TimeoutExpired:
|
||||
raise RuntimeError(f"SSH connection to {self.user}@{self.host} timed out")
|
||||
|
||||
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict:
|
||||
"""Execute a command on the remote host via SSH."""
|
||||
work_dir = cwd or self.cwd
|
||||
effective_timeout = timeout or self.timeout
|
||||
|
||||
# Wrap command to run in the correct directory
|
||||
# Use bash -c to handle complex commands properly
|
||||
wrapped_command = f'cd {work_dir} && {command}'
|
||||
|
||||
cmd = self._build_ssh_command()
|
||||
cmd.extend(["bash", "-c", wrapped_command])
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
text=True,
|
||||
timeout=effective_timeout,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
)
|
||||
return {"output": result.stdout, "returncode": result.returncode}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"output": f"Command timed out after {effective_timeout}s", "returncode": 124}
|
||||
except Exception as e:
|
||||
return {"output": f"SSH execution error: {str(e)}", "returncode": 1}
|
||||
|
||||
def cleanup(self):
|
||||
"""Close the SSH ControlMaster connection."""
|
||||
if self.control_socket.exists():
|
||||
try:
|
||||
# Send exit command to ControlMaster
|
||||
cmd = ["ssh", "-o", f"ControlPath={self.control_socket}", "-O", "exit",
|
||||
f"{self.user}@{self.host}"]
|
||||
subprocess.run(cmd, capture_output=True, timeout=5)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Remove socket file
|
||||
try:
|
||||
self.control_socket.unlink()
|
||||
except:
|
||||
pass
|
||||
|
||||
def stop(self):
|
||||
"""Alias for cleanup."""
|
||||
self.cleanup()
|
||||
|
||||
def __del__(self):
|
||||
"""Cleanup on destruction."""
|
||||
try:
|
||||
self.cleanup()
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
# Tool description for LLM
|
||||
TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux environment.
|
||||
|
||||
|
|
@ -348,25 +488,31 @@ _cleanup_running = False
|
|||
def _get_env_config() -> Dict[str, Any]:
|
||||
"""Get terminal environment configuration from environment variables."""
|
||||
return {
|
||||
"env_type": os.getenv("TERMINAL_ENV", "local"), # local, docker, singularity, or modal
|
||||
"env_type": os.getenv("TERMINAL_ENV", "local"), # local, docker, singularity, modal, or ssh
|
||||
"docker_image": os.getenv("TERMINAL_DOCKER_IMAGE", "python:3.11"),
|
||||
"singularity_image": os.getenv("TERMINAL_SINGULARITY_IMAGE", "docker://python:3.11"),
|
||||
"modal_image": os.getenv("TERMINAL_MODAL_IMAGE", "python:3.11"),
|
||||
"cwd": os.getenv("TERMINAL_CWD", "/tmp"),
|
||||
"timeout": int(os.getenv("TERMINAL_TIMEOUT", "60")),
|
||||
"lifetime_seconds": int(os.getenv("TERMINAL_LIFETIME_SECONDS", "300")),
|
||||
# SSH-specific config
|
||||
"ssh_host": os.getenv("TERMINAL_SSH_HOST", ""),
|
||||
"ssh_user": os.getenv("TERMINAL_SSH_USER", ""),
|
||||
"ssh_port": int(os.getenv("TERMINAL_SSH_PORT", "22")),
|
||||
"ssh_key": os.getenv("TERMINAL_SSH_KEY", ""), # Path to private key (optional, uses ssh-agent if empty)
|
||||
}
|
||||
|
||||
|
||||
def _create_environment(env_type: str, image: str, cwd: str, timeout: int):
|
||||
def _create_environment(env_type: str, image: str, cwd: str, timeout: int, ssh_config: dict = None):
|
||||
"""
|
||||
Create an execution environment from mini-swe-agent.
|
||||
|
||||
Args:
|
||||
env_type: One of "local", "docker", "singularity", "modal"
|
||||
image: Docker/Singularity/Modal image name (ignored for local)
|
||||
env_type: One of "local", "docker", "singularity", "modal", "ssh"
|
||||
image: Docker/Singularity/Modal image name (ignored for local/ssh)
|
||||
cwd: Working directory
|
||||
timeout: Default command timeout
|
||||
ssh_config: SSH connection config (for env_type="ssh")
|
||||
|
||||
Returns:
|
||||
Environment instance with execute() method
|
||||
|
|
@ -387,8 +533,20 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int):
|
|||
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
|
||||
return SwerexModalEnvironment(image=image, cwd=cwd, timeout=timeout)
|
||||
|
||||
elif env_type == "ssh":
|
||||
if not ssh_config or not ssh_config.get("host") or not ssh_config.get("user"):
|
||||
raise ValueError("SSH environment requires ssh_host and ssh_user to be configured")
|
||||
return _SSHEnvironment(
|
||||
host=ssh_config["host"],
|
||||
user=ssh_config["user"],
|
||||
port=ssh_config.get("port", 22),
|
||||
key_path=ssh_config.get("key", ""),
|
||||
cwd=cwd,
|
||||
timeout=timeout
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', or 'modal'")
|
||||
raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', 'modal', or 'ssh'")
|
||||
|
||||
|
||||
def _cleanup_inactive_envs(lifetime_seconds: int = 300):
|
||||
|
|
@ -416,7 +574,8 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
|
|||
env.terminate()
|
||||
|
||||
del _active_environments[task_id]
|
||||
print(f"[Terminal Cleanup] Cleaned up inactive environment for task: {task_id}")
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(f"[Terminal Cleanup] Cleaned up inactive environment for task: {task_id}")
|
||||
|
||||
if task_id in _last_activity:
|
||||
del _last_activity[task_id]
|
||||
|
|
@ -425,10 +584,11 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
|
|||
|
||||
except Exception as e:
|
||||
error_str = str(e)
|
||||
if "404" in error_str or "not found" in error_str.lower():
|
||||
print(f"[Terminal Cleanup] Environment for task {task_id} already cleaned up")
|
||||
else:
|
||||
print(f"[Terminal Cleanup] Error cleaning up environment for task {task_id}: {e}")
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
if "404" in error_str or "not found" in error_str.lower():
|
||||
print(f"[Terminal Cleanup] Environment for task {task_id} already cleaned up")
|
||||
else:
|
||||
print(f"[Terminal Cleanup] Error cleaning up environment for task {task_id}: {e}")
|
||||
|
||||
# Always remove from tracking dicts
|
||||
if task_id in _active_environments:
|
||||
|
|
@ -448,7 +608,8 @@ def _cleanup_thread_worker():
|
|||
config = _get_env_config()
|
||||
_cleanup_inactive_envs(config["lifetime_seconds"])
|
||||
except Exception as e:
|
||||
print(f"[Terminal Cleanup] Error in cleanup thread: {e}")
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(f"[Terminal Cleanup] Error in cleanup thread: {e}")
|
||||
|
||||
for _ in range(60):
|
||||
if not _cleanup_running:
|
||||
|
|
@ -545,7 +706,8 @@ def cleanup_vm(task_id: str):
|
|||
env.terminate()
|
||||
|
||||
del _active_environments[task_id]
|
||||
print(f"[Terminal Cleanup] Manually cleaned up environment for task: {task_id}")
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(f"[Terminal Cleanup] Manually cleaned up environment for task: {task_id}")
|
||||
|
||||
if task_id in _task_workdirs:
|
||||
del _task_workdirs[task_id]
|
||||
|
|
@ -554,11 +716,12 @@ def cleanup_vm(task_id: str):
|
|||
del _last_activity[task_id]
|
||||
|
||||
except Exception as e:
|
||||
error_str = str(e)
|
||||
if "404" in error_str or "not found" in error_str.lower():
|
||||
print(f"[Terminal Cleanup] Environment for task {task_id} already cleaned up")
|
||||
else:
|
||||
print(f"[Terminal Cleanup] Error cleaning up environment for task {task_id}: {e}")
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
error_str = str(e)
|
||||
if "404" in error_str or "not found" in error_str.lower():
|
||||
print(f"[Terminal Cleanup] Environment for task {task_id} already cleaned up")
|
||||
else:
|
||||
print(f"[Terminal Cleanup] Error cleaning up environment for task {task_id}: {e}")
|
||||
|
||||
|
||||
atexit.register(_stop_cleanup_thread)
|
||||
|
|
@ -616,9 +779,10 @@ def terminal_tool(
|
|||
# Use task_id for environment isolation
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
# For local environment, create a unique subdirectory per task
|
||||
# For local environment in batch mode, create a unique subdirectory per task
|
||||
# This prevents parallel tasks from overwriting each other's files
|
||||
if env_type == "local":
|
||||
# In CLI mode (HERMES_QUIET), use the cwd directly without subdirectories
|
||||
if env_type == "local" and not os.getenv("HERMES_QUIET"):
|
||||
import uuid
|
||||
with _env_lock:
|
||||
if effective_task_id not in _task_workdirs:
|
||||
|
|
@ -637,11 +801,22 @@ def terminal_tool(
|
|||
_check_disk_usage_warning()
|
||||
|
||||
try:
|
||||
# Build SSH config if using SSH environment
|
||||
ssh_config = None
|
||||
if env_type == "ssh":
|
||||
ssh_config = {
|
||||
"host": config.get("ssh_host", ""),
|
||||
"user": config.get("ssh_user", ""),
|
||||
"port": config.get("ssh_port", 22),
|
||||
"key": config.get("ssh_key", ""),
|
||||
}
|
||||
|
||||
_active_environments[effective_task_id] = _create_environment(
|
||||
env_type=env_type,
|
||||
image=image,
|
||||
cwd=cwd,
|
||||
timeout=effective_timeout
|
||||
timeout=effective_timeout,
|
||||
ssh_config=ssh_config
|
||||
)
|
||||
except ImportError as e:
|
||||
return json.dumps({
|
||||
|
|
|
|||
|
|
@ -99,7 +99,13 @@ DEBUG_DATA = {
|
|||
# Create logs directory if debug mode is enabled
|
||||
if DEBUG_MODE:
|
||||
DEBUG_LOG_PATH.mkdir(exist_ok=True)
|
||||
print(f"🐛 Debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
|
||||
_verbose_print(f"🐛 Debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
|
||||
|
||||
|
||||
def _verbose_print(*args, **kwargs):
|
||||
"""Print only if not in quiet mode (HERMES_QUIET not set)."""
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(*args, **kwargs)
|
||||
|
||||
|
||||
def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
|
||||
|
|
@ -140,7 +146,7 @@ def _save_debug_log() -> None:
|
|||
with open(debug_filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"🐛 Debug log saved: {debug_filepath}")
|
||||
_verbose_print(f"🐛 Debug log saved: {debug_filepath}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error saving debug log: {str(e)}")
|
||||
|
|
@ -185,12 +191,12 @@ async def process_content_with_llm(
|
|||
# Refuse if content is absurdly large
|
||||
if content_len > MAX_CONTENT_SIZE:
|
||||
size_mb = content_len / 1_000_000
|
||||
print(f"🚫 Content too large ({size_mb:.1f}MB > 2MB limit). Refusing to process.")
|
||||
_verbose_print(f"🚫 Content too large ({size_mb:.1f}MB > 2MB limit). Refusing to process.")
|
||||
return f"[Content too large to process: {size_mb:.1f}MB. Try using web_crawl with specific extraction instructions, or search for a more focused source.]"
|
||||
|
||||
# Skip processing if content is too short
|
||||
if content_len < min_length:
|
||||
print(f"📏 Content too short ({content_len} < {min_length} chars), skipping LLM processing")
|
||||
_verbose_print(f"📏 Content too short ({content_len} < {min_length} chars), skipping LLM processing")
|
||||
return None
|
||||
|
||||
# Create context information
|
||||
|
|
@ -203,13 +209,13 @@ async def process_content_with_llm(
|
|||
|
||||
# Check if we need chunked processing
|
||||
if content_len > CHUNK_THRESHOLD:
|
||||
print(f"📦 Content large ({content_len:,} chars). Using chunked processing...")
|
||||
_verbose_print(f"📦 Content large ({content_len:,} chars). Using chunked processing...")
|
||||
return await _process_large_content_chunked(
|
||||
content, context_str, model, CHUNK_SIZE, MAX_OUTPUT_SIZE
|
||||
)
|
||||
|
||||
# Standard single-pass processing for normal content
|
||||
print(f"🧠 Processing content with LLM ({content_len} characters)")
|
||||
_verbose_print(f"🧠 Processing content with LLM ({content_len} characters)")
|
||||
|
||||
processed_content = await _call_summarizer_llm(content, context_str, model)
|
||||
|
||||
|
|
@ -221,7 +227,7 @@ async def process_content_with_llm(
|
|||
# Log compression metrics
|
||||
processed_length = len(processed_content)
|
||||
compression_ratio = processed_length / content_len if content_len > 0 else 1.0
|
||||
print(f"✅ Content processed: {content_len} → {processed_length} chars ({compression_ratio:.1%})")
|
||||
_verbose_print(f"✅ Content processed: {content_len} → {processed_length} chars ({compression_ratio:.1%})")
|
||||
|
||||
return processed_content
|
||||
|
||||
|
|
@ -318,8 +324,8 @@ Create a markdown summary that captures all key information in a well-organized,
|
|||
except Exception as api_error:
|
||||
last_error = api_error
|
||||
if attempt < max_retries - 1:
|
||||
print(f"⚠️ LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
|
||||
print(f" Retrying in {retry_delay}s...")
|
||||
_verbose_print(f"⚠️ LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
|
||||
_verbose_print(f" Retrying in {retry_delay}s...")
|
||||
await asyncio.sleep(retry_delay)
|
||||
retry_delay = min(retry_delay * 2, 60)
|
||||
else:
|
||||
|
|
@ -355,7 +361,7 @@ async def _process_large_content_chunked(
|
|||
chunk = content[i:i + chunk_size]
|
||||
chunks.append(chunk)
|
||||
|
||||
print(f" 📦 Split into {len(chunks)} chunks of ~{chunk_size:,} chars each")
|
||||
_verbose_print(f" 📦 Split into {len(chunks)} chunks of ~{chunk_size:,} chars each")
|
||||
|
||||
# Summarize each chunk in parallel
|
||||
async def summarize_chunk(chunk_idx: int, chunk_content: str) -> tuple[int, Optional[str]]:
|
||||
|
|
@ -371,10 +377,10 @@ async def _process_large_content_chunked(
|
|||
chunk_info=chunk_info
|
||||
)
|
||||
if summary:
|
||||
print(f" ✅ Chunk {chunk_idx + 1}/{len(chunks)} summarized: {len(chunk_content):,} → {len(summary):,} chars")
|
||||
_verbose_print(f" ✅ Chunk {chunk_idx + 1}/{len(chunks)} summarized: {len(chunk_content):,} → {len(summary):,} chars")
|
||||
return chunk_idx, summary
|
||||
except Exception as e:
|
||||
print(f" ⚠️ Chunk {chunk_idx + 1}/{len(chunks)} failed: {str(e)[:50]}")
|
||||
_verbose_print(f" ⚠️ Chunk {chunk_idx + 1}/{len(chunks)} failed: {str(e)[:50]}")
|
||||
return chunk_idx, None
|
||||
|
||||
# Run all chunk summarizations in parallel
|
||||
|
|
@ -391,7 +397,7 @@ async def _process_large_content_chunked(
|
|||
print(f" ❌ All chunk summarizations failed")
|
||||
return "[Failed to process large content: all chunk summarizations failed]"
|
||||
|
||||
print(f" 📊 Got {len(summaries)}/{len(chunks)} chunk summaries")
|
||||
_verbose_print(f" 📊 Got {len(summaries)}/{len(chunks)} chunk summaries")
|
||||
|
||||
# If only one chunk succeeded, just return it (with cap)
|
||||
if len(summaries) == 1:
|
||||
|
|
@ -401,7 +407,7 @@ async def _process_large_content_chunked(
|
|||
return result
|
||||
|
||||
# Synthesize the summaries into a final summary
|
||||
print(f" 🔗 Synthesizing {len(summaries)} summaries...")
|
||||
_verbose_print(f" 🔗 Synthesizing {len(summaries)} summaries...")
|
||||
|
||||
combined_summaries = "\n\n---\n\n".join(summaries)
|
||||
|
||||
|
|
@ -443,11 +449,11 @@ Create a single, unified markdown summary."""
|
|||
final_len = len(final_summary)
|
||||
compression = final_len / original_len if original_len > 0 else 1.0
|
||||
|
||||
print(f" ✅ Synthesis complete: {original_len:,} → {final_len:,} chars ({compression:.2%})")
|
||||
_verbose_print(f" ✅ Synthesis complete: {original_len:,} → {final_len:,} chars ({compression:.2%})")
|
||||
return final_summary
|
||||
|
||||
except Exception as e:
|
||||
print(f" ⚠️ Synthesis failed: {str(e)[:100]}")
|
||||
_verbose_print(f" ⚠️ Synthesis failed: {str(e)[:100]}")
|
||||
# Fall back to concatenated summaries with truncation
|
||||
fallback = "\n\n".join(summaries)
|
||||
if len(fallback) > max_output_size:
|
||||
|
|
@ -534,7 +540,8 @@ def web_search_tool(query: str, limit: int = 5) -> str:
|
|||
}
|
||||
|
||||
try:
|
||||
print(f"🔍 Searching the web for: '{query}' (limit: {limit})")
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
_verbose_print(f"🔍 Searching the web for: '{query}' (limit: {limit})")
|
||||
|
||||
# Use Firecrawl's v2 search functionality WITHOUT scraping
|
||||
# We only want search result metadata, not scraped content
|
||||
|
|
@ -574,7 +581,8 @@ def web_search_tool(query: str, limit: int = 5) -> str:
|
|||
web_results = response['web']
|
||||
|
||||
results_count = len(web_results)
|
||||
print(f"✅ Found {results_count} search results")
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
_verbose_print(f"✅ Found {results_count} search results")
|
||||
|
||||
# Build response with just search metadata (URLs, titles, descriptions)
|
||||
response_data = {
|
||||
|
|
@ -654,7 +662,7 @@ async def web_extract_tool(
|
|||
}
|
||||
|
||||
try:
|
||||
print(f"📄 Extracting content from {len(urls)} URL(s)")
|
||||
_verbose_print(f"📄 Extracting content from {len(urls)} URL(s)")
|
||||
|
||||
# Determine requested formats for Firecrawl v2
|
||||
formats: List[str] = []
|
||||
|
|
@ -672,7 +680,7 @@ async def web_extract_tool(
|
|||
|
||||
for url in urls:
|
||||
try:
|
||||
print(f" 📄 Scraping: {url}")
|
||||
_verbose_print(f" 📄 Scraping: {url}")
|
||||
scrape_result = _get_firecrawl_client().scrape(
|
||||
url=url,
|
||||
formats=formats
|
||||
|
|
@ -748,14 +756,14 @@ async def web_extract_tool(
|
|||
response = {"results": results}
|
||||
|
||||
pages_extracted = len(response.get('results', []))
|
||||
print(f"✅ Extracted content from {pages_extracted} pages")
|
||||
_verbose_print(f"✅ Extracted content from {pages_extracted} pages")
|
||||
|
||||
debug_call_data["pages_extracted"] = pages_extracted
|
||||
debug_call_data["original_response_size"] = len(json.dumps(response))
|
||||
|
||||
# Process each result with LLM if enabled
|
||||
if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
|
||||
print("🧠 Processing extracted content with LLM (parallel)...")
|
||||
_verbose_print("🧠 Processing extracted content with LLM (parallel)...")
|
||||
debug_call_data["processing_applied"].append("llm_processing")
|
||||
|
||||
# Prepare tasks for parallel processing
|
||||
|
|
@ -813,12 +821,12 @@ async def web_extract_tool(
|
|||
if status == "processed":
|
||||
debug_call_data["compression_metrics"].append(metrics)
|
||||
debug_call_data["pages_processed_with_llm"] += 1
|
||||
print(f" 📝 {url} (processed)")
|
||||
_verbose_print(f" 📝 {url} (processed)")
|
||||
elif status == "too_short":
|
||||
debug_call_data["compression_metrics"].append(metrics)
|
||||
print(f" 📝 {url} (no processing - content too short)")
|
||||
_verbose_print(f" 📝 {url} (no processing - content too short)")
|
||||
else:
|
||||
print(f" ⚠️ {url} (no content to process)")
|
||||
_verbose_print(f" ⚠️ {url} (no content to process)")
|
||||
else:
|
||||
if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
|
||||
print("⚠️ LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
|
||||
|
|
@ -828,7 +836,7 @@ async def web_extract_tool(
|
|||
for result in response.get('results', []):
|
||||
url = result.get('url', 'Unknown URL')
|
||||
content_length = len(result.get('raw_content', ''))
|
||||
print(f" 📝 {url} ({content_length} characters)")
|
||||
_verbose_print(f" 📝 {url} ({content_length} characters)")
|
||||
|
||||
# Trim output to minimal fields per entry: title, content, error
|
||||
trimmed_results = [
|
||||
|
|
@ -923,10 +931,10 @@ async def web_crawl_tool(
|
|||
# Ensure URL has protocol
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
url = f'https://{url}'
|
||||
print(f" 📝 Added https:// prefix to URL: {url}")
|
||||
_verbose_print(f" 📝 Added https:// prefix to URL: {url}")
|
||||
|
||||
instructions_text = f" with instructions: '{instructions}'" if instructions else ""
|
||||
print(f"🕷️ Crawling {url}{instructions_text}")
|
||||
_verbose_print(f"🕷️ Crawling {url}{instructions_text}")
|
||||
|
||||
# Use Firecrawl's v2 crawl functionality
|
||||
# Docs: https://docs.firecrawl.dev/features/crawl
|
||||
|
|
@ -943,7 +951,7 @@ async def web_crawl_tool(
|
|||
# Note: The 'prompt' parameter is not documented for crawl
|
||||
# Instructions are typically used with the Extract endpoint, not Crawl
|
||||
if instructions:
|
||||
print(f" ℹ️ Note: Instructions parameter ignored (not supported in crawl API)")
|
||||
_verbose_print(f" ℹ️ Note: Instructions parameter ignored (not supported in crawl API)")
|
||||
|
||||
# Use the crawl method which waits for completion automatically
|
||||
try:
|
||||
|
|
@ -963,23 +971,23 @@ async def web_crawl_tool(
|
|||
# The crawl_result is a CrawlJob object with a 'data' attribute containing list of Document objects
|
||||
if hasattr(crawl_result, 'data'):
|
||||
data_list = crawl_result.data if crawl_result.data else []
|
||||
print(f" 📊 Status: {getattr(crawl_result, 'status', 'unknown')}")
|
||||
print(f" 📄 Retrieved {len(data_list)} pages")
|
||||
_verbose_print(f" 📊 Status: {getattr(crawl_result, 'status', 'unknown')}")
|
||||
_verbose_print(f" 📄 Retrieved {len(data_list)} pages")
|
||||
|
||||
# Debug: Check other attributes if no data
|
||||
if not data_list:
|
||||
print(f" 🔍 Debug - CrawlJob attributes: {[attr for attr in dir(crawl_result) if not attr.startswith('_')]}")
|
||||
print(f" 🔍 Debug - Status: {getattr(crawl_result, 'status', 'N/A')}")
|
||||
print(f" 🔍 Debug - Total: {getattr(crawl_result, 'total', 'N/A')}")
|
||||
print(f" 🔍 Debug - Completed: {getattr(crawl_result, 'completed', 'N/A')}")
|
||||
_verbose_print(f" 🔍 Debug - CrawlJob attributes: {[attr for attr in dir(crawl_result) if not attr.startswith('_')]}")
|
||||
_verbose_print(f" 🔍 Debug - Status: {getattr(crawl_result, 'status', 'N/A')}")
|
||||
_verbose_print(f" 🔍 Debug - Total: {getattr(crawl_result, 'total', 'N/A')}")
|
||||
_verbose_print(f" 🔍 Debug - Completed: {getattr(crawl_result, 'completed', 'N/A')}")
|
||||
|
||||
elif isinstance(crawl_result, dict) and 'data' in crawl_result:
|
||||
data_list = crawl_result.get("data", [])
|
||||
else:
|
||||
print(" ⚠️ Unexpected crawl result type")
|
||||
print(f" 🔍 Debug - Result type: {type(crawl_result)}")
|
||||
_verbose_print(f" 🔍 Debug - Result type: {type(crawl_result)}")
|
||||
if hasattr(crawl_result, '__dict__'):
|
||||
print(f" 🔍 Debug - Result attributes: {list(crawl_result.__dict__.keys())}")
|
||||
_verbose_print(f" 🔍 Debug - Result attributes: {list(crawl_result.__dict__.keys())}")
|
||||
|
||||
for item in data_list:
|
||||
# Process each crawled page - properly handle object serialization
|
||||
|
|
@ -1044,14 +1052,14 @@ async def web_crawl_tool(
|
|||
response = {"results": pages}
|
||||
|
||||
pages_crawled = len(response.get('results', []))
|
||||
print(f"✅ Crawled {pages_crawled} pages")
|
||||
_verbose_print(f"✅ Crawled {pages_crawled} pages")
|
||||
|
||||
debug_call_data["pages_crawled"] = pages_crawled
|
||||
debug_call_data["original_response_size"] = len(json.dumps(response))
|
||||
|
||||
# Process each result with LLM if enabled
|
||||
if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
|
||||
print("🧠 Processing crawled content with LLM (parallel)...")
|
||||
_verbose_print("🧠 Processing crawled content with LLM (parallel)...")
|
||||
debug_call_data["processing_applied"].append("llm_processing")
|
||||
|
||||
# Prepare tasks for parallel processing
|
||||
|
|
@ -1109,12 +1117,12 @@ async def web_crawl_tool(
|
|||
if status == "processed":
|
||||
debug_call_data["compression_metrics"].append(metrics)
|
||||
debug_call_data["pages_processed_with_llm"] += 1
|
||||
print(f" 🌐 {page_url} (processed)")
|
||||
_verbose_print(f" 🌐 {page_url} (processed)")
|
||||
elif status == "too_short":
|
||||
debug_call_data["compression_metrics"].append(metrics)
|
||||
print(f" 🌐 {page_url} (no processing - content too short)")
|
||||
_verbose_print(f" 🌐 {page_url} (no processing - content too short)")
|
||||
else:
|
||||
print(f" ⚠️ {page_url} (no content to process)")
|
||||
_verbose_print(f" ⚠️ {page_url} (no content to process)")
|
||||
else:
|
||||
if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
|
||||
print("⚠️ LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
|
||||
|
|
@ -1124,7 +1132,7 @@ async def web_crawl_tool(
|
|||
for result in response.get('results', []):
|
||||
page_url = result.get('url', 'Unknown URL')
|
||||
content_length = len(result.get('content', ''))
|
||||
print(f" 🌐 {page_url} ({content_length} characters)")
|
||||
_verbose_print(f" 🌐 {page_url} ({content_length} characters)")
|
||||
|
||||
# Trim output to minimal fields per entry: title, content, error
|
||||
trimmed_results = [
|
||||
|
|
@ -1246,7 +1254,7 @@ if __name__ == "__main__":
|
|||
|
||||
# Show debug mode status
|
||||
if DEBUG_MODE:
|
||||
print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
|
||||
_verbose_print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
|
||||
print(f" Debug logs will be saved to: ./logs/web_tools_debug_{DEBUG_SESSION_ID}.json")
|
||||
else:
|
||||
print("🐛 Debug mode disabled (set WEB_TOOLS_DEBUG=true to enable)")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue