Enhance async tool execution and error handling in Hermes agent for Atropos integration
- Updated `.gitignore` to exclude `testlogs` directory. - Refactored `handle_web_function_call` in `model_tools.py` to support running async functions in existing event loops, improving compatibility with Atropos. - Introduced a thread pool executor in `agent_loop.py` for running synchronous tool calls that internally use `asyncio.run()`, preventing deadlocks. - Added `ToolError` class to track tool execution errors, enhancing error reporting during agent loops. - Updated `wandb_log` method in `hermes_base_env.py` to log tool error statistics for better monitoring. - Implemented patches in `patches.py` to ensure async-safe operation of tools within Atropos's event loop. - Enhanced `ToolContext` and `terminal_tool.py` to utilize the new async handling, improving overall tool execution reliability.
This commit is contained in:
parent
a8809bbd3e
commit
d999d9876d
9 changed files with 540 additions and 64 deletions
|
|
@ -2,6 +2,7 @@
|
|||
"""File Tools Module - LLM agent file manipulation tools."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
from typing import Optional
|
||||
from tools.file_operations import ShellFileOperations
|
||||
|
|
@ -11,23 +12,85 @@ _file_ops_cache: dict = {}
|
|||
|
||||
|
||||
def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
|
||||
"""Get or create ShellFileOperations for a terminal environment."""
|
||||
from tools.terminal_tool import _active_environments, _env_lock, _LocalEnvironment
|
||||
"""Get or create ShellFileOperations for a terminal environment.
|
||||
|
||||
Respects the TERMINAL_ENV setting -- if the task_id doesn't have an
|
||||
environment yet, creates one using the configured backend (local, docker,
|
||||
modal, etc.) rather than always defaulting to local.
|
||||
"""
|
||||
from tools.terminal_tool import (
|
||||
_active_environments, _env_lock, _create_environment,
|
||||
_get_env_config, _last_activity, _start_cleanup_thread,
|
||||
_check_disk_usage_warning,
|
||||
)
|
||||
import time
|
||||
|
||||
# Fast path: check cache without heavy locks
|
||||
with _file_ops_lock:
|
||||
if task_id in _file_ops_cache:
|
||||
return _file_ops_cache[task_id]
|
||||
|
||||
# Check if we need to create a new environment
|
||||
needs_creation = False
|
||||
with _env_lock:
|
||||
if task_id not in _active_environments:
|
||||
needs_creation = True
|
||||
|
||||
# Create environment OUTSIDE locks so we don't block other rollouts
|
||||
# during slow Modal/Docker startup (~10s)
|
||||
if needs_creation:
|
||||
config = _get_env_config()
|
||||
env_type = config["env_type"]
|
||||
|
||||
if env_type == "docker":
|
||||
image = config["docker_image"]
|
||||
elif env_type == "singularity":
|
||||
image = config["singularity_image"]
|
||||
elif env_type == "modal":
|
||||
image = config["modal_image"]
|
||||
else:
|
||||
image = ""
|
||||
|
||||
cwd = config["cwd"]
|
||||
_check_disk_usage_warning()
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(f"[FileTools] Creating new {env_type} environment for task {task_id[:8]}...", flush=True)
|
||||
|
||||
new_env = _create_environment(
|
||||
env_type=env_type,
|
||||
image=image,
|
||||
cwd=cwd,
|
||||
timeout=config["timeout"],
|
||||
)
|
||||
|
||||
# Store under lock (brief) -- do NOT call _start_cleanup_thread inside
|
||||
# the lock because it also acquires _env_lock (non-reentrant = deadlock)
|
||||
created = False
|
||||
with _env_lock:
|
||||
if task_id not in _active_environments:
|
||||
import os
|
||||
env = _LocalEnvironment(cwd=os.getcwd(), timeout=60)
|
||||
_active_environments[task_id] = env
|
||||
terminal_env = _active_environments[task_id]
|
||||
_active_environments[task_id] = new_env
|
||||
created = True
|
||||
else:
|
||||
try:
|
||||
if hasattr(new_env, 'stop'):
|
||||
new_env.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
file_ops = ShellFileOperations(terminal_env)
|
||||
if created:
|
||||
_start_cleanup_thread()
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(f"[FileTools] {env_type} environment ready for task {task_id[:8]}", flush=True)
|
||||
|
||||
# Now get the environment and build file_ops
|
||||
with _env_lock:
|
||||
_last_activity[task_id] = time.time()
|
||||
terminal_env = _active_environments[task_id]
|
||||
|
||||
file_ops = ShellFileOperations(terminal_env)
|
||||
with _file_ops_lock:
|
||||
_file_ops_cache[task_id] = file_ops
|
||||
return file_ops
|
||||
return file_ops
|
||||
|
||||
|
||||
def clear_file_ops_cache(task_id: str = None):
|
||||
|
|
@ -56,6 +119,7 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
|
|||
result = file_ops.write_file(path, content)
|
||||
return json.dumps(result.to_dict(), ensure_ascii=False)
|
||||
except Exception as e:
|
||||
print(f"[FileTools] write_file error: {type(e).__name__}: {e}", flush=True)
|
||||
return json.dumps({"error": str(e)}, ensure_ascii=False)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1347,40 +1347,61 @@ def terminal_tool(
|
|||
_start_cleanup_thread()
|
||||
|
||||
# Get or create environment
|
||||
# Check under lock, but create OUTSIDE lock so we don't block
|
||||
# other concurrent rollouts during slow Modal/Docker startup
|
||||
needs_creation = False
|
||||
with _env_lock:
|
||||
if effective_task_id not in _active_environments:
|
||||
# Check disk usage before creating new environment
|
||||
_check_disk_usage_warning()
|
||||
|
||||
try:
|
||||
# Build SSH config if using SSH environment
|
||||
ssh_config = None
|
||||
if env_type == "ssh":
|
||||
ssh_config = {
|
||||
"host": config.get("ssh_host", ""),
|
||||
"user": config.get("ssh_user", ""),
|
||||
"port": config.get("ssh_port", 22),
|
||||
"key": config.get("ssh_key", ""),
|
||||
}
|
||||
|
||||
_active_environments[effective_task_id] = _create_environment(
|
||||
env_type=env_type,
|
||||
image=image,
|
||||
cwd=cwd,
|
||||
timeout=effective_timeout,
|
||||
ssh_config=ssh_config
|
||||
)
|
||||
except ImportError as e:
|
||||
return json.dumps({
|
||||
"output": "",
|
||||
"exit_code": -1,
|
||||
"error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
|
||||
"status": "disabled"
|
||||
}, ensure_ascii=False)
|
||||
needs_creation = True
|
||||
else:
|
||||
_last_activity[effective_task_id] = time.time()
|
||||
env = _active_environments[effective_task_id]
|
||||
|
||||
# Update last activity time
|
||||
_last_activity[effective_task_id] = time.time()
|
||||
env = _active_environments[effective_task_id]
|
||||
if needs_creation:
|
||||
_check_disk_usage_warning()
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(f"[Terminal] Creating new {env_type} environment for task {effective_task_id[:8]}...", flush=True)
|
||||
try:
|
||||
ssh_config = None
|
||||
if env_type == "ssh":
|
||||
ssh_config = {
|
||||
"host": config.get("ssh_host", ""),
|
||||
"user": config.get("ssh_user", ""),
|
||||
"port": config.get("ssh_port", 22),
|
||||
"key": config.get("ssh_key", ""),
|
||||
}
|
||||
|
||||
new_env = _create_environment(
|
||||
env_type=env_type,
|
||||
image=image,
|
||||
cwd=cwd,
|
||||
timeout=effective_timeout,
|
||||
ssh_config=ssh_config
|
||||
)
|
||||
except ImportError as e:
|
||||
return json.dumps({
|
||||
"output": "",
|
||||
"exit_code": -1,
|
||||
"error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
|
||||
"status": "disabled"
|
||||
}, ensure_ascii=False)
|
||||
|
||||
# Store under lock (brief)
|
||||
with _env_lock:
|
||||
if effective_task_id not in _active_environments:
|
||||
_active_environments[effective_task_id] = new_env
|
||||
else:
|
||||
# Another thread created it while we were building -- clean up ours
|
||||
try:
|
||||
if hasattr(new_env, 'stop'):
|
||||
new_env.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_last_activity[effective_task_id] = time.time()
|
||||
env = _active_environments[effective_task_id]
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(f"[Terminal] {env_type} environment ready for task {effective_task_id[:8]}", flush=True)
|
||||
|
||||
# Check for dangerous commands (only for local/ssh in interactive modes)
|
||||
# Skip check if force=True (user has confirmed they want to run it)
|
||||
|
|
@ -1435,13 +1456,20 @@ def terminal_tool(
|
|||
retry_count += 1
|
||||
wait_time = 2 ** retry_count
|
||||
print(f"⚠️ Terminal: execution error, retrying in {wait_time}s (attempt {retry_count}/{max_retries})")
|
||||
print(f" Command: {command[:200]}")
|
||||
print(f" Error: {type(e).__name__}: {e}")
|
||||
print(f" Task ID: {effective_task_id}, Backend: {env_type}")
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
|
||||
print(f"❌ Terminal: execution failed after {max_retries} retries")
|
||||
print(f" Command: {command[:200]}")
|
||||
print(f" Error: {type(e).__name__}: {e}")
|
||||
print(f" Task ID: {effective_task_id}, Backend: {env_type}")
|
||||
return json.dumps({
|
||||
"output": "",
|
||||
"exit_code": -1,
|
||||
"error": f"Command execution failed: {str(e)}"
|
||||
"error": f"Command execution failed: {type(e).__name__}: {str(e)}"
|
||||
}, ensure_ascii=False)
|
||||
|
||||
# Got a result
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue