Enhance async tool execution and error handling in Hermes agent for Atropos integration

- Updated `.gitignore` to exclude `testlogs` directory. - Refactored `handle_web_function_call` in `model_tools.py` to support running async functions in existing event loops, improving compatibility with Atropos. - Introduced a thread pool executor in `agent_loop.py` for running synchronous tool calls that internally use `asyncio.run()`, preventing deadlocks. - Added `ToolError` class to track tool execution errors, enhancing error reporting during agent loops. - Updated `wandb_log` method in `hermes_base_env.py` to log tool error statistics for better monitoring. - Implemented patches in `patches.py` to ensure async-safe operation of tools within Atropos's event loop. - Enhanced `ToolContext` and `terminal_tool.py` to utilize the new async handling, improving overall tool execution reliability.
2026-02-08 05:00:47 +00:00 · 2026-02-08 05:00:47 +00:00 · d999d9876d
commit d999d9876d
parent a8809bbd3e
9 changed files with 540 additions and 64 deletions
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@ -2,6 +2,7 @@
 """File Tools Module - LLM agent file manipulation tools."""

 import json
+import os
 import threading
 from typing import Optional
 from tools.file_operations import ShellFileOperations
@ -11,23 +12,85 @@ _file_ops_cache: dict = {}


 def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
-    """Get or create ShellFileOperations for a terminal environment."""
-    from tools.terminal_tool import _active_environments, _env_lock, _LocalEnvironment
+    """Get or create ShellFileOperations for a terminal environment.
    
+    Respects the TERMINAL_ENV setting -- if the task_id doesn't have an
+    environment yet, creates one using the configured backend (local, docker,
+    modal, etc.) rather than always defaulting to local.
+    """
+    from tools.terminal_tool import (
+        _active_environments, _env_lock, _create_environment,
+        _get_env_config, _last_activity, _start_cleanup_thread,
+        _check_disk_usage_warning,
+    )
+    import time
+    
+    # Fast path: check cache without heavy locks
    with _file_ops_lock:
        if task_id in _file_ops_cache:
            return _file_ops_cache[task_id]
+    
+    # Check if we need to create a new environment
+    needs_creation = False
+    with _env_lock:
+        if task_id not in _active_environments:
+            needs_creation = True
+    
+    # Create environment OUTSIDE locks so we don't block other rollouts
+    # during slow Modal/Docker startup (~10s)
+    if needs_creation:
+        config = _get_env_config()
+        env_type = config["env_type"]
        
+        if env_type == "docker":
+            image = config["docker_image"]
+        elif env_type == "singularity":
+            image = config["singularity_image"]
+        elif env_type == "modal":
+            image = config["modal_image"]
+        else:
+            image = ""
+        
+        cwd = config["cwd"]
+        _check_disk_usage_warning()
+        if not os.getenv("HERMES_QUIET"):
+            print(f"[FileTools] Creating new {env_type} environment for task {task_id[:8]}...", flush=True)
+        
+        new_env = _create_environment(
+            env_type=env_type,
+            image=image,
+            cwd=cwd,
+            timeout=config["timeout"],
+        )
+        
+        # Store under lock (brief) -- do NOT call _start_cleanup_thread inside
+        # the lock because it also acquires _env_lock (non-reentrant = deadlock)
+        created = False
        with _env_lock:
            if task_id not in _active_environments:
-                import os
-                env = _LocalEnvironment(cwd=os.getcwd(), timeout=60)
-                _active_environments[task_id] = env
-            terminal_env = _active_environments[task_id]
+                _active_environments[task_id] = new_env
+                created = True
+            else:
+                try:
+                    if hasattr(new_env, 'stop'):
+                        new_env.stop()
+                except Exception:
+                    pass
        
-        file_ops = ShellFileOperations(terminal_env)
+        if created:
+            _start_cleanup_thread()
+            if not os.getenv("HERMES_QUIET"):
+                print(f"[FileTools] {env_type} environment ready for task {task_id[:8]}", flush=True)
+    
+    # Now get the environment and build file_ops
+    with _env_lock:
+        _last_activity[task_id] = time.time()
+        terminal_env = _active_environments[task_id]
+    
+    file_ops = ShellFileOperations(terminal_env)
+    with _file_ops_lock:
        _file_ops_cache[task_id] = file_ops
-        return file_ops
+    return file_ops


 def clear_file_ops_cache(task_id: str = None):
@ -56,6 +119,7 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
        result = file_ops.write_file(path, content)
        return json.dumps(result.to_dict(), ensure_ascii=False)
    except Exception as e:
+        print(f"[FileTools] write_file error: {type(e).__name__}: {e}", flush=True)  
        return json.dumps({"error": str(e)}, ensure_ascii=False)


--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -1347,40 +1347,61 @@ def terminal_tool(
        _start_cleanup_thread()

        # Get or create environment
+        # Check under lock, but create OUTSIDE lock so we don't block
+        # other concurrent rollouts during slow Modal/Docker startup
+        needs_creation = False
        with _env_lock:
            if effective_task_id not in _active_environments:
-                # Check disk usage before creating new environment
-                _check_disk_usage_warning()
-                
-                try:
-                    # Build SSH config if using SSH environment
-                    ssh_config = None
-                    if env_type == "ssh":
-                        ssh_config = {
-                            "host": config.get("ssh_host", ""),
-                            "user": config.get("ssh_user", ""),
-                            "port": config.get("ssh_port", 22),
-                            "key": config.get("ssh_key", ""),
-                        }
-                    
-                    _active_environments[effective_task_id] = _create_environment(
-                        env_type=env_type,
-                        image=image,
-                        cwd=cwd,
-                        timeout=effective_timeout,
-                        ssh_config=ssh_config
-                    )
-                except ImportError as e:
-                    return json.dumps({
-                        "output": "",
-                        "exit_code": -1,
-                        "error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
-                        "status": "disabled"
-                    }, ensure_ascii=False)
+                needs_creation = True
+            else:
+                _last_activity[effective_task_id] = time.time()
+                env = _active_environments[effective_task_id]

-            # Update last activity time
-            _last_activity[effective_task_id] = time.time()
-            env = _active_environments[effective_task_id]
+        if needs_creation:
+            _check_disk_usage_warning()
+            if not os.getenv("HERMES_QUIET"):
+                print(f"[Terminal] Creating new {env_type} environment for task {effective_task_id[:8]}...", flush=True)
+            try:
+                ssh_config = None
+                if env_type == "ssh":
+                    ssh_config = {
+                        "host": config.get("ssh_host", ""),
+                        "user": config.get("ssh_user", ""),
+                        "port": config.get("ssh_port", 22),
+                        "key": config.get("ssh_key", ""),
+                    }
+
+                new_env = _create_environment(
+                    env_type=env_type,
+                    image=image,
+                    cwd=cwd,
+                    timeout=effective_timeout,
+                    ssh_config=ssh_config
+                )
+            except ImportError as e:
+                return json.dumps({
+                    "output": "",
+                    "exit_code": -1,
+                    "error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
+                    "status": "disabled"
+                }, ensure_ascii=False)
+
+            # Store under lock (brief)
+            with _env_lock:
+                if effective_task_id not in _active_environments:
+                    _active_environments[effective_task_id] = new_env
+                else:
+                    # Another thread created it while we were building -- clean up ours
+                    try:
+                        if hasattr(new_env, 'stop'):
+                            new_env.stop()
+                    except Exception:
+                        pass
+
+                _last_activity[effective_task_id] = time.time()
+                env = _active_environments[effective_task_id]
+                if not os.getenv("HERMES_QUIET"):
+                    print(f"[Terminal] {env_type} environment ready for task {effective_task_id[:8]}", flush=True)

        # Check for dangerous commands (only for local/ssh in interactive modes)
        # Skip check if force=True (user has confirmed they want to run it)
@ -1435,13 +1456,20 @@ def terminal_tool(
                        retry_count += 1
                        wait_time = 2 ** retry_count
                        print(f"⚠️  Terminal: execution error, retrying in {wait_time}s (attempt {retry_count}/{max_retries})")
+                        print(f"   Command: {command[:200]}")
+                        print(f"   Error: {type(e).__name__}: {e}")
+                        print(f"   Task ID: {effective_task_id}, Backend: {env_type}")
                        time.sleep(wait_time)
                        continue
                    
+                    print(f"❌ Terminal: execution failed after {max_retries} retries")
+                    print(f"   Command: {command[:200]}")
+                    print(f"   Error: {type(e).__name__}: {e}")
+                    print(f"   Task ID: {effective_task_id}, Backend: {env_type}")
                    return json.dumps({
                        "output": "",
                        "exit_code": -1,
-                        "error": f"Command execution failed: {str(e)}"
+                        "error": f"Command execution failed: {type(e).__name__}: {str(e)}"
                    }, ensure_ascii=False)
                
                # Got a result