Add kill_modal script to manage Modal applications and better handling of file and terminal tools

- Introduced a new script, `kill_modal.sh`, to facilitate stopping running Modal apps, including the ability to stop all apps or specific swe-rex sandboxes. - Enhanced user experience with clear usage instructions and feedback during the stopping process. - Improved error handling to ensure smooth execution even if some apps fail to stop.
2026-02-12 05:37:14 +00:00 · 2026-02-12 05:37:14 +00:00 · f23856df8e
commit f23856df8e
parent 1b7bc299f3
3 changed files with 141 additions and 99 deletions
--- a/scripts/kill_modal.sh
+++ b/scripts/kill_modal.sh
@ -0,0 +1,34 @@
 #!/bin/bash
 # Kill all running Modal apps (sandboxes, deployments, etc.)
 #
 # Usage:
 #   bash scripts/kill_modal.sh          # Stop swe-rex (the sandbox app)
 #   bash scripts/kill_modal.sh --all    # Stop ALL Modal apps
 set -uo pipefail
 echo "Fetching Modal app list..."
 APP_LIST=$(modal app list 2>/dev/null)
 if [[ "${1:-}" == "--all" ]]; then
    echo "Stopping ALL Modal apps..."
    echo "$APP_LIST" | grep -oE 'ap-[A-Za-z0-9]+' | sort -u | while read app_id; do
        echo "  Stopping $app_id"
        modal app stop "$app_id" 2>/dev/null || true
    done
 else
    echo "Stopping swe-rex sandboxes..."
    APPS=$(echo "$APP_LIST" | grep 'swe-rex' | grep -oE 'ap-[A-Za-z0-9]+' || true)
    if [[ -z "$APPS" ]]; then
        echo "  No swe-rex apps found."
    else
        echo "$APPS" | while read app_id; do
            echo "  Stopping $app_id"
            modal app stop "$app_id" 2>/dev/null || true
        done
    fi
 fi
 echo ""
 echo "Current swe-rex status:"
 modal app list 2>/dev/null | grep -E 'State|swe-rex' || echo "  (none)"
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@ -30,62 +30,63 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
        if task_id in _file_ops_cache:
            return _file_ops_cache[task_id]
-    # Check if we need to create a new environment
+    # Check if we need to create a new environment.
    # Uses the same per-task creation locks as terminal_tool to prevent
    # duplicate sandbox creation from concurrent tool calls.
    from tools.terminal_tool import _creation_locks, _creation_locks_lock
    needs_creation = False
    with _env_lock:
        if task_id not in _active_environments:
            needs_creation = True
    # Create environment OUTSIDE locks so we don't block other rollouts
    # during slow Modal/Docker startup (~10s)
    if needs_creation:
-        from tools.terminal_tool import _task_env_overrides
+        # Per-task lock: only one thread creates the sandbox, others wait
-        
+        with _creation_locks_lock:
-        config = _get_env_config()
+            if task_id not in _creation_locks:
-        env_type = config["env_type"]
+                _creation_locks[task_id] = __import__("threading").Lock()
-        
+            task_lock = _creation_locks[task_id]
-        # Check per-task overrides (set by environments like TerminalBench2Env)
+
-        overrides = _task_env_overrides.get(task_id, {})
+        with task_lock:
-        
+            # Double-check after acquiring the per-task lock
-        if env_type == "docker":
+            with _env_lock:
-            image = overrides.get("docker_image") or config["docker_image"]
+                if task_id in _active_environments:
-        elif env_type == "singularity":
+                    needs_creation = False
-            image = overrides.get("singularity_image") or config["singularity_image"]
+
-        elif env_type == "modal":
+            if needs_creation:
-            image = overrides.get("modal_image") or config["modal_image"]
+                from tools.terminal_tool import _task_env_overrides
-        else:
+                
-            image = ""
+                config = _get_env_config()
-        
+                env_type = config["env_type"]
-        cwd = overrides.get("cwd") or config["cwd"]
+                overrides = _task_env_overrides.get(task_id, {})
-        _check_disk_usage_warning()
+                
-        if not os.getenv("HERMES_QUIET"):
+                if env_type == "docker":
-            print(f"[FileTools] Creating new {env_type} environment for task {task_id[:8]}...", flush=True)
+                    image = overrides.get("docker_image") or config["docker_image"]
-        
+                elif env_type == "singularity":
-        new_env = _create_environment(
+                    image = overrides.get("singularity_image") or config["singularity_image"]
-            env_type=env_type,
+                elif env_type == "modal":
-            image=image,
+                    image = overrides.get("modal_image") or config["modal_image"]
-            cwd=cwd,
+                else:
-            timeout=config["timeout"],
+                    image = ""
-        )
+                
-        
+                cwd = overrides.get("cwd") or config["cwd"]
-        # Store under lock (brief) -- do NOT call _start_cleanup_thread inside
+                if not os.getenv("HERMES_QUIET"):
-        # the lock because it also acquires _env_lock (non-reentrant = deadlock)
+                    print(f"[FileTools] Creating new {env_type} environment for task {task_id[:8]}...", flush=True)
-        created = False
+                
-        with _env_lock:
+                new_env = _create_environment(
-            if task_id not in _active_environments:
+                    env_type=env_type,
-                _active_environments[task_id] = new_env
+                    image=image,
-                created = True
+                    cwd=cwd,
-            else:
+                    timeout=config["timeout"],
-                try:
+                )
-                    if hasattr(new_env, 'stop'):
+                
-                        new_env.stop()
+                with _env_lock:
-                except Exception:
+                    _active_environments[task_id] = new_env
-                    pass
+                    _last_activity[task_id] = __import__("time").time()
-        
+                
-        if created:
+                _start_cleanup_thread()
-            _start_cleanup_thread()
+                if not os.getenv("HERMES_QUIET"):
-            if not os.getenv("HERMES_QUIET"):
+                    print(f"[FileTools] {env_type} environment ready for task {task_id[:8]}", flush=True)
                print(f"[FileTools] {env_type} environment ready for task {task_id[:8]}", flush=True)
    # Now get the environment and build file_ops
    with _env_lock:
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -1132,6 +1132,8 @@ _active_environments: Dict[str, Any] = {}
 _task_workdirs: Dict[str, str] = {}  # Maps task_id to working directory
 _last_activity: Dict[str, float] = {}
 _env_lock = threading.Lock()
 _creation_locks: Dict[str, threading.Lock] = {}  # Per-task locks for sandbox creation
 _creation_locks_lock = threading.Lock()  # Protects _creation_locks dict itself
 _cleanup_thread = None
 _cleanup_running = False
@ -1515,64 +1517,69 @@ def terminal_tool(
        # Start cleanup thread
        _start_cleanup_thread()
-        # Get or create environment
+        # Get or create environment.
-        # Check under lock, but create OUTSIDE lock so we don't block
+        # Use a per-task creation lock so concurrent tool calls for the same
-        # other concurrent rollouts during slow Modal/Docker startup
+        # task_id wait for the first one to finish creating the sandbox,
-        needs_creation = False
+        # instead of each creating their own (wasting Modal resources).
        with _env_lock:
-            if effective_task_id not in _active_environments:
+            if effective_task_id in _active_environments:
                needs_creation = True
            else:
                _last_activity[effective_task_id] = time.time()
                env = _active_environments[effective_task_id]
                needs_creation = False
            else:
                needs_creation = True
        if needs_creation:
-            # Disk usage warning only relevant for local/singularity backends
+            # Per-task lock: only one thread creates the sandbox, others wait
-            if env_type in ("singularity", "local"):
+            with _creation_locks_lock:
-                _check_disk_usage_warning()
+                if effective_task_id not in _creation_locks:
-            if not os.getenv("HERMES_QUIET"):
+                    _creation_locks[effective_task_id] = threading.Lock()
-                print(f"[Terminal] Creating new {env_type} environment for task {effective_task_id[:8]}...", flush=True)
+                task_lock = _creation_locks[effective_task_id]
            try:
                ssh_config = None
                if env_type == "ssh":
                    ssh_config = {
                        "host": config.get("ssh_host", ""),
                        "user": config.get("ssh_user", ""),
                        "port": config.get("ssh_port", 22),
                        "key": config.get("ssh_key", ""),
                    }
-                new_env = _create_environment(
+            with task_lock:
-                    env_type=env_type,
+                # Double-check after acquiring the per-task lock
-                    image=image,
+                with _env_lock:
-                    cwd=cwd,
+                    if effective_task_id in _active_environments:
-                    timeout=effective_timeout,
+                        _last_activity[effective_task_id] = time.time()
-                    ssh_config=ssh_config
+                        env = _active_environments[effective_task_id]
-                )
+                        needs_creation = False
            except ImportError as e:
                return json.dumps({
                    "output": "",
                    "exit_code": -1,
                    "error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
                    "status": "disabled"
                }, ensure_ascii=False)
-            # Store under lock (brief)
+                if needs_creation:
-            with _env_lock:
+                    if env_type in ("singularity", "local"):
-                if effective_task_id not in _active_environments:
+                        _check_disk_usage_warning()
-                    _active_environments[effective_task_id] = new_env
+                    if not os.getenv("HERMES_QUIET"):
-                else:
+                        print(f"[Terminal] Creating new {env_type} environment for task {effective_task_id[:8]}...", flush=True)
                    # Another thread created it while we were building -- clean up ours
                    try:
-                        if hasattr(new_env, 'stop'):
+                        ssh_config = None
-                            new_env.stop()
+                        if env_type == "ssh":
-                    except Exception:
+                            ssh_config = {
-                        pass
+                                "host": config.get("ssh_host", ""),
                                "user": config.get("ssh_user", ""),
                                "port": config.get("ssh_port", 22),
                                "key": config.get("ssh_key", ""),
                            }
-                _last_activity[effective_task_id] = time.time()
+                        new_env = _create_environment(
-                env = _active_environments[effective_task_id]
+                            env_type=env_type,
-                if not os.getenv("HERMES_QUIET"):
+                            image=image,
-                    print(f"[Terminal] {env_type} environment ready for task {effective_task_id[:8]}", flush=True)
+                            cwd=cwd,
                            timeout=effective_timeout,
                            ssh_config=ssh_config
                        )
                    except ImportError as e:
                        return json.dumps({
                            "output": "",
                            "exit_code": -1,
                            "error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
                            "status": "disabled"
                        }, ensure_ascii=False)
                    with _env_lock:
                        _active_environments[effective_task_id] = new_env
                        _last_activity[effective_task_id] = time.time()
                        env = new_env
                    if not os.getenv("HERMES_QUIET"):
                        print(f"[Terminal] {env_type} environment ready for task {effective_task_id[:8]}", flush=True)
        # Check for dangerous commands (only for local/ssh in interactive modes)
        # Skip check if force=True (user has confirmed they want to run it)