Add kill_modal script to manage Modal applications and better handling of file and terminal tools

- Introduced a new script, `kill_modal.sh`, to facilitate stopping running Modal apps, including the ability to stop all apps or specific swe-rex sandboxes.
- Enhanced user experience with clear usage instructions and feedback during the stopping process.
- Improved error handling to ensure smooth execution even if some apps fail to stop.
This commit is contained in:
teknium 2026-02-12 05:37:14 +00:00
parent 1b7bc299f3
commit f23856df8e
3 changed files with 141 additions and 99 deletions

34
scripts/kill_modal.sh Executable file
View file

@ -0,0 +1,34 @@
#!/bin/bash
# Kill all running Modal apps (sandboxes, deployments, etc.)
#
# Usage:
# bash scripts/kill_modal.sh # Stop swe-rex (the sandbox app)
# bash scripts/kill_modal.sh --all # Stop ALL Modal apps
set -uo pipefail
echo "Fetching Modal app list..."
APP_LIST=$(modal app list 2>/dev/null)
if [[ "${1:-}" == "--all" ]]; then
echo "Stopping ALL Modal apps..."
echo "$APP_LIST" | grep -oE 'ap-[A-Za-z0-9]+' | sort -u | while read app_id; do
echo " Stopping $app_id"
modal app stop "$app_id" 2>/dev/null || true
done
else
echo "Stopping swe-rex sandboxes..."
APPS=$(echo "$APP_LIST" | grep 'swe-rex' | grep -oE 'ap-[A-Za-z0-9]+' || true)
if [[ -z "$APPS" ]]; then
echo " No swe-rex apps found."
else
echo "$APPS" | while read app_id; do
echo " Stopping $app_id"
modal app stop "$app_id" 2>/dev/null || true
done
fi
fi
echo ""
echo "Current swe-rex status:"
modal app list 2>/dev/null | grep -E 'State|swe-rex' || echo " (none)"

View file

@ -30,62 +30,63 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
if task_id in _file_ops_cache: if task_id in _file_ops_cache:
return _file_ops_cache[task_id] return _file_ops_cache[task_id]
# Check if we need to create a new environment # Check if we need to create a new environment.
# Uses the same per-task creation locks as terminal_tool to prevent
# duplicate sandbox creation from concurrent tool calls.
from tools.terminal_tool import _creation_locks, _creation_locks_lock
needs_creation = False needs_creation = False
with _env_lock: with _env_lock:
if task_id not in _active_environments: if task_id not in _active_environments:
needs_creation = True needs_creation = True
# Create environment OUTSIDE locks so we don't block other rollouts
# during slow Modal/Docker startup (~10s)
if needs_creation: if needs_creation:
from tools.terminal_tool import _task_env_overrides # Per-task lock: only one thread creates the sandbox, others wait
with _creation_locks_lock:
config = _get_env_config() if task_id not in _creation_locks:
env_type = config["env_type"] _creation_locks[task_id] = __import__("threading").Lock()
task_lock = _creation_locks[task_id]
# Check per-task overrides (set by environments like TerminalBench2Env)
overrides = _task_env_overrides.get(task_id, {}) with task_lock:
# Double-check after acquiring the per-task lock
if env_type == "docker": with _env_lock:
image = overrides.get("docker_image") or config["docker_image"] if task_id in _active_environments:
elif env_type == "singularity": needs_creation = False
image = overrides.get("singularity_image") or config["singularity_image"]
elif env_type == "modal": if needs_creation:
image = overrides.get("modal_image") or config["modal_image"] from tools.terminal_tool import _task_env_overrides
else:
image = "" config = _get_env_config()
env_type = config["env_type"]
cwd = overrides.get("cwd") or config["cwd"] overrides = _task_env_overrides.get(task_id, {})
_check_disk_usage_warning()
if not os.getenv("HERMES_QUIET"): if env_type == "docker":
print(f"[FileTools] Creating new {env_type} environment for task {task_id[:8]}...", flush=True) image = overrides.get("docker_image") or config["docker_image"]
elif env_type == "singularity":
new_env = _create_environment( image = overrides.get("singularity_image") or config["singularity_image"]
env_type=env_type, elif env_type == "modal":
image=image, image = overrides.get("modal_image") or config["modal_image"]
cwd=cwd, else:
timeout=config["timeout"], image = ""
)
cwd = overrides.get("cwd") or config["cwd"]
# Store under lock (brief) -- do NOT call _start_cleanup_thread inside if not os.getenv("HERMES_QUIET"):
# the lock because it also acquires _env_lock (non-reentrant = deadlock) print(f"[FileTools] Creating new {env_type} environment for task {task_id[:8]}...", flush=True)
created = False
with _env_lock: new_env = _create_environment(
if task_id not in _active_environments: env_type=env_type,
_active_environments[task_id] = new_env image=image,
created = True cwd=cwd,
else: timeout=config["timeout"],
try: )
if hasattr(new_env, 'stop'):
new_env.stop() with _env_lock:
except Exception: _active_environments[task_id] = new_env
pass _last_activity[task_id] = __import__("time").time()
if created: _start_cleanup_thread()
_start_cleanup_thread() if not os.getenv("HERMES_QUIET"):
if not os.getenv("HERMES_QUIET"): print(f"[FileTools] {env_type} environment ready for task {task_id[:8]}", flush=True)
print(f"[FileTools] {env_type} environment ready for task {task_id[:8]}", flush=True)
# Now get the environment and build file_ops # Now get the environment and build file_ops
with _env_lock: with _env_lock:

View file

@ -1132,6 +1132,8 @@ _active_environments: Dict[str, Any] = {}
_task_workdirs: Dict[str, str] = {} # Maps task_id to working directory _task_workdirs: Dict[str, str] = {} # Maps task_id to working directory
_last_activity: Dict[str, float] = {} _last_activity: Dict[str, float] = {}
_env_lock = threading.Lock() _env_lock = threading.Lock()
_creation_locks: Dict[str, threading.Lock] = {} # Per-task locks for sandbox creation
_creation_locks_lock = threading.Lock() # Protects _creation_locks dict itself
_cleanup_thread = None _cleanup_thread = None
_cleanup_running = False _cleanup_running = False
@ -1515,64 +1517,69 @@ def terminal_tool(
# Start cleanup thread # Start cleanup thread
_start_cleanup_thread() _start_cleanup_thread()
# Get or create environment # Get or create environment.
# Check under lock, but create OUTSIDE lock so we don't block # Use a per-task creation lock so concurrent tool calls for the same
# other concurrent rollouts during slow Modal/Docker startup # task_id wait for the first one to finish creating the sandbox,
needs_creation = False # instead of each creating their own (wasting Modal resources).
with _env_lock: with _env_lock:
if effective_task_id not in _active_environments: if effective_task_id in _active_environments:
needs_creation = True
else:
_last_activity[effective_task_id] = time.time() _last_activity[effective_task_id] = time.time()
env = _active_environments[effective_task_id] env = _active_environments[effective_task_id]
needs_creation = False
else:
needs_creation = True
if needs_creation: if needs_creation:
# Disk usage warning only relevant for local/singularity backends # Per-task lock: only one thread creates the sandbox, others wait
if env_type in ("singularity", "local"): with _creation_locks_lock:
_check_disk_usage_warning() if effective_task_id not in _creation_locks:
if not os.getenv("HERMES_QUIET"): _creation_locks[effective_task_id] = threading.Lock()
print(f"[Terminal] Creating new {env_type} environment for task {effective_task_id[:8]}...", flush=True) task_lock = _creation_locks[effective_task_id]
try:
ssh_config = None
if env_type == "ssh":
ssh_config = {
"host": config.get("ssh_host", ""),
"user": config.get("ssh_user", ""),
"port": config.get("ssh_port", 22),
"key": config.get("ssh_key", ""),
}
new_env = _create_environment( with task_lock:
env_type=env_type, # Double-check after acquiring the per-task lock
image=image, with _env_lock:
cwd=cwd, if effective_task_id in _active_environments:
timeout=effective_timeout, _last_activity[effective_task_id] = time.time()
ssh_config=ssh_config env = _active_environments[effective_task_id]
) needs_creation = False
except ImportError as e:
return json.dumps({
"output": "",
"exit_code": -1,
"error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
"status": "disabled"
}, ensure_ascii=False)
# Store under lock (brief) if needs_creation:
with _env_lock: if env_type in ("singularity", "local"):
if effective_task_id not in _active_environments: _check_disk_usage_warning()
_active_environments[effective_task_id] = new_env if not os.getenv("HERMES_QUIET"):
else: print(f"[Terminal] Creating new {env_type} environment for task {effective_task_id[:8]}...", flush=True)
# Another thread created it while we were building -- clean up ours
try: try:
if hasattr(new_env, 'stop'): ssh_config = None
new_env.stop() if env_type == "ssh":
except Exception: ssh_config = {
pass "host": config.get("ssh_host", ""),
"user": config.get("ssh_user", ""),
"port": config.get("ssh_port", 22),
"key": config.get("ssh_key", ""),
}
_last_activity[effective_task_id] = time.time() new_env = _create_environment(
env = _active_environments[effective_task_id] env_type=env_type,
if not os.getenv("HERMES_QUIET"): image=image,
print(f"[Terminal] {env_type} environment ready for task {effective_task_id[:8]}", flush=True) cwd=cwd,
timeout=effective_timeout,
ssh_config=ssh_config
)
except ImportError as e:
return json.dumps({
"output": "",
"exit_code": -1,
"error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
"status": "disabled"
}, ensure_ascii=False)
with _env_lock:
_active_environments[effective_task_id] = new_env
_last_activity[effective_task_id] = time.time()
env = new_env
if not os.getenv("HERMES_QUIET"):
print(f"[Terminal] {env_type} environment ready for task {effective_task_id[:8]}", flush=True)
# Check for dangerous commands (only for local/ssh in interactive modes) # Check for dangerous commands (only for local/ssh in interactive modes)
# Skip check if force=True (user has confirmed they want to run it) # Skip check if force=True (user has confirmed they want to run it)