Refactor file operations and environment management in file_tools and terminal_tool
- Improved the caching mechanism for ShellFileOperations to ensure stale entries are invalidated when environments are cleaned up. - Enhanced thread safety by refining the use of locks during environment creation and cleanup processes. - Streamlined the cleanup of inactive environments to prevent blocking other tool calls, ensuring efficient resource management. - Added error handling and messaging improvements for better user feedback during environment cleanup.
This commit is contained in:
parent
01a3a6ab0d
commit
8117d0adab
2 changed files with 146 additions and 124 deletions
|
|
@ -13,86 +13,88 @@ _file_ops_cache: dict = {}
|
|||
|
||||
def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
|
||||
"""Get or create ShellFileOperations for a terminal environment.
|
||||
|
||||
|
||||
Respects the TERMINAL_ENV setting -- if the task_id doesn't have an
|
||||
environment yet, creates one using the configured backend (local, docker,
|
||||
modal, etc.) rather than always defaulting to local.
|
||||
|
||||
Thread-safe: uses the same per-task creation locks as terminal_tool to
|
||||
prevent duplicate sandbox creation from concurrent tool calls.
|
||||
"""
|
||||
from tools.terminal_tool import (
|
||||
_active_environments, _env_lock, _create_environment,
|
||||
_get_env_config, _last_activity, _start_cleanup_thread,
|
||||
_check_disk_usage_warning,
|
||||
_creation_locks, _creation_locks_lock,
|
||||
)
|
||||
import time
|
||||
|
||||
# Fast path: check cache without heavy locks
|
||||
|
||||
# Fast path: check cache -- but also verify the underlying environment
|
||||
# is still alive (it may have been killed by the cleanup thread).
|
||||
with _file_ops_lock:
|
||||
if task_id in _file_ops_cache:
|
||||
return _file_ops_cache[task_id]
|
||||
|
||||
# Check if we need to create a new environment.
|
||||
# Uses the same per-task creation locks as terminal_tool to prevent
|
||||
# duplicate sandbox creation from concurrent tool calls.
|
||||
from tools.terminal_tool import _creation_locks, _creation_locks_lock
|
||||
|
||||
needs_creation = False
|
||||
with _env_lock:
|
||||
if task_id not in _active_environments:
|
||||
needs_creation = True
|
||||
|
||||
if needs_creation:
|
||||
# Per-task lock: only one thread creates the sandbox, others wait
|
||||
with _creation_locks_lock:
|
||||
if task_id not in _creation_locks:
|
||||
_creation_locks[task_id] = __import__("threading").Lock()
|
||||
task_lock = _creation_locks[task_id]
|
||||
cached = _file_ops_cache.get(task_id)
|
||||
if cached is not None:
|
||||
with _env_lock:
|
||||
if task_id in _active_environments:
|
||||
_last_activity[task_id] = time.time()
|
||||
return cached
|
||||
else:
|
||||
# Environment was cleaned up -- invalidate stale cache entry
|
||||
with _file_ops_lock:
|
||||
_file_ops_cache.pop(task_id, None)
|
||||
|
||||
# Need to ensure the environment exists before building file_ops.
|
||||
# Acquire per-task lock so only one thread creates the sandbox.
|
||||
with _creation_locks_lock:
|
||||
if task_id not in _creation_locks:
|
||||
_creation_locks[task_id] = threading.Lock()
|
||||
task_lock = _creation_locks[task_id]
|
||||
|
||||
with task_lock:
|
||||
# Double-check: another thread may have created it while we waited
|
||||
with _env_lock:
|
||||
if task_id in _active_environments:
|
||||
_last_activity[task_id] = time.time()
|
||||
terminal_env = _active_environments[task_id]
|
||||
else:
|
||||
terminal_env = None
|
||||
|
||||
if terminal_env is None:
|
||||
from tools.terminal_tool import _task_env_overrides
|
||||
|
||||
config = _get_env_config()
|
||||
env_type = config["env_type"]
|
||||
overrides = _task_env_overrides.get(task_id, {})
|
||||
|
||||
if env_type == "docker":
|
||||
image = overrides.get("docker_image") or config["docker_image"]
|
||||
elif env_type == "singularity":
|
||||
image = overrides.get("singularity_image") or config["singularity_image"]
|
||||
elif env_type == "modal":
|
||||
image = overrides.get("modal_image") or config["modal_image"]
|
||||
else:
|
||||
image = ""
|
||||
|
||||
cwd = overrides.get("cwd") or config["cwd"]
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(f"[FileTools] Creating new {env_type} environment for task {task_id[:8]}...", flush=True)
|
||||
|
||||
terminal_env = _create_environment(
|
||||
env_type=env_type,
|
||||
image=image,
|
||||
cwd=cwd,
|
||||
timeout=config["timeout"],
|
||||
)
|
||||
|
||||
with task_lock:
|
||||
# Double-check after acquiring the per-task lock
|
||||
with _env_lock:
|
||||
if task_id in _active_environments:
|
||||
needs_creation = False
|
||||
_active_environments[task_id] = terminal_env
|
||||
_last_activity[task_id] = time.time()
|
||||
|
||||
if needs_creation:
|
||||
from tools.terminal_tool import _task_env_overrides
|
||||
|
||||
config = _get_env_config()
|
||||
env_type = config["env_type"]
|
||||
overrides = _task_env_overrides.get(task_id, {})
|
||||
|
||||
if env_type == "docker":
|
||||
image = overrides.get("docker_image") or config["docker_image"]
|
||||
elif env_type == "singularity":
|
||||
image = overrides.get("singularity_image") or config["singularity_image"]
|
||||
elif env_type == "modal":
|
||||
image = overrides.get("modal_image") or config["modal_image"]
|
||||
else:
|
||||
image = ""
|
||||
|
||||
cwd = overrides.get("cwd") or config["cwd"]
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(f"[FileTools] Creating new {env_type} environment for task {task_id[:8]}...", flush=True)
|
||||
|
||||
new_env = _create_environment(
|
||||
env_type=env_type,
|
||||
image=image,
|
||||
cwd=cwd,
|
||||
timeout=config["timeout"],
|
||||
)
|
||||
|
||||
with _env_lock:
|
||||
_active_environments[task_id] = new_env
|
||||
_last_activity[task_id] = __import__("time").time()
|
||||
|
||||
_start_cleanup_thread()
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(f"[FileTools] {env_type} environment ready for task {task_id[:8]}", flush=True)
|
||||
|
||||
# Now get the environment and build file_ops
|
||||
with _env_lock:
|
||||
_last_activity[task_id] = time.time()
|
||||
terminal_env = _active_environments[task_id]
|
||||
|
||||
_start_cleanup_thread()
|
||||
if not os.getenv("HERMES_QUIET"):
|
||||
print(f"[FileTools] {env_type} environment ready for task {task_id[:8]}", flush=True)
|
||||
|
||||
# Build file_ops from the (guaranteed live) environment and cache it
|
||||
file_ops = ShellFileOperations(terminal_env)
|
||||
with _file_ops_lock:
|
||||
_file_ops_cache[task_id] = file_ops
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue