The architecture has been updated

2026-03-31 23:31:36 +03:00 · 2026-03-31 23:31:36 +03:00 · a01257ead9
commit a01257ead9
parent 805f7a017e
1119 changed files with 226 additions and 352 deletions
--- a/hermes_code/tools/environments/init.py
+++ b/hermes_code/tools/environments/init.py
@ -0,0 +1,13 @@
+"""Hermes execution environment backends.
+
+Each backend provides the same interface (BaseEnvironment ABC) for running
+shell commands in a specific execution context: local, Docker, Singularity,
+SSH, Modal, or Daytona.
+
+The terminal_tool.py factory (_create_environment) selects the backend
+based on the TERMINAL_ENV configuration.
+"""
+
+from tools.environments.base import BaseEnvironment
+
+__all__ = ["BaseEnvironment"]
--- a/hermes_code/tools/environments/base.py
+++ b/hermes_code/tools/environments/base.py
@ -0,0 +1,99 @@
+"""Base class for all Hermes execution environment backends."""
+
+from abc import ABC, abstractmethod
+import os
+import subprocess
+from pathlib import Path
+
+from hermes_cli.config import get_hermes_home
+
+
+def get_sandbox_dir() -> Path:
+    """Return the host-side root for all sandbox storage (Docker workspaces,
+    Singularity overlays/SIF cache, etc.).
+
+    Configurable via TERMINAL_SANDBOX_DIR. Defaults to {HERMES_HOME}/sandboxes/.
+    """
+    custom = os.getenv("TERMINAL_SANDBOX_DIR")
+    if custom:
+        p = Path(custom)
+    else:
+        p = get_hermes_home() / "sandboxes"
+    p.mkdir(parents=True, exist_ok=True)
+    return p
+
+
+class BaseEnvironment(ABC):
+    """Common interface for all Hermes execution backends.
+
+    Subclasses implement execute() and cleanup(). Shared helpers eliminate
+    duplicated subprocess boilerplate across backends.
+    """
+
+    def __init__(self, cwd: str, timeout: int, env: dict = None):
+        self.cwd = cwd
+        self.timeout = timeout
+        self.env = env or {}
+
+    @abstractmethod
+    def execute(self, command: str, cwd: str = "", *,
+                timeout: int | None = None,
+                stdin_data: str | None = None) -> dict:
+        """Execute a command, return {"output": str, "returncode": int}."""
+        ...
+
+    @abstractmethod
+    def cleanup(self):
+        """Release backend resources (container, instance, connection)."""
+        ...
+
+    def stop(self):
+        """Alias for cleanup (compat with older callers)."""
+        self.cleanup()
+
+    def __del__(self):
+        try:
+            self.cleanup()
+        except Exception:
+            pass
+
+    # ------------------------------------------------------------------
+    # Shared helpers (eliminate duplication across backends)
+    # ------------------------------------------------------------------
+
+    def _prepare_command(self, command: str) -> tuple[str, str | None]:
+        """Transform sudo commands if SUDO_PASSWORD is available.
+
+        Returns:
+            (transformed_command, sudo_stdin) — see _transform_sudo_command
+            for the full contract.  Callers that drive a subprocess directly
+            should prepend sudo_stdin (when not None) to any stdin_data they
+            pass to Popen.  Callers that embed stdin via heredoc (modal,
+            daytona) handle sudo_stdin in their own execute() method.
+        """
+        from tools.terminal_tool import _transform_sudo_command
+        return _transform_sudo_command(command)
+
+    def _build_run_kwargs(self, timeout: int | None,
+                          stdin_data: str | None = None) -> dict:
+        """Build common subprocess.run kwargs for non-interactive execution."""
+        kw = {
+            "text": True,
+            "timeout": timeout or self.timeout,
+            "encoding": "utf-8",
+            "errors": "replace",
+            "stdout": subprocess.PIPE,
+            "stderr": subprocess.STDOUT,
+        }
+        if stdin_data is not None:
+            kw["input"] = stdin_data
+        else:
+            kw["stdin"] = subprocess.DEVNULL
+        return kw
+
+    def _timeout_result(self, timeout: int | None) -> dict:
+        """Standard return dict when a command times out."""
+        return {
+            "output": f"Command timed out after {timeout or self.timeout}s",
+            "returncode": 124,
+        }
--- a/hermes_code/tools/environments/daytona.py
+++ b/hermes_code/tools/environments/daytona.py
@ -0,0 +1,250 @@
+"""Daytona cloud execution environment.
+
+Uses the Daytona Python SDK to run commands in cloud sandboxes.
+Supports persistent sandboxes: when enabled, sandboxes are stopped on cleanup
+and resumed on next creation, preserving the filesystem across sessions.
+"""
+
+import logging
+import time
+import math
+import shlex
+import threading
+import uuid
+import warnings
+from typing import Optional
+
+from tools.environments.base import BaseEnvironment
+from tools.interrupt import is_interrupted
+
+logger = logging.getLogger(__name__)
+
+
+class DaytonaEnvironment(BaseEnvironment):
+    """Daytona cloud sandbox execution backend.
+
+    Uses stopped/started sandbox lifecycle for filesystem persistence
+    instead of snapshots, making it faster and stateless on the host.
+    """
+
+    def __init__(
+        self,
+        image: str,
+        cwd: str = "/home/daytona",
+        timeout: int = 60,
+        cpu: int = 1,
+        memory: int = 5120,       # MB (hermes convention)
+        disk: int = 10240,        # MB (Daytona platform max is 10GB)
+        persistent_filesystem: bool = True,
+        task_id: str = "default",
+    ):
+        self._requested_cwd = cwd
+        super().__init__(cwd=cwd, timeout=timeout)
+
+        from daytona import (
+            Daytona,
+            CreateSandboxFromImageParams,
+            DaytonaError,
+            Resources,
+            SandboxState,
+        )
+
+        self._persistent = persistent_filesystem
+        self._task_id = task_id
+        self._SandboxState = SandboxState
+        self._daytona = Daytona()
+        self._sandbox = None
+        self._lock = threading.Lock()
+
+        memory_gib = max(1, math.ceil(memory / 1024))
+        disk_gib = max(1, math.ceil(disk / 1024))
+        if disk_gib > 10:
+            warnings.warn(
+                f"Daytona: requested disk ({disk_gib}GB) exceeds platform limit (10GB). "
+                f"Capping to 10GB. Set container_disk: 10240 in config to silence this.",
+                stacklevel=2,
+            )
+            disk_gib = 10
+        resources = Resources(cpu=cpu, memory=memory_gib, disk=disk_gib)
+
+        labels = {"hermes_task_id": task_id}
+        sandbox_name = f"hermes-{task_id}"
+
+        # Try to resume an existing sandbox for this task
+        if self._persistent:
+            # 1. Try name-based lookup (new path)
+            try:
+                self._sandbox = self._daytona.get(sandbox_name)
+                self._sandbox.start()
+                logger.info("Daytona: resumed sandbox %s for task %s",
+                            self._sandbox.id, task_id)
+            except DaytonaError:
+                self._sandbox = None
+            except Exception as e:
+                logger.warning("Daytona: failed to resume sandbox for task %s: %s",
+                               task_id, e)
+                self._sandbox = None
+
+            # 2. Legacy fallback: find sandbox created before the naming migration
+            if self._sandbox is None:
+                try:
+                    page = self._daytona.list(labels=labels, page=1, limit=1)
+                    if page.items:
+                        self._sandbox = page.items[0]
+                        self._sandbox.start()
+                        logger.info("Daytona: resumed legacy sandbox %s for task %s",
+                                    self._sandbox.id, task_id)
+                except Exception as e:
+                    logger.debug("Daytona: no legacy sandbox found for task %s: %s",
+                                 task_id, e)
+                    self._sandbox = None
+
+        # Create a fresh sandbox if we don't have one
+        if self._sandbox is None:
+            self._sandbox = self._daytona.create(
+                CreateSandboxFromImageParams(
+                    image=image,
+                    name=sandbox_name,
+                    labels=labels,
+                    auto_stop_interval=0,
+                    resources=resources,
+                )
+            )
+            logger.info("Daytona: created sandbox %s for task %s",
+                        self._sandbox.id, task_id)
+
+        # Resolve cwd: detect actual home dir inside the sandbox
+        if self._requested_cwd in ("~", "/home/daytona"):
+            try:
+                home = self._sandbox.process.exec("echo $HOME").result.strip()
+                if home:
+                    self.cwd = home
+            except Exception:
+                pass  # leave cwd as-is; sandbox will use its own default
+            logger.info("Daytona: resolved cwd to %s", self.cwd)
+
+    def _ensure_sandbox_ready(self):
+        """Restart sandbox if it was stopped (e.g., by a previous interrupt)."""
+        self._sandbox.refresh_data()
+        if self._sandbox.state in (self._SandboxState.STOPPED, self._SandboxState.ARCHIVED):
+            self._sandbox.start()
+            logger.info("Daytona: restarted sandbox %s", self._sandbox.id)
+
+    def _exec_in_thread(self, exec_command: str, cwd: Optional[str], timeout: int) -> dict:
+        """Run exec in a background thread with interrupt polling.
+
+        The Daytona SDK's exec(timeout=...) parameter is unreliable (the
+        server-side timeout is not enforced and the SDK has no client-side
+        fallback), so we wrap the command with the shell ``timeout`` utility
+        which reliably kills the process and returns exit code 124.
+        """
+        # Wrap with shell `timeout` to enforce the deadline reliably.
+        # Add a small buffer so the shell timeout fires before any SDK-level
+        # timeout would, giving us a clean exit code 124.
+        timed_command = f"timeout {timeout} sh -c {shlex.quote(exec_command)}"
+
+        result_holder: dict = {"value": None, "error": None}
+
+        def _run():
+            try:
+                response = self._sandbox.process.exec(
+                    timed_command, cwd=cwd,
+                )
+                result_holder["value"] = {
+                    "output": response.result or "",
+                    "returncode": response.exit_code,
+                }
+            except Exception as e:
+                result_holder["error"] = e
+
+        t = threading.Thread(target=_run, daemon=True)
+        t.start()
+        # Wait for timeout + generous buffer for network/SDK overhead
+        deadline = time.monotonic() + timeout + 10
+        while t.is_alive():
+            t.join(timeout=0.2)
+            if is_interrupted():
+                with self._lock:
+                    try:
+                        self._sandbox.stop()
+                    except Exception:
+                        pass
+                return {
+                    "output": "[Command interrupted - Daytona sandbox stopped]",
+                    "returncode": 130,
+                }
+            if time.monotonic() > deadline:
+                # Shell timeout didn't fire and SDK is hung — force stop
+                with self._lock:
+                    try:
+                        self._sandbox.stop()
+                    except Exception:
+                        pass
+                return self._timeout_result(timeout)
+
+        if result_holder["error"]:
+            return {"error": result_holder["error"]}
+        return result_holder["value"]
+
+    def execute(self, command: str, cwd: str = "", *,
+                timeout: Optional[int] = None,
+                stdin_data: Optional[str] = None) -> dict:
+        with self._lock:
+            self._ensure_sandbox_ready()
+
+        if stdin_data is not None:
+            marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
+            while marker in stdin_data:
+                marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
+            command = f"{command} << '{marker}'\n{stdin_data}\n{marker}"
+
+        exec_command, sudo_stdin = self._prepare_command(command)
+
+        # Daytona sandboxes execute commands via the Daytona SDK and cannot
+        # pipe subprocess stdin directly the way a local Popen can.  When a
+        # sudo password is present, use a shell-level pipe from printf so that
+        # the password feeds sudo -S without appearing as an echo argument
+        # embedded in the shell string.  The password is still visible in the
+        # remote sandbox's command line, but it is not exposed on the user's
+        # local machine — which is the primary threat being mitigated.
+        if sudo_stdin is not None:
+            import shlex
+            exec_command = (
+                f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}"
+            )
+        effective_cwd = cwd or self.cwd or None
+        effective_timeout = timeout or self.timeout
+
+        result = self._exec_in_thread(exec_command, effective_cwd, effective_timeout)
+
+        if "error" in result:
+            from daytona import DaytonaError
+            err = result["error"]
+            if isinstance(err, DaytonaError):
+                with self._lock:
+                    try:
+                        self._ensure_sandbox_ready()
+                    except Exception:
+                        return {"output": f"Daytona execution error: {err}", "returncode": 1}
+                result = self._exec_in_thread(exec_command, effective_cwd, effective_timeout)
+                if "error" not in result:
+                    return result
+            return {"output": f"Daytona execution error: {err}", "returncode": 1}
+
+        return result
+
+    def cleanup(self):
+        with self._lock:
+            if self._sandbox is None:
+                return
+            try:
+                if self._persistent:
+                    self._sandbox.stop()
+                    logger.info("Daytona: stopped sandbox %s (filesystem preserved)",
+                                self._sandbox.id)
+                else:
+                    self._daytona.delete(self._sandbox)
+                    logger.info("Daytona: deleted sandbox %s", self._sandbox.id)
+            except Exception as e:
+                logger.warning("Daytona: cleanup failed: %s", e)
+            self._sandbox = None
--- a/hermes_code/tools/environments/docker.py
+++ b/hermes_code/tools/environments/docker.py
@ -0,0 +1,494 @@
+"""Docker execution environment for sandboxed command execution.
+
+Security hardened (cap-drop ALL, no-new-privileges, PID limits),
+configurable resource limits (CPU, memory, disk), and optional filesystem
+persistence via bind mounts.
+"""
+
+import logging
+import os
+import re
+import shutil
+import subprocess
+import sys
+import threading
+import time
+import uuid
+from typing import Optional
+
+from tools.environments.base import BaseEnvironment
+from tools.interrupt import is_interrupted
+
+logger = logging.getLogger(__name__)
+
+
+# Common Docker Desktop install paths checked when 'docker' is not in PATH.
+# macOS Intel: /usr/local/bin, macOS Apple Silicon (Homebrew): /opt/homebrew/bin,
+# Docker Desktop app bundle: /Applications/Docker.app/Contents/Resources/bin
+_DOCKER_SEARCH_PATHS = [
+    "/usr/local/bin/docker",
+    "/opt/homebrew/bin/docker",
+    "/Applications/Docker.app/Contents/Resources/bin/docker",
+]
+
+_docker_executable: Optional[str] = None  # resolved once, cached
+_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
+
+
+def _normalize_forward_env_names(forward_env: list[str] | None) -> list[str]:
+    """Return a deduplicated list of valid environment variable names."""
+    normalized: list[str] = []
+    seen: set[str] = set()
+
+    for item in forward_env or []:
+        if not isinstance(item, str):
+            logger.warning("Ignoring non-string docker_forward_env entry: %r", item)
+            continue
+
+        key = item.strip()
+        if not key:
+            continue
+        if not _ENV_VAR_NAME_RE.match(key):
+            logger.warning("Ignoring invalid docker_forward_env entry: %r", item)
+            continue
+        if key in seen:
+            continue
+
+        seen.add(key)
+        normalized.append(key)
+
+    return normalized
+
+
+def _load_hermes_env_vars() -> dict[str, str]:
+    """Load ~/.hermes/.env values without failing Docker command execution."""
+    try:
+        from hermes_cli.config import load_env
+
+        return load_env() or {}
+    except Exception:
+        return {}
+
+
+def find_docker() -> Optional[str]:
+    """Locate the docker CLI binary.
+
+    Checks ``shutil.which`` first (respects PATH), then probes well-known
+    install locations on macOS where Docker Desktop may not be in PATH
+    (e.g. when running as a gateway service via launchd).
+
+    Returns the absolute path, or ``None`` if docker cannot be found.
+    """
+    global _docker_executable
+    if _docker_executable is not None:
+        return _docker_executable
+
+    found = shutil.which("docker")
+    if found:
+        _docker_executable = found
+        return found
+
+    for path in _DOCKER_SEARCH_PATHS:
+        if os.path.isfile(path) and os.access(path, os.X_OK):
+            _docker_executable = path
+            logger.info("Found docker at non-PATH location: %s", path)
+            return path
+
+    return None
+
+
+# Security flags applied to every container.
+# The container itself is the security boundary (isolated from host).
+# We drop all capabilities then add back the minimum needed:
+#   DAC_OVERRIDE - root can write to bind-mounted dirs owned by host user
+#   CHOWN/FOWNER - package managers (pip, npm, apt) need to set file ownership
+# Block privilege escalation and limit PIDs.
+# /tmp is size-limited and nosuid but allows exec (needed by pip/npm builds).
+_SECURITY_ARGS = [
+    "--cap-drop", "ALL",
+    "--cap-add", "DAC_OVERRIDE",
+    "--cap-add", "CHOWN",
+    "--cap-add", "FOWNER",
+    "--security-opt", "no-new-privileges",
+    "--pids-limit", "256",
+    "--tmpfs", "/tmp:rw,nosuid,size=512m",
+    "--tmpfs", "/var/tmp:rw,noexec,nosuid,size=256m",
+    "--tmpfs", "/run:rw,noexec,nosuid,size=64m",
+]
+
+
+_storage_opt_ok: Optional[bool] = None  # cached result across instances
+
+
+def _ensure_docker_available() -> None:
+    """Best-effort check that the docker CLI is available before use.
+
+    Reuses ``find_docker()`` so this preflight stays consistent with the rest of
+    the Docker backend, including known non-PATH Docker Desktop locations.
+    """
+    docker_exe = find_docker()
+    if not docker_exe:
+        logger.error(
+            "Docker backend selected but no docker executable was found in PATH "
+            "or known install locations. Install Docker Desktop and ensure the "
+            "CLI is available."
+        )
+        raise RuntimeError(
+            "Docker executable not found in PATH or known install locations. "
+            "Install Docker and ensure the 'docker' command is available."
+        )
+
+    try:
+        result = subprocess.run(
+            [docker_exe, "version"],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+    except FileNotFoundError:
+        logger.error(
+            "Docker backend selected but the resolved docker executable '%s' could "
+            "not be executed.",
+            docker_exe,
+            exc_info=True,
+        )
+        raise RuntimeError(
+            "Docker executable could not be executed. Check your Docker installation."
+        )
+    except subprocess.TimeoutExpired:
+        logger.error(
+            "Docker backend selected but '%s version' timed out. "
+            "The Docker daemon may not be running.",
+            docker_exe,
+            exc_info=True,
+        )
+        raise RuntimeError(
+            "Docker daemon is not responding. Ensure Docker is running and try again."
+        )
+    except Exception:
+        logger.error(
+            "Unexpected error while checking Docker availability.",
+            exc_info=True,
+        )
+        raise
+    else:
+        if result.returncode != 0:
+            logger.error(
+                "Docker backend selected but '%s version' failed "
+                "(exit code %d, stderr=%s)",
+                docker_exe,
+                result.returncode,
+                result.stderr.strip(),
+            )
+            raise RuntimeError(
+                "Docker command is available but 'docker version' failed. "
+                "Check your Docker installation."
+            )
+
+
+class DockerEnvironment(BaseEnvironment):
+    """Hardened Docker container execution with resource limits and persistence.
+
+    Security: all capabilities dropped, no privilege escalation, PID limits,
+    size-limited tmpfs for scratch dirs. The container itself is the security
+    boundary — the filesystem inside is writable so agents can install packages
+    (pip, npm, apt) as needed. Writable workspace via tmpfs or bind mounts.
+
+    Persistence: when enabled, bind mounts preserve /workspace and /root
+    across container restarts.
+    """
+
+    def __init__(
+        self,
+        image: str,
+        cwd: str = "/root",
+        timeout: int = 60,
+        cpu: float = 0,
+        memory: int = 0,
+        disk: int = 0,
+        persistent_filesystem: bool = False,
+        task_id: str = "default",
+        volumes: list = None,
+        forward_env: list[str] | None = None,
+        network: bool = True,
+        host_cwd: str = None,
+        auto_mount_cwd: bool = False,
+    ):
+        if cwd == "~":
+            cwd = "/root"
+        super().__init__(cwd=cwd, timeout=timeout)
+        self._base_image = image
+        self._persistent = persistent_filesystem
+        self._task_id = task_id
+        self._forward_env = _normalize_forward_env_names(forward_env)
+        self._container_id: Optional[str] = None
+        logger.info(f"DockerEnvironment volumes: {volumes}")
+        # Ensure volumes is a list (config.yaml could be malformed)
+        if volumes is not None and not isinstance(volumes, list):
+            logger.warning(f"docker_volumes config is not a list: {volumes!r}")
+            volumes = []
+
+        # Fail fast if Docker is not available.
+        _ensure_docker_available()
+
+        # Build resource limit args
+        resource_args = []
+        if cpu > 0:
+            resource_args.extend(["--cpus", str(cpu)])
+        if memory > 0:
+            resource_args.extend(["--memory", f"{memory}m"])
+        if disk > 0 and sys.platform != "darwin":
+            if self._storage_opt_supported():
+                resource_args.extend(["--storage-opt", f"size={disk}m"])
+            else:
+                logger.warning(
+                    "Docker storage driver does not support per-container disk limits "
+                    "(requires overlay2 on XFS with pquota). Container will run without disk quota."
+                )
+        if not network:
+            resource_args.append("--network=none")
+
+        # Persistent workspace via bind mounts from a configurable host directory
+        # (TERMINAL_SANDBOX_DIR, default ~/.hermes/sandboxes/). Non-persistent
+        # mode uses tmpfs (ephemeral, fast, gone on cleanup).
+        from tools.environments.base import get_sandbox_dir
+
+        # User-configured volume mounts (from config.yaml docker_volumes)
+        volume_args = []
+        workspace_explicitly_mounted = False
+        for vol in (volumes or []):
+            if not isinstance(vol, str):
+                logger.warning(f"Docker volume entry is not a string: {vol!r}")
+                continue
+            vol = vol.strip()
+            if not vol:
+                continue
+            if ":" in vol:
+                volume_args.extend(["-v", vol])
+                if ":/workspace" in vol:
+                    workspace_explicitly_mounted = True
+            else:
+                logger.warning(f"Docker volume '{vol}' missing colon, skipping")
+
+        host_cwd_abs = os.path.abspath(os.path.expanduser(host_cwd)) if host_cwd else ""
+        bind_host_cwd = (
+            auto_mount_cwd
+            and bool(host_cwd_abs)
+            and os.path.isdir(host_cwd_abs)
+            and not workspace_explicitly_mounted
+        )
+        if auto_mount_cwd and host_cwd and not os.path.isdir(host_cwd_abs):
+            logger.debug(f"Skipping docker cwd mount: host_cwd is not a valid directory: {host_cwd}")
+
+        self._workspace_dir: Optional[str] = None
+        self._home_dir: Optional[str] = None
+        writable_args = []
+        if self._persistent:
+            sandbox = get_sandbox_dir() / "docker" / task_id
+            self._home_dir = str(sandbox / "home")
+            os.makedirs(self._home_dir, exist_ok=True)
+            writable_args.extend([
+                "-v", f"{self._home_dir}:/root",
+            ])
+            if not bind_host_cwd and not workspace_explicitly_mounted:
+                self._workspace_dir = str(sandbox / "workspace")
+                os.makedirs(self._workspace_dir, exist_ok=True)
+                writable_args.extend([
+                    "-v", f"{self._workspace_dir}:/workspace",
+                ])
+        else:
+            if not bind_host_cwd and not workspace_explicitly_mounted:
+                writable_args.extend([
+                    "--tmpfs", "/workspace:rw,exec,size=10g",
+                ])
+            writable_args.extend([
+                "--tmpfs", "/home:rw,exec,size=1g",
+                "--tmpfs", "/root:rw,exec,size=1g",
+            ])
+
+        if bind_host_cwd:
+            logger.info(f"Mounting configured host cwd to /workspace: {host_cwd_abs}")
+            volume_args = ["-v", f"{host_cwd_abs}:/workspace", *volume_args]
+        elif workspace_explicitly_mounted:
+            logger.debug("Skipping docker cwd mount: /workspace already mounted by user config")
+
+        logger.info(f"Docker volume_args: {volume_args}")
+        all_run_args = list(_SECURITY_ARGS) + writable_args + resource_args + volume_args
+        logger.info(f"Docker run_args: {all_run_args}")
+
+        # Resolve the docker executable once so it works even when
+        # /usr/local/bin is not in PATH (common on macOS gateway/service).
+        self._docker_exe = find_docker() or "docker"
+
+        # Start the container directly via `docker run -d`.
+        container_name = f"hermes-{uuid.uuid4().hex[:8]}"
+        run_cmd = [
+            self._docker_exe, "run", "-d",
+            "--name", container_name,
+            "-w", cwd,
+            *all_run_args,
+            image,
+            "sleep", "2h",
+        ]
+        logger.debug(f"Starting container: {' '.join(run_cmd)}")
+        result = subprocess.run(
+            run_cmd,
+            capture_output=True,
+            text=True,
+            timeout=120,  # image pull may take a while
+            check=True,
+        )
+        self._container_id = result.stdout.strip()
+        logger.info(f"Started container {container_name} ({self._container_id[:12]})")
+
+    @staticmethod
+    def _storage_opt_supported() -> bool:
+        """Check if Docker's storage driver supports --storage-opt size=.
+        
+        Only overlay2 on XFS with pquota supports per-container disk quotas.
+        Ubuntu (and most distros) default to ext4, where this flag errors out.
+        """
+        global _storage_opt_ok
+        if _storage_opt_ok is not None:
+            return _storage_opt_ok
+        try:
+            docker = find_docker() or "docker"
+            result = subprocess.run(
+                [docker, "info", "--format", "{{.Driver}}"],
+                capture_output=True, text=True, timeout=10,
+            )
+            driver = result.stdout.strip().lower()
+            if driver != "overlay2":
+                _storage_opt_ok = False
+                return False
+            # overlay2 only supports storage-opt on XFS with pquota.
+            # Probe by attempting a dry-ish run — the fastest reliable check.
+            probe = subprocess.run(
+                [docker, "create", "--storage-opt", "size=1m", "hello-world"],
+                capture_output=True, text=True, timeout=15,
+            )
+            if probe.returncode == 0:
+                # Clean up the created container
+                container_id = probe.stdout.strip()
+                if container_id:
+                    subprocess.run([docker, "rm", container_id],
+                                   capture_output=True, timeout=5)
+                _storage_opt_ok = True
+            else:
+                _storage_opt_ok = False
+        except Exception:
+            _storage_opt_ok = False
+        logger.debug("Docker --storage-opt support: %s", _storage_opt_ok)
+        return _storage_opt_ok
+
+    def execute(self, command: str, cwd: str = "", *,
+                timeout: int | None = None,
+                stdin_data: str | None = None) -> dict:
+        exec_command, sudo_stdin = self._prepare_command(command)
+        work_dir = cwd or self.cwd
+        effective_timeout = timeout or self.timeout
+
+        # Merge sudo password (if any) with caller-supplied stdin_data.
+        if sudo_stdin is not None and stdin_data is not None:
+            effective_stdin = sudo_stdin + stdin_data
+        elif sudo_stdin is not None:
+            effective_stdin = sudo_stdin
+        else:
+            effective_stdin = stdin_data
+
+        # docker exec -w doesn't expand ~, so prepend a cd into the command
+        if work_dir == "~" or work_dir.startswith("~/"):
+            exec_command = f"cd {work_dir} && {exec_command}"
+            work_dir = "/"
+
+        assert self._container_id, "Container not started"
+        cmd = [self._docker_exe, "exec"]
+        if effective_stdin is not None:
+            cmd.append("-i")
+        cmd.extend(["-w", work_dir])
+        hermes_env = _load_hermes_env_vars() if self._forward_env else {}
+        for key in self._forward_env:
+            value = os.getenv(key)
+            if value is None:
+                value = hermes_env.get(key)
+            if value is not None:
+                cmd.extend(["-e", f"{key}={value}"])
+        cmd.extend([self._container_id, "bash", "-lc", exec_command])
+
+        try:
+            _output_chunks = []
+            proc = subprocess.Popen(
+                cmd,
+                stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                stdin=subprocess.PIPE if effective_stdin else subprocess.DEVNULL,
+                text=True,
+            )
+            if effective_stdin:
+                try:
+                    proc.stdin.write(effective_stdin)
+                    proc.stdin.close()
+                except Exception:
+                    pass
+
+            def _drain():
+                try:
+                    for line in proc.stdout:
+                        _output_chunks.append(line)
+                except Exception:
+                    pass
+
+            reader = threading.Thread(target=_drain, daemon=True)
+            reader.start()
+            deadline = time.monotonic() + effective_timeout
+
+            while proc.poll() is None:
+                if is_interrupted():
+                    proc.terminate()
+                    try:
+                        proc.wait(timeout=1)
+                    except subprocess.TimeoutExpired:
+                        proc.kill()
+                    reader.join(timeout=2)
+                    return {
+                        "output": "".join(_output_chunks) + "\n[Command interrupted]",
+                        "returncode": 130,
+                    }
+                if time.monotonic() > deadline:
+                    proc.kill()
+                    reader.join(timeout=2)
+                    return self._timeout_result(effective_timeout)
+                time.sleep(0.2)
+
+            reader.join(timeout=5)
+            return {"output": "".join(_output_chunks), "returncode": proc.returncode}
+        except Exception as e:
+            return {"output": f"Docker execution error: {e}", "returncode": 1}
+
+    def cleanup(self):
+        """Stop and remove the container. Bind-mount dirs persist if persistent=True."""
+        if self._container_id:
+            try:
+                # Stop in background so cleanup doesn't block
+                stop_cmd = (
+                    f"(timeout 60 {self._docker_exe} stop {self._container_id} || "
+                    f"{self._docker_exe} rm -f {self._container_id}) >/dev/null 2>&1 &"
+                )
+                subprocess.Popen(stop_cmd, shell=True)
+            except Exception as e:
+                logger.warning("Failed to stop container %s: %s", self._container_id, e)
+
+            if not self._persistent:
+                # Also schedule removal (stop only leaves it as stopped)
+                try:
+                    subprocess.Popen(
+                        f"sleep 3 && {self._docker_exe} rm -f {self._container_id} >/dev/null 2>&1 &",
+                        shell=True,
+                    )
+                except Exception:
+                    pass
+            self._container_id = None
+
+        if not self._persistent:
+            for d in (self._workspace_dir, self._home_dir):
+                if d:
+                    shutil.rmtree(d, ignore_errors=True)
--- a/hermes_code/tools/environments/local.py
+++ b/hermes_code/tools/environments/local.py
@ -0,0 +1,476 @@
+"""Local execution environment with interrupt support and non-blocking I/O."""
+
+import glob
+import os
+import platform
+import shutil
+import signal
+import subprocess
+import threading
+import time
+
+_IS_WINDOWS = platform.system() == "Windows"
+
+from tools.environments.base import BaseEnvironment
+from tools.environments.persistent_shell import PersistentShellMixin
+from tools.interrupt import is_interrupted
+
+# Unique marker to isolate real command output from shell init/exit noise.
+# printf (no trailing newline) keeps the boundaries clean for splitting.
+_OUTPUT_FENCE = "__HERMES_FENCE_a9f7b3__"
+
+# Hermes-internal env vars that should NOT leak into terminal subprocesses.
+# These are loaded from ~/.hermes/.env for Hermes' own LLM/provider calls
+# but can break external CLIs (e.g. codex) that also honor them.
+# See: https://github.com/NousResearch/hermes-agent/issues/1002
+#
+# Built dynamically from the provider registry so new providers are
+# automatically covered without manual blocklist maintenance.
+_HERMES_PROVIDER_ENV_FORCE_PREFIX = "_HERMES_FORCE_"
+
+
+def _build_provider_env_blocklist() -> frozenset:
+    """Derive the blocklist from provider, tool, and gateway config.
+
+    Automatically picks up api_key_env_vars and base_url_env_var from
+    every registered provider, plus tool/messaging env vars from the
+    optional config registry, so new Hermes-managed secrets are blocked
+    in subprocesses without having to maintain multiple static lists.
+    """
+    blocked: set[str] = set()
+
+    try:
+        from hermes_cli.auth import PROVIDER_REGISTRY
+        for pconfig in PROVIDER_REGISTRY.values():
+            blocked.update(pconfig.api_key_env_vars)
+            if pconfig.base_url_env_var:
+                blocked.add(pconfig.base_url_env_var)
+    except ImportError:
+        pass
+
+    try:
+        from hermes_cli.config import OPTIONAL_ENV_VARS
+        for name, metadata in OPTIONAL_ENV_VARS.items():
+            category = metadata.get("category")
+            if category in {"tool", "messaging"}:
+                blocked.add(name)
+            elif category == "setting" and metadata.get("password"):
+                blocked.add(name)
+    except ImportError:
+        pass
+
+    # Vars not covered above but still Hermes-internal / conflict-prone.
+    blocked.update({
+        "OPENAI_BASE_URL",
+        "OPENAI_API_KEY",
+        "OPENAI_API_BASE",         # legacy alias
+        "OPENAI_ORG_ID",
+        "OPENAI_ORGANIZATION",
+        "OPENROUTER_API_KEY",
+        "ANTHROPIC_BASE_URL",
+        "ANTHROPIC_TOKEN",         # OAuth token (not in registry as env var)
+        "CLAUDE_CODE_OAUTH_TOKEN",
+        "LLM_MODEL",
+        # Expanded isolation for other major providers (Issue #1002)
+        "GOOGLE_API_KEY",          # Gemini / Google AI Studio
+        "DEEPSEEK_API_KEY",        # DeepSeek
+        "MISTRAL_API_KEY",         # Mistral AI
+        "GROQ_API_KEY",            # Groq
+        "TOGETHER_API_KEY",        # Together AI
+        "PERPLEXITY_API_KEY",      # Perplexity
+        "COHERE_API_KEY",          # Cohere
+        "FIREWORKS_API_KEY",       # Fireworks AI
+        "XAI_API_KEY",             # xAI (Grok)
+        "HELICONE_API_KEY",        # LLM Observability proxy
+        "PARALLEL_API_KEY",
+        "FIRECRAWL_API_KEY",
+        "FIRECRAWL_API_URL",
+        # Gateway/runtime config not represented in OPTIONAL_ENV_VARS.
+        "TELEGRAM_HOME_CHANNEL",
+        "TELEGRAM_HOME_CHANNEL_NAME",
+        "DISCORD_HOME_CHANNEL",
+        "DISCORD_HOME_CHANNEL_NAME",
+        "DISCORD_REQUIRE_MENTION",
+        "DISCORD_FREE_RESPONSE_CHANNELS",
+        "DISCORD_AUTO_THREAD",
+        "SLACK_HOME_CHANNEL",
+        "SLACK_HOME_CHANNEL_NAME",
+        "SLACK_ALLOWED_USERS",
+        "WHATSAPP_ENABLED",
+        "WHATSAPP_MODE",
+        "WHATSAPP_ALLOWED_USERS",
+        "SIGNAL_HTTP_URL",
+        "SIGNAL_ACCOUNT",
+        "SIGNAL_ALLOWED_USERS",
+        "SIGNAL_GROUP_ALLOWED_USERS",
+        "SIGNAL_HOME_CHANNEL",
+        "SIGNAL_HOME_CHANNEL_NAME",
+        "SIGNAL_IGNORE_STORIES",
+        "HASS_TOKEN",
+        "HASS_URL",
+        "EMAIL_ADDRESS",
+        "EMAIL_PASSWORD",
+        "EMAIL_IMAP_HOST",
+        "EMAIL_SMTP_HOST",
+        "EMAIL_HOME_ADDRESS",
+        "EMAIL_HOME_ADDRESS_NAME",
+        "GATEWAY_ALLOWED_USERS",
+        # Skills Hub / GitHub app auth paths and aliases.
+        "GH_TOKEN",
+        "GITHUB_APP_ID",
+        "GITHUB_APP_PRIVATE_KEY_PATH",
+        "GITHUB_APP_INSTALLATION_ID",
+        # Remote sandbox backend credentials.
+        "MODAL_TOKEN_ID",
+        "MODAL_TOKEN_SECRET",
+        "DAYTONA_API_KEY",
+    })
+    return frozenset(blocked)
+
+
+_HERMES_PROVIDER_ENV_BLOCKLIST = _build_provider_env_blocklist()
+
+
+def _sanitize_subprocess_env(base_env: dict | None, extra_env: dict | None = None) -> dict:
+    """Filter Hermes-managed secrets from a subprocess environment.
+
+    `_HERMES_FORCE_<VAR>` entries in ``extra_env`` opt a blocked variable back in
+    intentionally for callers that truly need it.  Vars registered via
+    :mod:`tools.env_passthrough` (skill-declared or user-configured) also
+    bypass the blocklist.
+    """
+    try:
+        from tools.env_passthrough import is_env_passthrough as _is_passthrough
+    except Exception:
+        _is_passthrough = lambda _: False  # noqa: E731
+
+    sanitized: dict[str, str] = {}
+
+    for key, value in (base_env or {}).items():
+        if key.startswith(_HERMES_PROVIDER_ENV_FORCE_PREFIX):
+            continue
+        if key not in _HERMES_PROVIDER_ENV_BLOCKLIST or _is_passthrough(key):
+            sanitized[key] = value
+
+    for key, value in (extra_env or {}).items():
+        if key.startswith(_HERMES_PROVIDER_ENV_FORCE_PREFIX):
+            real_key = key[len(_HERMES_PROVIDER_ENV_FORCE_PREFIX):]
+            sanitized[real_key] = value
+        elif key not in _HERMES_PROVIDER_ENV_BLOCKLIST or _is_passthrough(key):
+            sanitized[key] = value
+
+    return sanitized
+
+
+def _find_bash() -> str:
+    """Find bash for command execution.
+
+    The fence wrapper uses bash syntax (semicolons, $?, printf), so we
+    must use bash — not the user's $SHELL which could be fish/zsh/etc.
+    On Windows: uses Git Bash (bundled with Git for Windows).
+    """
+    if not _IS_WINDOWS:
+        return (
+            shutil.which("bash")
+            or ("/usr/bin/bash" if os.path.isfile("/usr/bin/bash") else None)
+            or ("/bin/bash" if os.path.isfile("/bin/bash") else None)
+            or os.environ.get("SHELL")  # last resort: whatever they have
+            or "/bin/sh"
+        )
+
+    # Windows: look for Git Bash (installed with Git for Windows).
+    # Allow override via env var (same pattern as Claude Code).
+    custom = os.environ.get("HERMES_GIT_BASH_PATH")
+    if custom and os.path.isfile(custom):
+        return custom
+
+    # shutil.which finds bash.exe if Git\bin is on PATH
+    found = shutil.which("bash")
+    if found:
+        return found
+
+    # Check common Git for Windows install locations
+    for candidate in (
+        os.path.join(os.environ.get("ProgramFiles", r"C:\Program Files"), "Git", "bin", "bash.exe"),
+        os.path.join(os.environ.get("ProgramFiles(x86)", r"C:\Program Files (x86)"), "Git", "bin", "bash.exe"),
+        os.path.join(os.environ.get("LOCALAPPDATA", ""), "Programs", "Git", "bin", "bash.exe"),
+    ):
+        if candidate and os.path.isfile(candidate):
+            return candidate
+
+    raise RuntimeError(
+        "Git Bash not found. Hermes Agent requires Git for Windows on Windows.\n"
+        "Install it from: https://git-scm.com/download/win\n"
+        "Or set HERMES_GIT_BASH_PATH to your bash.exe location."
+    )
+
+
+# Backward compat — process_registry.py imports this name
+_find_shell = _find_bash
+
+
+# Noise lines emitted by interactive shells when stdin is not a terminal.
+# Used as a fallback when output fence markers are missing.
+_SHELL_NOISE_SUBSTRINGS = (
+    # bash
+    "bash: cannot set terminal process group",
+    "bash: no job control in this shell",
+    "no job control in this shell",
+    "cannot set terminal process group",
+    "tcsetattr: Inappropriate ioctl for device",
+    # zsh / oh-my-zsh / macOS terminal session
+    "Restored session:",
+    "Saving session...",
+    "Last login:",
+    "command not found:",
+    "Oh My Zsh",
+    "compinit:",
+)
+
+
+def _clean_shell_noise(output: str) -> str:
+    """Strip shell startup/exit warnings that leak when using -i without a TTY.
+
+    Removes lines matching known noise patterns from both the beginning
+    and end of the output.  Lines in the middle are left untouched.
+    """
+
+    def _is_noise(line: str) -> bool:
+        return any(noise in line for noise in _SHELL_NOISE_SUBSTRINGS)
+
+    lines = output.split("\n")
+
+    # Strip leading noise
+    while lines and _is_noise(lines[0]):
+        lines.pop(0)
+
+    # Strip trailing noise (walk backwards, skip empty lines from split)
+    end = len(lines) - 1
+    while end >= 0 and (not lines[end] or _is_noise(lines[end])):
+        end -= 1
+
+    if end < 0:
+        return ""
+
+    cleaned = lines[: end + 1]
+    result = "\n".join(cleaned)
+
+    # Preserve trailing newline if original had one
+    if output.endswith("\n") and result and not result.endswith("\n"):
+        result += "\n"
+    return result
+
+
+# Standard PATH entries for environments with minimal PATH (e.g. systemd services).
+# Includes macOS Homebrew paths (/opt/homebrew/* for Apple Silicon).
+_SANE_PATH = (
+    "/opt/homebrew/bin:/opt/homebrew/sbin:"
+    "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+)
+
+
+def _make_run_env(env: dict) -> dict:
+    """Build a run environment with a sane PATH and provider-var stripping."""
+    try:
+        from tools.env_passthrough import is_env_passthrough as _is_passthrough
+    except Exception:
+        _is_passthrough = lambda _: False  # noqa: E731
+
+    merged = dict(os.environ | env)
+    run_env = {}
+    for k, v in merged.items():
+        if k.startswith(_HERMES_PROVIDER_ENV_FORCE_PREFIX):
+            real_key = k[len(_HERMES_PROVIDER_ENV_FORCE_PREFIX):]
+            run_env[real_key] = v
+        elif k not in _HERMES_PROVIDER_ENV_BLOCKLIST or _is_passthrough(k):
+            run_env[k] = v
+    existing_path = run_env.get("PATH", "")
+    if "/usr/bin" not in existing_path.split(":"):
+        run_env["PATH"] = f"{existing_path}:{_SANE_PATH}" if existing_path else _SANE_PATH
+    return run_env
+
+
+def _extract_fenced_output(raw: str) -> str:
+    """Extract real command output from between fence markers.
+
+    The execute() method wraps each command with printf(FENCE) markers.
+    This function finds the first and last fence and returns only the
+    content between them, which is the actual command output free of
+    any shell init/exit noise.
+
+    Falls back to pattern-based _clean_shell_noise if fences are missing.
+    """
+    first = raw.find(_OUTPUT_FENCE)
+    if first == -1:
+        return _clean_shell_noise(raw)
+
+    start = first + len(_OUTPUT_FENCE)
+    last = raw.rfind(_OUTPUT_FENCE)
+
+    if last <= first:
+        # Only start fence found (e.g. user command called `exit`)
+        return _clean_shell_noise(raw[start:])
+
+    return raw[start:last]
+
+
+class LocalEnvironment(PersistentShellMixin, BaseEnvironment):
+    """Run commands directly on the host machine.
+
+    Features:
+    - Popen + polling for interrupt support (user can cancel mid-command)
+    - Background stdout drain thread to prevent pipe buffer deadlocks
+    - stdin_data support for piping content (bypasses ARG_MAX limits)
+    - sudo -S transform via SUDO_PASSWORD env var
+    - Uses interactive login shell so full user env is available
+    - Optional persistent shell mode (cwd/env vars survive across calls)
+    """
+
+    def __init__(self, cwd: str = "", timeout: int = 60, env: dict = None,
+                 persistent: bool = False):
+        super().__init__(cwd=cwd or os.getcwd(), timeout=timeout, env=env)
+        self.persistent = persistent
+        if self.persistent:
+            self._init_persistent_shell()
+
+    @property
+    def _temp_prefix(self) -> str:
+        return f"/tmp/hermes-local-{self._session_id}"
+
+    def _spawn_shell_process(self) -> subprocess.Popen:
+        user_shell = _find_bash()
+        run_env = _make_run_env(self.env)
+        return subprocess.Popen(
+            [user_shell, "-l"],
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.DEVNULL,
+            text=True,
+            env=run_env,
+            preexec_fn=None if _IS_WINDOWS else os.setsid,
+        )
+
+    def _read_temp_files(self, *paths: str) -> list[str]:
+        results = []
+        for path in paths:
+            if os.path.exists(path):
+                with open(path) as f:
+                    results.append(f.read())
+            else:
+                results.append("")
+        return results
+
+    def _kill_shell_children(self):
+        if self._shell_pid is None:
+            return
+        try:
+            subprocess.run(
+                ["pkill", "-P", str(self._shell_pid)],
+                capture_output=True, timeout=5,
+            )
+        except (subprocess.TimeoutExpired, FileNotFoundError):
+            pass
+
+    def _cleanup_temp_files(self):
+        for f in glob.glob(f"{self._temp_prefix}-*"):
+            if os.path.exists(f):
+                os.remove(f)
+
+    def _execute_oneshot(self, command: str, cwd: str = "", *,
+                         timeout: int | None = None,
+                         stdin_data: str | None = None) -> dict:
+        work_dir = cwd or self.cwd or os.getcwd()
+        effective_timeout = timeout or self.timeout
+        exec_command, sudo_stdin = self._prepare_command(command)
+
+        if sudo_stdin is not None and stdin_data is not None:
+            effective_stdin = sudo_stdin + stdin_data
+        elif sudo_stdin is not None:
+            effective_stdin = sudo_stdin
+        else:
+            effective_stdin = stdin_data
+
+        user_shell = _find_bash()
+        fenced_cmd = (
+            f"printf '{_OUTPUT_FENCE}';"
+            f" {exec_command};"
+            f" __hermes_rc=$?;"
+            f" printf '{_OUTPUT_FENCE}';"
+            f" exit $__hermes_rc"
+        )
+        run_env = _make_run_env(self.env)
+
+        proc = subprocess.Popen(
+            [user_shell, "-lic", fenced_cmd],
+            text=True,
+            cwd=work_dir,
+            env=run_env,
+            encoding="utf-8",
+            errors="replace",
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            stdin=subprocess.PIPE if effective_stdin is not None else subprocess.DEVNULL,
+            preexec_fn=None if _IS_WINDOWS else os.setsid,
+        )
+
+        if effective_stdin is not None:
+            def _write_stdin():
+                try:
+                    proc.stdin.write(effective_stdin)
+                    proc.stdin.close()
+                except (BrokenPipeError, OSError):
+                    pass
+            threading.Thread(target=_write_stdin, daemon=True).start()
+
+        _output_chunks: list[str] = []
+
+        def _drain_stdout():
+            try:
+                for line in proc.stdout:
+                    _output_chunks.append(line)
+            except ValueError:
+                pass
+            finally:
+                try:
+                    proc.stdout.close()
+                except Exception:
+                    pass
+
+        reader = threading.Thread(target=_drain_stdout, daemon=True)
+        reader.start()
+        deadline = time.monotonic() + effective_timeout
+
+        while proc.poll() is None:
+            if is_interrupted():
+                try:
+                    if _IS_WINDOWS:
+                        proc.terminate()
+                    else:
+                        pgid = os.getpgid(proc.pid)
+                        os.killpg(pgid, signal.SIGTERM)
+                        try:
+                            proc.wait(timeout=1.0)
+                        except subprocess.TimeoutExpired:
+                            os.killpg(pgid, signal.SIGKILL)
+                except (ProcessLookupError, PermissionError):
+                    proc.kill()
+                reader.join(timeout=2)
+                return {
+                    "output": "".join(_output_chunks) + "\n[Command interrupted — user sent a new message]",
+                    "returncode": 130,
+                }
+            if time.monotonic() > deadline:
+                try:
+                    if _IS_WINDOWS:
+                        proc.terminate()
+                    else:
+                        os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
+                except (ProcessLookupError, PermissionError):
+                    proc.kill()
+                reader.join(timeout=2)
+                return self._timeout_result(effective_timeout)
+            time.sleep(0.2)
+
+        reader.join(timeout=5)
+        output = _extract_fenced_output("".join(_output_chunks))
+        return {"output": output, "returncode": proc.returncode}
--- a/hermes_code/tools/environments/modal.py
+++ b/hermes_code/tools/environments/modal.py
@ -0,0 +1,259 @@
+"""Modal cloud execution environment using SWE-ReX directly.
+
+Supports persistent filesystem snapshots: when enabled, the sandbox's filesystem
+is snapshotted on cleanup and restored on next creation, so installed packages,
+project files, and config changes survive across sessions.
+"""
+
+import asyncio
+import json
+import logging
+import threading
+import uuid
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+from hermes_cli.config import get_hermes_home
+from tools.environments.base import BaseEnvironment
+from tools.interrupt import is_interrupted
+
+logger = logging.getLogger(__name__)
+
+_SNAPSHOT_STORE = get_hermes_home() / "modal_snapshots.json"
+
+
+def _load_snapshots() -> Dict[str, str]:
+    """Load snapshot ID mapping from disk."""
+    if _SNAPSHOT_STORE.exists():
+        try:
+            return json.loads(_SNAPSHOT_STORE.read_text())
+        except Exception:
+            pass
+    return {}
+
+
+def _save_snapshots(data: Dict[str, str]) -> None:
+    """Persist snapshot ID mapping to disk."""
+    _SNAPSHOT_STORE.parent.mkdir(parents=True, exist_ok=True)
+    _SNAPSHOT_STORE.write_text(json.dumps(data, indent=2))
+
+
+class _AsyncWorker:
+    """Background thread with its own event loop for async-safe swe-rex calls.
+
+    Allows sync code to submit async coroutines and block for results,
+    even when called from inside another running event loop (e.g. Atropos).
+    """
+
+    def __init__(self):
+        self._loop: Optional[asyncio.AbstractEventLoop] = None
+        self._thread: Optional[threading.Thread] = None
+        self._started = threading.Event()
+
+    def start(self):
+        self._thread = threading.Thread(target=self._run_loop, daemon=True)
+        self._thread.start()
+        self._started.wait(timeout=30)
+
+    def _run_loop(self):
+        self._loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(self._loop)
+        self._started.set()
+        self._loop.run_forever()
+
+    def run_coroutine(self, coro, timeout=600):
+        if self._loop is None or self._loop.is_closed():
+            raise RuntimeError("AsyncWorker loop is not running")
+        future = asyncio.run_coroutine_threadsafe(coro, self._loop)
+        return future.result(timeout=timeout)
+
+    def stop(self):
+        if self._loop and self._loop.is_running():
+            self._loop.call_soon_threadsafe(self._loop.stop)
+        if self._thread:
+            self._thread.join(timeout=10)
+
+
+class ModalEnvironment(BaseEnvironment):
+    """Modal cloud execution via SWE-ReX.
+
+    Uses swe-rex's ModalDeployment directly for sandbox management.
+    Adds sudo -S support, configurable resources (CPU, memory, disk),
+    and optional filesystem persistence via Modal's snapshot API.
+    """
+
+    def __init__(
+        self,
+        image: str,
+        cwd: str = "/root",
+        timeout: int = 60,
+        modal_sandbox_kwargs: Optional[Dict[str, Any]] = None,
+        persistent_filesystem: bool = True,
+        task_id: str = "default",
+    ):
+        super().__init__(cwd=cwd, timeout=timeout)
+
+        self._persistent = persistent_filesystem
+        self._task_id = task_id
+        self._base_image = image
+        self._deployment = None
+        self._worker = _AsyncWorker()
+
+        sandbox_kwargs = dict(modal_sandbox_kwargs or {})
+
+        # If persistent, try to restore from a previous snapshot
+        restored_image = None
+        if self._persistent:
+            snapshot_id = _load_snapshots().get(self._task_id)
+            if snapshot_id:
+                try:
+                    import modal
+                    restored_image = modal.Image.from_id(snapshot_id)
+                    logger.info("Modal: restoring from snapshot %s", snapshot_id[:20])
+                except Exception as e:
+                    logger.warning("Modal: failed to restore snapshot, using base image: %s", e)
+                    restored_image = None
+
+        effective_image = restored_image if restored_image else image
+
+        # Pre-build a modal.Image with pip fix for Modal's legacy image builder.
+        # Some task images have broken pip; fix via ensurepip before Modal uses it.
+        import modal as _modal
+        if isinstance(effective_image, str):
+            effective_image = _modal.Image.from_registry(
+                effective_image,
+                setup_dockerfile_commands=[
+                    "RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
+                    "python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
+                ],
+            )
+
+        # Start the async worker thread and create the deployment on it
+        # so all gRPC channels are bound to the worker's event loop.
+        self._worker.start()
+
+        from swerex.deployment.modal import ModalDeployment
+
+        async def _create_and_start():
+            deployment = ModalDeployment(
+                image=effective_image,
+                startup_timeout=180.0,
+                runtime_timeout=3600.0,
+                deployment_timeout=3600.0,
+                install_pipx=True,
+                modal_sandbox_kwargs=sandbox_kwargs,
+            )
+            await deployment.start()
+            return deployment
+
+        self._deployment = self._worker.run_coroutine(_create_and_start())
+
+    def execute(self, command: str, cwd: str = "", *,
+                timeout: int | None = None,
+                stdin_data: str | None = None) -> dict:
+        if stdin_data is not None:
+            marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
+            while marker in stdin_data:
+                marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
+            command = f"{command} << '{marker}'\n{stdin_data}\n{marker}"
+
+        exec_command, sudo_stdin = self._prepare_command(command)
+
+        # Modal sandboxes execute commands via the Modal SDK and cannot pipe
+        # subprocess stdin directly the way a local Popen can.  When a sudo
+        # password is present, use a shell-level pipe from printf so that the
+        # password feeds sudo -S without appearing as an echo argument embedded
+        # in the shell string.
+        if sudo_stdin is not None:
+            import shlex
+            exec_command = (
+                f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}"
+            )
+
+        from swerex.runtime.abstract import Command as RexCommand
+
+        effective_cwd = cwd or self.cwd
+        effective_timeout = timeout or self.timeout
+
+        # Run in a background thread so we can poll for interrupts
+        result_holder = {"value": None, "error": None}
+
+        def _run():
+            try:
+                async def _do_execute():
+                    return await self._deployment.runtime.execute(
+                        RexCommand(
+                            command=exec_command,
+                            shell=True,
+                            check=False,
+                            cwd=effective_cwd,
+                            timeout=effective_timeout,
+                            merge_output_streams=True,
+                        )
+                    )
+                output = self._worker.run_coroutine(_do_execute())
+                result_holder["value"] = {
+                    "output": output.stdout,
+                    "returncode": output.exit_code,
+                }
+            except Exception as e:
+                result_holder["error"] = e
+
+        t = threading.Thread(target=_run, daemon=True)
+        t.start()
+        while t.is_alive():
+            t.join(timeout=0.2)
+            if is_interrupted():
+                try:
+                    self._worker.run_coroutine(
+                        asyncio.wait_for(self._deployment.stop(), timeout=10),
+                        timeout=15,
+                    )
+                except Exception:
+                    pass
+                return {
+                    "output": "[Command interrupted - Modal sandbox terminated]",
+                    "returncode": 130,
+                }
+
+        if result_holder["error"]:
+            return {"output": f"Modal execution error: {result_holder['error']}", "returncode": 1}
+        return result_holder["value"]
+
+    def cleanup(self):
+        """Snapshot the filesystem (if persistent) then stop the sandbox."""
+        if self._deployment is None:
+            return
+
+        if self._persistent:
+            try:
+                sandbox = getattr(self._deployment, '_sandbox', None)
+                if sandbox:
+                    async def _snapshot():
+                        img = await sandbox.snapshot_filesystem.aio()
+                        return img.object_id
+
+                    try:
+                        snapshot_id = self._worker.run_coroutine(_snapshot(), timeout=60)
+                    except Exception:
+                        snapshot_id = None
+
+                    if snapshot_id:
+                        snapshots = _load_snapshots()
+                        snapshots[self._task_id] = snapshot_id
+                        _save_snapshots(snapshots)
+                        logger.info("Modal: saved filesystem snapshot %s for task %s",
+                                    snapshot_id[:20], self._task_id)
+            except Exception as e:
+                logger.warning("Modal: filesystem snapshot failed: %s", e)
+
+        try:
+            self._worker.run_coroutine(
+                asyncio.wait_for(self._deployment.stop(), timeout=10),
+                timeout=15,
+            )
+        except Exception:
+            pass
+        finally:
+            self._worker.stop()
+            self._deployment = None
--- a/hermes_code/tools/environments/persistent_shell.py
+++ b/hermes_code/tools/environments/persistent_shell.py
@ -0,0 +1,272 @@
+"""Persistent shell mixin: file-based IPC protocol for long-lived bash shells."""
+
+import logging
+import shlex
+import subprocess
+import threading
+import time
+import uuid
+from abc import abstractmethod
+
+from tools.interrupt import is_interrupted
+
+logger = logging.getLogger(__name__)
+
+
+class PersistentShellMixin:
+    """Mixin that adds persistent shell capability to any BaseEnvironment.
+
+    Subclasses must implement ``_spawn_shell_process()``, ``_read_temp_files()``,
+    ``_kill_shell_children()``, ``_execute_oneshot()``, and ``_cleanup_temp_files()``.
+    """
+
+    persistent: bool
+
+    @abstractmethod
+    def _spawn_shell_process(self) -> subprocess.Popen: ...
+
+    @abstractmethod
+    def _read_temp_files(self, *paths: str) -> list[str]: ...
+
+    @abstractmethod
+    def _kill_shell_children(self): ...
+
+    @abstractmethod
+    def _execute_oneshot(self, command: str, cwd: str, *,
+                         timeout: int | None = None,
+                         stdin_data: str | None = None) -> dict: ...
+
+    @abstractmethod
+    def _cleanup_temp_files(self): ...
+
+    _session_id: str = ""
+    _poll_interval: float = 0.01
+
+    @property
+    def _temp_prefix(self) -> str:
+        return f"/tmp/hermes-persistent-{self._session_id}"
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def _init_persistent_shell(self):
+        self._shell_lock = threading.Lock()
+        self._shell_proc: subprocess.Popen | None = None
+        self._shell_alive: bool = False
+        self._shell_pid: int | None = None
+
+        self._session_id = uuid.uuid4().hex[:12]
+        p = self._temp_prefix
+        self._pshell_stdout = f"{p}-stdout"
+        self._pshell_stderr = f"{p}-stderr"
+        self._pshell_status = f"{p}-status"
+        self._pshell_cwd = f"{p}-cwd"
+        self._pshell_pid_file = f"{p}-pid"
+
+        self._shell_proc = self._spawn_shell_process()
+        self._shell_alive = True
+
+        self._drain_thread = threading.Thread(
+            target=self._drain_shell_output, daemon=True,
+        )
+        self._drain_thread.start()
+
+        init_script = (
+            f"export TERM=${{TERM:-dumb}}\n"
+            f"touch {self._pshell_stdout} {self._pshell_stderr} "
+            f"{self._pshell_status} {self._pshell_cwd} {self._pshell_pid_file}\n"
+            f"echo $$ > {self._pshell_pid_file}\n"
+            f"pwd > {self._pshell_cwd}\n"
+        )
+        self._send_to_shell(init_script)
+
+        deadline = time.monotonic() + 3.0
+        while time.monotonic() < deadline:
+            pid_str = self._read_temp_files(self._pshell_pid_file)[0].strip()
+            if pid_str.isdigit():
+                self._shell_pid = int(pid_str)
+                break
+            time.sleep(0.05)
+        else:
+            logger.warning("Could not read persistent shell PID")
+            self._shell_pid = None
+
+        if self._shell_pid:
+            logger.info(
+                "Persistent shell started (session=%s, pid=%d)",
+                self._session_id, self._shell_pid,
+            )
+
+        reported_cwd = self._read_temp_files(self._pshell_cwd)[0].strip()
+        if reported_cwd:
+            self.cwd = reported_cwd
+
+    def _cleanup_persistent_shell(self):
+        if self._shell_proc is None:
+            return
+
+        if self._session_id:
+            self._cleanup_temp_files()
+
+        try:
+            self._shell_proc.stdin.close()
+        except Exception:
+            pass
+        try:
+            self._shell_proc.terminate()
+            self._shell_proc.wait(timeout=3)
+        except subprocess.TimeoutExpired:
+            self._shell_proc.kill()
+
+        self._shell_alive = False
+        self._shell_proc = None
+
+        if hasattr(self, "_drain_thread") and self._drain_thread.is_alive():
+            self._drain_thread.join(timeout=1.0)
+
+    # ------------------------------------------------------------------
+    # execute() / cleanup() — shared dispatcher, subclasses inherit
+    # ------------------------------------------------------------------
+
+    def execute(self, command: str, cwd: str = "", *,
+                timeout: int | None = None,
+                stdin_data: str | None = None) -> dict:
+        if self.persistent:
+            return self._execute_persistent(
+                command, cwd, timeout=timeout, stdin_data=stdin_data,
+            )
+        return self._execute_oneshot(
+            command, cwd, timeout=timeout, stdin_data=stdin_data,
+        )
+
+    def cleanup(self):
+        if self.persistent:
+            self._cleanup_persistent_shell()
+
+    # ------------------------------------------------------------------
+    # Shell I/O
+    # ------------------------------------------------------------------
+
+    def _drain_shell_output(self):
+        try:
+            for _ in self._shell_proc.stdout:
+                pass
+        except Exception:
+            pass
+        self._shell_alive = False
+
+    def _send_to_shell(self, text: str):
+        if not self._shell_alive or self._shell_proc is None:
+            return
+        try:
+            self._shell_proc.stdin.write(text)
+            self._shell_proc.stdin.flush()
+        except (BrokenPipeError, OSError):
+            self._shell_alive = False
+
+    def _read_persistent_output(self) -> tuple[str, int, str]:
+        stdout, stderr, status_raw, cwd = self._read_temp_files(
+            self._pshell_stdout, self._pshell_stderr,
+            self._pshell_status, self._pshell_cwd,
+        )
+        output = self._merge_output(stdout, stderr)
+        status = status_raw.strip()
+        if ":" in status:
+            status = status.split(":", 1)[1]
+        try:
+            exit_code = int(status.strip())
+        except ValueError:
+            exit_code = 1
+        return output, exit_code, cwd.strip()
+
+    # ------------------------------------------------------------------
+    # Execution
+    # ------------------------------------------------------------------
+
+    def _execute_persistent(self, command: str, cwd: str, *,
+                            timeout: int | None = None,
+                            stdin_data: str | None = None) -> dict:
+        if not self._shell_alive:
+            logger.info("Persistent shell died, restarting...")
+            self._init_persistent_shell()
+
+        exec_command, sudo_stdin = self._prepare_command(command)
+        effective_timeout = timeout or self.timeout
+        if stdin_data or sudo_stdin:
+            return self._execute_oneshot(
+                command, cwd, timeout=timeout, stdin_data=stdin_data,
+            )
+
+        with self._shell_lock:
+            return self._execute_persistent_locked(
+                exec_command, cwd, effective_timeout,
+            )
+
+    def _execute_persistent_locked(self, command: str, cwd: str,
+                                   timeout: int) -> dict:
+        work_dir = cwd or self.cwd
+        cmd_id = uuid.uuid4().hex[:8]
+        truncate = (
+            f": > {self._pshell_stdout}\n"
+            f": > {self._pshell_stderr}\n"
+            f": > {self._pshell_status}\n"
+        )
+        self._send_to_shell(truncate)
+        escaped = command.replace("'", "'\\''")
+
+        ipc_script = (
+            f"cd {shlex.quote(work_dir)}\n"
+            f"eval '{escaped}' < /dev/null > {self._pshell_stdout} 2> {self._pshell_stderr}\n"
+            f"__EC=$?\n"
+            f"pwd > {self._pshell_cwd}\n"
+            f"echo {cmd_id}:$__EC > {self._pshell_status}\n"
+        )
+        self._send_to_shell(ipc_script)
+        deadline = time.monotonic() + timeout
+        poll_interval = self._poll_interval
+
+        while True:
+            if is_interrupted():
+                self._kill_shell_children()
+                output, _, _ = self._read_persistent_output()
+                return {
+                    "output": output + "\n[Command interrupted]",
+                    "returncode": 130,
+                }
+
+            if time.monotonic() > deadline:
+                self._kill_shell_children()
+                output, _, _ = self._read_persistent_output()
+                if output:
+                    return {
+                        "output": output + f"\n[Command timed out after {timeout}s]",
+                        "returncode": 124,
+                    }
+                return self._timeout_result(timeout)
+
+            if not self._shell_alive:
+                return {
+                    "output": "Persistent shell died during execution",
+                    "returncode": 1,
+                }
+
+            status_content = self._read_temp_files(self._pshell_status)[0].strip()
+            if status_content.startswith(cmd_id + ":"):
+                break
+
+            time.sleep(poll_interval)
+
+        output, exit_code, new_cwd = self._read_persistent_output()
+        if new_cwd:
+            self.cwd = new_cwd
+        return {"output": output, "returncode": exit_code}
+
+    @staticmethod
+    def _merge_output(stdout: str, stderr: str) -> str:
+        parts = []
+        if stdout.strip():
+            parts.append(stdout.rstrip("\n"))
+        if stderr.strip():
+            parts.append(stderr.rstrip("\n"))
+        return "\n".join(parts)
--- a/hermes_code/tools/environments/singularity.py
+++ b/hermes_code/tools/environments/singularity.py
@ -0,0 +1,369 @@
+"""Singularity/Apptainer persistent container environment.
+
+Security-hardened with --containall, --no-home, capability dropping.
+Supports configurable resource limits and optional filesystem persistence
+via writable overlay directories that survive across sessions.
+"""
+
+import json
+import logging
+import os
+import shutil
+import subprocess
+import tempfile
+import threading
+import uuid
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+from hermes_cli.config import get_hermes_home
+from tools.environments.base import BaseEnvironment
+from tools.interrupt import is_interrupted
+
+logger = logging.getLogger(__name__)
+
+_SNAPSHOT_STORE = get_hermes_home() / "singularity_snapshots.json"
+
+
+def _find_singularity_executable() -> str:
+    """Locate the apptainer or singularity CLI binary.
+
+    Returns the executable name (``"apptainer"`` or ``"singularity"``).
+    Raises ``RuntimeError`` with install instructions if neither is found.
+    """
+    if shutil.which("apptainer"):
+        return "apptainer"
+    if shutil.which("singularity"):
+        return "singularity"
+    raise RuntimeError(
+        "Neither 'apptainer' nor 'singularity' was found in PATH. "
+        "Install Apptainer (https://apptainer.org/docs/admin/main/installation.html) "
+        "or Singularity and ensure the CLI is available."
+    )
+
+
+def _ensure_singularity_available() -> str:
+    """Preflight check: resolve the executable and verify it responds.
+
+    Returns the executable name on success.
+    Raises ``RuntimeError`` with an actionable message on failure.
+    """
+    exe = _find_singularity_executable()
+
+    try:
+        result = subprocess.run(
+            [exe, "version"],
+            capture_output=True,
+            text=True,
+            timeout=10,
+        )
+    except FileNotFoundError:
+        raise RuntimeError(
+            f"Singularity backend selected but the resolved executable '{exe}' "
+            "could not be executed. Check your installation."
+        )
+    except subprocess.TimeoutExpired:
+        raise RuntimeError(
+            f"'{exe} version' timed out. The runtime may be misconfigured."
+        )
+
+    if result.returncode != 0:
+        stderr = result.stderr.strip()[:200]
+        raise RuntimeError(
+            f"'{exe} version' failed (exit code {result.returncode}): {stderr}"
+        )
+
+    return exe
+
+
+def _load_snapshots() -> Dict[str, str]:
+    if _SNAPSHOT_STORE.exists():
+        try:
+            return json.loads(_SNAPSHOT_STORE.read_text())
+        except Exception:
+            pass
+    return {}
+
+
+def _save_snapshots(data: Dict[str, str]) -> None:
+    _SNAPSHOT_STORE.parent.mkdir(parents=True, exist_ok=True)
+    _SNAPSHOT_STORE.write_text(json.dumps(data, indent=2))
+
+
+# -------------------------------------------------------------------------
+# Singularity helpers (scratch dir, SIF cache, SIF building)
+# -------------------------------------------------------------------------
+
+def _get_scratch_dir() -> Path:
+    """Get the best directory for Singularity sandboxes.
+
+    Resolution order:
+      1. TERMINAL_SCRATCH_DIR (explicit override)
+      2. TERMINAL_SANDBOX_DIR / singularity (shared sandbox root)
+      3. /scratch (common on HPC clusters)
+      4. ~/.hermes/sandboxes/singularity (fallback)
+    """
+    custom_scratch = os.getenv("TERMINAL_SCRATCH_DIR")
+    if custom_scratch:
+        scratch_path = Path(custom_scratch)
+        scratch_path.mkdir(parents=True, exist_ok=True)
+        return scratch_path
+
+    from tools.environments.base import get_sandbox_dir
+    sandbox = get_sandbox_dir() / "singularity"
+
+    scratch = Path("/scratch")
+    if scratch.exists() and os.access(scratch, os.W_OK):
+        user_scratch = scratch / os.getenv("USER", "hermes") / "hermes-agent"
+        user_scratch.mkdir(parents=True, exist_ok=True)
+        logger.info("Using /scratch for sandboxes: %s", user_scratch)
+        return user_scratch
+
+    sandbox.mkdir(parents=True, exist_ok=True)
+    return sandbox
+
+
+def _get_apptainer_cache_dir() -> Path:
+    """Get the Apptainer cache directory for SIF images."""
+    cache_dir = os.getenv("APPTAINER_CACHEDIR")
+    if cache_dir:
+        cache_path = Path(cache_dir)
+        cache_path.mkdir(parents=True, exist_ok=True)
+        return cache_path
+    scratch = _get_scratch_dir()
+    cache_path = scratch / ".apptainer"
+    cache_path.mkdir(parents=True, exist_ok=True)
+    return cache_path
+
+
+_sif_build_lock = threading.Lock()
+
+
+def _get_or_build_sif(image: str, executable: str = "apptainer") -> str:
+    """Get or build a SIF image from a docker:// URL.
+
+    Returns the path unchanged if it's already a .sif file.
+    For docker:// URLs, checks the cache and builds if needed.
+    """
+    if image.endswith('.sif') and Path(image).exists():
+        return image
+    if not image.startswith('docker://'):
+        return image
+
+    image_name = image.replace('docker://', '').replace('/', '-').replace(':', '-')
+    cache_dir = _get_apptainer_cache_dir()
+    sif_path = cache_dir / f"{image_name}.sif"
+
+    if sif_path.exists():
+        return str(sif_path)
+
+    with _sif_build_lock:
+        if sif_path.exists():
+            return str(sif_path)
+
+        logger.info("Building SIF image (one-time setup)...")
+        logger.info("  Source: %s", image)
+        logger.info("  Target: %s", sif_path)
+
+        tmp_dir = cache_dir / "tmp"
+        tmp_dir.mkdir(parents=True, exist_ok=True)
+
+        env = os.environ.copy()
+        env["APPTAINER_TMPDIR"] = str(tmp_dir)
+        env["APPTAINER_CACHEDIR"] = str(cache_dir)
+
+        try:
+            result = subprocess.run(
+                [executable, "build", str(sif_path), image],
+                capture_output=True, text=True, timeout=600, env=env,
+            )
+            if result.returncode != 0:
+                logger.warning("SIF build failed, falling back to docker:// URL")
+                logger.warning("  Error: %s", result.stderr[:500])
+                return image
+            logger.info("SIF image built successfully")
+            return str(sif_path)
+        except subprocess.TimeoutExpired:
+            logger.warning("SIF build timed out, falling back to docker:// URL")
+            if sif_path.exists():
+                sif_path.unlink()
+            return image
+        except Exception as e:
+            logger.warning("SIF build error: %s, falling back to docker:// URL", e)
+            return image
+
+
+# -------------------------------------------------------------------------
+# SingularityEnvironment
+# -------------------------------------------------------------------------
+
+class SingularityEnvironment(BaseEnvironment):
+    """Hardened Singularity/Apptainer container with resource limits and persistence.
+
+    Security: --containall (isolated PID/IPC/mount namespaces, no host home mount),
+    --no-home, writable-tmpfs for scratch space. The container cannot see or modify
+    the host filesystem outside of explicitly bound paths.
+
+    Persistence: when enabled, the writable overlay directory is preserved across
+    sessions so installed packages and files survive cleanup/restore.
+    """
+
+    def __init__(
+        self,
+        image: str,
+        cwd: str = "~",
+        timeout: int = 60,
+        cpu: float = 0,
+        memory: int = 0,
+        disk: int = 0,
+        persistent_filesystem: bool = False,
+        task_id: str = "default",
+    ):
+        super().__init__(cwd=cwd, timeout=timeout)
+        self.executable = _ensure_singularity_available()
+        self.image = _get_or_build_sif(image, self.executable)
+        self.instance_id = f"hermes_{uuid.uuid4().hex[:12]}"
+        self._instance_started = False
+        self._persistent = persistent_filesystem
+        self._task_id = task_id
+        self._overlay_dir: Optional[Path] = None
+
+        # Resource limits
+        self._cpu = cpu
+        self._memory = memory
+
+        # Persistent overlay directory
+        if self._persistent:
+            overlay_base = _get_scratch_dir() / "hermes-overlays"
+            overlay_base.mkdir(parents=True, exist_ok=True)
+            self._overlay_dir = overlay_base / f"overlay-{task_id}"
+            self._overlay_dir.mkdir(parents=True, exist_ok=True)
+
+        self._start_instance()
+
+    def _start_instance(self):
+        cmd = [self.executable, "instance", "start"]
+
+        # Security: full isolation from host
+        cmd.extend(["--containall", "--no-home"])
+
+        # Writable layer
+        if self._persistent and self._overlay_dir:
+            # Persistent writable overlay -- survives across restarts
+            cmd.extend(["--overlay", str(self._overlay_dir)])
+        else:
+            cmd.append("--writable-tmpfs")
+
+        # Resource limits (cgroup-based, may require root or appropriate config)
+        if self._memory > 0:
+            cmd.extend(["--memory", f"{self._memory}M"])
+        if self._cpu > 0:
+            cmd.extend(["--cpus", str(self._cpu)])
+
+        cmd.extend([str(self.image), self.instance_id])
+
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
+            if result.returncode != 0:
+                raise RuntimeError(f"Failed to start instance: {result.stderr}")
+            self._instance_started = True
+            logger.info("Singularity instance %s started (persistent=%s)", 
+                        self.instance_id, self._persistent)
+        except subprocess.TimeoutExpired:
+            raise RuntimeError("Instance start timed out")
+
+    def execute(self, command: str, cwd: str = "", *,
+                timeout: int | None = None,
+                stdin_data: str | None = None) -> dict:
+        if not self._instance_started:
+            return {"output": "Instance not started", "returncode": -1}
+
+        effective_timeout = timeout or self.timeout
+        work_dir = cwd or self.cwd
+        exec_command, sudo_stdin = self._prepare_command(command)
+
+        # Merge sudo password (if any) with caller-supplied stdin_data.
+        if sudo_stdin is not None and stdin_data is not None:
+            effective_stdin = sudo_stdin + stdin_data
+        elif sudo_stdin is not None:
+            effective_stdin = sudo_stdin
+        else:
+            effective_stdin = stdin_data
+
+        # apptainer exec --pwd doesn't expand ~, so prepend a cd into the command
+        if work_dir == "~" or work_dir.startswith("~/"):
+            exec_command = f"cd {work_dir} && {exec_command}"
+            work_dir = "/tmp"
+
+        cmd = [self.executable, "exec", "--pwd", work_dir,
+               f"instance://{self.instance_id}",
+               "bash", "-c", exec_command]
+
+        try:
+            import time as _time
+            _output_chunks = []
+            proc = subprocess.Popen(
+                cmd,
+                stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                stdin=subprocess.PIPE if effective_stdin else subprocess.DEVNULL,
+                text=True,
+            )
+            if effective_stdin:
+                try:
+                    proc.stdin.write(effective_stdin)
+                    proc.stdin.close()
+                except Exception:
+                    pass
+
+            def _drain():
+                try:
+                    for line in proc.stdout:
+                        _output_chunks.append(line)
+                except Exception:
+                    pass
+
+            reader = threading.Thread(target=_drain, daemon=True)
+            reader.start()
+            deadline = _time.monotonic() + effective_timeout
+
+            while proc.poll() is None:
+                if is_interrupted():
+                    proc.terminate()
+                    try:
+                        proc.wait(timeout=1)
+                    except subprocess.TimeoutExpired:
+                        proc.kill()
+                    reader.join(timeout=2)
+                    return {
+                        "output": "".join(_output_chunks) + "\n[Command interrupted]",
+                        "returncode": 130,
+                    }
+                if _time.monotonic() > deadline:
+                    proc.kill()
+                    reader.join(timeout=2)
+                    return self._timeout_result(effective_timeout)
+                _time.sleep(0.2)
+
+            reader.join(timeout=5)
+            return {"output": "".join(_output_chunks), "returncode": proc.returncode}
+        except Exception as e:
+            return {"output": f"Singularity execution error: {e}", "returncode": 1}
+
+    def cleanup(self):
+        """Stop the instance. If persistent, the overlay dir survives for next creation."""
+        if self._instance_started:
+            try:
+                subprocess.run(
+                    [self.executable, "instance", "stop", self.instance_id],
+                    capture_output=True, text=True, timeout=30,
+                )
+                logger.info("Singularity instance %s stopped", self.instance_id)
+            except Exception as e:
+                logger.warning("Failed to stop Singularity instance %s: %s", self.instance_id, e)
+            self._instance_started = False
+
+        # Record overlay path for persistence restoration
+        if self._persistent and self._overlay_dir:
+            snapshots = _load_snapshots()
+            snapshots[self._task_id] = str(self._overlay_dir)
+            _save_snapshots(snapshots)
--- a/hermes_code/tools/environments/ssh.py
+++ b/hermes_code/tools/environments/ssh.py
@ -0,0 +1,232 @@
+"""SSH remote execution environment with ControlMaster connection persistence."""
+
+import logging
+import shutil
+import subprocess
+import tempfile
+import threading
+import time
+from pathlib import Path
+
+from tools.environments.base import BaseEnvironment
+from tools.environments.persistent_shell import PersistentShellMixin
+from tools.interrupt import is_interrupted
+
+logger = logging.getLogger(__name__)
+
+
+def _ensure_ssh_available() -> None:
+    """Fail fast with a clear error when the SSH client is unavailable."""
+    if not shutil.which("ssh"):
+        raise RuntimeError(
+            "SSH is not installed or not in PATH. Install OpenSSH client: apt install openssh-client"
+        )
+
+
+class SSHEnvironment(PersistentShellMixin, BaseEnvironment):
+    """Run commands on a remote machine over SSH.
+
+    Uses SSH ControlMaster for connection persistence so subsequent
+    commands are fast. Security benefit: the agent cannot modify its
+    own code since execution happens on a separate machine.
+
+    Foreground commands are interruptible: the local ssh process is killed
+    and a remote kill is attempted over the ControlMaster socket.
+
+    When ``persistent=True``, a single long-lived bash shell is kept alive
+    over SSH and state (cwd, env vars, shell variables) persists across
+    ``execute()`` calls.  Output capture uses file-based IPC on the remote
+    host (stdout/stderr/exit-code written to temp files, polled via fast
+    ControlMaster one-shot reads).
+    """
+
+    def __init__(self, host: str, user: str, cwd: str = "~",
+                 timeout: int = 60, port: int = 22, key_path: str = "",
+                 persistent: bool = False):
+        super().__init__(cwd=cwd, timeout=timeout)
+        self.host = host
+        self.user = user
+        self.port = port
+        self.key_path = key_path
+        self.persistent = persistent
+
+        self.control_dir = Path(tempfile.gettempdir()) / "hermes-ssh"
+        self.control_dir.mkdir(parents=True, exist_ok=True)
+        self.control_socket = self.control_dir / f"{user}@{host}:{port}.sock"
+        _ensure_ssh_available()
+        self._establish_connection()
+
+        if self.persistent:
+            self._init_persistent_shell()
+
+    def _build_ssh_command(self, extra_args: list | None = None) -> list:
+        cmd = ["ssh"]
+        cmd.extend(["-o", f"ControlPath={self.control_socket}"])
+        cmd.extend(["-o", "ControlMaster=auto"])
+        cmd.extend(["-o", "ControlPersist=300"])
+        cmd.extend(["-o", "BatchMode=yes"])
+        cmd.extend(["-o", "StrictHostKeyChecking=accept-new"])
+        cmd.extend(["-o", "ConnectTimeout=10"])
+        if self.port != 22:
+            cmd.extend(["-p", str(self.port)])
+        if self.key_path:
+            cmd.extend(["-i", self.key_path])
+        if extra_args:
+            cmd.extend(extra_args)
+        cmd.append(f"{self.user}@{self.host}")
+        return cmd
+
+    def _establish_connection(self):
+        cmd = self._build_ssh_command()
+        cmd.append("echo 'SSH connection established'")
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=15)
+            if result.returncode != 0:
+                error_msg = result.stderr.strip() or result.stdout.strip()
+                raise RuntimeError(f"SSH connection failed: {error_msg}")
+        except subprocess.TimeoutExpired:
+            raise RuntimeError(f"SSH connection to {self.user}@{self.host} timed out")
+
+    _poll_interval: float = 0.15
+
+    @property
+    def _temp_prefix(self) -> str:
+        return f"/tmp/hermes-ssh-{self._session_id}"
+
+    def _spawn_shell_process(self) -> subprocess.Popen:
+        cmd = self._build_ssh_command()
+        cmd.append("bash -l")
+        return subprocess.Popen(
+            cmd,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.DEVNULL,
+            text=True,
+        )
+
+    def _read_temp_files(self, *paths: str) -> list[str]:
+        if len(paths) == 1:
+            cmd = self._build_ssh_command()
+            cmd.append(f"cat {paths[0]} 2>/dev/null")
+            try:
+                result = subprocess.run(
+                    cmd, capture_output=True, text=True, timeout=10,
+                )
+                return [result.stdout]
+            except (subprocess.TimeoutExpired, OSError):
+                return [""]
+
+        delim = f"__HERMES_SEP_{self._session_id}__"
+        script = "; ".join(
+            f"cat {p} 2>/dev/null; echo '{delim}'" for p in paths
+        )
+        cmd = self._build_ssh_command()
+        cmd.append(script)
+        try:
+            result = subprocess.run(
+                cmd, capture_output=True, text=True, timeout=10,
+            )
+            parts = result.stdout.split(delim + "\n")
+            return [parts[i] if i < len(parts) else "" for i in range(len(paths))]
+        except (subprocess.TimeoutExpired, OSError):
+            return [""] * len(paths)
+
+    def _kill_shell_children(self):
+        if self._shell_pid is None:
+            return
+        cmd = self._build_ssh_command()
+        cmd.append(f"pkill -P {self._shell_pid} 2>/dev/null; true")
+        try:
+            subprocess.run(cmd, capture_output=True, timeout=5)
+        except (subprocess.TimeoutExpired, OSError):
+            pass
+
+    def _cleanup_temp_files(self):
+        cmd = self._build_ssh_command()
+        cmd.append(f"rm -f {self._temp_prefix}-*")
+        try:
+            subprocess.run(cmd, capture_output=True, timeout=5)
+        except (subprocess.TimeoutExpired, OSError):
+            pass
+
+    def _execute_oneshot(self, command: str, cwd: str = "", *,
+                         timeout: int | None = None,
+                         stdin_data: str | None = None) -> dict:
+        work_dir = cwd or self.cwd
+        exec_command, sudo_stdin = self._prepare_command(command)
+        wrapped = f'cd {work_dir} && {exec_command}'
+        effective_timeout = timeout or self.timeout
+
+        if sudo_stdin is not None and stdin_data is not None:
+            effective_stdin = sudo_stdin + stdin_data
+        elif sudo_stdin is not None:
+            effective_stdin = sudo_stdin
+        else:
+            effective_stdin = stdin_data
+
+        cmd = self._build_ssh_command()
+        cmd.append(wrapped)
+
+        kwargs = self._build_run_kwargs(timeout, effective_stdin)
+        kwargs.pop("timeout", None)
+        _output_chunks = []
+        proc = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            stdin=subprocess.PIPE if effective_stdin else subprocess.DEVNULL,
+            text=True,
+        )
+
+        if effective_stdin:
+            try:
+                proc.stdin.write(effective_stdin)
+                proc.stdin.close()
+            except (BrokenPipeError, OSError):
+                pass
+
+        def _drain():
+            try:
+                for line in proc.stdout:
+                    _output_chunks.append(line)
+            except Exception:
+                pass
+
+        reader = threading.Thread(target=_drain, daemon=True)
+        reader.start()
+        deadline = time.monotonic() + effective_timeout
+
+        while proc.poll() is None:
+            if is_interrupted():
+                proc.terminate()
+                try:
+                    proc.wait(timeout=1)
+                except subprocess.TimeoutExpired:
+                    proc.kill()
+                reader.join(timeout=2)
+                return {
+                    "output": "".join(_output_chunks) + "\n[Command interrupted]",
+                    "returncode": 130,
+                }
+            if time.monotonic() > deadline:
+                proc.kill()
+                reader.join(timeout=2)
+                return self._timeout_result(effective_timeout)
+            time.sleep(0.2)
+
+        reader.join(timeout=5)
+        return {"output": "".join(_output_chunks), "returncode": proc.returncode}
+
+    def cleanup(self):
+        super().cleanup()
+        if self.control_socket.exists():
+            try:
+                cmd = ["ssh", "-o", f"ControlPath={self.control_socket}",
+                       "-O", "exit", f"{self.user}@{self.host}"]
+                subprocess.run(cmd, capture_output=True, timeout=5)
+            except (OSError, subprocess.SubprocessError):
+                pass
+            try:
+                self.control_socket.unlink()
+            except OSError:
+                pass