refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)

Drop the mini-swe-agent git submodule. All terminal backends now use hermes-agent's own environment implementations directly. Docker backend: - Inline the `docker run -d` container startup (was 15 lines in minisweagent's DockerEnvironment). Our wrapper already handled execute(), cleanup(), security hardening, volumes, and resource limits. Modal backend: - Import swe-rex's ModalDeployment directly instead of going through minisweagent's 90-line passthrough wrapper. - Bake the _AsyncWorker pattern (from environments/patches.py) directly into ModalEnvironment for Atropos compatibility without monkey-patching. Cleanup: - Remove minisweagent_path.py (submodule path resolution helper) - Remove submodule init/install from install.sh and setup-hermes.sh - Remove mini-swe-agent from .gitmodules - environments/patches.py is now a no-op (kept for backward compat) - terminal_tool.py no longer does sys.path hacking for minisweagent - mini_swe_runner.py guards imports (optional, for RL training only) - Update all affected tests to mock the new direct subprocess calls - Update README.md, CONTRIBUTING.md No functionality change — all Docker, Modal, local, SSH, Singularity, and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00 · 2026-03-24 07:30:25 -07:00 · 02b38b93cb
commit 02b38b93cb
parent 2233f764af
22 changed files with 283 additions and 591 deletions
--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@ -1,6 +1,6 @@
-"""Docker execution environment wrapping mini-swe-agent's DockerEnvironment.
+"""Docker execution environment for sandboxed command execution.

-Adds security hardening (cap-drop ALL, no-new-privileges, PID limits),
+Security hardened (cap-drop ALL, no-new-privileges, PID limits),
 configurable resource limits (CPU, memory, disk), and optional filesystem
 persistence via bind mounts.
 """
@ -13,6 +13,7 @@ import subprocess
 import sys
 import threading
 import time
+import uuid
 from typing import Optional

 from tools.environments.base import BaseEnvironment
@ -227,12 +228,9 @@ class DockerEnvironment(BaseEnvironment):
            logger.warning(f"docker_volumes config is not a list: {volumes!r}")
            volumes = []

-        # Fail fast if Docker is not available rather than surfacing a cryptic
-        # FileNotFoundError deep inside the mini-swe-agent stack.
+        # Fail fast if Docker is not available.
        _ensure_docker_available()

-        from minisweagent.environments.docker import DockerEnvironment as _Docker
-
        # Build resource limit args
        resource_args = []
        if cpu > 0:
@ -320,14 +318,28 @@ class DockerEnvironment(BaseEnvironment):

        # Resolve the docker executable once so it works even when
        # /usr/local/bin is not in PATH (common on macOS gateway/service).
-        docker_exe = find_docker() or "docker"
+        self._docker_exe = find_docker() or "docker"

-        self._inner = _Docker(
-            image=image, cwd=cwd, timeout=timeout,
-            run_args=all_run_args,
-            executable=docker_exe,
+        # Start the container directly via `docker run -d`.
+        container_name = f"hermes-{uuid.uuid4().hex[:8]}"
+        run_cmd = [
+            self._docker_exe, "run", "-d",
+            "--name", container_name,
+            "-w", cwd,
+            *all_run_args,
+            image,
+            "sleep", "2h",
+        ]
+        logger.debug(f"Starting container: {' '.join(run_cmd)}")
+        result = subprocess.run(
+            run_cmd,
+            capture_output=True,
+            text=True,
+            timeout=120,  # image pull may take a while
+            check=True,
        )
-        self._container_id = self._inner.container_id
+        self._container_id = result.stdout.strip()
+        logger.info(f"Started container {container_name} ({self._container_id[:12]})")

    @staticmethod
    def _storage_opt_supported() -> bool:
@ -389,8 +401,8 @@ class DockerEnvironment(BaseEnvironment):
            exec_command = f"cd {work_dir} && {exec_command}"
            work_dir = "/"

-        assert self._inner.container_id, "Container not started"
-        cmd = [self._inner.config.executable, "exec"]
+        assert self._container_id, "Container not started"
+        cmd = [self._docker_exe, "exec"]
        if effective_stdin is not None:
            cmd.append("-i")
        cmd.extend(["-w", work_dir])
@ -401,9 +413,7 @@ class DockerEnvironment(BaseEnvironment):
                value = hermes_env.get(key)
            if value is not None:
                cmd.extend(["-e", f"{key}={value}"])
-        for key, value in self._inner.config.env.items():
-            cmd.extend(["-e", f"{key}={value}"])
-        cmd.extend([self._inner.container_id, "bash", "-lc", exec_command])
+        cmd.extend([self._container_id, "bash", "-lc", exec_command])

        try:
            _output_chunks = []
@ -456,24 +466,29 @@ class DockerEnvironment(BaseEnvironment):

    def cleanup(self):
        """Stop and remove the container. Bind-mount dirs persist if persistent=True."""
-        self._inner.cleanup()
-
-        if not self._persistent and self._container_id:
-            # Inner cleanup only runs `docker stop` in background; container is left
-            # as stopped. When container_persistent=false we must remove it.
-            docker_exe = find_docker() or self._inner.config.executable
+        if self._container_id:
            try:
-                subprocess.run(
-                    [docker_exe, "rm", "-f", self._container_id],
-                    capture_output=True,
-                    timeout=30,
+                # Stop in background so cleanup doesn't block
+                stop_cmd = (
+                    f"(timeout 60 {self._docker_exe} stop {self._container_id} || "
+                    f"{self._docker_exe} rm -f {self._container_id}) >/dev/null 2>&1 &"
                )
+                subprocess.Popen(stop_cmd, shell=True)
            except Exception as e:
-                logger.warning("Failed to remove non-persistent container %s: %s", self._container_id, e)
+                logger.warning("Failed to stop container %s: %s", self._container_id, e)
+
+            if not self._persistent:
+                # Also schedule removal (stop only leaves it as stopped)
+                try:
+                    subprocess.Popen(
+                        f"sleep 3 && {self._docker_exe} rm -f {self._container_id} >/dev/null 2>&1 &",
+                        shell=True,
+                    )
+                except Exception:
+                    pass
            self._container_id = None

        if not self._persistent:
-            import shutil
            for d in (self._workspace_dir, self._home_dir):
                if d:
                    shutil.rmtree(d, ignore_errors=True)
--- a/tools/environments/modal.py
+++ b/tools/environments/modal.py
@ -1,14 +1,14 @@
-"""Modal cloud execution environment wrapping mini-swe-agent's SwerexModalEnvironment.
+"""Modal cloud execution environment using SWE-ReX directly.

 Supports persistent filesystem snapshots: when enabled, the sandbox's filesystem
 is snapshotted on cleanup and restored on next creation, so installed packages,
 project files, and config changes survive across sessions.
 """

+import asyncio
 import json
 import logging
 import threading
-import time
 import uuid
 from pathlib import Path
 from typing import Any, Dict, Optional
@ -38,15 +38,49 @@ def _save_snapshots(data: Dict[str, str]) -> None:
    _SNAPSHOT_STORE.write_text(json.dumps(data, indent=2))


-class ModalEnvironment(BaseEnvironment):
-    """Modal cloud execution via mini-swe-agent.
+class _AsyncWorker:
+    """Background thread with its own event loop for async-safe swe-rex calls.

-    Wraps SwerexModalEnvironment and adds sudo -S support, configurable
-    resources (CPU, memory, disk), and optional filesystem persistence
-    via Modal's snapshot_filesystem() API.
+    Allows sync code to submit async coroutines and block for results,
+    even when called from inside another running event loop (e.g. Atropos).
    """

-    _patches_applied = False
+    def __init__(self):
+        self._loop: Optional[asyncio.AbstractEventLoop] = None
+        self._thread: Optional[threading.Thread] = None
+        self._started = threading.Event()
+
+    def start(self):
+        self._thread = threading.Thread(target=self._run_loop, daemon=True)
+        self._thread.start()
+        self._started.wait(timeout=30)
+
+    def _run_loop(self):
+        self._loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(self._loop)
+        self._started.set()
+        self._loop.run_forever()
+
+    def run_coroutine(self, coro, timeout=600):
+        if self._loop is None or self._loop.is_closed():
+            raise RuntimeError("AsyncWorker loop is not running")
+        future = asyncio.run_coroutine_threadsafe(coro, self._loop)
+        return future.result(timeout=timeout)
+
+    def stop(self):
+        if self._loop and self._loop.is_running():
+            self._loop.call_soon_threadsafe(self._loop.stop)
+        if self._thread:
+            self._thread.join(timeout=10)
+
+
+class ModalEnvironment(BaseEnvironment):
+    """Modal cloud execution via SWE-ReX.
+
+    Uses swe-rex's ModalDeployment directly for sandbox management.
+    Adds sudo -S support, configurable resources (CPU, memory, disk),
+    and optional filesystem persistence via Modal's snapshot API.
+    """

    def __init__(
        self,
@ -59,17 +93,11 @@ class ModalEnvironment(BaseEnvironment):
    ):
        super().__init__(cwd=cwd, timeout=timeout)

-        if not ModalEnvironment._patches_applied:
-            try:
-                from environments.patches import apply_patches
-                apply_patches()
-            except ImportError:
-                pass
-            ModalEnvironment._patches_applied = True
-
        self._persistent = persistent_filesystem
        self._task_id = task_id
        self._base_image = image
+        self._deployment = None
+        self._worker = _AsyncWorker()

        sandbox_kwargs = dict(modal_sandbox_kwargs or {})

@ -88,16 +116,37 @@ class ModalEnvironment(BaseEnvironment):

        effective_image = restored_image if restored_image else image

-        from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
-        self._inner = SwerexModalEnvironment(
-            image=effective_image,
-            cwd=cwd,
-            timeout=timeout,
-            startup_timeout=180.0,
-            runtime_timeout=3600.0,
-            modal_sandbox_kwargs=sandbox_kwargs,
-            install_pipx=True,  # Required: installs pipx + swe-rex runtime (swerex-remote)
-        )
+        # Pre-build a modal.Image with pip fix for Modal's legacy image builder.
+        # Some task images have broken pip; fix via ensurepip before Modal uses it.
+        import modal as _modal
+        if isinstance(effective_image, str):
+            effective_image = _modal.Image.from_registry(
+                effective_image,
+                setup_dockerfile_commands=[
+                    "RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
+                    "python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
+                ],
+            )
+
+        # Start the async worker thread and create the deployment on it
+        # so all gRPC channels are bound to the worker's event loop.
+        self._worker.start()
+
+        from swerex.deployment.modal import ModalDeployment
+
+        async def _create_and_start():
+            deployment = ModalDeployment(
+                image=effective_image,
+                startup_timeout=180.0,
+                runtime_timeout=3600.0,
+                deployment_timeout=3600.0,
+                install_pipx=True,
+                modal_sandbox_kwargs=sandbox_kwargs,
+            )
+            await deployment.start()
+            return deployment
+
+        self._deployment = self._worker.run_coroutine(_create_and_start())

    def execute(self, command: str, cwd: str = "", *,
                timeout: int | None = None,
@ -114,21 +163,39 @@ class ModalEnvironment(BaseEnvironment):
        # subprocess stdin directly the way a local Popen can.  When a sudo
        # password is present, use a shell-level pipe from printf so that the
        # password feeds sudo -S without appearing as an echo argument embedded
-        # in the shell string.  The password is still visible in the remote
-        # sandbox's command line, but it is not exposed on the user's local
-        # machine — which is the primary threat being mitigated.
+        # in the shell string.
        if sudo_stdin is not None:
            import shlex
            exec_command = (
                f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}"
            )

+        from swerex.runtime.abstract import Command as RexCommand
+
+        effective_cwd = cwd or self.cwd
+        effective_timeout = timeout or self.timeout
+
        # Run in a background thread so we can poll for interrupts
        result_holder = {"value": None, "error": None}

        def _run():
            try:
-                result_holder["value"] = self._inner.execute(exec_command, cwd=cwd, timeout=timeout)
+                async def _do_execute():
+                    return await self._deployment.runtime.execute(
+                        RexCommand(
+                            command=exec_command,
+                            shell=True,
+                            check=False,
+                            cwd=effective_cwd,
+                            timeout=effective_timeout,
+                            merge_output_streams=True,
+                        )
+                    )
+                output = self._worker.run_coroutine(_do_execute())
+                result_holder["value"] = {
+                    "output": output.stdout,
+                    "returncode": output.exit_code,
+                }
            except Exception as e:
                result_holder["error"] = e

@ -138,7 +205,10 @@ class ModalEnvironment(BaseEnvironment):
            t.join(timeout=0.2)
            if is_interrupted():
                try:
-                    self._inner.stop()
+                    self._worker.run_coroutine(
+                        asyncio.wait_for(self._deployment.stop(), timeout=10),
+                        timeout=15,
+                    )
                except Exception:
                    pass
                return {
@ -152,35 +222,38 @@ class ModalEnvironment(BaseEnvironment):

    def cleanup(self):
        """Snapshot the filesystem (if persistent) then stop the sandbox."""
-        # Check if _inner was ever set (init may have failed)
-        if not hasattr(self, '_inner') or self._inner is None:
+        if self._deployment is None:
            return

        if self._persistent:
            try:
-                sandbox = getattr(self._inner, 'deployment', None)
-                sandbox = getattr(sandbox, '_sandbox', None) if sandbox else None
+                sandbox = getattr(self._deployment, '_sandbox', None)
                if sandbox:
-                    import asyncio
                    async def _snapshot():
                        img = await sandbox.snapshot_filesystem.aio()
                        return img.object_id
-                    try:
-                        snapshot_id = asyncio.run(_snapshot())
-                    except RuntimeError:
-                        import concurrent.futures
-                        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-                            snapshot_id = pool.submit(
-                                asyncio.run, _snapshot()
-                            ).result(timeout=60)

-                    snapshots = _load_snapshots()
-                    snapshots[self._task_id] = snapshot_id
-                    _save_snapshots(snapshots)
-                    logger.info("Modal: saved filesystem snapshot %s for task %s",
-                                snapshot_id[:20], self._task_id)
+                    try:
+                        snapshot_id = self._worker.run_coroutine(_snapshot(), timeout=60)
+                    except Exception:
+                        snapshot_id = None
+
+                    if snapshot_id:
+                        snapshots = _load_snapshots()
+                        snapshots[self._task_id] = snapshot_id
+                        _save_snapshots(snapshots)
+                        logger.info("Modal: saved filesystem snapshot %s for task %s",
+                                    snapshot_id[:20], self._task_id)
            except Exception as e:
                logger.warning("Modal: filesystem snapshot failed: %s", e)

-        if hasattr(self._inner, 'stop'):
-            self._inner.stop()
+        try:
+            self._worker.run_coroutine(
+                asyncio.wait_for(self._deployment.stop(), timeout=10),
+                timeout=15,
+            )
+        except Exception:
+            pass
+        finally:
+            self._worker.stop()
+            self._deployment = None
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -51,13 +51,6 @@ logger = logging.getLogger(__name__)
 from tools.interrupt import is_interrupted, _interrupt_event


-# Add mini-swe-agent to path if not installed. In git worktrees the populated
-# submodule may live in the main checkout rather than the worktree itself.
-from minisweagent_path import ensure_minisweagent_on_path
-
-ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
-
-
 # =============================================================================
 # Custom Singularity Environment with more space
 # =============================================================================
@ -1188,27 +1181,15 @@ def terminal_tool(


 def check_terminal_requirements() -> bool:
-    """Check if all requirements for the terminal tool are met.
-
-    Important: local and singularity backends now use Hermes' own environment
-    wrappers directly and do not require the ``minisweagent`` Python package to
-    be installed. Docker and Modal still rely on mini-swe-agent internals.
-    """
+    """Check if all requirements for the terminal tool are met."""
    config = _get_env_config()
    env_type = config["env_type"]

    try:
        if env_type == "local":
-            # Local execution uses Hermes' own LocalEnvironment wrapper and does
-            # not depend on minisweagent being importable.
            return True

        elif env_type == "docker":
-            ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
-            if importlib.util.find_spec("minisweagent") is None:
-                logger.error("mini-swe-agent is required for docker terminal backend but is not importable")
-                return False
-            # Check if docker is available (use find_docker for macOS PATH issues)
            from tools.environments.docker import find_docker
            docker = find_docker()
            if not docker:
@ -1225,7 +1206,6 @@ def check_terminal_requirements() -> bool:
            return False

        elif env_type == "ssh":
-            # Check that host and user are configured
            if not config.get("ssh_host") or not config.get("ssh_user"):
                logger.error(
                    "SSH backend selected but TERMINAL_SSH_HOST and TERMINAL_SSH_USER "
@ -1235,11 +1215,9 @@ def check_terminal_requirements() -> bool:
            return True

        elif env_type == "modal":
-            ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
-            if importlib.util.find_spec("minisweagent") is None:
-                logger.error("mini-swe-agent is required for modal terminal backend but is not importable")
+            if importlib.util.find_spec("swerex") is None:
+                logger.error("swe-rex is required for modal terminal backend: pip install 'swe-rex[modal]'")
                return False
-            # Check for modal token
            has_token = os.getenv("MODAL_TOKEN_ID") is not None
            has_config = Path.home().joinpath(".modal.toml").exists()
            if not (has_token or has_config):
@ -1269,7 +1247,7 @@ def check_terminal_requirements() -> bool:

 if __name__ == "__main__":
    # Simple test when run directly
-    print("Terminal Tool Module (mini-swe-agent backend)")
+    print("Terminal Tool Module")
    print("=" * 50)
    
    config = _get_env_config()