refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)

Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.

Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
  minisweagent's DockerEnvironment). Our wrapper already handled
  execute(), cleanup(), security hardening, volumes, and resource limits.

Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
  minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
  into ModalEnvironment for Atropos compatibility without monkey-patching.

Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md

No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
This commit is contained in:
Teknium 2026-03-24 07:30:25 -07:00 committed by GitHub
parent 2233f764af
commit 02b38b93cb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 283 additions and 591 deletions

View file

@ -1,6 +1,6 @@
"""Docker execution environment wrapping mini-swe-agent's DockerEnvironment.
"""Docker execution environment for sandboxed command execution.
Adds security hardening (cap-drop ALL, no-new-privileges, PID limits),
Security hardened (cap-drop ALL, no-new-privileges, PID limits),
configurable resource limits (CPU, memory, disk), and optional filesystem
persistence via bind mounts.
"""
@ -13,6 +13,7 @@ import subprocess
import sys
import threading
import time
import uuid
from typing import Optional
from tools.environments.base import BaseEnvironment
@ -227,12 +228,9 @@ class DockerEnvironment(BaseEnvironment):
logger.warning(f"docker_volumes config is not a list: {volumes!r}")
volumes = []
# Fail fast if Docker is not available rather than surfacing a cryptic
# FileNotFoundError deep inside the mini-swe-agent stack.
# Fail fast if Docker is not available.
_ensure_docker_available()
from minisweagent.environments.docker import DockerEnvironment as _Docker
# Build resource limit args
resource_args = []
if cpu > 0:
@ -320,14 +318,28 @@ class DockerEnvironment(BaseEnvironment):
# Resolve the docker executable once so it works even when
# /usr/local/bin is not in PATH (common on macOS gateway/service).
docker_exe = find_docker() or "docker"
self._docker_exe = find_docker() or "docker"
self._inner = _Docker(
image=image, cwd=cwd, timeout=timeout,
run_args=all_run_args,
executable=docker_exe,
# Start the container directly via `docker run -d`.
container_name = f"hermes-{uuid.uuid4().hex[:8]}"
run_cmd = [
self._docker_exe, "run", "-d",
"--name", container_name,
"-w", cwd,
*all_run_args,
image,
"sleep", "2h",
]
logger.debug(f"Starting container: {' '.join(run_cmd)}")
result = subprocess.run(
run_cmd,
capture_output=True,
text=True,
timeout=120, # image pull may take a while
check=True,
)
self._container_id = self._inner.container_id
self._container_id = result.stdout.strip()
logger.info(f"Started container {container_name} ({self._container_id[:12]})")
@staticmethod
def _storage_opt_supported() -> bool:
@ -389,8 +401,8 @@ class DockerEnvironment(BaseEnvironment):
exec_command = f"cd {work_dir} && {exec_command}"
work_dir = "/"
assert self._inner.container_id, "Container not started"
cmd = [self._inner.config.executable, "exec"]
assert self._container_id, "Container not started"
cmd = [self._docker_exe, "exec"]
if effective_stdin is not None:
cmd.append("-i")
cmd.extend(["-w", work_dir])
@ -401,9 +413,7 @@ class DockerEnvironment(BaseEnvironment):
value = hermes_env.get(key)
if value is not None:
cmd.extend(["-e", f"{key}={value}"])
for key, value in self._inner.config.env.items():
cmd.extend(["-e", f"{key}={value}"])
cmd.extend([self._inner.container_id, "bash", "-lc", exec_command])
cmd.extend([self._container_id, "bash", "-lc", exec_command])
try:
_output_chunks = []
@ -456,24 +466,29 @@ class DockerEnvironment(BaseEnvironment):
def cleanup(self):
"""Stop and remove the container. Bind-mount dirs persist if persistent=True."""
self._inner.cleanup()
if not self._persistent and self._container_id:
# Inner cleanup only runs `docker stop` in background; container is left
# as stopped. When container_persistent=false we must remove it.
docker_exe = find_docker() or self._inner.config.executable
if self._container_id:
try:
subprocess.run(
[docker_exe, "rm", "-f", self._container_id],
capture_output=True,
timeout=30,
# Stop in background so cleanup doesn't block
stop_cmd = (
f"(timeout 60 {self._docker_exe} stop {self._container_id} || "
f"{self._docker_exe} rm -f {self._container_id}) >/dev/null 2>&1 &"
)
subprocess.Popen(stop_cmd, shell=True)
except Exception as e:
logger.warning("Failed to remove non-persistent container %s: %s", self._container_id, e)
logger.warning("Failed to stop container %s: %s", self._container_id, e)
if not self._persistent:
# Also schedule removal (stop only leaves it as stopped)
try:
subprocess.Popen(
f"sleep 3 && {self._docker_exe} rm -f {self._container_id} >/dev/null 2>&1 &",
shell=True,
)
except Exception:
pass
self._container_id = None
if not self._persistent:
import shutil
for d in (self._workspace_dir, self._home_dir):
if d:
shutil.rmtree(d, ignore_errors=True)

View file

@ -1,14 +1,14 @@
"""Modal cloud execution environment wrapping mini-swe-agent's SwerexModalEnvironment.
"""Modal cloud execution environment using SWE-ReX directly.
Supports persistent filesystem snapshots: when enabled, the sandbox's filesystem
is snapshotted on cleanup and restored on next creation, so installed packages,
project files, and config changes survive across sessions.
"""
import asyncio
import json
import logging
import threading
import time
import uuid
from pathlib import Path
from typing import Any, Dict, Optional
@ -38,15 +38,49 @@ def _save_snapshots(data: Dict[str, str]) -> None:
_SNAPSHOT_STORE.write_text(json.dumps(data, indent=2))
class ModalEnvironment(BaseEnvironment):
"""Modal cloud execution via mini-swe-agent.
class _AsyncWorker:
"""Background thread with its own event loop for async-safe swe-rex calls.
Wraps SwerexModalEnvironment and adds sudo -S support, configurable
resources (CPU, memory, disk), and optional filesystem persistence
via Modal's snapshot_filesystem() API.
Allows sync code to submit async coroutines and block for results,
even when called from inside another running event loop (e.g. Atropos).
"""
_patches_applied = False
def __init__(self):
self._loop: Optional[asyncio.AbstractEventLoop] = None
self._thread: Optional[threading.Thread] = None
self._started = threading.Event()
def start(self):
self._thread = threading.Thread(target=self._run_loop, daemon=True)
self._thread.start()
self._started.wait(timeout=30)
def _run_loop(self):
self._loop = asyncio.new_event_loop()
asyncio.set_event_loop(self._loop)
self._started.set()
self._loop.run_forever()
def run_coroutine(self, coro, timeout=600):
if self._loop is None or self._loop.is_closed():
raise RuntimeError("AsyncWorker loop is not running")
future = asyncio.run_coroutine_threadsafe(coro, self._loop)
return future.result(timeout=timeout)
def stop(self):
if self._loop and self._loop.is_running():
self._loop.call_soon_threadsafe(self._loop.stop)
if self._thread:
self._thread.join(timeout=10)
class ModalEnvironment(BaseEnvironment):
"""Modal cloud execution via SWE-ReX.
Uses swe-rex's ModalDeployment directly for sandbox management.
Adds sudo -S support, configurable resources (CPU, memory, disk),
and optional filesystem persistence via Modal's snapshot API.
"""
def __init__(
self,
@ -59,17 +93,11 @@ class ModalEnvironment(BaseEnvironment):
):
super().__init__(cwd=cwd, timeout=timeout)
if not ModalEnvironment._patches_applied:
try:
from environments.patches import apply_patches
apply_patches()
except ImportError:
pass
ModalEnvironment._patches_applied = True
self._persistent = persistent_filesystem
self._task_id = task_id
self._base_image = image
self._deployment = None
self._worker = _AsyncWorker()
sandbox_kwargs = dict(modal_sandbox_kwargs or {})
@ -88,16 +116,37 @@ class ModalEnvironment(BaseEnvironment):
effective_image = restored_image if restored_image else image
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
self._inner = SwerexModalEnvironment(
image=effective_image,
cwd=cwd,
timeout=timeout,
startup_timeout=180.0,
runtime_timeout=3600.0,
modal_sandbox_kwargs=sandbox_kwargs,
install_pipx=True, # Required: installs pipx + swe-rex runtime (swerex-remote)
)
# Pre-build a modal.Image with pip fix for Modal's legacy image builder.
# Some task images have broken pip; fix via ensurepip before Modal uses it.
import modal as _modal
if isinstance(effective_image, str):
effective_image = _modal.Image.from_registry(
effective_image,
setup_dockerfile_commands=[
"RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
"python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
],
)
# Start the async worker thread and create the deployment on it
# so all gRPC channels are bound to the worker's event loop.
self._worker.start()
from swerex.deployment.modal import ModalDeployment
async def _create_and_start():
deployment = ModalDeployment(
image=effective_image,
startup_timeout=180.0,
runtime_timeout=3600.0,
deployment_timeout=3600.0,
install_pipx=True,
modal_sandbox_kwargs=sandbox_kwargs,
)
await deployment.start()
return deployment
self._deployment = self._worker.run_coroutine(_create_and_start())
def execute(self, command: str, cwd: str = "", *,
timeout: int | None = None,
@ -114,21 +163,39 @@ class ModalEnvironment(BaseEnvironment):
# subprocess stdin directly the way a local Popen can. When a sudo
# password is present, use a shell-level pipe from printf so that the
# password feeds sudo -S without appearing as an echo argument embedded
# in the shell string. The password is still visible in the remote
# sandbox's command line, but it is not exposed on the user's local
# machine — which is the primary threat being mitigated.
# in the shell string.
if sudo_stdin is not None:
import shlex
exec_command = (
f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}"
)
from swerex.runtime.abstract import Command as RexCommand
effective_cwd = cwd or self.cwd
effective_timeout = timeout or self.timeout
# Run in a background thread so we can poll for interrupts
result_holder = {"value": None, "error": None}
def _run():
try:
result_holder["value"] = self._inner.execute(exec_command, cwd=cwd, timeout=timeout)
async def _do_execute():
return await self._deployment.runtime.execute(
RexCommand(
command=exec_command,
shell=True,
check=False,
cwd=effective_cwd,
timeout=effective_timeout,
merge_output_streams=True,
)
)
output = self._worker.run_coroutine(_do_execute())
result_holder["value"] = {
"output": output.stdout,
"returncode": output.exit_code,
}
except Exception as e:
result_holder["error"] = e
@ -138,7 +205,10 @@ class ModalEnvironment(BaseEnvironment):
t.join(timeout=0.2)
if is_interrupted():
try:
self._inner.stop()
self._worker.run_coroutine(
asyncio.wait_for(self._deployment.stop(), timeout=10),
timeout=15,
)
except Exception:
pass
return {
@ -152,35 +222,38 @@ class ModalEnvironment(BaseEnvironment):
def cleanup(self):
"""Snapshot the filesystem (if persistent) then stop the sandbox."""
# Check if _inner was ever set (init may have failed)
if not hasattr(self, '_inner') or self._inner is None:
if self._deployment is None:
return
if self._persistent:
try:
sandbox = getattr(self._inner, 'deployment', None)
sandbox = getattr(sandbox, '_sandbox', None) if sandbox else None
sandbox = getattr(self._deployment, '_sandbox', None)
if sandbox:
import asyncio
async def _snapshot():
img = await sandbox.snapshot_filesystem.aio()
return img.object_id
try:
snapshot_id = asyncio.run(_snapshot())
except RuntimeError:
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
snapshot_id = pool.submit(
asyncio.run, _snapshot()
).result(timeout=60)
snapshots = _load_snapshots()
snapshots[self._task_id] = snapshot_id
_save_snapshots(snapshots)
logger.info("Modal: saved filesystem snapshot %s for task %s",
snapshot_id[:20], self._task_id)
try:
snapshot_id = self._worker.run_coroutine(_snapshot(), timeout=60)
except Exception:
snapshot_id = None
if snapshot_id:
snapshots = _load_snapshots()
snapshots[self._task_id] = snapshot_id
_save_snapshots(snapshots)
logger.info("Modal: saved filesystem snapshot %s for task %s",
snapshot_id[:20], self._task_id)
except Exception as e:
logger.warning("Modal: filesystem snapshot failed: %s", e)
if hasattr(self._inner, 'stop'):
self._inner.stop()
try:
self._worker.run_coroutine(
asyncio.wait_for(self._deployment.stop(), timeout=10),
timeout=15,
)
except Exception:
pass
finally:
self._worker.stop()
self._deployment = None

View file

@ -51,13 +51,6 @@ logger = logging.getLogger(__name__)
from tools.interrupt import is_interrupted, _interrupt_event
# Add mini-swe-agent to path if not installed. In git worktrees the populated
# submodule may live in the main checkout rather than the worktree itself.
from minisweagent_path import ensure_minisweagent_on_path
ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
# =============================================================================
# Custom Singularity Environment with more space
# =============================================================================
@ -1188,27 +1181,15 @@ def terminal_tool(
def check_terminal_requirements() -> bool:
"""Check if all requirements for the terminal tool are met.
Important: local and singularity backends now use Hermes' own environment
wrappers directly and do not require the ``minisweagent`` Python package to
be installed. Docker and Modal still rely on mini-swe-agent internals.
"""
"""Check if all requirements for the terminal tool are met."""
config = _get_env_config()
env_type = config["env_type"]
try:
if env_type == "local":
# Local execution uses Hermes' own LocalEnvironment wrapper and does
# not depend on minisweagent being importable.
return True
elif env_type == "docker":
ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
if importlib.util.find_spec("minisweagent") is None:
logger.error("mini-swe-agent is required for docker terminal backend but is not importable")
return False
# Check if docker is available (use find_docker for macOS PATH issues)
from tools.environments.docker import find_docker
docker = find_docker()
if not docker:
@ -1225,7 +1206,6 @@ def check_terminal_requirements() -> bool:
return False
elif env_type == "ssh":
# Check that host and user are configured
if not config.get("ssh_host") or not config.get("ssh_user"):
logger.error(
"SSH backend selected but TERMINAL_SSH_HOST and TERMINAL_SSH_USER "
@ -1235,11 +1215,9 @@ def check_terminal_requirements() -> bool:
return True
elif env_type == "modal":
ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
if importlib.util.find_spec("minisweagent") is None:
logger.error("mini-swe-agent is required for modal terminal backend but is not importable")
if importlib.util.find_spec("swerex") is None:
logger.error("swe-rex is required for modal terminal backend: pip install 'swe-rex[modal]'")
return False
# Check for modal token
has_token = os.getenv("MODAL_TOKEN_ID") is not None
has_config = Path.home().joinpath(".modal.toml").exists()
if not (has_token or has_config):
@ -1269,7 +1247,7 @@ def check_terminal_requirements() -> bool:
if __name__ == "__main__":
# Simple test when run directly
print("Terminal Tool Module (mini-swe-agent backend)")
print("Terminal Tool Module")
print("=" * 50)
config = _get_env_config()