refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use hermes-agent's own environment implementations directly. Docker backend: - Inline the `docker run -d` container startup (was 15 lines in minisweagent's DockerEnvironment). Our wrapper already handled execute(), cleanup(), security hardening, volumes, and resource limits. Modal backend: - Import swe-rex's ModalDeployment directly instead of going through minisweagent's 90-line passthrough wrapper. - Bake the _AsyncWorker pattern (from environments/patches.py) directly into ModalEnvironment for Atropos compatibility without monkey-patching. Cleanup: - Remove minisweagent_path.py (submodule path resolution helper) - Remove submodule init/install from install.sh and setup-hermes.sh - Remove mini-swe-agent from .gitmodules - environments/patches.py is now a no-op (kept for backward compat) - terminal_tool.py no longer does sys.path hacking for minisweagent - mini_swe_runner.py guards imports (optional, for RL training only) - Update all affected tests to mock the new direct subprocess calls - Update README.md, CONTRIBUTING.md No functionality change — all Docker, Modal, local, SSH, Singularity, and Daytona backends behave identically. 6093 tests pass.
This commit is contained in:
parent
2233f764af
commit
02b38b93cb
22 changed files with 283 additions and 591 deletions
|
|
@ -1,6 +1,6 @@
|
|||
"""Docker execution environment wrapping mini-swe-agent's DockerEnvironment.
|
||||
"""Docker execution environment for sandboxed command execution.
|
||||
|
||||
Adds security hardening (cap-drop ALL, no-new-privileges, PID limits),
|
||||
Security hardened (cap-drop ALL, no-new-privileges, PID limits),
|
||||
configurable resource limits (CPU, memory, disk), and optional filesystem
|
||||
persistence via bind mounts.
|
||||
"""
|
||||
|
|
@ -13,6 +13,7 @@ import subprocess
|
|||
import sys
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
from tools.environments.base import BaseEnvironment
|
||||
|
|
@ -227,12 +228,9 @@ class DockerEnvironment(BaseEnvironment):
|
|||
logger.warning(f"docker_volumes config is not a list: {volumes!r}")
|
||||
volumes = []
|
||||
|
||||
# Fail fast if Docker is not available rather than surfacing a cryptic
|
||||
# FileNotFoundError deep inside the mini-swe-agent stack.
|
||||
# Fail fast if Docker is not available.
|
||||
_ensure_docker_available()
|
||||
|
||||
from minisweagent.environments.docker import DockerEnvironment as _Docker
|
||||
|
||||
# Build resource limit args
|
||||
resource_args = []
|
||||
if cpu > 0:
|
||||
|
|
@ -320,14 +318,28 @@ class DockerEnvironment(BaseEnvironment):
|
|||
|
||||
# Resolve the docker executable once so it works even when
|
||||
# /usr/local/bin is not in PATH (common on macOS gateway/service).
|
||||
docker_exe = find_docker() or "docker"
|
||||
self._docker_exe = find_docker() or "docker"
|
||||
|
||||
self._inner = _Docker(
|
||||
image=image, cwd=cwd, timeout=timeout,
|
||||
run_args=all_run_args,
|
||||
executable=docker_exe,
|
||||
# Start the container directly via `docker run -d`.
|
||||
container_name = f"hermes-{uuid.uuid4().hex[:8]}"
|
||||
run_cmd = [
|
||||
self._docker_exe, "run", "-d",
|
||||
"--name", container_name,
|
||||
"-w", cwd,
|
||||
*all_run_args,
|
||||
image,
|
||||
"sleep", "2h",
|
||||
]
|
||||
logger.debug(f"Starting container: {' '.join(run_cmd)}")
|
||||
result = subprocess.run(
|
||||
run_cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120, # image pull may take a while
|
||||
check=True,
|
||||
)
|
||||
self._container_id = self._inner.container_id
|
||||
self._container_id = result.stdout.strip()
|
||||
logger.info(f"Started container {container_name} ({self._container_id[:12]})")
|
||||
|
||||
@staticmethod
|
||||
def _storage_opt_supported() -> bool:
|
||||
|
|
@ -389,8 +401,8 @@ class DockerEnvironment(BaseEnvironment):
|
|||
exec_command = f"cd {work_dir} && {exec_command}"
|
||||
work_dir = "/"
|
||||
|
||||
assert self._inner.container_id, "Container not started"
|
||||
cmd = [self._inner.config.executable, "exec"]
|
||||
assert self._container_id, "Container not started"
|
||||
cmd = [self._docker_exe, "exec"]
|
||||
if effective_stdin is not None:
|
||||
cmd.append("-i")
|
||||
cmd.extend(["-w", work_dir])
|
||||
|
|
@ -401,9 +413,7 @@ class DockerEnvironment(BaseEnvironment):
|
|||
value = hermes_env.get(key)
|
||||
if value is not None:
|
||||
cmd.extend(["-e", f"{key}={value}"])
|
||||
for key, value in self._inner.config.env.items():
|
||||
cmd.extend(["-e", f"{key}={value}"])
|
||||
cmd.extend([self._inner.container_id, "bash", "-lc", exec_command])
|
||||
cmd.extend([self._container_id, "bash", "-lc", exec_command])
|
||||
|
||||
try:
|
||||
_output_chunks = []
|
||||
|
|
@ -456,24 +466,29 @@ class DockerEnvironment(BaseEnvironment):
|
|||
|
||||
def cleanup(self):
|
||||
"""Stop and remove the container. Bind-mount dirs persist if persistent=True."""
|
||||
self._inner.cleanup()
|
||||
|
||||
if not self._persistent and self._container_id:
|
||||
# Inner cleanup only runs `docker stop` in background; container is left
|
||||
# as stopped. When container_persistent=false we must remove it.
|
||||
docker_exe = find_docker() or self._inner.config.executable
|
||||
if self._container_id:
|
||||
try:
|
||||
subprocess.run(
|
||||
[docker_exe, "rm", "-f", self._container_id],
|
||||
capture_output=True,
|
||||
timeout=30,
|
||||
# Stop in background so cleanup doesn't block
|
||||
stop_cmd = (
|
||||
f"(timeout 60 {self._docker_exe} stop {self._container_id} || "
|
||||
f"{self._docker_exe} rm -f {self._container_id}) >/dev/null 2>&1 &"
|
||||
)
|
||||
subprocess.Popen(stop_cmd, shell=True)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to remove non-persistent container %s: %s", self._container_id, e)
|
||||
logger.warning("Failed to stop container %s: %s", self._container_id, e)
|
||||
|
||||
if not self._persistent:
|
||||
# Also schedule removal (stop only leaves it as stopped)
|
||||
try:
|
||||
subprocess.Popen(
|
||||
f"sleep 3 && {self._docker_exe} rm -f {self._container_id} >/dev/null 2>&1 &",
|
||||
shell=True,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
self._container_id = None
|
||||
|
||||
if not self._persistent:
|
||||
import shutil
|
||||
for d in (self._workspace_dir, self._home_dir):
|
||||
if d:
|
||||
shutil.rmtree(d, ignore_errors=True)
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
"""Modal cloud execution environment wrapping mini-swe-agent's SwerexModalEnvironment.
|
||||
"""Modal cloud execution environment using SWE-ReX directly.
|
||||
|
||||
Supports persistent filesystem snapshots: when enabled, the sandbox's filesystem
|
||||
is snapshotted on cleanup and restored on next creation, so installed packages,
|
||||
project files, and config changes survive across sessions.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
|
@ -38,15 +38,49 @@ def _save_snapshots(data: Dict[str, str]) -> None:
|
|||
_SNAPSHOT_STORE.write_text(json.dumps(data, indent=2))
|
||||
|
||||
|
||||
class ModalEnvironment(BaseEnvironment):
|
||||
"""Modal cloud execution via mini-swe-agent.
|
||||
class _AsyncWorker:
|
||||
"""Background thread with its own event loop for async-safe swe-rex calls.
|
||||
|
||||
Wraps SwerexModalEnvironment and adds sudo -S support, configurable
|
||||
resources (CPU, memory, disk), and optional filesystem persistence
|
||||
via Modal's snapshot_filesystem() API.
|
||||
Allows sync code to submit async coroutines and block for results,
|
||||
even when called from inside another running event loop (e.g. Atropos).
|
||||
"""
|
||||
|
||||
_patches_applied = False
|
||||
def __init__(self):
|
||||
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
self._started = threading.Event()
|
||||
|
||||
def start(self):
|
||||
self._thread = threading.Thread(target=self._run_loop, daemon=True)
|
||||
self._thread.start()
|
||||
self._started.wait(timeout=30)
|
||||
|
||||
def _run_loop(self):
|
||||
self._loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(self._loop)
|
||||
self._started.set()
|
||||
self._loop.run_forever()
|
||||
|
||||
def run_coroutine(self, coro, timeout=600):
|
||||
if self._loop is None or self._loop.is_closed():
|
||||
raise RuntimeError("AsyncWorker loop is not running")
|
||||
future = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
||||
return future.result(timeout=timeout)
|
||||
|
||||
def stop(self):
|
||||
if self._loop and self._loop.is_running():
|
||||
self._loop.call_soon_threadsafe(self._loop.stop)
|
||||
if self._thread:
|
||||
self._thread.join(timeout=10)
|
||||
|
||||
|
||||
class ModalEnvironment(BaseEnvironment):
|
||||
"""Modal cloud execution via SWE-ReX.
|
||||
|
||||
Uses swe-rex's ModalDeployment directly for sandbox management.
|
||||
Adds sudo -S support, configurable resources (CPU, memory, disk),
|
||||
and optional filesystem persistence via Modal's snapshot API.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -59,17 +93,11 @@ class ModalEnvironment(BaseEnvironment):
|
|||
):
|
||||
super().__init__(cwd=cwd, timeout=timeout)
|
||||
|
||||
if not ModalEnvironment._patches_applied:
|
||||
try:
|
||||
from environments.patches import apply_patches
|
||||
apply_patches()
|
||||
except ImportError:
|
||||
pass
|
||||
ModalEnvironment._patches_applied = True
|
||||
|
||||
self._persistent = persistent_filesystem
|
||||
self._task_id = task_id
|
||||
self._base_image = image
|
||||
self._deployment = None
|
||||
self._worker = _AsyncWorker()
|
||||
|
||||
sandbox_kwargs = dict(modal_sandbox_kwargs or {})
|
||||
|
||||
|
|
@ -88,16 +116,37 @@ class ModalEnvironment(BaseEnvironment):
|
|||
|
||||
effective_image = restored_image if restored_image else image
|
||||
|
||||
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
|
||||
self._inner = SwerexModalEnvironment(
|
||||
image=effective_image,
|
||||
cwd=cwd,
|
||||
timeout=timeout,
|
||||
startup_timeout=180.0,
|
||||
runtime_timeout=3600.0,
|
||||
modal_sandbox_kwargs=sandbox_kwargs,
|
||||
install_pipx=True, # Required: installs pipx + swe-rex runtime (swerex-remote)
|
||||
)
|
||||
# Pre-build a modal.Image with pip fix for Modal's legacy image builder.
|
||||
# Some task images have broken pip; fix via ensurepip before Modal uses it.
|
||||
import modal as _modal
|
||||
if isinstance(effective_image, str):
|
||||
effective_image = _modal.Image.from_registry(
|
||||
effective_image,
|
||||
setup_dockerfile_commands=[
|
||||
"RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
|
||||
"python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
|
||||
],
|
||||
)
|
||||
|
||||
# Start the async worker thread and create the deployment on it
|
||||
# so all gRPC channels are bound to the worker's event loop.
|
||||
self._worker.start()
|
||||
|
||||
from swerex.deployment.modal import ModalDeployment
|
||||
|
||||
async def _create_and_start():
|
||||
deployment = ModalDeployment(
|
||||
image=effective_image,
|
||||
startup_timeout=180.0,
|
||||
runtime_timeout=3600.0,
|
||||
deployment_timeout=3600.0,
|
||||
install_pipx=True,
|
||||
modal_sandbox_kwargs=sandbox_kwargs,
|
||||
)
|
||||
await deployment.start()
|
||||
return deployment
|
||||
|
||||
self._deployment = self._worker.run_coroutine(_create_and_start())
|
||||
|
||||
def execute(self, command: str, cwd: str = "", *,
|
||||
timeout: int | None = None,
|
||||
|
|
@ -114,21 +163,39 @@ class ModalEnvironment(BaseEnvironment):
|
|||
# subprocess stdin directly the way a local Popen can. When a sudo
|
||||
# password is present, use a shell-level pipe from printf so that the
|
||||
# password feeds sudo -S without appearing as an echo argument embedded
|
||||
# in the shell string. The password is still visible in the remote
|
||||
# sandbox's command line, but it is not exposed on the user's local
|
||||
# machine — which is the primary threat being mitigated.
|
||||
# in the shell string.
|
||||
if sudo_stdin is not None:
|
||||
import shlex
|
||||
exec_command = (
|
||||
f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}"
|
||||
)
|
||||
|
||||
from swerex.runtime.abstract import Command as RexCommand
|
||||
|
||||
effective_cwd = cwd or self.cwd
|
||||
effective_timeout = timeout or self.timeout
|
||||
|
||||
# Run in a background thread so we can poll for interrupts
|
||||
result_holder = {"value": None, "error": None}
|
||||
|
||||
def _run():
|
||||
try:
|
||||
result_holder["value"] = self._inner.execute(exec_command, cwd=cwd, timeout=timeout)
|
||||
async def _do_execute():
|
||||
return await self._deployment.runtime.execute(
|
||||
RexCommand(
|
||||
command=exec_command,
|
||||
shell=True,
|
||||
check=False,
|
||||
cwd=effective_cwd,
|
||||
timeout=effective_timeout,
|
||||
merge_output_streams=True,
|
||||
)
|
||||
)
|
||||
output = self._worker.run_coroutine(_do_execute())
|
||||
result_holder["value"] = {
|
||||
"output": output.stdout,
|
||||
"returncode": output.exit_code,
|
||||
}
|
||||
except Exception as e:
|
||||
result_holder["error"] = e
|
||||
|
||||
|
|
@ -138,7 +205,10 @@ class ModalEnvironment(BaseEnvironment):
|
|||
t.join(timeout=0.2)
|
||||
if is_interrupted():
|
||||
try:
|
||||
self._inner.stop()
|
||||
self._worker.run_coroutine(
|
||||
asyncio.wait_for(self._deployment.stop(), timeout=10),
|
||||
timeout=15,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return {
|
||||
|
|
@ -152,35 +222,38 @@ class ModalEnvironment(BaseEnvironment):
|
|||
|
||||
def cleanup(self):
|
||||
"""Snapshot the filesystem (if persistent) then stop the sandbox."""
|
||||
# Check if _inner was ever set (init may have failed)
|
||||
if not hasattr(self, '_inner') or self._inner is None:
|
||||
if self._deployment is None:
|
||||
return
|
||||
|
||||
if self._persistent:
|
||||
try:
|
||||
sandbox = getattr(self._inner, 'deployment', None)
|
||||
sandbox = getattr(sandbox, '_sandbox', None) if sandbox else None
|
||||
sandbox = getattr(self._deployment, '_sandbox', None)
|
||||
if sandbox:
|
||||
import asyncio
|
||||
async def _snapshot():
|
||||
img = await sandbox.snapshot_filesystem.aio()
|
||||
return img.object_id
|
||||
try:
|
||||
snapshot_id = asyncio.run(_snapshot())
|
||||
except RuntimeError:
|
||||
import concurrent.futures
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
||||
snapshot_id = pool.submit(
|
||||
asyncio.run, _snapshot()
|
||||
).result(timeout=60)
|
||||
|
||||
snapshots = _load_snapshots()
|
||||
snapshots[self._task_id] = snapshot_id
|
||||
_save_snapshots(snapshots)
|
||||
logger.info("Modal: saved filesystem snapshot %s for task %s",
|
||||
snapshot_id[:20], self._task_id)
|
||||
try:
|
||||
snapshot_id = self._worker.run_coroutine(_snapshot(), timeout=60)
|
||||
except Exception:
|
||||
snapshot_id = None
|
||||
|
||||
if snapshot_id:
|
||||
snapshots = _load_snapshots()
|
||||
snapshots[self._task_id] = snapshot_id
|
||||
_save_snapshots(snapshots)
|
||||
logger.info("Modal: saved filesystem snapshot %s for task %s",
|
||||
snapshot_id[:20], self._task_id)
|
||||
except Exception as e:
|
||||
logger.warning("Modal: filesystem snapshot failed: %s", e)
|
||||
|
||||
if hasattr(self._inner, 'stop'):
|
||||
self._inner.stop()
|
||||
try:
|
||||
self._worker.run_coroutine(
|
||||
asyncio.wait_for(self._deployment.stop(), timeout=10),
|
||||
timeout=15,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
self._worker.stop()
|
||||
self._deployment = None
|
||||
|
|
|
|||
|
|
@ -51,13 +51,6 @@ logger = logging.getLogger(__name__)
|
|||
from tools.interrupt import is_interrupted, _interrupt_event
|
||||
|
||||
|
||||
# Add mini-swe-agent to path if not installed. In git worktrees the populated
|
||||
# submodule may live in the main checkout rather than the worktree itself.
|
||||
from minisweagent_path import ensure_minisweagent_on_path
|
||||
|
||||
ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Custom Singularity Environment with more space
|
||||
# =============================================================================
|
||||
|
|
@ -1188,27 +1181,15 @@ def terminal_tool(
|
|||
|
||||
|
||||
def check_terminal_requirements() -> bool:
|
||||
"""Check if all requirements for the terminal tool are met.
|
||||
|
||||
Important: local and singularity backends now use Hermes' own environment
|
||||
wrappers directly and do not require the ``minisweagent`` Python package to
|
||||
be installed. Docker and Modal still rely on mini-swe-agent internals.
|
||||
"""
|
||||
"""Check if all requirements for the terminal tool are met."""
|
||||
config = _get_env_config()
|
||||
env_type = config["env_type"]
|
||||
|
||||
try:
|
||||
if env_type == "local":
|
||||
# Local execution uses Hermes' own LocalEnvironment wrapper and does
|
||||
# not depend on minisweagent being importable.
|
||||
return True
|
||||
|
||||
elif env_type == "docker":
|
||||
ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
|
||||
if importlib.util.find_spec("minisweagent") is None:
|
||||
logger.error("mini-swe-agent is required for docker terminal backend but is not importable")
|
||||
return False
|
||||
# Check if docker is available (use find_docker for macOS PATH issues)
|
||||
from tools.environments.docker import find_docker
|
||||
docker = find_docker()
|
||||
if not docker:
|
||||
|
|
@ -1225,7 +1206,6 @@ def check_terminal_requirements() -> bool:
|
|||
return False
|
||||
|
||||
elif env_type == "ssh":
|
||||
# Check that host and user are configured
|
||||
if not config.get("ssh_host") or not config.get("ssh_user"):
|
||||
logger.error(
|
||||
"SSH backend selected but TERMINAL_SSH_HOST and TERMINAL_SSH_USER "
|
||||
|
|
@ -1235,11 +1215,9 @@ def check_terminal_requirements() -> bool:
|
|||
return True
|
||||
|
||||
elif env_type == "modal":
|
||||
ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
|
||||
if importlib.util.find_spec("minisweagent") is None:
|
||||
logger.error("mini-swe-agent is required for modal terminal backend but is not importable")
|
||||
if importlib.util.find_spec("swerex") is None:
|
||||
logger.error("swe-rex is required for modal terminal backend: pip install 'swe-rex[modal]'")
|
||||
return False
|
||||
# Check for modal token
|
||||
has_token = os.getenv("MODAL_TOKEN_ID") is not None
|
||||
has_config = Path.home().joinpath(".modal.toml").exists()
|
||||
if not (has_token or has_config):
|
||||
|
|
@ -1269,7 +1247,7 @@ def check_terminal_requirements() -> bool:
|
|||
|
||||
if __name__ == "__main__":
|
||||
# Simple test when run directly
|
||||
print("Terminal Tool Module (mini-swe-agent backend)")
|
||||
print("Terminal Tool Module")
|
||||
print("=" * 50)
|
||||
|
||||
config = _get_env_config()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue