Merge PR #451: feat: Add Daytona environment backend

Authored by rovle. Adds Daytona as the sixth terminal execution backend with cloud sandboxes, persistent workspaces, and full CLI/gateway integration. Includes 24 unit tests and 8 integration tests.
2026-03-06 03:32:40 -08:00 · 2026-03-06 03:32:40 -08:00 · 39299e2de4
commit 39299e2de4
parent efec4fcaab a6499b6107
22 changed files with 865 additions and 30 deletions
--- a/tools/approval.py
+++ b/tools/approval.py
@ -247,7 +247,7 @@ def check_dangerous_command(command: str, env_type: str,
    Returns:
        {"approved": True/False, "message": str or None, ...}
    """
-    if env_type in ("docker", "singularity", "modal"):
+    if env_type in ("docker", "singularity", "modal", "daytona"):
        return {"approved": True, "message": None}

    is_dangerous, pattern_key, description = detect_dangerous_command(command)
--- a/tools/environments/init.py
+++ b/tools/environments/init.py
@ -2,7 +2,7 @@

 Each backend provides the same interface (BaseEnvironment ABC) for running
 shell commands in a specific execution context: local, Docker, Singularity,
-SSH, or Modal.
+SSH, Modal, or Daytona.

 The terminal_tool.py factory (_create_environment) selects the backend
 based on the TERMINAL_ENV configuration.
--- a/tools/environments/daytona.py
+++ b/tools/environments/daytona.py
@ -0,0 +1,220 @@
+"""Daytona cloud execution environment.
+
+Uses the Daytona Python SDK to run commands in cloud sandboxes.
+Supports persistent sandboxes: when enabled, sandboxes are stopped on cleanup
+and resumed on next creation, preserving the filesystem across sessions.
+"""
+
+import logging
+import math
+import shlex
+import threading
+import uuid
+import warnings
+from typing import Optional
+
+from tools.environments.base import BaseEnvironment
+from tools.interrupt import is_interrupted
+
+logger = logging.getLogger(__name__)
+
+
+class DaytonaEnvironment(BaseEnvironment):
+    """Daytona cloud sandbox execution backend.
+
+    Uses stopped/started sandbox lifecycle for filesystem persistence
+    instead of snapshots, making it faster and stateless on the host.
+    """
+
+    def __init__(
+        self,
+        image: str,
+        cwd: str = "/home/daytona",
+        timeout: int = 60,
+        cpu: int = 1,
+        memory: int = 5120,       # MB (hermes convention)
+        disk: int = 10240,        # MB (Daytona platform max is 10GB)
+        persistent_filesystem: bool = True,
+        task_id: str = "default",
+    ):
+        self._requested_cwd = cwd
+        super().__init__(cwd=cwd, timeout=timeout)
+
+        from daytona import (
+            Daytona,
+            CreateSandboxFromImageParams,
+            DaytonaError,
+            Resources,
+            SandboxState,
+        )
+
+        self._persistent = persistent_filesystem
+        self._task_id = task_id
+        self._SandboxState = SandboxState
+        self._daytona = Daytona()
+        self._sandbox = None
+        self._lock = threading.Lock()
+
+        memory_gib = max(1, math.ceil(memory / 1024))
+        disk_gib = max(1, math.ceil(disk / 1024))
+        if disk_gib > 10:
+            warnings.warn(
+                f"Daytona: requested disk ({disk_gib}GB) exceeds platform limit (10GB). "
+                f"Capping to 10GB. Set container_disk: 10240 in config to silence this.",
+                stacklevel=2,
+            )
+            disk_gib = 10
+        resources = Resources(cpu=cpu, memory=memory_gib, disk=disk_gib)
+
+        labels = {"hermes_task_id": task_id}
+
+        # Try to resume an existing stopped sandbox for this task
+        if self._persistent:
+            try:
+                self._sandbox = self._daytona.find_one(labels=labels)
+                self._sandbox.start()
+                logger.info("Daytona: resumed sandbox %s for task %s",
+                            self._sandbox.id, task_id)
+            except DaytonaError:
+                self._sandbox = None
+            except Exception as e:
+                logger.warning("Daytona: failed to resume sandbox for task %s: %s",
+                               task_id, e)
+                self._sandbox = None
+
+        # Create a fresh sandbox if we don't have one
+        if self._sandbox is None:
+            self._sandbox = self._daytona.create(
+                CreateSandboxFromImageParams(
+                    image=image,
+                    labels=labels,
+                    auto_stop_interval=0,
+                    resources=resources,
+                )
+            )
+            logger.info("Daytona: created sandbox %s for task %s",
+                        self._sandbox.id, task_id)
+
+        # Resolve cwd: detect actual home dir inside the sandbox
+        if self._requested_cwd in ("~", "/home/daytona"):
+            try:
+                home = self._sandbox.process.exec("echo $HOME").result.strip()
+                if home:
+                    self.cwd = home
+            except Exception:
+                pass  # leave cwd as-is; sandbox will use its own default
+            logger.info("Daytona: resolved cwd to %s", self.cwd)
+
+    def _ensure_sandbox_ready(self):
+        """Restart sandbox if it was stopped (e.g., by a previous interrupt)."""
+        self._sandbox.refresh_data()
+        if self._sandbox.state in (self._SandboxState.STOPPED, self._SandboxState.ARCHIVED):
+            self._sandbox.start()
+            logger.info("Daytona: restarted sandbox %s", self._sandbox.id)
+
+    def _exec_in_thread(self, exec_command: str, cwd: Optional[str], timeout: int) -> dict:
+        """Run exec in a background thread with interrupt polling.
+
+        The Daytona SDK's exec(timeout=...) parameter is unreliable (the
+        server-side timeout is not enforced and the SDK has no client-side
+        fallback), so we wrap the command with the shell ``timeout`` utility
+        which reliably kills the process and returns exit code 124.
+        """
+        # Wrap with shell `timeout` to enforce the deadline reliably.
+        # Add a small buffer so the shell timeout fires before any SDK-level
+        # timeout would, giving us a clean exit code 124.
+        timed_command = f"timeout {timeout} sh -c {shlex.quote(exec_command)}"
+
+        result_holder: dict = {"value": None, "error": None}
+
+        def _run():
+            try:
+                response = self._sandbox.process.exec(
+                    timed_command, cwd=cwd,
+                )
+                result_holder["value"] = {
+                    "output": response.result or "",
+                    "returncode": response.exit_code,
+                }
+            except Exception as e:
+                result_holder["error"] = e
+
+        t = threading.Thread(target=_run, daemon=True)
+        t.start()
+        # Wait for timeout + generous buffer for network/SDK overhead
+        deadline = timeout + 10
+        while t.is_alive():
+            t.join(timeout=0.2)
+            deadline -= 0.2
+            if is_interrupted():
+                with self._lock:
+                    try:
+                        self._sandbox.stop()
+                    except Exception:
+                        pass
+                return {
+                    "output": "[Command interrupted - Daytona sandbox stopped]",
+                    "returncode": 130,
+                }
+            if deadline <= 0:
+                # Shell timeout didn't fire and SDK is hung — force stop
+                with self._lock:
+                    try:
+                        self._sandbox.stop()
+                    except Exception:
+                        pass
+                return self._timeout_result(timeout)
+
+        if result_holder["error"]:
+            return {"error": result_holder["error"]}
+        return result_holder["value"]
+
+    def execute(self, command: str, cwd: str = "", *,
+                timeout: Optional[int] = None,
+                stdin_data: Optional[str] = None) -> dict:
+        with self._lock:
+            self._ensure_sandbox_ready()
+
+        if stdin_data is not None:
+            marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
+            while marker in stdin_data:
+                marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
+            command = f"{command} << '{marker}'\n{stdin_data}\n{marker}"
+
+        exec_command = self._prepare_command(command)
+        effective_cwd = cwd or self.cwd or None
+        effective_timeout = timeout or self.timeout
+
+        result = self._exec_in_thread(exec_command, effective_cwd, effective_timeout)
+
+        if "error" in result:
+            from daytona import DaytonaError
+            err = result["error"]
+            if isinstance(err, DaytonaError):
+                with self._lock:
+                    try:
+                        self._ensure_sandbox_ready()
+                    except Exception:
+                        return {"output": f"Daytona execution error: {err}", "returncode": 1}
+                result = self._exec_in_thread(exec_command, effective_cwd, effective_timeout)
+                if "error" not in result:
+                    return result
+            return {"output": f"Daytona execution error: {err}", "returncode": 1}
+
+        return result
+
+    def cleanup(self):
+        with self._lock:
+            if self._sandbox is None:
+                return
+            try:
+                if self._persistent:
+                    self._sandbox.stop()
+                    logger.info("Daytona: stopped sandbox %s (filesystem preserved)",
+                                self._sandbox.id)
+                else:
+                    self._daytona.delete(self._sandbox)
+                    logger.info("Daytona: deleted sandbox %s", self._sandbox.id)
+            except Exception as e:
+                logger.warning("Daytona: cleanup failed: %s", e)
+            self._sandbox = None
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@ -75,6 +75,8 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
                image = overrides.get("singularity_image") or config["singularity_image"]
            elif env_type == "modal":
                image = overrides.get("modal_image") or config["modal_image"]
+            elif env_type == "daytona":
+                image = overrides.get("daytona_image") or config["daytona_image"]
            else:
                image = ""

@ -82,7 +84,7 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
            logger.info("Creating new %s environment for task %s...", env_type, task_id[:8])

            container_config = None
-            if env_type in ("docker", "singularity", "modal"):
+            if env_type in ("docker", "singularity", "modal", "daytona"):
                container_config = {
                    "container_cpu": config.get("container_cpu", 1),
                    "container_memory": config.get("container_memory", 5120),
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -423,7 +423,7 @@ def _get_env_config() -> Dict[str, Any]:
    # catches the case where cli.py (or .env) leaked the host's CWD.
    # SSH is excluded since /home/ paths are valid on remote machines.
    cwd = os.getenv("TERMINAL_CWD", default_cwd)
-    if env_type in ("modal", "docker", "singularity") and cwd:
+    if env_type in ("modal", "docker", "singularity", "daytona") and cwd:
        host_prefixes = ("/Users/", "C:\\", "C:/")
        if any(cwd.startswith(p) for p in host_prefixes) and cwd != default_cwd:
            logger.info("Ignoring TERMINAL_CWD=%r for %s backend "
@ -436,6 +436,7 @@ def _get_env_config() -> Dict[str, Any]:
        "docker_image": os.getenv("TERMINAL_DOCKER_IMAGE", default_image),
        "singularity_image": os.getenv("TERMINAL_SINGULARITY_IMAGE", f"docker://{default_image}"),
        "modal_image": os.getenv("TERMINAL_MODAL_IMAGE", default_image),
+        "daytona_image": os.getenv("TERMINAL_DAYTONA_IMAGE", default_image),
        "cwd": cwd,
        "timeout": int(os.getenv("TERMINAL_TIMEOUT", "180")),
        "lifetime_seconds": int(os.getenv("TERMINAL_LIFETIME_SECONDS", "300")),
@ -444,7 +445,7 @@ def _get_env_config() -> Dict[str, Any]:
        "ssh_user": os.getenv("TERMINAL_SSH_USER", ""),
        "ssh_port": int(os.getenv("TERMINAL_SSH_PORT", "22")),
        "ssh_key": os.getenv("TERMINAL_SSH_KEY", ""),
-        # Container resource config (applies to docker, singularity, modal -- ignored for local/ssh)
+        # Container resource config (applies to docker, singularity, modal, daytona -- ignored for local/ssh)
        "container_cpu": float(os.getenv("TERMINAL_CONTAINER_CPU", "1")),
        "container_memory": int(os.getenv("TERMINAL_CONTAINER_MEMORY", "5120")),     # MB (default 5GB)
        "container_disk": int(os.getenv("TERMINAL_CONTAINER_DISK", "51200")),        # MB (default 50GB)
@ -460,7 +461,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
    Create an execution environment from mini-swe-agent.
    
    Args:
-        env_type: One of "local", "docker", "singularity", "modal", "ssh"
+        env_type: One of "local", "docker", "singularity", "modal", "daytona", "ssh"
        image: Docker/Singularity/Modal image name (ignored for local/ssh)
        cwd: Working directory
        timeout: Default command timeout
@ -511,6 +512,15 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
            persistent_filesystem=persistent, task_id=task_id,
        )
    
+    elif env_type == "daytona":
+        # Lazy import so daytona SDK is only required when backend is selected.
+        from tools.environments.daytona import DaytonaEnvironment as _DaytonaEnvironment
+        return _DaytonaEnvironment(
+            image=image, cwd=cwd, timeout=timeout,
+            cpu=int(cpu), memory=memory, disk=disk,
+            persistent_filesystem=persistent, task_id=task_id,
+        )
+
    elif env_type == "ssh":
        if not ssh_config or not ssh_config.get("host") or not ssh_config.get("user"):
            raise ValueError("SSH environment requires ssh_host and ssh_user to be configured")
@ -522,9 +532,9 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
            cwd=cwd,
            timeout=timeout,
        )
-    
+
    else:
-        raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', 'modal', or 'ssh'")
+        raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', 'modal', 'daytona', or 'ssh'")


 def _cleanup_inactive_envs(lifetime_seconds: int = 300):
@ -799,9 +809,11 @@ def terminal_tool(
            image = overrides.get("singularity_image") or config["singularity_image"]
        elif env_type == "modal":
            image = overrides.get("modal_image") or config["modal_image"]
+        elif env_type == "daytona":
+            image = overrides.get("daytona_image") or config["daytona_image"]
        else:
            image = ""
-        
+
        cwd = overrides.get("cwd") or config["cwd"]
        default_timeout = config["timeout"]
        effective_timeout = timeout or default_timeout
@ -851,7 +863,7 @@ def terminal_tool(
                            }

                        container_config = None
-                        if env_type in ("docker", "singularity", "modal"):
+                        if env_type in ("docker", "singularity", "modal", "daytona"):
                            container_config = {
                                "container_cpu": config.get("container_cpu", 1),
                                "container_memory": config.get("container_memory", 5120),
@ -1090,6 +1102,9 @@ def check_terminal_requirements() -> bool:
            from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
            # Check for modal token
            return os.getenv("MODAL_TOKEN_ID") is not None or Path.home().joinpath(".modal.toml").exists()
+        elif env_type == "daytona":
+            from daytona import Daytona
+            return os.getenv("DAYTONA_API_KEY") is not None
        else:
            return False
    except Exception as e:
@ -1128,10 +1143,11 @@ if __name__ == "__main__":

    print("\nEnvironment Variables:")
    default_img = "nikolaik/python-nodejs:python3.11-nodejs20"
-    print(f"  TERMINAL_ENV: {os.getenv('TERMINAL_ENV', 'local')} (local/docker/singularity/modal/ssh)")
+    print(f"  TERMINAL_ENV: {os.getenv('TERMINAL_ENV', 'local')} (local/docker/singularity/modal/daytona/ssh)")
    print(f"  TERMINAL_DOCKER_IMAGE: {os.getenv('TERMINAL_DOCKER_IMAGE', default_img)}")
    print(f"  TERMINAL_SINGULARITY_IMAGE: {os.getenv('TERMINAL_SINGULARITY_IMAGE', f'docker://{default_img}')}")
    print(f"  TERMINAL_MODAL_IMAGE: {os.getenv('TERMINAL_MODAL_IMAGE', default_img)}")
+    print(f"  TERMINAL_DAYTONA_IMAGE: {os.getenv('TERMINAL_DAYTONA_IMAGE', default_img)}")
    print(f"  TERMINAL_CWD: {os.getenv('TERMINAL_CWD', os.getcwd())}")
    print(f"  TERMINAL_SANDBOX_DIR: {os.getenv('TERMINAL_SANDBOX_DIR', '~/.hermes/sandboxes')}")
    print(f"  TERMINAL_TIMEOUT: {os.getenv('TERMINAL_TIMEOUT', '60')}")