Merge PR #451: feat: Add Daytona environment backend
Authored by rovle. Adds Daytona as the sixth terminal execution backend with cloud sandboxes, persistent workspaces, and full CLI/gateway integration. Includes 24 unit tests and 8 integration tests.
This commit is contained in:
commit
39299e2de4
22 changed files with 865 additions and 30 deletions
|
|
@ -247,7 +247,7 @@ def check_dangerous_command(command: str, env_type: str,
|
|||
Returns:
|
||||
{"approved": True/False, "message": str or None, ...}
|
||||
"""
|
||||
if env_type in ("docker", "singularity", "modal"):
|
||||
if env_type in ("docker", "singularity", "modal", "daytona"):
|
||||
return {"approved": True, "message": None}
|
||||
|
||||
is_dangerous, pattern_key, description = detect_dangerous_command(command)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
Each backend provides the same interface (BaseEnvironment ABC) for running
|
||||
shell commands in a specific execution context: local, Docker, Singularity,
|
||||
SSH, or Modal.
|
||||
SSH, Modal, or Daytona.
|
||||
|
||||
The terminal_tool.py factory (_create_environment) selects the backend
|
||||
based on the TERMINAL_ENV configuration.
|
||||
|
|
|
|||
220
tools/environments/daytona.py
Normal file
220
tools/environments/daytona.py
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
"""Daytona cloud execution environment.
|
||||
|
||||
Uses the Daytona Python SDK to run commands in cloud sandboxes.
|
||||
Supports persistent sandboxes: when enabled, sandboxes are stopped on cleanup
|
||||
and resumed on next creation, preserving the filesystem across sessions.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import math
|
||||
import shlex
|
||||
import threading
|
||||
import uuid
|
||||
import warnings
|
||||
from typing import Optional
|
||||
|
||||
from tools.environments.base import BaseEnvironment
|
||||
from tools.interrupt import is_interrupted
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DaytonaEnvironment(BaseEnvironment):
|
||||
"""Daytona cloud sandbox execution backend.
|
||||
|
||||
Uses stopped/started sandbox lifecycle for filesystem persistence
|
||||
instead of snapshots, making it faster and stateless on the host.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
image: str,
|
||||
cwd: str = "/home/daytona",
|
||||
timeout: int = 60,
|
||||
cpu: int = 1,
|
||||
memory: int = 5120, # MB (hermes convention)
|
||||
disk: int = 10240, # MB (Daytona platform max is 10GB)
|
||||
persistent_filesystem: bool = True,
|
||||
task_id: str = "default",
|
||||
):
|
||||
self._requested_cwd = cwd
|
||||
super().__init__(cwd=cwd, timeout=timeout)
|
||||
|
||||
from daytona import (
|
||||
Daytona,
|
||||
CreateSandboxFromImageParams,
|
||||
DaytonaError,
|
||||
Resources,
|
||||
SandboxState,
|
||||
)
|
||||
|
||||
self._persistent = persistent_filesystem
|
||||
self._task_id = task_id
|
||||
self._SandboxState = SandboxState
|
||||
self._daytona = Daytona()
|
||||
self._sandbox = None
|
||||
self._lock = threading.Lock()
|
||||
|
||||
memory_gib = max(1, math.ceil(memory / 1024))
|
||||
disk_gib = max(1, math.ceil(disk / 1024))
|
||||
if disk_gib > 10:
|
||||
warnings.warn(
|
||||
f"Daytona: requested disk ({disk_gib}GB) exceeds platform limit (10GB). "
|
||||
f"Capping to 10GB. Set container_disk: 10240 in config to silence this.",
|
||||
stacklevel=2,
|
||||
)
|
||||
disk_gib = 10
|
||||
resources = Resources(cpu=cpu, memory=memory_gib, disk=disk_gib)
|
||||
|
||||
labels = {"hermes_task_id": task_id}
|
||||
|
||||
# Try to resume an existing stopped sandbox for this task
|
||||
if self._persistent:
|
||||
try:
|
||||
self._sandbox = self._daytona.find_one(labels=labels)
|
||||
self._sandbox.start()
|
||||
logger.info("Daytona: resumed sandbox %s for task %s",
|
||||
self._sandbox.id, task_id)
|
||||
except DaytonaError:
|
||||
self._sandbox = None
|
||||
except Exception as e:
|
||||
logger.warning("Daytona: failed to resume sandbox for task %s: %s",
|
||||
task_id, e)
|
||||
self._sandbox = None
|
||||
|
||||
# Create a fresh sandbox if we don't have one
|
||||
if self._sandbox is None:
|
||||
self._sandbox = self._daytona.create(
|
||||
CreateSandboxFromImageParams(
|
||||
image=image,
|
||||
labels=labels,
|
||||
auto_stop_interval=0,
|
||||
resources=resources,
|
||||
)
|
||||
)
|
||||
logger.info("Daytona: created sandbox %s for task %s",
|
||||
self._sandbox.id, task_id)
|
||||
|
||||
# Resolve cwd: detect actual home dir inside the sandbox
|
||||
if self._requested_cwd in ("~", "/home/daytona"):
|
||||
try:
|
||||
home = self._sandbox.process.exec("echo $HOME").result.strip()
|
||||
if home:
|
||||
self.cwd = home
|
||||
except Exception:
|
||||
pass # leave cwd as-is; sandbox will use its own default
|
||||
logger.info("Daytona: resolved cwd to %s", self.cwd)
|
||||
|
||||
def _ensure_sandbox_ready(self):
|
||||
"""Restart sandbox if it was stopped (e.g., by a previous interrupt)."""
|
||||
self._sandbox.refresh_data()
|
||||
if self._sandbox.state in (self._SandboxState.STOPPED, self._SandboxState.ARCHIVED):
|
||||
self._sandbox.start()
|
||||
logger.info("Daytona: restarted sandbox %s", self._sandbox.id)
|
||||
|
||||
def _exec_in_thread(self, exec_command: str, cwd: Optional[str], timeout: int) -> dict:
|
||||
"""Run exec in a background thread with interrupt polling.
|
||||
|
||||
The Daytona SDK's exec(timeout=...) parameter is unreliable (the
|
||||
server-side timeout is not enforced and the SDK has no client-side
|
||||
fallback), so we wrap the command with the shell ``timeout`` utility
|
||||
which reliably kills the process and returns exit code 124.
|
||||
"""
|
||||
# Wrap with shell `timeout` to enforce the deadline reliably.
|
||||
# Add a small buffer so the shell timeout fires before any SDK-level
|
||||
# timeout would, giving us a clean exit code 124.
|
||||
timed_command = f"timeout {timeout} sh -c {shlex.quote(exec_command)}"
|
||||
|
||||
result_holder: dict = {"value": None, "error": None}
|
||||
|
||||
def _run():
|
||||
try:
|
||||
response = self._sandbox.process.exec(
|
||||
timed_command, cwd=cwd,
|
||||
)
|
||||
result_holder["value"] = {
|
||||
"output": response.result or "",
|
||||
"returncode": response.exit_code,
|
||||
}
|
||||
except Exception as e:
|
||||
result_holder["error"] = e
|
||||
|
||||
t = threading.Thread(target=_run, daemon=True)
|
||||
t.start()
|
||||
# Wait for timeout + generous buffer for network/SDK overhead
|
||||
deadline = timeout + 10
|
||||
while t.is_alive():
|
||||
t.join(timeout=0.2)
|
||||
deadline -= 0.2
|
||||
if is_interrupted():
|
||||
with self._lock:
|
||||
try:
|
||||
self._sandbox.stop()
|
||||
except Exception:
|
||||
pass
|
||||
return {
|
||||
"output": "[Command interrupted - Daytona sandbox stopped]",
|
||||
"returncode": 130,
|
||||
}
|
||||
if deadline <= 0:
|
||||
# Shell timeout didn't fire and SDK is hung — force stop
|
||||
with self._lock:
|
||||
try:
|
||||
self._sandbox.stop()
|
||||
except Exception:
|
||||
pass
|
||||
return self._timeout_result(timeout)
|
||||
|
||||
if result_holder["error"]:
|
||||
return {"error": result_holder["error"]}
|
||||
return result_holder["value"]
|
||||
|
||||
def execute(self, command: str, cwd: str = "", *,
|
||||
timeout: Optional[int] = None,
|
||||
stdin_data: Optional[str] = None) -> dict:
|
||||
with self._lock:
|
||||
self._ensure_sandbox_ready()
|
||||
|
||||
if stdin_data is not None:
|
||||
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
|
||||
while marker in stdin_data:
|
||||
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
|
||||
command = f"{command} << '{marker}'\n{stdin_data}\n{marker}"
|
||||
|
||||
exec_command = self._prepare_command(command)
|
||||
effective_cwd = cwd or self.cwd or None
|
||||
effective_timeout = timeout or self.timeout
|
||||
|
||||
result = self._exec_in_thread(exec_command, effective_cwd, effective_timeout)
|
||||
|
||||
if "error" in result:
|
||||
from daytona import DaytonaError
|
||||
err = result["error"]
|
||||
if isinstance(err, DaytonaError):
|
||||
with self._lock:
|
||||
try:
|
||||
self._ensure_sandbox_ready()
|
||||
except Exception:
|
||||
return {"output": f"Daytona execution error: {err}", "returncode": 1}
|
||||
result = self._exec_in_thread(exec_command, effective_cwd, effective_timeout)
|
||||
if "error" not in result:
|
||||
return result
|
||||
return {"output": f"Daytona execution error: {err}", "returncode": 1}
|
||||
|
||||
return result
|
||||
|
||||
def cleanup(self):
|
||||
with self._lock:
|
||||
if self._sandbox is None:
|
||||
return
|
||||
try:
|
||||
if self._persistent:
|
||||
self._sandbox.stop()
|
||||
logger.info("Daytona: stopped sandbox %s (filesystem preserved)",
|
||||
self._sandbox.id)
|
||||
else:
|
||||
self._daytona.delete(self._sandbox)
|
||||
logger.info("Daytona: deleted sandbox %s", self._sandbox.id)
|
||||
except Exception as e:
|
||||
logger.warning("Daytona: cleanup failed: %s", e)
|
||||
self._sandbox = None
|
||||
|
|
@ -75,6 +75,8 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
|
|||
image = overrides.get("singularity_image") or config["singularity_image"]
|
||||
elif env_type == "modal":
|
||||
image = overrides.get("modal_image") or config["modal_image"]
|
||||
elif env_type == "daytona":
|
||||
image = overrides.get("daytona_image") or config["daytona_image"]
|
||||
else:
|
||||
image = ""
|
||||
|
||||
|
|
@ -82,7 +84,7 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
|
|||
logger.info("Creating new %s environment for task %s...", env_type, task_id[:8])
|
||||
|
||||
container_config = None
|
||||
if env_type in ("docker", "singularity", "modal"):
|
||||
if env_type in ("docker", "singularity", "modal", "daytona"):
|
||||
container_config = {
|
||||
"container_cpu": config.get("container_cpu", 1),
|
||||
"container_memory": config.get("container_memory", 5120),
|
||||
|
|
|
|||
|
|
@ -423,7 +423,7 @@ def _get_env_config() -> Dict[str, Any]:
|
|||
# catches the case where cli.py (or .env) leaked the host's CWD.
|
||||
# SSH is excluded since /home/ paths are valid on remote machines.
|
||||
cwd = os.getenv("TERMINAL_CWD", default_cwd)
|
||||
if env_type in ("modal", "docker", "singularity") and cwd:
|
||||
if env_type in ("modal", "docker", "singularity", "daytona") and cwd:
|
||||
host_prefixes = ("/Users/", "C:\\", "C:/")
|
||||
if any(cwd.startswith(p) for p in host_prefixes) and cwd != default_cwd:
|
||||
logger.info("Ignoring TERMINAL_CWD=%r for %s backend "
|
||||
|
|
@ -436,6 +436,7 @@ def _get_env_config() -> Dict[str, Any]:
|
|||
"docker_image": os.getenv("TERMINAL_DOCKER_IMAGE", default_image),
|
||||
"singularity_image": os.getenv("TERMINAL_SINGULARITY_IMAGE", f"docker://{default_image}"),
|
||||
"modal_image": os.getenv("TERMINAL_MODAL_IMAGE", default_image),
|
||||
"daytona_image": os.getenv("TERMINAL_DAYTONA_IMAGE", default_image),
|
||||
"cwd": cwd,
|
||||
"timeout": int(os.getenv("TERMINAL_TIMEOUT", "180")),
|
||||
"lifetime_seconds": int(os.getenv("TERMINAL_LIFETIME_SECONDS", "300")),
|
||||
|
|
@ -444,7 +445,7 @@ def _get_env_config() -> Dict[str, Any]:
|
|||
"ssh_user": os.getenv("TERMINAL_SSH_USER", ""),
|
||||
"ssh_port": int(os.getenv("TERMINAL_SSH_PORT", "22")),
|
||||
"ssh_key": os.getenv("TERMINAL_SSH_KEY", ""),
|
||||
# Container resource config (applies to docker, singularity, modal -- ignored for local/ssh)
|
||||
# Container resource config (applies to docker, singularity, modal, daytona -- ignored for local/ssh)
|
||||
"container_cpu": float(os.getenv("TERMINAL_CONTAINER_CPU", "1")),
|
||||
"container_memory": int(os.getenv("TERMINAL_CONTAINER_MEMORY", "5120")), # MB (default 5GB)
|
||||
"container_disk": int(os.getenv("TERMINAL_CONTAINER_DISK", "51200")), # MB (default 50GB)
|
||||
|
|
@ -460,7 +461,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
|||
Create an execution environment from mini-swe-agent.
|
||||
|
||||
Args:
|
||||
env_type: One of "local", "docker", "singularity", "modal", "ssh"
|
||||
env_type: One of "local", "docker", "singularity", "modal", "daytona", "ssh"
|
||||
image: Docker/Singularity/Modal image name (ignored for local/ssh)
|
||||
cwd: Working directory
|
||||
timeout: Default command timeout
|
||||
|
|
@ -511,6 +512,15 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
|||
persistent_filesystem=persistent, task_id=task_id,
|
||||
)
|
||||
|
||||
elif env_type == "daytona":
|
||||
# Lazy import so daytona SDK is only required when backend is selected.
|
||||
from tools.environments.daytona import DaytonaEnvironment as _DaytonaEnvironment
|
||||
return _DaytonaEnvironment(
|
||||
image=image, cwd=cwd, timeout=timeout,
|
||||
cpu=int(cpu), memory=memory, disk=disk,
|
||||
persistent_filesystem=persistent, task_id=task_id,
|
||||
)
|
||||
|
||||
elif env_type == "ssh":
|
||||
if not ssh_config or not ssh_config.get("host") or not ssh_config.get("user"):
|
||||
raise ValueError("SSH environment requires ssh_host and ssh_user to be configured")
|
||||
|
|
@ -522,9 +532,9 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
|||
cwd=cwd,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', 'modal', or 'ssh'")
|
||||
raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', 'modal', 'daytona', or 'ssh'")
|
||||
|
||||
|
||||
def _cleanup_inactive_envs(lifetime_seconds: int = 300):
|
||||
|
|
@ -799,9 +809,11 @@ def terminal_tool(
|
|||
image = overrides.get("singularity_image") or config["singularity_image"]
|
||||
elif env_type == "modal":
|
||||
image = overrides.get("modal_image") or config["modal_image"]
|
||||
elif env_type == "daytona":
|
||||
image = overrides.get("daytona_image") or config["daytona_image"]
|
||||
else:
|
||||
image = ""
|
||||
|
||||
|
||||
cwd = overrides.get("cwd") or config["cwd"]
|
||||
default_timeout = config["timeout"]
|
||||
effective_timeout = timeout or default_timeout
|
||||
|
|
@ -851,7 +863,7 @@ def terminal_tool(
|
|||
}
|
||||
|
||||
container_config = None
|
||||
if env_type in ("docker", "singularity", "modal"):
|
||||
if env_type in ("docker", "singularity", "modal", "daytona"):
|
||||
container_config = {
|
||||
"container_cpu": config.get("container_cpu", 1),
|
||||
"container_memory": config.get("container_memory", 5120),
|
||||
|
|
@ -1090,6 +1102,9 @@ def check_terminal_requirements() -> bool:
|
|||
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
|
||||
# Check for modal token
|
||||
return os.getenv("MODAL_TOKEN_ID") is not None or Path.home().joinpath(".modal.toml").exists()
|
||||
elif env_type == "daytona":
|
||||
from daytona import Daytona
|
||||
return os.getenv("DAYTONA_API_KEY") is not None
|
||||
else:
|
||||
return False
|
||||
except Exception as e:
|
||||
|
|
@ -1128,10 +1143,11 @@ if __name__ == "__main__":
|
|||
|
||||
print("\nEnvironment Variables:")
|
||||
default_img = "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
print(f" TERMINAL_ENV: {os.getenv('TERMINAL_ENV', 'local')} (local/docker/singularity/modal/ssh)")
|
||||
print(f" TERMINAL_ENV: {os.getenv('TERMINAL_ENV', 'local')} (local/docker/singularity/modal/daytona/ssh)")
|
||||
print(f" TERMINAL_DOCKER_IMAGE: {os.getenv('TERMINAL_DOCKER_IMAGE', default_img)}")
|
||||
print(f" TERMINAL_SINGULARITY_IMAGE: {os.getenv('TERMINAL_SINGULARITY_IMAGE', f'docker://{default_img}')}")
|
||||
print(f" TERMINAL_MODAL_IMAGE: {os.getenv('TERMINAL_MODAL_IMAGE', default_img)}")
|
||||
print(f" TERMINAL_DAYTONA_IMAGE: {os.getenv('TERMINAL_DAYTONA_IMAGE', default_img)}")
|
||||
print(f" TERMINAL_CWD: {os.getenv('TERMINAL_CWD', os.getcwd())}")
|
||||
print(f" TERMINAL_SANDBOX_DIR: {os.getenv('TERMINAL_SANDBOX_DIR', '~/.hermes/sandboxes')}")
|
||||
print(f" TERMINAL_TIMEOUT: {os.getenv('TERMINAL_TIMEOUT', '60')}")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue