From a6499b610760d0f1bfa89a5dbd29407f594d3f7f Mon Sep 17 00:00:00 2001 From: rovle Date: Thu, 5 Mar 2026 13:12:41 -0800 Subject: [PATCH] fix(daytona): use shell timeout wrapper instead of broken SDK exec timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Daytona SDK's process.exec(timeout=N) parameter is not enforced — the server-side timeout never fires and the SDK has no client-side fallback, causing commands to hang indefinitely. Fix: wrap commands with timeout N sh -c '...' (coreutils) which reliably kills the process and returns exit code 124. Added shlex.quote for proper shell escaping and a secondary deadline (timeout + 10s) that force-stops the sandbox if the shell timeout somehow fails. Signed-off-by: rovle --- tests/tools/test_daytona_environment.py | 34 ++++++++++++++++++++++++- tools/environments/daytona.py | 27 ++++++++++++++++++-- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/tests/tools/test_daytona_environment.py b/tests/tools/test_daytona_environment.py index 41f1ace8..6d32f744 100644 --- a/tests/tools/test_daytona_environment.py +++ b/tests/tools/test_daytona_environment.py @@ -200,6 +200,36 @@ class TestExecute: assert result["output"] == "hello" assert result["returncode"] == 0 + def test_command_wrapped_with_shell_timeout(self, make_env): + sb = _make_sandbox() + sb.process.exec.side_effect = [ + _make_exec_response(result="/root"), + _make_exec_response(result="ok", exit_code=0), + ] + sb.state = "started" + env = make_env(sandbox=sb, timeout=42) + + env.execute("echo hello") + # The command sent to exec should be wrapped with `timeout N sh -c '...'` + call_args = sb.process.exec.call_args_list[-1] + cmd = call_args[0][0] + assert cmd.startswith("timeout 42 sh -c ") + # SDK timeout param should NOT be passed + assert "timeout" not in call_args[1] + + def test_timeout_returns_exit_code_124(self, make_env): + """Shell timeout utility returns exit code 124.""" + sb = _make_sandbox() + sb.process.exec.side_effect = [ + _make_exec_response(result="/root"), + _make_exec_response(result="", exit_code=124), + ] + sb.state = "started" + env = make_env(sandbox=sb) + + result = env.execute("sleep 300", timeout=5) + assert result["returncode"] == 124 + def test_nonzero_exit_code(self, make_env): sb = _make_sandbox() sb.process.exec.side_effect = [ @@ -223,10 +253,12 @@ class TestExecute: env.execute("python3", stdin_data="print('hi')") # Check that the command passed to exec contains heredoc markers + # (single quotes get shell-escaped by shlex.quote, so check components) call_args = sb.process.exec.call_args_list[-1] cmd = call_args[0][0] assert "HERMES_EOF_" in cmd - assert "print('hi')" in cmd + assert "print" in cmd + assert "hi" in cmd def test_custom_cwd_passed_through(self, make_env): sb = _make_sandbox() diff --git a/tools/environments/daytona.py b/tools/environments/daytona.py index bfd1732e..c8df198c 100644 --- a/tools/environments/daytona.py +++ b/tools/environments/daytona.py @@ -7,6 +7,7 @@ and resumed on next creation, preserving the filesystem across sessions. import logging import math +import shlex import threading import uuid import warnings @@ -112,13 +113,24 @@ class DaytonaEnvironment(BaseEnvironment): logger.info("Daytona: restarted sandbox %s", self._sandbox.id) def _exec_in_thread(self, exec_command: str, cwd: Optional[str], timeout: int) -> dict: - """Run exec in a background thread with interrupt polling.""" + """Run exec in a background thread with interrupt polling. + + The Daytona SDK's exec(timeout=...) parameter is unreliable (the + server-side timeout is not enforced and the SDK has no client-side + fallback), so we wrap the command with the shell ``timeout`` utility + which reliably kills the process and returns exit code 124. + """ + # Wrap with shell `timeout` to enforce the deadline reliably. + # Add a small buffer so the shell timeout fires before any SDK-level + # timeout would, giving us a clean exit code 124. + timed_command = f"timeout {timeout} sh -c {shlex.quote(exec_command)}" + result_holder: dict = {"value": None, "error": None} def _run(): try: response = self._sandbox.process.exec( - exec_command, cwd=cwd, timeout=timeout, + timed_command, cwd=cwd, ) result_holder["value"] = { "output": response.result or "", @@ -129,8 +141,11 @@ class DaytonaEnvironment(BaseEnvironment): t = threading.Thread(target=_run, daemon=True) t.start() + # Wait for timeout + generous buffer for network/SDK overhead + deadline = timeout + 10 while t.is_alive(): t.join(timeout=0.2) + deadline -= 0.2 if is_interrupted(): with self._lock: try: @@ -141,6 +156,14 @@ class DaytonaEnvironment(BaseEnvironment): "output": "[Command interrupted - Daytona sandbox stopped]", "returncode": 130, } + if deadline <= 0: + # Shell timeout didn't fire and SDK is hung — force stop + with self._lock: + try: + self._sandbox.stop() + except Exception: + pass + return self._timeout_result(timeout) if result_holder["error"]: return {"error": result_holder["error"]}