fix(daytona): use shell timeout wrapper instead of broken SDK exec timeout
The Daytona SDK's process.exec(timeout=N) parameter is not enforced — the server-side timeout never fires and the SDK has no client-side fallback, causing commands to hang indefinitely. Fix: wrap commands with timeout N sh -c '...' (coreutils) which reliably kills the process and returns exit code 124. Added shlex.quote for proper shell escaping and a secondary deadline (timeout + 10s) that force-stops the sandbox if the shell timeout somehow fails. Signed-off-by: rovle <lovre.pesut@gmail.com>
This commit is contained in:
parent
74a36b0729
commit
a6499b6107
2 changed files with 58 additions and 3 deletions
|
|
@ -200,6 +200,36 @@ class TestExecute:
|
||||||
assert result["output"] == "hello"
|
assert result["output"] == "hello"
|
||||||
assert result["returncode"] == 0
|
assert result["returncode"] == 0
|
||||||
|
|
||||||
|
def test_command_wrapped_with_shell_timeout(self, make_env):
|
||||||
|
sb = _make_sandbox()
|
||||||
|
sb.process.exec.side_effect = [
|
||||||
|
_make_exec_response(result="/root"),
|
||||||
|
_make_exec_response(result="ok", exit_code=0),
|
||||||
|
]
|
||||||
|
sb.state = "started"
|
||||||
|
env = make_env(sandbox=sb, timeout=42)
|
||||||
|
|
||||||
|
env.execute("echo hello")
|
||||||
|
# The command sent to exec should be wrapped with `timeout N sh -c '...'`
|
||||||
|
call_args = sb.process.exec.call_args_list[-1]
|
||||||
|
cmd = call_args[0][0]
|
||||||
|
assert cmd.startswith("timeout 42 sh -c ")
|
||||||
|
# SDK timeout param should NOT be passed
|
||||||
|
assert "timeout" not in call_args[1]
|
||||||
|
|
||||||
|
def test_timeout_returns_exit_code_124(self, make_env):
|
||||||
|
"""Shell timeout utility returns exit code 124."""
|
||||||
|
sb = _make_sandbox()
|
||||||
|
sb.process.exec.side_effect = [
|
||||||
|
_make_exec_response(result="/root"),
|
||||||
|
_make_exec_response(result="", exit_code=124),
|
||||||
|
]
|
||||||
|
sb.state = "started"
|
||||||
|
env = make_env(sandbox=sb)
|
||||||
|
|
||||||
|
result = env.execute("sleep 300", timeout=5)
|
||||||
|
assert result["returncode"] == 124
|
||||||
|
|
||||||
def test_nonzero_exit_code(self, make_env):
|
def test_nonzero_exit_code(self, make_env):
|
||||||
sb = _make_sandbox()
|
sb = _make_sandbox()
|
||||||
sb.process.exec.side_effect = [
|
sb.process.exec.side_effect = [
|
||||||
|
|
@ -223,10 +253,12 @@ class TestExecute:
|
||||||
|
|
||||||
env.execute("python3", stdin_data="print('hi')")
|
env.execute("python3", stdin_data="print('hi')")
|
||||||
# Check that the command passed to exec contains heredoc markers
|
# Check that the command passed to exec contains heredoc markers
|
||||||
|
# (single quotes get shell-escaped by shlex.quote, so check components)
|
||||||
call_args = sb.process.exec.call_args_list[-1]
|
call_args = sb.process.exec.call_args_list[-1]
|
||||||
cmd = call_args[0][0]
|
cmd = call_args[0][0]
|
||||||
assert "HERMES_EOF_" in cmd
|
assert "HERMES_EOF_" in cmd
|
||||||
assert "print('hi')" in cmd
|
assert "print" in cmd
|
||||||
|
assert "hi" in cmd
|
||||||
|
|
||||||
def test_custom_cwd_passed_through(self, make_env):
|
def test_custom_cwd_passed_through(self, make_env):
|
||||||
sb = _make_sandbox()
|
sb = _make_sandbox()
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ and resumed on next creation, preserving the filesystem across sessions.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
|
import shlex
|
||||||
import threading
|
import threading
|
||||||
import uuid
|
import uuid
|
||||||
import warnings
|
import warnings
|
||||||
|
|
@ -112,13 +113,24 @@ class DaytonaEnvironment(BaseEnvironment):
|
||||||
logger.info("Daytona: restarted sandbox %s", self._sandbox.id)
|
logger.info("Daytona: restarted sandbox %s", self._sandbox.id)
|
||||||
|
|
||||||
def _exec_in_thread(self, exec_command: str, cwd: Optional[str], timeout: int) -> dict:
|
def _exec_in_thread(self, exec_command: str, cwd: Optional[str], timeout: int) -> dict:
|
||||||
"""Run exec in a background thread with interrupt polling."""
|
"""Run exec in a background thread with interrupt polling.
|
||||||
|
|
||||||
|
The Daytona SDK's exec(timeout=...) parameter is unreliable (the
|
||||||
|
server-side timeout is not enforced and the SDK has no client-side
|
||||||
|
fallback), so we wrap the command with the shell ``timeout`` utility
|
||||||
|
which reliably kills the process and returns exit code 124.
|
||||||
|
"""
|
||||||
|
# Wrap with shell `timeout` to enforce the deadline reliably.
|
||||||
|
# Add a small buffer so the shell timeout fires before any SDK-level
|
||||||
|
# timeout would, giving us a clean exit code 124.
|
||||||
|
timed_command = f"timeout {timeout} sh -c {shlex.quote(exec_command)}"
|
||||||
|
|
||||||
result_holder: dict = {"value": None, "error": None}
|
result_holder: dict = {"value": None, "error": None}
|
||||||
|
|
||||||
def _run():
|
def _run():
|
||||||
try:
|
try:
|
||||||
response = self._sandbox.process.exec(
|
response = self._sandbox.process.exec(
|
||||||
exec_command, cwd=cwd, timeout=timeout,
|
timed_command, cwd=cwd,
|
||||||
)
|
)
|
||||||
result_holder["value"] = {
|
result_holder["value"] = {
|
||||||
"output": response.result or "",
|
"output": response.result or "",
|
||||||
|
|
@ -129,8 +141,11 @@ class DaytonaEnvironment(BaseEnvironment):
|
||||||
|
|
||||||
t = threading.Thread(target=_run, daemon=True)
|
t = threading.Thread(target=_run, daemon=True)
|
||||||
t.start()
|
t.start()
|
||||||
|
# Wait for timeout + generous buffer for network/SDK overhead
|
||||||
|
deadline = timeout + 10
|
||||||
while t.is_alive():
|
while t.is_alive():
|
||||||
t.join(timeout=0.2)
|
t.join(timeout=0.2)
|
||||||
|
deadline -= 0.2
|
||||||
if is_interrupted():
|
if is_interrupted():
|
||||||
with self._lock:
|
with self._lock:
|
||||||
try:
|
try:
|
||||||
|
|
@ -141,6 +156,14 @@ class DaytonaEnvironment(BaseEnvironment):
|
||||||
"output": "[Command interrupted - Daytona sandbox stopped]",
|
"output": "[Command interrupted - Daytona sandbox stopped]",
|
||||||
"returncode": 130,
|
"returncode": 130,
|
||||||
}
|
}
|
||||||
|
if deadline <= 0:
|
||||||
|
# Shell timeout didn't fire and SDK is hung — force stop
|
||||||
|
with self._lock:
|
||||||
|
try:
|
||||||
|
self._sandbox.stop()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return self._timeout_result(timeout)
|
||||||
|
|
||||||
if result_holder["error"]:
|
if result_holder["error"]:
|
||||||
return {"error": result_holder["error"]}
|
return {"error": result_holder["error"]}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue