feat: enhance command execution with stdin support

- Modified the `_exec` method in `ShellFileOperations` to accept `stdin_data`, allowing large content to be piped directly to commands, bypassing ARG_MAX limitations.
- Updated the `execute` method in various environment classes (`_LocalEnvironment`, `_SingularityEnvironment`, `_SSHEnvironment`, `_DockerEnvironment`) to support `stdin_data`, improving command execution flexibility.
- Removed the unique marker generation for heredoc in favor of direct stdin piping, simplifying file writing operations and enhancing performance for large files.
This commit is contained in:
teknium1 2026-02-19 14:50:51 -08:00
parent 3191a9ba11
commit d49af633f0
2 changed files with 103 additions and 52 deletions

View file

@ -28,7 +28,6 @@ Usage:
import os import os
import re import re
import json import json
import uuid
import difflib import difflib
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass, field from dataclasses import dataclass, field
@ -267,11 +266,19 @@ class ShellFileOperations(FileOperations):
# Cache for command availability checks # Cache for command availability checks
self._command_cache: Dict[str, bool] = {} self._command_cache: Dict[str, bool] = {}
def _exec(self, command: str, cwd: str = None, timeout: int = None) -> ExecuteResult: def _exec(self, command: str, cwd: str = None, timeout: int = None,
"""Execute command via terminal backend.""" stdin_data: str = None) -> ExecuteResult:
"""Execute command via terminal backend.
Args:
stdin_data: If provided, piped to the process's stdin instead of
embedding in the command string. Bypasses ARG_MAX.
"""
kwargs = {} kwargs = {}
if timeout: if timeout:
kwargs['timeout'] = timeout kwargs['timeout'] = timeout
if stdin_data is not None:
kwargs['stdin_data'] = stdin_data
result = self.env.execute(command, cwd=cwd or self.cwd, **kwargs) result = self.env.execute(command, cwd=cwd or self.cwd, **kwargs)
return ExecuteResult( return ExecuteResult(
@ -535,7 +542,9 @@ class ShellFileOperations(FileOperations):
""" """
Write content to a file, creating parent directories as needed. Write content to a file, creating parent directories as needed.
Uses heredoc with unique marker for safe shell execution. Pipes content through stdin to avoid OS ARG_MAX limits on large
files. The content never appears in the shell command string
only the file path does.
Args: Args:
path: File path to write path: File path to write
@ -557,15 +566,10 @@ class ShellFileOperations(FileOperations):
if mkdir_result.exit_code == 0: if mkdir_result.exit_code == 0:
dirs_created = True dirs_created = True
# Generate unique marker for heredoc that won't appear in content # Write via stdin pipe — content bypasses shell arg parsing entirely,
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}" # so there's no ARG_MAX limit regardless of file size.
while marker in content: write_cmd = f"cat > {self._escape_shell_arg(path)}"
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}" write_result = self._exec(write_cmd, stdin_data=content)
# Write using heredoc with single-quoted marker (prevents all expansion)
# The single quotes around the marker prevent variable expansion
write_cmd = f"cat > {self._escape_shell_arg(path)} << '{marker}'\n{content}\n{marker}"
write_result = self._exec(write_cmd)
if write_result.exit_code != 0: if write_result.exit_code != 0:
return WriteResult(error=f"Failed to write file: {write_result.stdout}") return WriteResult(error=f"Failed to write file: {write_result.stdout}")

View file

@ -635,7 +635,8 @@ class _LocalEnvironment:
self.timeout = timeout self.timeout = timeout
self.env = env or {} self.env = env or {}
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict: def execute(self, command: str, cwd: str = "", *, timeout: int | None = None,
stdin_data: str | None = None) -> dict:
""" """
Execute a command locally with sudo support. Execute a command locally with sudo support.
@ -647,6 +648,10 @@ class _LocalEnvironment:
pipe buffer deadlocks. Without this, commands producing >64KB of pipe buffer deadlocks. Without this, commands producing >64KB of
output would block (Linux pipe buffer = 64KB) while the poll loop output would block (Linux pipe buffer = 64KB) while the poll loop
waits for the process to finish a classic deadlock. waits for the process to finish a classic deadlock.
Args:
stdin_data: If provided, piped to the process's stdin. This
bypasses shell ARG_MAX limits for large content.
""" """
work_dir = cwd or self.cwd or os.getcwd() work_dir = cwd or self.cwd or os.getcwd()
effective_timeout = timeout or self.timeout effective_timeout = timeout or self.timeout
@ -665,11 +670,24 @@ class _LocalEnvironment:
errors="replace", errors="replace",
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, stderr=subprocess.STDOUT,
stdin=subprocess.DEVNULL, # Prevent hanging on interactive prompts stdin=subprocess.PIPE if stdin_data is not None else subprocess.DEVNULL,
# Start in a new process group so we can kill the whole tree # Start in a new process group so we can kill the whole tree
preexec_fn=os.setsid, preexec_fn=os.setsid,
) )
# Pipe stdin_data in a background thread to avoid deadlock
# (large writes can block if the pipe buffer fills before the
# process drains it).
if stdin_data is not None:
def _write_stdin():
try:
proc.stdin.write(stdin_data)
proc.stdin.close()
except (BrokenPipeError, OSError):
pass
stdin_writer = threading.Thread(target=_write_stdin, daemon=True)
stdin_writer.start()
# Drain stdout in a background thread to prevent pipe buffer # Drain stdout in a background thread to prevent pipe buffer
# deadlocks. The OS pipe buffer is 64KB on Linux; if the child # deadlocks. The OS pipe buffer is 64KB on Linux; if the child
# writes more than that before anyone reads, it blocks forever. # writes more than that before anyone reads, it blocks forever.
@ -798,7 +816,8 @@ class _SingularityEnvironment:
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
raise RuntimeError("Instance start timed out") raise RuntimeError("Instance start timed out")
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict: def execute(self, command: str, cwd: str = "", *, timeout: int | None = None,
stdin_data: str | None = None) -> dict:
"""Execute a command in the persistent Singularity instance. """Execute a command in the persistent Singularity instance.
All commands run in the same container, so files, installs, and All commands run in the same container, so files, installs, and
@ -822,17 +841,21 @@ class _SingularityEnvironment:
# Execute the command # Execute the command
cmd.extend(["bash", "-c", exec_command]) cmd.extend(["bash", "-c", exec_command])
run_kwargs = {
"text": True,
"timeout": timeout or self.timeout,
"encoding": "utf-8",
"errors": "replace",
"stdout": subprocess.PIPE,
"stderr": subprocess.STDOUT,
}
if stdin_data is not None:
run_kwargs["input"] = stdin_data
else:
run_kwargs["stdin"] = subprocess.DEVNULL
try: try:
result = subprocess.run( result = subprocess.run(cmd, **run_kwargs)
cmd,
text=True,
timeout=timeout or self.timeout,
encoding="utf-8",
errors="replace",
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.DEVNULL, # Prevent hanging on interactive prompts
)
return {"output": result.stdout, "returncode": result.returncode} return {"output": result.stdout, "returncode": result.returncode}
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
return {"output": f"Command timed out after {timeout or self.timeout}s", "returncode": 124} return {"output": f"Command timed out after {timeout or self.timeout}s", "returncode": 124}
@ -944,7 +967,8 @@ class _SSHEnvironment:
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
raise RuntimeError(f"SSH connection to {self.user}@{self.host} timed out") raise RuntimeError(f"SSH connection to {self.user}@{self.host} timed out")
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict: def execute(self, command: str, cwd: str = "", *, timeout: int | None = None,
stdin_data: str | None = None) -> dict:
"""Execute a command on the remote host via SSH.""" """Execute a command on the remote host via SSH."""
work_dir = cwd or self.cwd work_dir = cwd or self.cwd
effective_timeout = timeout or self.timeout effective_timeout = timeout or self.timeout
@ -953,23 +977,26 @@ class _SSHEnvironment:
exec_command = _transform_sudo_command(command) exec_command = _transform_sudo_command(command)
# Wrap command to run in the correct directory # Wrap command to run in the correct directory
# Use bash -c to handle complex commands properly
wrapped_command = f'cd {work_dir} && {exec_command}' wrapped_command = f'cd {work_dir} && {exec_command}'
cmd = self._build_ssh_command() cmd = self._build_ssh_command()
cmd.extend(["bash", "-c", wrapped_command]) cmd.extend(["bash", "-c", wrapped_command])
run_kwargs = {
"text": True,
"timeout": effective_timeout,
"encoding": "utf-8",
"errors": "replace",
"stdout": subprocess.PIPE,
"stderr": subprocess.STDOUT,
}
if stdin_data is not None:
run_kwargs["input"] = stdin_data
else:
run_kwargs["stdin"] = subprocess.DEVNULL
try: try:
result = subprocess.run( result = subprocess.run(cmd, **run_kwargs)
cmd,
text=True,
timeout=effective_timeout,
encoding="utf-8",
errors="replace",
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.DEVNULL, # Prevent hanging on interactive prompts
)
return {"output": result.stdout, "returncode": result.returncode} return {"output": result.stdout, "returncode": result.returncode}
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
return {"output": f"Command timed out after {effective_timeout}s", "returncode": 124} return {"output": f"Command timed out after {effective_timeout}s", "returncode": 124}
@ -1020,7 +1047,8 @@ class _DockerEnvironment:
self.cwd = cwd self.cwd = cwd
self.timeout = timeout self.timeout = timeout
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict: def execute(self, command: str, cwd: str = "", *, timeout: int | None = None,
stdin_data: str | None = None) -> dict:
"""Execute a command in the Docker container with sudo support.""" """Execute a command in the Docker container with sudo support."""
# Transform sudo commands if SUDO_PASSWORD is available # Transform sudo commands if SUDO_PASSWORD is available
exec_command = _transform_sudo_command(command) exec_command = _transform_sudo_command(command)
@ -1031,7 +1059,10 @@ class _DockerEnvironment:
# Get container_id from inner environment # Get container_id from inner environment
assert self._inner.container_id, "Container not started" assert self._inner.container_id, "Container not started"
cmd = [self._inner.config.executable, "exec", "-w", work_dir] cmd = [self._inner.config.executable, "exec"]
if stdin_data is not None:
cmd.append("-i") # Enable stdin piping into the container
cmd.extend(["-w", work_dir])
for key in self._inner.config.forward_env: for key in self._inner.config.forward_env:
if (value := os.getenv(key)) is not None: if (value := os.getenv(key)) is not None:
cmd.extend(["-e", f"{key}={value}"]) cmd.extend(["-e", f"{key}={value}"])
@ -1039,17 +1070,21 @@ class _DockerEnvironment:
cmd.extend(["-e", f"{key}={value}"]) cmd.extend(["-e", f"{key}={value}"])
cmd.extend([self._inner.container_id, "bash", "-lc", exec_command]) cmd.extend([self._inner.container_id, "bash", "-lc", exec_command])
run_kwargs = {
"text": True,
"timeout": effective_timeout,
"encoding": "utf-8",
"errors": "replace",
"stdout": subprocess.PIPE,
"stderr": subprocess.STDOUT,
}
if stdin_data is not None:
run_kwargs["input"] = stdin_data
else:
run_kwargs["stdin"] = subprocess.DEVNULL
try: try:
result = subprocess.run( result = subprocess.run(cmd, **run_kwargs)
cmd,
text=True,
timeout=effective_timeout,
encoding="utf-8",
errors="replace",
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.DEVNULL, # Prevent hanging on interactive prompts
)
return {"output": result.stdout, "returncode": result.returncode} return {"output": result.stdout, "returncode": result.returncode}
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
return {"output": f"Command timed out after {effective_timeout}s", "returncode": 124} return {"output": f"Command timed out after {effective_timeout}s", "returncode": 124}
@ -1110,8 +1145,20 @@ class _ModalEnvironment:
self.cwd = cwd self.cwd = cwd
self.timeout = timeout self.timeout = timeout
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict: def execute(self, command: str, cwd: str = "", *, timeout: int | None = None,
"""Execute a command in Modal with sudo support.""" stdin_data: str | None = None) -> dict:
"""Execute a command in Modal with sudo support.
Modal uses HTTP transport (no execve), so there's no ARG_MAX limit.
When stdin_data is provided, we embed it as a heredoc since there's
no process-level stdin pipe to the cloud sandbox.
"""
if stdin_data is not None:
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
while marker in stdin_data:
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
command = f"{command} << '{marker}'\n{stdin_data}\n{marker}"
# Transform sudo commands if SUDO_PASSWORD is available # Transform sudo commands if SUDO_PASSWORD is available
exec_command = _transform_sudo_command(command) exec_command = _transform_sudo_command(command)