Merge branch 'main' into feat/honcho-integration

This commit is contained in:
Teknium 2026-02-27 23:32:49 -08:00 committed by GitHub
commit 4a9086b848
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
73 changed files with 7080 additions and 280 deletions

View file

@ -812,10 +812,11 @@ def _extract_relevant_content(
)
try:
from agent.auxiliary_client import auxiliary_max_tokens_param
response = _aux_vision_client.chat.completions.create(
model=EXTRACTION_MODEL,
messages=[{"role": "user", "content": extraction_prompt}],
max_tokens=4000,
**auxiliary_max_tokens_param(4000),
temperature=0.1,
)
return response.choices[0].message.content
@ -1283,6 +1284,7 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
)
# Use the sync auxiliary vision client directly
from agent.auxiliary_client import auxiliary_max_tokens_param
response = _aux_vision_client.chat.completions.create(
model=EXTRACTION_MODEL,
messages=[
@ -1294,7 +1296,7 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
],
}
],
max_tokens=2000,
**auxiliary_max_tokens_param(2000),
temperature=0.1,
)

View file

@ -27,7 +27,7 @@ from cron.jobs import create_job, get_job, list_jobs, remove_job
# ---------------------------------------------------------------------------
_CRON_THREAT_PATTERNS = [
(r'ignore\s+(previous|all|above|prior)\s+instructions', "prompt_injection"),
(r'ignore\s+(?:\w+\s+)*(?:previous|all|above|prior)\s+(?:\w+\s+)*instructions', "prompt_injection"),
(r'do\s+not\s+tell\s+the\s+user', "deception_hide"),
(r'system\s+prompt\s+override', "sys_prompt_override"),
(r'disregard\s+(your|all|any)\s+(instructions|rules|guidelines)', "disregard_rules"),

View file

@ -55,6 +55,7 @@ class DockerEnvironment(BaseEnvironment):
disk: int = 0,
persistent_filesystem: bool = False,
task_id: str = "default",
volumes: list = None,
network: bool = True,
):
if cwd == "~":
@ -64,6 +65,11 @@ class DockerEnvironment(BaseEnvironment):
self._persistent = persistent_filesystem
self._task_id = task_id
self._container_id: Optional[str] = None
logger.info(f"DockerEnvironment volumes: {volumes}")
# Ensure volumes is a list (config.yaml could be malformed)
if volumes is not None and not isinstance(volumes, list):
logger.warning(f"docker_volumes config is not a list: {volumes!r}")
volumes = []
from minisweagent.environments.docker import DockerEnvironment as _Docker
@ -111,7 +117,23 @@ class DockerEnvironment(BaseEnvironment):
# All containers get full security hardening (read-only root + writable
# mounts for the workspace). Persistence uses Docker volumes, not
# filesystem layer commits, so --read-only is always safe.
all_run_args = list(_SECURITY_ARGS) + writable_args + resource_args
# User-configured volume mounts (from config.yaml docker_volumes)
volume_args = []
for vol in (volumes or []):
if not isinstance(vol, str):
logger.warning(f"Docker volume entry is not a string: {vol!r}")
continue
vol = vol.strip()
if not vol:
continue
if ":" in vol:
volume_args.extend(["-v", vol])
else:
logger.warning(f"Docker volume '{vol}' missing colon, skipping")
logger.info(f"Docker volume_args: {volume_args}")
all_run_args = list(_SECURITY_ARGS) + writable_args + resource_args + volume_args
logger.info(f"Docker run_args: {all_run_args}")
self._inner = _Docker(
image=image, cwd=cwd, timeout=timeout,

View file

@ -1,6 +1,7 @@
"""Local execution environment with interrupt support and non-blocking I/O."""
import os
import shutil
import signal
import subprocess
import threading
@ -8,6 +9,23 @@ import time
from tools.environments.base import BaseEnvironment
# Noise lines emitted by interactive shells when stdin is not a terminal.
# Filtered from output to keep tool results clean.
_SHELL_NOISE = frozenset({
"bash: no job control in this shell",
"bash: no job control in this shell\n",
"no job control in this shell",
"no job control in this shell\n",
})
def _clean_shell_noise(output: str) -> str:
"""Strip shell startup warnings that leak when using -i without a TTY."""
lines = output.split("\n", 2) # only check first two lines
if lines and lines[0].strip() in _SHELL_NOISE:
return "\n".join(lines[1:])
return output
class LocalEnvironment(BaseEnvironment):
"""Run commands directly on the host machine.
@ -17,6 +35,7 @@ class LocalEnvironment(BaseEnvironment):
- Background stdout drain thread to prevent pipe buffer deadlocks
- stdin_data support for piping content (bypasses ARG_MAX limits)
- sudo -S transform via SUDO_PASSWORD env var
- Uses interactive login shell so full user env is available
"""
def __init__(self, cwd: str = "", timeout: int = 60, env: dict = None):
@ -32,9 +51,15 @@ class LocalEnvironment(BaseEnvironment):
exec_command = self._prepare_command(command)
try:
# Use the user's shell as an interactive login shell (-lic) so
# that ALL rc files are sourced — including content after the
# interactive guard in .bashrc (case $- in *i*)..esac) where
# tools like nvm, pyenv, and cargo install their init scripts.
# -l alone isn't enough: .profile sources .bashrc, but the guard
# returns early because the shell isn't interactive.
user_shell = os.environ.get("SHELL") or shutil.which("bash") or "/bin/bash"
proc = subprocess.Popen(
exec_command,
shell=True,
[user_shell, "-lic", exec_command],
text=True,
cwd=work_dir,
env=os.environ | self.env,
@ -99,7 +124,8 @@ class LocalEnvironment(BaseEnvironment):
time.sleep(0.2)
reader.join(timeout=5)
return {"output": "".join(_output_chunks), "returncode": proc.returncode}
output = _clean_shell_noise("".join(_output_chunks))
return {"output": output, "returncode": proc.returncode}
except Exception as e:
return {"output": f"Execution error: {str(e)}", "returncode": 1}

View file

@ -42,32 +42,36 @@ from pathlib import Path
_HOME = str(Path.home())
WRITE_DENIED_PATHS = {
os.path.join(_HOME, ".ssh", "authorized_keys"),
os.path.join(_HOME, ".ssh", "id_rsa"),
os.path.join(_HOME, ".ssh", "id_ed25519"),
os.path.join(_HOME, ".ssh", "config"),
os.path.join(_HOME, ".hermes", ".env"),
os.path.join(_HOME, ".bashrc"),
os.path.join(_HOME, ".zshrc"),
os.path.join(_HOME, ".profile"),
os.path.join(_HOME, ".bash_profile"),
os.path.join(_HOME, ".zprofile"),
os.path.join(_HOME, ".netrc"),
os.path.join(_HOME, ".pgpass"),
os.path.join(_HOME, ".npmrc"),
os.path.join(_HOME, ".pypirc"),
"/etc/sudoers",
"/etc/passwd",
"/etc/shadow",
os.path.realpath(p) for p in [
os.path.join(_HOME, ".ssh", "authorized_keys"),
os.path.join(_HOME, ".ssh", "id_rsa"),
os.path.join(_HOME, ".ssh", "id_ed25519"),
os.path.join(_HOME, ".ssh", "config"),
os.path.join(_HOME, ".hermes", ".env"),
os.path.join(_HOME, ".bashrc"),
os.path.join(_HOME, ".zshrc"),
os.path.join(_HOME, ".profile"),
os.path.join(_HOME, ".bash_profile"),
os.path.join(_HOME, ".zprofile"),
os.path.join(_HOME, ".netrc"),
os.path.join(_HOME, ".pgpass"),
os.path.join(_HOME, ".npmrc"),
os.path.join(_HOME, ".pypirc"),
"/etc/sudoers",
"/etc/passwd",
"/etc/shadow",
]
}
WRITE_DENIED_PREFIXES = [
os.path.join(_HOME, ".ssh") + os.sep,
os.path.join(_HOME, ".aws") + os.sep,
os.path.join(_HOME, ".gnupg") + os.sep,
os.path.join(_HOME, ".kube") + os.sep,
"/etc/sudoers.d" + os.sep,
"/etc/systemd" + os.sep,
os.path.realpath(p) + os.sep for p in [
os.path.join(_HOME, ".ssh"),
os.path.join(_HOME, ".aws"),
os.path.join(_HOME, ".gnupg"),
os.path.join(_HOME, ".kube"),
"/etc/sudoers.d",
"/etc/systemd",
]
]
@ -441,8 +445,8 @@ class ShellFileOperations(FileOperations):
# Clamp limit
limit = min(limit, MAX_LINES)
# Check if file exists and get metadata
stat_cmd = f"stat -c '%s' {self._escape_shell_arg(path)} 2>/dev/null"
# Check if file exists and get size (wc -c is POSIX, works on Linux + macOS)
stat_cmd = f"wc -c < {self._escape_shell_arg(path)} 2>/dev/null"
stat_result = self._exec(stat_cmd)
if stat_result.exit_code != 0:
@ -518,8 +522,8 @@ class ShellFileOperations(FileOperations):
def _read_image(self, path: str) -> ReadResult:
"""Read an image file, returning base64 content."""
# Get file size
stat_cmd = f"stat -c '%s' {self._escape_shell_arg(path)} 2>/dev/null"
# Get file size (wc -c is POSIX, works on Linux + macOS)
stat_cmd = f"wc -c < {self._escape_shell_arg(path)} 2>/dev/null"
stat_result = self._exec(stat_cmd)
try:
file_size = int(stat_result.stdout.strip())
@ -648,8 +652,8 @@ class ShellFileOperations(FileOperations):
if write_result.exit_code != 0:
return WriteResult(error=f"Failed to write file: {write_result.stdout}")
# Get bytes written
stat_cmd = f"stat -c '%s' {self._escape_shell_arg(path)} 2>/dev/null"
# Get bytes written (wc -c is POSIX, works on Linux + macOS)
stat_cmd = f"wc -c < {self._escape_shell_arg(path)} 2>/dev/null"
stat_result = self._exec(stat_cmd)
try:

View file

@ -81,11 +81,20 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
cwd = overrides.get("cwd") or config["cwd"]
logger.info("Creating new %s environment for task %s...", env_type, task_id[:8])
container_config = None
if env_type in ("docker", "singularity", "modal"):
container_config = {
"container_cpu": config.get("container_cpu", 1),
"container_memory": config.get("container_memory", 5120),
"container_disk": config.get("container_disk", 51200),
"container_persistent": config.get("container_persistent", True),
}
terminal_env = _create_environment(
env_type=env_type,
image=image,
cwd=cwd,
timeout=config["timeout"],
container_config=container_config,
)
with _env_lock:

View file

@ -345,7 +345,9 @@ class MemoryStore:
if not raw.strip():
return []
entries = [e.strip() for e in raw.split("§")]
# Use ENTRY_DELIMITER for consistency with _write_file. Splitting by "§"
# alone would incorrectly split entries that contain "§" in their content.
entries = [e.strip() for e in raw.split(ENTRY_DELIMITER)]
return [e for e in entries if e]
@staticmethod

View file

@ -32,6 +32,8 @@ Usage:
import json
import logging
import os
import shlex
import shutil
import signal
import subprocess
import threading
@ -85,6 +87,14 @@ class ProcessRegistry:
- Cleanup thread (sandbox reaping coordination)
"""
# Noise lines emitted by interactive shells when stdin is not a terminal.
_SHELL_NOISE = frozenset({
"bash: no job control in this shell",
"bash: no job control in this shell\n",
"no job control in this shell",
"no job control in this shell\n",
})
def __init__(self):
self._running: Dict[str, ProcessSession] = {}
self._finished: Dict[str, ProcessSession] = {}
@ -93,6 +103,14 @@ class ProcessRegistry:
# Side-channel for check_interval watchers (gateway reads after agent run)
self.pending_watchers: List[Dict[str, Any]] = []
@staticmethod
def _clean_shell_noise(text: str) -> str:
"""Strip shell startup warnings from the beginning of output."""
lines = text.split("\n", 2)
if lines and lines[0].strip() in ProcessRegistry._SHELL_NOISE:
return "\n".join(lines[1:])
return text
# ----- Spawn -----
def spawn_local(
@ -127,8 +145,9 @@ class ProcessRegistry:
# Try PTY mode for interactive CLI tools
try:
import ptyprocess
user_shell = os.environ.get("SHELL") or shutil.which("bash") or "/bin/bash"
pty_proc = ptyprocess.PtyProcess.spawn(
["bash", "-c", command],
[user_shell, "-lic", command],
cwd=session.cwd,
env=os.environ | (env_vars or {}),
dimensions=(30, 120),
@ -160,9 +179,11 @@ class ProcessRegistry:
logger.warning("PTY spawn failed (%s), falling back to pipe mode", e)
# Standard Popen path (non-PTY or PTY fallback)
# Use the user's login shell for consistency with LocalEnvironment --
# ensures rc files are sourced and user tools are available.
user_shell = os.environ.get("SHELL") or shutil.which("bash") or "/bin/bash"
proc = subprocess.Popen(
command,
shell=True,
[user_shell, "-lic", command],
text=True,
cwd=session.cwd,
env=os.environ | (env_vars or {}),
@ -227,8 +248,9 @@ class ProcessRegistry:
# Run the command in the sandbox with output capture
log_path = f"/tmp/hermes_bg_{session.id}.log"
pid_path = f"/tmp/hermes_bg_{session.id}.pid"
quoted_command = shlex.quote(command)
bg_command = (
f"nohup bash -c '{command}' > {log_path} 2>&1 & "
f"nohup bash -c {quoted_command} > {log_path} 2>&1 & "
f"echo $! > {pid_path} && cat {pid_path}"
)
@ -268,11 +290,15 @@ class ProcessRegistry:
def _reader_loop(self, session: ProcessSession):
"""Background thread: read stdout from a local Popen process."""
first_chunk = True
try:
while True:
chunk = session.process.stdout.read(4096)
if not chunk:
break
if first_chunk:
chunk = self._clean_shell_noise(chunk)
first_chunk = False
with session._lock:
session.output_buffer += chunk
if len(session.output_buffer) > session.max_output_chars:

View file

@ -170,7 +170,7 @@ async def _summarize_session(
max_retries = 3
for attempt in range(max_retries):
try:
from agent.auxiliary_client import get_auxiliary_extra_body
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
_extra = get_auxiliary_extra_body()
response = await _async_aux_client.chat.completions.create(
model=_SUMMARIZER_MODEL,
@ -180,7 +180,7 @@ async def _summarize_session(
],
**({} if not _extra else {"extra_body": _extra}),
temperature=0.1,
max_tokens=MAX_SUMMARY_TOKENS,
**auxiliary_max_tokens_param(MAX_SUMMARY_TOKENS),
)
return response.choices[0].message.content.strip()
except Exception as e:

View file

@ -319,7 +319,9 @@ def _transform_sudo_command(command: str) -> str:
# Replace 'sudo' with password-piped version
# The -S flag makes sudo read password from stdin
# The -p '' suppresses the password prompt
return f"echo '{sudo_password}' | sudo -S -p ''"
# Use shlex.quote() to prevent shell injection via password content
import shlex
return f"echo {shlex.quote(sudo_password)} | sudo -S -p ''"
# Match 'sudo' at word boundaries (not 'visudo' or 'sudoers')
# This handles: sudo, sudo -flag, etc.
@ -445,6 +447,7 @@ def _get_env_config() -> Dict[str, Any]:
"container_memory": int(os.getenv("TERMINAL_CONTAINER_MEMORY", "5120")), # MB (default 5GB)
"container_disk": int(os.getenv("TERMINAL_CONTAINER_DISK", "51200")), # MB (default 50GB)
"container_persistent": os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("true", "1", "yes"),
"docker_volumes": json.loads(os.getenv("TERMINAL_DOCKER_VOLUMES", "[]")),
}
@ -471,6 +474,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
memory = cc.get("container_memory", 5120)
disk = cc.get("container_disk", 51200)
persistent = cc.get("container_persistent", True)
volumes = cc.get("docker_volumes", [])
if env_type == "local":
return _LocalEnvironment(cwd=cwd, timeout=timeout)
@ -480,6 +484,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
image=image, cwd=cwd, timeout=timeout,
cpu=cpu, memory=memory, disk=disk,
persistent_filesystem=persistent, task_id=task_id,
volumes=volumes,
)
elif env_type == "singularity":
@ -593,7 +598,7 @@ def _cleanup_thread_worker():
config = _get_env_config()
_cleanup_inactive_envs(config["lifetime_seconds"])
except Exception as e:
logger.warning("Error in cleanup thread: %s", e)
logger.warning("Error in cleanup thread: %s", e, exc_info=True)
for _ in range(60):
if not _cleanup_running:
@ -617,7 +622,10 @@ def _stop_cleanup_thread():
global _cleanup_running
_cleanup_running = False
if _cleanup_thread is not None:
_cleanup_thread.join(timeout=5)
try:
_cleanup_thread.join(timeout=5)
except (SystemExit, KeyboardInterrupt):
pass
def get_active_environments_info() -> Dict[str, Any]:
@ -658,7 +666,7 @@ def cleanup_all_environments():
cleanup_vm(task_id)
cleaned += 1
except Exception as e:
logger.error("Error cleaning %s: %s", task_id, e)
logger.error("Error cleaning %s: %s", task_id, e, exc_info=True)
# Also clean any orphaned directories
scratch_dir = _get_scratch_dir()
@ -848,6 +856,7 @@ def terminal_tool(
"container_memory": config.get("container_memory", 5120),
"container_disk": config.get("container_disk", 51200),
"container_persistent": config.get("container_persistent", True),
"docker_volumes": config.get("docker_volumes", []),
}
new_env = _create_environment(
@ -1068,6 +1077,10 @@ def check_terminal_requirements() -> bool:
result = subprocess.run([executable, "--version"], capture_output=True, timeout=5)
return result.returncode == 0
return False
elif env_type == "ssh":
from tools.environments.ssh import SSHEnvironment
# Check that host and user are configured
return bool(config.get("ssh_host")) and bool(config.get("ssh_user"))
elif env_type == "modal":
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
# Check for modal token

View file

@ -50,10 +50,7 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> dict:
- "transcript" (str): The transcribed text (empty on failure)
- "error" (str, optional): Error message if success is False
"""
# Use VOICE_TOOLS_OPENAI_KEY to avoid interference with the OpenAI SDK's
# auto-detection of OPENAI_API_KEY (which would break OpenRouter calls).
# Falls back to OPENAI_API_KEY for backward compatibility.
api_key = os.getenv("VOICE_TOOLS_OPENAI_KEY") or os.getenv("OPENAI_API_KEY")
api_key = os.getenv("VOICE_TOOLS_OPENAI_KEY")
if not api_key:
return {
"success": False,

View file

@ -210,7 +210,7 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any]
Returns:
Path to the saved audio file.
"""
api_key = os.getenv("VOICE_TOOLS_OPENAI_KEY") or os.getenv("OPENAI_API_KEY", "")
api_key = os.getenv("VOICE_TOOLS_OPENAI_KEY", "")
if not api_key:
raise ValueError("VOICE_TOOLS_OPENAI_KEY not set. Get one at https://platform.openai.com/api-keys")
@ -392,7 +392,7 @@ def check_tts_requirements() -> bool:
return True
if _HAS_ELEVENLABS and os.getenv("ELEVENLABS_API_KEY"):
return True
if _HAS_OPENAI and (os.getenv("VOICE_TOOLS_OPENAI_KEY") or os.getenv("OPENAI_API_KEY")):
if _HAS_OPENAI and os.getenv("VOICE_TOOLS_OPENAI_KEY"):
return True
return False
@ -409,7 +409,7 @@ if __name__ == "__main__":
print(f" ElevenLabs: {'✅ installed' if _HAS_ELEVENLABS else '❌ not installed (pip install elevenlabs)'}")
print(f" API Key: {'✅ set' if os.getenv('ELEVENLABS_API_KEY') else '❌ not set'}")
print(f" OpenAI: {'✅ installed' if _HAS_OPENAI else '❌ not installed'}")
print(f" API Key: {'✅ set' if (os.getenv('VOICE_TOOLS_OPENAI_KEY') or os.getenv('OPENAI_API_KEY')) else '❌ not set'}")
print(f" API Key: {'✅ set' if os.getenv('VOICE_TOOLS_OPENAI_KEY') else '❌ not set (VOICE_TOOLS_OPENAI_KEY)'}")
print(f" ffmpeg: {'✅ found' if _has_ffmpeg() else '❌ not found (needed for Telegram Opus)'}")
print(f"\n Output dir: {DEFAULT_OUTPUT_DIR}")

View file

@ -314,13 +314,13 @@ async def vision_analyze_tool(
logger.info("Processing image with %s...", model)
# Call the vision API
from agent.auxiliary_client import get_auxiliary_extra_body
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
_extra = get_auxiliary_extra_body()
response = await _aux_async_client.chat.completions.create(
model=model,
messages=messages,
temperature=0.1,
max_tokens=2000,
**auxiliary_max_tokens_param(2000),
**({} if not _extra else {"extra_body": _extra}),
)

View file

@ -242,7 +242,7 @@ Create a markdown summary that captures all key information in a well-organized,
if _aux_async_client is None:
logger.warning("No auxiliary model available for web content processing")
return None
from agent.auxiliary_client import get_auxiliary_extra_body
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
_extra = get_auxiliary_extra_body()
response = await _aux_async_client.chat.completions.create(
model=model,
@ -251,7 +251,7 @@ Create a markdown summary that captures all key information in a well-organized,
{"role": "user", "content": user_prompt}
],
temperature=0.1,
max_tokens=max_tokens,
**auxiliary_max_tokens_param(max_tokens),
**({} if not _extra else {"extra_body": _extra}),
)
return response.choices[0].message.content.strip()
@ -365,7 +365,7 @@ Create a single, unified markdown summary."""
fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
return fallback
from agent.auxiliary_client import get_auxiliary_extra_body
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
_extra = get_auxiliary_extra_body()
response = await _aux_async_client.chat.completions.create(
model=model,
@ -374,7 +374,7 @@ Create a single, unified markdown summary."""
{"role": "user", "content": synthesis_prompt}
],
temperature=0.1,
max_tokens=4000,
**auxiliary_max_tokens_param(4000),
**({} if not _extra else {"extra_body": _extra}),
)
final_summary = response.choices[0].message.content.strip()
@ -1240,7 +1240,7 @@ WEB_SEARCH_SCHEMA = {
WEB_EXTRACT_SCHEMA = {
"name": "web_extract",
"description": "Extract content from web page URLs. Returns page content in markdown format. Pages under 5000 chars return full markdown; larger pages are LLM-summarized and capped at ~5000 chars per page. Pages over 2M chars are refused. If a URL fails or times out, use the browser tool to access it instead.",
"description": "Extract content from web page URLs. Returns page content in markdown format. Also works with PDF URLs (arxiv papers, documents, etc.) — pass the PDF link directly and it converts to markdown text. Pages under 5000 chars return full markdown; larger pages are LLM-summarized and capped at ~5000 chars per page. Pages over 2M chars are refused. If a URL fails or times out, use the browser tool to access it instead.",
"parameters": {
"type": "object",
"properties": {