Merge branch 'main' into feat/honcho-integration

2026-02-27 23:32:49 -08:00 · 2026-02-27 23:32:49 -08:00 · 4a9086b848
commit 4a9086b848
parent 70d1abf81b 50cb4d5fc7
73 changed files with 7080 additions and 280 deletions
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@ -812,10 +812,11 @@ def _extract_relevant_content(
        )

    try:
+        from agent.auxiliary_client import auxiliary_max_tokens_param
        response = _aux_vision_client.chat.completions.create(
            model=EXTRACTION_MODEL,
            messages=[{"role": "user", "content": extraction_prompt}],
-            max_tokens=4000,
+            **auxiliary_max_tokens_param(4000),
            temperature=0.1,
        )
        return response.choices[0].message.content
@ -1283,6 +1284,7 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
        )

        # Use the sync auxiliary vision client directly
+        from agent.auxiliary_client import auxiliary_max_tokens_param
        response = _aux_vision_client.chat.completions.create(
            model=EXTRACTION_MODEL,
            messages=[
@ -1294,7 +1296,7 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
                    ],
                }
            ],
-            max_tokens=2000,
+            **auxiliary_max_tokens_param(2000),
            temperature=0.1,
        )
        
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@ -27,7 +27,7 @@ from cron.jobs import create_job, get_job, list_jobs, remove_job
 # ---------------------------------------------------------------------------

 _CRON_THREAT_PATTERNS = [
-    (r'ignore\s+(previous|all|above|prior)\s+instructions', "prompt_injection"),
+    (r'ignore\s+(?:\w+\s+)*(?:previous|all|above|prior)\s+(?:\w+\s+)*instructions', "prompt_injection"),
    (r'do\s+not\s+tell\s+the\s+user', "deception_hide"),
    (r'system\s+prompt\s+override', "sys_prompt_override"),
    (r'disregard\s+(your|all|any)\s+(instructions|rules|guidelines)', "disregard_rules"),
--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@ -55,6 +55,7 @@ class DockerEnvironment(BaseEnvironment):
        disk: int = 0,
        persistent_filesystem: bool = False,
        task_id: str = "default",
+        volumes: list = None,
        network: bool = True,
    ):
        if cwd == "~":
@ -64,6 +65,11 @@ class DockerEnvironment(BaseEnvironment):
        self._persistent = persistent_filesystem
        self._task_id = task_id
        self._container_id: Optional[str] = None
+        logger.info(f"DockerEnvironment volumes: {volumes}")
+        # Ensure volumes is a list (config.yaml could be malformed)
+        if volumes is not None and not isinstance(volumes, list):
+            logger.warning(f"docker_volumes config is not a list: {volumes!r}")
+            volumes = []

        from minisweagent.environments.docker import DockerEnvironment as _Docker

@ -111,7 +117,23 @@ class DockerEnvironment(BaseEnvironment):
        # All containers get full security hardening (read-only root + writable
        # mounts for the workspace). Persistence uses Docker volumes, not
        # filesystem layer commits, so --read-only is always safe.
-        all_run_args = list(_SECURITY_ARGS) + writable_args + resource_args
+        # User-configured volume mounts (from config.yaml docker_volumes)
+        volume_args = []
+        for vol in (volumes or []):
+            if not isinstance(vol, str):
+                logger.warning(f"Docker volume entry is not a string: {vol!r}")
+                continue
+            vol = vol.strip()
+            if not vol:
+                continue
+            if ":" in vol:
+                volume_args.extend(["-v", vol])
+            else:
+                logger.warning(f"Docker volume '{vol}' missing colon, skipping")
+
+        logger.info(f"Docker volume_args: {volume_args}")
+        all_run_args = list(_SECURITY_ARGS) + writable_args + resource_args + volume_args
+        logger.info(f"Docker run_args: {all_run_args}")

        self._inner = _Docker(
            image=image, cwd=cwd, timeout=timeout,
--- a/tools/environments/local.py
+++ b/tools/environments/local.py
@ -1,6 +1,7 @@
 """Local execution environment with interrupt support and non-blocking I/O."""

 import os
+import shutil
 import signal
 import subprocess
 import threading
@ -8,6 +9,23 @@ import time

 from tools.environments.base import BaseEnvironment

+# Noise lines emitted by interactive shells when stdin is not a terminal.
+# Filtered from output to keep tool results clean.
+_SHELL_NOISE = frozenset({
+    "bash: no job control in this shell",
+    "bash: no job control in this shell\n",
+    "no job control in this shell",
+    "no job control in this shell\n",
+})
+
+
+def _clean_shell_noise(output: str) -> str:
+    """Strip shell startup warnings that leak when using -i without a TTY."""
+    lines = output.split("\n", 2)  # only check first two lines
+    if lines and lines[0].strip() in _SHELL_NOISE:
+        return "\n".join(lines[1:])
+    return output
+

 class LocalEnvironment(BaseEnvironment):
    """Run commands directly on the host machine.
@ -17,6 +35,7 @@ class LocalEnvironment(BaseEnvironment):
    - Background stdout drain thread to prevent pipe buffer deadlocks
    - stdin_data support for piping content (bypasses ARG_MAX limits)
    - sudo -S transform via SUDO_PASSWORD env var
+    - Uses interactive login shell so full user env is available
    """

    def __init__(self, cwd: str = "", timeout: int = 60, env: dict = None):
@ -32,9 +51,15 @@ class LocalEnvironment(BaseEnvironment):
        exec_command = self._prepare_command(command)

        try:
+            # Use the user's shell as an interactive login shell (-lic) so
+            # that ALL rc files are sourced — including content after the
+            # interactive guard in .bashrc (case $- in *i*)..esac) where
+            # tools like nvm, pyenv, and cargo install their init scripts.
+            # -l alone isn't enough: .profile sources .bashrc, but the guard
+            # returns early because the shell isn't interactive.
+            user_shell = os.environ.get("SHELL") or shutil.which("bash") or "/bin/bash"
            proc = subprocess.Popen(
-                exec_command,
-                shell=True,
+                [user_shell, "-lic", exec_command],
                text=True,
                cwd=work_dir,
                env=os.environ | self.env,
@ -99,7 +124,8 @@ class LocalEnvironment(BaseEnvironment):
                time.sleep(0.2)

            reader.join(timeout=5)
-            return {"output": "".join(_output_chunks), "returncode": proc.returncode}
+            output = _clean_shell_noise("".join(_output_chunks))
+            return {"output": output, "returncode": proc.returncode}

        except Exception as e:
            return {"output": f"Execution error: {str(e)}", "returncode": 1}
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@ -42,32 +42,36 @@ from pathlib import Path
 _HOME = str(Path.home())

 WRITE_DENIED_PATHS = {
-    os.path.join(_HOME, ".ssh", "authorized_keys"),
-    os.path.join(_HOME, ".ssh", "id_rsa"),
-    os.path.join(_HOME, ".ssh", "id_ed25519"),
-    os.path.join(_HOME, ".ssh", "config"),
-    os.path.join(_HOME, ".hermes", ".env"),
-    os.path.join(_HOME, ".bashrc"),
-    os.path.join(_HOME, ".zshrc"),
-    os.path.join(_HOME, ".profile"),
-    os.path.join(_HOME, ".bash_profile"),
-    os.path.join(_HOME, ".zprofile"),
-    os.path.join(_HOME, ".netrc"),
-    os.path.join(_HOME, ".pgpass"),
-    os.path.join(_HOME, ".npmrc"),
-    os.path.join(_HOME, ".pypirc"),
-    "/etc/sudoers",
-    "/etc/passwd",
-    "/etc/shadow",
+    os.path.realpath(p) for p in [
+        os.path.join(_HOME, ".ssh", "authorized_keys"),
+        os.path.join(_HOME, ".ssh", "id_rsa"),
+        os.path.join(_HOME, ".ssh", "id_ed25519"),
+        os.path.join(_HOME, ".ssh", "config"),
+        os.path.join(_HOME, ".hermes", ".env"),
+        os.path.join(_HOME, ".bashrc"),
+        os.path.join(_HOME, ".zshrc"),
+        os.path.join(_HOME, ".profile"),
+        os.path.join(_HOME, ".bash_profile"),
+        os.path.join(_HOME, ".zprofile"),
+        os.path.join(_HOME, ".netrc"),
+        os.path.join(_HOME, ".pgpass"),
+        os.path.join(_HOME, ".npmrc"),
+        os.path.join(_HOME, ".pypirc"),
+        "/etc/sudoers",
+        "/etc/passwd",
+        "/etc/shadow",
+    ]
 }

 WRITE_DENIED_PREFIXES = [
-    os.path.join(_HOME, ".ssh") + os.sep,
-    os.path.join(_HOME, ".aws") + os.sep,
-    os.path.join(_HOME, ".gnupg") + os.sep,
-    os.path.join(_HOME, ".kube") + os.sep,
-    "/etc/sudoers.d" + os.sep,
-    "/etc/systemd" + os.sep,
+    os.path.realpath(p) + os.sep for p in [
+        os.path.join(_HOME, ".ssh"),
+        os.path.join(_HOME, ".aws"),
+        os.path.join(_HOME, ".gnupg"),
+        os.path.join(_HOME, ".kube"),
+        "/etc/sudoers.d",
+        "/etc/systemd",
+    ]
 ]


@ -441,8 +445,8 @@ class ShellFileOperations(FileOperations):
        # Clamp limit
        limit = min(limit, MAX_LINES)
        
-        # Check if file exists and get metadata
-        stat_cmd = f"stat -c '%s' {self._escape_shell_arg(path)} 2>/dev/null"
+        # Check if file exists and get size (wc -c is POSIX, works on Linux + macOS)
+        stat_cmd = f"wc -c < {self._escape_shell_arg(path)} 2>/dev/null"
        stat_result = self._exec(stat_cmd)
        
        if stat_result.exit_code != 0:
@ -518,8 +522,8 @@ class ShellFileOperations(FileOperations):

    def _read_image(self, path: str) -> ReadResult:
        """Read an image file, returning base64 content."""
-        # Get file size
-        stat_cmd = f"stat -c '%s' {self._escape_shell_arg(path)} 2>/dev/null"
+        # Get file size (wc -c is POSIX, works on Linux + macOS)
+        stat_cmd = f"wc -c < {self._escape_shell_arg(path)} 2>/dev/null"
        stat_result = self._exec(stat_cmd)
        try:
            file_size = int(stat_result.stdout.strip())
@ -648,8 +652,8 @@ class ShellFileOperations(FileOperations):
        if write_result.exit_code != 0:
            return WriteResult(error=f"Failed to write file: {write_result.stdout}")
        
-        # Get bytes written
-        stat_cmd = f"stat -c '%s' {self._escape_shell_arg(path)} 2>/dev/null"
+        # Get bytes written (wc -c is POSIX, works on Linux + macOS)
+        stat_cmd = f"wc -c < {self._escape_shell_arg(path)} 2>/dev/null"
        stat_result = self._exec(stat_cmd)
        
        try:
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@ -81,11 +81,20 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
            cwd = overrides.get("cwd") or config["cwd"]
            logger.info("Creating new %s environment for task %s...", env_type, task_id[:8])

+            container_config = None
+            if env_type in ("docker", "singularity", "modal"):
+                container_config = {
+                    "container_cpu": config.get("container_cpu", 1),
+                    "container_memory": config.get("container_memory", 5120),
+                    "container_disk": config.get("container_disk", 51200),
+                    "container_persistent": config.get("container_persistent", True),
+                }
            terminal_env = _create_environment(
                env_type=env_type,
                image=image,
                cwd=cwd,
                timeout=config["timeout"],
+                container_config=container_config,
            )

            with _env_lock:
--- a/tools/memory_tool.py
+++ b/tools/memory_tool.py
@ -345,7 +345,9 @@ class MemoryStore:
        if not raw.strip():
            return []

-        entries = [e.strip() for e in raw.split("§")]
+        # Use ENTRY_DELIMITER for consistency with _write_file. Splitting by "§"
+        # alone would incorrectly split entries that contain "§" in their content.
+        entries = [e.strip() for e in raw.split(ENTRY_DELIMITER)]
        return [e for e in entries if e]

    @staticmethod
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@ -32,6 +32,8 @@ Usage:
 import json
 import logging
 import os
+import shlex
+import shutil
 import signal
 import subprocess
 import threading
@ -85,6 +87,14 @@ class ProcessRegistry:
      - Cleanup thread (sandbox reaping coordination)
    """

+    # Noise lines emitted by interactive shells when stdin is not a terminal.
+    _SHELL_NOISE = frozenset({
+        "bash: no job control in this shell",
+        "bash: no job control in this shell\n",
+        "no job control in this shell",
+        "no job control in this shell\n",
+    })
+
    def __init__(self):
        self._running: Dict[str, ProcessSession] = {}
        self._finished: Dict[str, ProcessSession] = {}
@ -93,6 +103,14 @@ class ProcessRegistry:
        # Side-channel for check_interval watchers (gateway reads after agent run)
        self.pending_watchers: List[Dict[str, Any]] = []

+    @staticmethod
+    def _clean_shell_noise(text: str) -> str:
+        """Strip shell startup warnings from the beginning of output."""
+        lines = text.split("\n", 2)
+        if lines and lines[0].strip() in ProcessRegistry._SHELL_NOISE:
+            return "\n".join(lines[1:])
+        return text
+
    # ----- Spawn -----

    def spawn_local(
@ -127,8 +145,9 @@ class ProcessRegistry:
            # Try PTY mode for interactive CLI tools
            try:
                import ptyprocess
+                user_shell = os.environ.get("SHELL") or shutil.which("bash") or "/bin/bash"
                pty_proc = ptyprocess.PtyProcess.spawn(
-                    ["bash", "-c", command],
+                    [user_shell, "-lic", command],
                    cwd=session.cwd,
                    env=os.environ | (env_vars or {}),
                    dimensions=(30, 120),
@ -160,9 +179,11 @@ class ProcessRegistry:
                logger.warning("PTY spawn failed (%s), falling back to pipe mode", e)

        # Standard Popen path (non-PTY or PTY fallback)
+        # Use the user's login shell for consistency with LocalEnvironment --
+        # ensures rc files are sourced and user tools are available.
+        user_shell = os.environ.get("SHELL") or shutil.which("bash") or "/bin/bash"
        proc = subprocess.Popen(
-            command,
-            shell=True,
+            [user_shell, "-lic", command],
            text=True,
            cwd=session.cwd,
            env=os.environ | (env_vars or {}),
@ -227,8 +248,9 @@ class ProcessRegistry:
        # Run the command in the sandbox with output capture
        log_path = f"/tmp/hermes_bg_{session.id}.log"
        pid_path = f"/tmp/hermes_bg_{session.id}.pid"
+        quoted_command = shlex.quote(command)
        bg_command = (
-            f"nohup bash -c '{command}' > {log_path} 2>&1 & "
+            f"nohup bash -c {quoted_command} > {log_path} 2>&1 & "
            f"echo $! > {pid_path} && cat {pid_path}"
        )

@ -268,11 +290,15 @@ class ProcessRegistry:

    def _reader_loop(self, session: ProcessSession):
        """Background thread: read stdout from a local Popen process."""
+        first_chunk = True
        try:
            while True:
                chunk = session.process.stdout.read(4096)
                if not chunk:
                    break
+                if first_chunk:
+                    chunk = self._clean_shell_noise(chunk)
+                    first_chunk = False
                with session._lock:
                    session.output_buffer += chunk
                    if len(session.output_buffer) > session.max_output_chars:
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@ -170,7 +170,7 @@ async def _summarize_session(
    max_retries = 3
    for attempt in range(max_retries):
        try:
-            from agent.auxiliary_client import get_auxiliary_extra_body
+            from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
            _extra = get_auxiliary_extra_body()
            response = await _async_aux_client.chat.completions.create(
                model=_SUMMARIZER_MODEL,
@ -180,7 +180,7 @@ async def _summarize_session(
                ],
                **({} if not _extra else {"extra_body": _extra}),
                temperature=0.1,
-                max_tokens=MAX_SUMMARY_TOKENS,
+                **auxiliary_max_tokens_param(MAX_SUMMARY_TOKENS),
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -319,7 +319,9 @@ def _transform_sudo_command(command: str) -> str:
        # Replace 'sudo' with password-piped version
        # The -S flag makes sudo read password from stdin
        # The -p '' suppresses the password prompt
-        return f"echo '{sudo_password}' | sudo -S -p ''"
+        # Use shlex.quote() to prevent shell injection via password content
+        import shlex
+        return f"echo {shlex.quote(sudo_password)} | sudo -S -p ''"
    
    # Match 'sudo' at word boundaries (not 'visudo' or 'sudoers')
    # This handles: sudo, sudo -flag, etc.
@ -445,6 +447,7 @@ def _get_env_config() -> Dict[str, Any]:
        "container_memory": int(os.getenv("TERMINAL_CONTAINER_MEMORY", "5120")),     # MB (default 5GB)
        "container_disk": int(os.getenv("TERMINAL_CONTAINER_DISK", "51200")),        # MB (default 50GB)
        "container_persistent": os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("true", "1", "yes"),
+        "docker_volumes": json.loads(os.getenv("TERMINAL_DOCKER_VOLUMES", "[]")),
    }


@ -471,6 +474,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
    memory = cc.get("container_memory", 5120)
    disk = cc.get("container_disk", 51200)
    persistent = cc.get("container_persistent", True)
+    volumes = cc.get("docker_volumes", [])

    if env_type == "local":
        return _LocalEnvironment(cwd=cwd, timeout=timeout)
@ -480,6 +484,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
            image=image, cwd=cwd, timeout=timeout,
            cpu=cpu, memory=memory, disk=disk,
            persistent_filesystem=persistent, task_id=task_id,
+            volumes=volumes,
        )
    
    elif env_type == "singularity":
@ -593,7 +598,7 @@ def _cleanup_thread_worker():
            config = _get_env_config()
            _cleanup_inactive_envs(config["lifetime_seconds"])
        except Exception as e:
-            logger.warning("Error in cleanup thread: %s", e)
+            logger.warning("Error in cleanup thread: %s", e, exc_info=True)

        for _ in range(60):
            if not _cleanup_running:
@ -617,7 +622,10 @@ def _stop_cleanup_thread():
    global _cleanup_running
    _cleanup_running = False
    if _cleanup_thread is not None:
-        _cleanup_thread.join(timeout=5)
+        try:
+            _cleanup_thread.join(timeout=5)
+        except (SystemExit, KeyboardInterrupt):
+            pass


 def get_active_environments_info() -> Dict[str, Any]:
@ -658,7 +666,7 @@ def cleanup_all_environments():
            cleanup_vm(task_id)
            cleaned += 1
        except Exception as e:
-            logger.error("Error cleaning %s: %s", task_id, e)
+            logger.error("Error cleaning %s: %s", task_id, e, exc_info=True)
    
    # Also clean any orphaned directories
    scratch_dir = _get_scratch_dir()
@ -848,6 +856,7 @@ def terminal_tool(
                                "container_memory": config.get("container_memory", 5120),
                                "container_disk": config.get("container_disk", 51200),
                                "container_persistent": config.get("container_persistent", True),
+                                "docker_volumes": config.get("docker_volumes", []),
                            }

                        new_env = _create_environment(
@ -1068,6 +1077,10 @@ def check_terminal_requirements() -> bool:
                result = subprocess.run([executable, "--version"], capture_output=True, timeout=5)
                return result.returncode == 0
            return False
+        elif env_type == "ssh":
+            from tools.environments.ssh import SSHEnvironment
+            # Check that host and user are configured
+            return bool(config.get("ssh_host")) and bool(config.get("ssh_user"))
        elif env_type == "modal":
            from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
            # Check for modal token
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@ -50,10 +50,7 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> dict:
          - "transcript" (str): The transcribed text (empty on failure)
          - "error" (str, optional): Error message if success is False
    """
-    # Use VOICE_TOOLS_OPENAI_KEY to avoid interference with the OpenAI SDK's
-    # auto-detection of OPENAI_API_KEY (which would break OpenRouter calls).
-    # Falls back to OPENAI_API_KEY for backward compatibility.
-    api_key = os.getenv("VOICE_TOOLS_OPENAI_KEY") or os.getenv("OPENAI_API_KEY")
+    api_key = os.getenv("VOICE_TOOLS_OPENAI_KEY")
    if not api_key:
        return {
            "success": False,
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@ -210,7 +210,7 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any]
    Returns:
        Path to the saved audio file.
    """
-    api_key = os.getenv("VOICE_TOOLS_OPENAI_KEY") or os.getenv("OPENAI_API_KEY", "")
+    api_key = os.getenv("VOICE_TOOLS_OPENAI_KEY", "")
    if not api_key:
        raise ValueError("VOICE_TOOLS_OPENAI_KEY not set. Get one at https://platform.openai.com/api-keys")

@ -392,7 +392,7 @@ def check_tts_requirements() -> bool:
        return True
    if _HAS_ELEVENLABS and os.getenv("ELEVENLABS_API_KEY"):
        return True
-    if _HAS_OPENAI and (os.getenv("VOICE_TOOLS_OPENAI_KEY") or os.getenv("OPENAI_API_KEY")):
+    if _HAS_OPENAI and os.getenv("VOICE_TOOLS_OPENAI_KEY"):
        return True
    return False

@ -409,7 +409,7 @@ if __name__ == "__main__":
    print(f"  ElevenLabs: {'✅ installed' if _HAS_ELEVENLABS else '❌ not installed (pip install elevenlabs)'}")
    print(f"    API Key:  {'✅ set' if os.getenv('ELEVENLABS_API_KEY') else '❌ not set'}")
    print(f"  OpenAI:     {'✅ installed' if _HAS_OPENAI else '❌ not installed'}")
-    print(f"    API Key:  {'✅ set' if (os.getenv('VOICE_TOOLS_OPENAI_KEY') or os.getenv('OPENAI_API_KEY')) else '❌ not set'}")
+    print(f"    API Key:  {'✅ set' if os.getenv('VOICE_TOOLS_OPENAI_KEY') else '❌ not set (VOICE_TOOLS_OPENAI_KEY)'}")
    print(f"  ffmpeg:     {'✅ found' if _has_ffmpeg() else '❌ not found (needed for Telegram Opus)'}")
    print(f"\n  Output dir: {DEFAULT_OUTPUT_DIR}")

--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@ -314,13 +314,13 @@ async def vision_analyze_tool(
        logger.info("Processing image with %s...", model)
        
        # Call the vision API
-        from agent.auxiliary_client import get_auxiliary_extra_body
+        from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
        _extra = get_auxiliary_extra_body()
        response = await _aux_async_client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=0.1,
-            max_tokens=2000,
+            **auxiliary_max_tokens_param(2000),
            **({} if not _extra else {"extra_body": _extra}),
        )
        
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@ -242,7 +242,7 @@ Create a markdown summary that captures all key information in a well-organized,
            if _aux_async_client is None:
                logger.warning("No auxiliary model available for web content processing")
                return None
-            from agent.auxiliary_client import get_auxiliary_extra_body
+            from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
            _extra = get_auxiliary_extra_body()
            response = await _aux_async_client.chat.completions.create(
                model=model,
@ -251,7 +251,7 @@ Create a markdown summary that captures all key information in a well-organized,
                    {"role": "user", "content": user_prompt}
                ],
                temperature=0.1,
-                max_tokens=max_tokens,
+                **auxiliary_max_tokens_param(max_tokens),
                **({} if not _extra else {"extra_body": _extra}),
            )
            return response.choices[0].message.content.strip()
@ -365,7 +365,7 @@ Create a single, unified markdown summary."""
                fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
            return fallback

-        from agent.auxiliary_client import get_auxiliary_extra_body
+        from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
        _extra = get_auxiliary_extra_body()
        response = await _aux_async_client.chat.completions.create(
            model=model,
@ -374,7 +374,7 @@ Create a single, unified markdown summary."""
                {"role": "user", "content": synthesis_prompt}
            ],
            temperature=0.1,
-            max_tokens=4000,
+            **auxiliary_max_tokens_param(4000),
            **({} if not _extra else {"extra_body": _extra}),
        )
        final_summary = response.choices[0].message.content.strip()
@ -1240,7 +1240,7 @@ WEB_SEARCH_SCHEMA = {

 WEB_EXTRACT_SCHEMA = {
    "name": "web_extract",
-    "description": "Extract content from web page URLs. Returns page content in markdown format. Pages under 5000 chars return full markdown; larger pages are LLM-summarized and capped at ~5000 chars per page. Pages over 2M chars are refused. If a URL fails or times out, use the browser tool to access it instead.",
+    "description": "Extract content from web page URLs. Returns page content in markdown format. Also works with PDF URLs (arxiv papers, documents, etc.) — pass the PDF link directly and it converts to markdown text. Pages under 5000 chars return full markdown; larger pages are LLM-summarized and capped at ~5000 chars per page. Pages over 2M chars are refused. If a URL fails or times out, use the browser tool to access it instead.",
    "parameters": {
        "type": "object",
        "properties": {