Merge remote-tracking branch 'origin/main' into feat/honcho-async-memory

Made-with: Cursor # Conflicts: # cli.py # tests/test_run_agent.py
2026-03-11 12:22:56 -04:00 · 2026-03-11 12:22:56 -04:00 · a0b0dbe6b2
commit a0b0dbe6b2
parent 047b118299 91101065bb
138 changed files with 17829 additions and 1109 deletions
--- a/tools/approval.py
+++ b/tools/approval.py
@ -250,6 +250,10 @@ def check_dangerous_command(command: str, env_type: str,
    if env_type in ("docker", "singularity", "modal", "daytona"):
        return {"approved": True, "message": None}

+    # --yolo: bypass all approval prompts
+    if os.getenv("HERMES_YOLO_MODE"):
+        return {"approved": True, "message": None}
+
    is_dangerous, pattern_key, description = detect_dangerous_command(command)
    if not is_dangerous:
        return {"approved": True, "message": None}
--- a/tools/checkpoint_manager.py
+++ b/tools/checkpoint_manager.py
@ -95,21 +95,34 @@ def _run_git(
 ) -> tuple:
    """Run a git command against the shadow repo.  Returns (ok, stdout, stderr)."""
    env = _git_env(shadow_repo, working_dir)
+    cmd = ["git"] + list(args)
    try:
        result = subprocess.run(
-            ["git"] + args,
+            cmd,
            capture_output=True,
            text=True,
            timeout=timeout,
            env=env,
            cwd=str(Path(working_dir).resolve()),
        )
-        return result.returncode == 0, result.stdout.strip(), result.stderr.strip()
+        ok = result.returncode == 0
+        stdout = result.stdout.strip()
+        stderr = result.stderr.strip()
+        if not ok:
+            logger.error(
+                "Git command failed: %s (rc=%d) stderr=%s",
+                " ".join(cmd), result.returncode, stderr,
+            )
+        return ok, stdout, stderr
    except subprocess.TimeoutExpired:
-        return False, "", f"git timed out after {timeout}s: git {' '.join(args)}"
+        msg = f"git timed out after {timeout}s: {' '.join(cmd)}"
+        logger.error(msg, exc_info=True)
+        return False, "", msg
    except FileNotFoundError:
+        logger.error("Git executable not found: %s", " ".join(cmd), exc_info=True)
        return False, "", "git not found"
    except Exception as exc:
+        logger.error("Unexpected git error running %s: %s", " ".join(cmd), exc, exc_info=True)
        return False, "", str(exc)


@ -287,7 +300,7 @@ class CheckpointManager:
            ["cat-file", "-t", commit_hash], shadow, abs_dir,
        )
        if not ok:
-            return {"success": False, "error": f"Checkpoint '{commit_hash}' not found"}
+            return {"success": False, "error": f"Checkpoint '{commit_hash}' not found", "debug": err or None}

        # Take a checkpoint of current state before restoring (so you can undo the undo)
        self._take(abs_dir, f"pre-rollback snapshot (restoring to {commit_hash[:8]})")
@ -299,7 +312,7 @@ class CheckpointManager:
        )

        if not ok:
-            return {"success": False, "error": f"Restore failed: {err}"}
+            return {"success": False, "error": "Restore failed", "debug": err or None}

        # Get info about what was restored
        ok2, reason_out, _ = _run_git(
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@ -458,11 +458,17 @@ def execute_code(

        # --- Poll loop: watch for exit, timeout, and interrupt ---
        deadline = time.monotonic() + timeout
-        stdout_chunks: list = []
        stderr_chunks: list = []

-        # Background readers to avoid pipe buffer deadlocks
+        # Background readers to avoid pipe buffer deadlocks.
+        # For stdout we use a head+tail strategy: keep the first HEAD_BYTES
+        # and a rolling window of the last TAIL_BYTES so the final print()
+        # output is never lost.  Stderr keeps head-only (errors appear early).
+        _STDOUT_HEAD_BYTES = int(MAX_STDOUT_BYTES * 0.4)   # 40% head
+        _STDOUT_TAIL_BYTES = MAX_STDOUT_BYTES - _STDOUT_HEAD_BYTES  # 60% tail
+
        def _drain(pipe, chunks, max_bytes):
+            """Simple head-only drain (used for stderr)."""
            total = 0
            try:
                while True:
@ -476,8 +482,48 @@ def execute_code(
            except (ValueError, OSError) as e:
                logger.debug("Error reading process output: %s", e, exc_info=True)

+        stdout_total_bytes = [0]  # mutable ref for total bytes seen
+
+        def _drain_head_tail(pipe, head_chunks, tail_chunks, head_bytes, tail_bytes, total_ref):
+            """Drain stdout keeping both head and tail data."""
+            head_collected = 0
+            from collections import deque
+            tail_buf = deque()
+            tail_collected = 0
+            try:
+                while True:
+                    data = pipe.read(4096)
+                    if not data:
+                        break
+                    total_ref[0] += len(data)
+                    # Fill head buffer first
+                    if head_collected < head_bytes:
+                        keep = min(len(data), head_bytes - head_collected)
+                        head_chunks.append(data[:keep])
+                        head_collected += keep
+                        data = data[keep:]  # remaining goes to tail
+                        if not data:
+                            continue
+                    # Everything past head goes into rolling tail buffer
+                    tail_buf.append(data)
+                    tail_collected += len(data)
+                    # Evict old tail data to stay within tail_bytes budget
+                    while tail_collected > tail_bytes and tail_buf:
+                        oldest = tail_buf.popleft()
+                        tail_collected -= len(oldest)
+            except (ValueError, OSError):
+                pass
+            # Transfer final tail to output list
+            tail_chunks.extend(tail_buf)
+
+        stdout_head_chunks: list = []
+        stdout_tail_chunks: list = []
+
        stdout_reader = threading.Thread(
-            target=_drain, args=(proc.stdout, stdout_chunks, MAX_STDOUT_BYTES), daemon=True
+            target=_drain_head_tail,
+            args=(proc.stdout, stdout_head_chunks, stdout_tail_chunks,
+                  _STDOUT_HEAD_BYTES, _STDOUT_TAIL_BYTES, stdout_total_bytes),
+            daemon=True
        )
        stderr_reader = threading.Thread(
            target=_drain, args=(proc.stderr, stderr_chunks, MAX_STDERR_BYTES), daemon=True
@ -501,12 +547,21 @@ def execute_code(
        stdout_reader.join(timeout=3)
        stderr_reader.join(timeout=3)

-        stdout_text = b"".join(stdout_chunks).decode("utf-8", errors="replace")
+        stdout_head = b"".join(stdout_head_chunks).decode("utf-8", errors="replace")
+        stdout_tail = b"".join(stdout_tail_chunks).decode("utf-8", errors="replace")
        stderr_text = b"".join(stderr_chunks).decode("utf-8", errors="replace")

-        # Truncation notice
-        if len(stdout_text) >= MAX_STDOUT_BYTES:
-            stdout_text = stdout_text[:MAX_STDOUT_BYTES] + "\n[output truncated at 50KB]"
+        # Assemble stdout with head+tail truncation
+        total_stdout = stdout_total_bytes[0]
+        if total_stdout > MAX_STDOUT_BYTES and stdout_tail:
+            omitted = total_stdout - len(stdout_head) - len(stdout_tail)
+            truncated_notice = (
+                f"\n\n... [OUTPUT TRUNCATED - {omitted:,} chars omitted "
+                f"out of {total_stdout:,} total] ...\n\n"
+            )
+            stdout_text = stdout_head + truncated_notice + stdout_tail
+        else:
+            stdout_text = stdout_head + stdout_tail

        exit_code = proc.returncode if proc.returncode is not None else -1
        duration = round(time.monotonic() - exec_start, 2)
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@ -166,10 +166,20 @@ def _run_single_child(
    max_iterations: int,
    parent_agent,
    task_count: int = 1,
+    # Credential overrides from delegation config (provider:model resolution)
+    override_provider: Optional[str] = None,
+    override_base_url: Optional[str] = None,
+    override_api_key: Optional[str] = None,
+    override_api_mode: Optional[str] = None,
 ) -> Dict[str, Any]:
    """
    Spawn and run a single child agent. Called from within a thread.
    Returns a structured result dict.
+
+    When override_* params are set (from delegation config), the child uses
+    those credentials instead of inheriting from the parent.  This enables
+    routing subagents to a different provider:model pair (e.g. cheap/fast
+    model on OpenRouter while the parent runs on Nous Portal).
    """
    from run_agent import AIAgent

@ -199,12 +209,19 @@ def _run_single_child(
        # count toward the session-wide limit.
        shared_budget = getattr(parent_agent, "iteration_budget", None)

+        # Resolve effective credentials: config override > parent inherit
+        effective_model = model or parent_agent.model
+        effective_provider = override_provider or getattr(parent_agent, "provider", None)
+        effective_base_url = override_base_url or parent_agent.base_url
+        effective_api_key = override_api_key or parent_api_key
+        effective_api_mode = override_api_mode or getattr(parent_agent, "api_mode", None)
+
        child = AIAgent(
-            base_url=parent_agent.base_url,
-            api_key=parent_api_key,
-            model=model or parent_agent.model,
-            provider=getattr(parent_agent, "provider", None),
-            api_mode=getattr(parent_agent, "api_mode", None),
+            base_url=effective_base_url,
+            api_key=effective_api_key,
+            model=effective_model,
+            provider=effective_provider,
+            api_mode=effective_api_mode,
            max_iterations=max_iterations,
            max_tokens=getattr(parent_agent, "max_tokens", None),
            reasoning_config=getattr(parent_agent, "reasoning_config", None),
@ -327,6 +344,16 @@ def delegate_task(
    default_max_iter = cfg.get("max_iterations", DEFAULT_MAX_ITERATIONS)
    effective_max_iter = max_iterations or default_max_iter

+    # Resolve delegation credentials (provider:model pair).
+    # When delegation.provider is configured, this resolves the full credential
+    # bundle (base_url, api_key, api_mode) via the same runtime provider system
+    # used by CLI/gateway startup.  When unconfigured, returns None values so
+    # children inherit from the parent.
+    try:
+        creds = _resolve_delegation_credentials(cfg, parent_agent)
+    except ValueError as exc:
+        return json.dumps({"error": str(exc)})
+
    # Normalize to task list
    if tasks and isinstance(tasks, list):
        task_list = tasks[:MAX_CONCURRENT_CHILDREN]
@ -358,10 +385,14 @@ def delegate_task(
            goal=t["goal"],
            context=t.get("context"),
            toolsets=t.get("toolsets") or toolsets,
-            model=None,
+            model=creds["model"],
            max_iterations=effective_max_iter,
            parent_agent=parent_agent,
            task_count=1,
+            override_provider=creds["provider"],
+            override_base_url=creds["base_url"],
+            override_api_key=creds["api_key"],
+            override_api_mode=creds["api_mode"],
        )
        results.append(result)
    else:
@ -383,10 +414,14 @@ def delegate_task(
                    goal=t["goal"],
                    context=t.get("context"),
                    toolsets=t.get("toolsets") or toolsets,
-                    model=None,
+                    model=creds["model"],
                    max_iterations=effective_max_iter,
                    parent_agent=parent_agent,
                    task_count=n_tasks,
+                    override_provider=creds["provider"],
+                    override_base_url=creds["base_url"],
+                    override_api_key=creds["api_key"],
+                    override_api_mode=creds["api_mode"],
                )
                futures[future] = i

@ -444,11 +479,78 @@ def delegate_task(
    }, ensure_ascii=False)


+def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
+    """Resolve credentials for subagent delegation.
+
+    If ``delegation.provider`` is configured, resolves the full credential
+    bundle (base_url, api_key, api_mode, provider) via the runtime provider
+    system — the same path used by CLI/gateway startup.  This lets subagents
+    run on a completely different provider:model pair.
+
+    If no provider is configured, returns None values so the child inherits
+    everything from the parent agent.
+
+    Raises ValueError with a user-friendly message on credential failure.
+    """
+    configured_model = cfg.get("model") or None
+    configured_provider = cfg.get("provider") or None
+
+    if not configured_provider:
+        # No provider override — child inherits everything from parent
+        return {
+            "model": configured_model,
+            "provider": None,
+            "base_url": None,
+            "api_key": None,
+            "api_mode": None,
+        }
+
+    # Provider is configured — resolve full credentials
+    try:
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+        runtime = resolve_runtime_provider(requested=configured_provider)
+    except Exception as exc:
+        raise ValueError(
+            f"Cannot resolve delegation provider '{configured_provider}': {exc}. "
+            f"Check that the provider is configured (API key set, valid provider name). "
+            f"Available providers: openrouter, nous, zai, kimi-coding, minimax."
+        ) from exc
+
+    api_key = runtime.get("api_key", "")
+    if not api_key:
+        raise ValueError(
+            f"Delegation provider '{configured_provider}' resolved but has no API key. "
+            f"Set the appropriate environment variable or run 'hermes login'."
+        )
+
+    return {
+        "model": configured_model,
+        "provider": runtime.get("provider"),
+        "base_url": runtime.get("base_url"),
+        "api_key": api_key,
+        "api_mode": runtime.get("api_mode"),
+    }
+
+
 def _load_config() -> dict:
-    """Load delegation config from CLI_CONFIG if available."""
+    """Load delegation config from CLI_CONFIG or persistent config.
+
+    Checks the runtime config (cli.py CLI_CONFIG) first, then falls back
+    to the persistent config (hermes_cli/config.py load_config()) so that
+    ``delegation.model`` / ``delegation.provider`` are picked up regardless
+    of the entry point (CLI, gateway, cron).
+    """
    try:
        from cli import CLI_CONFIG
-        return CLI_CONFIG.get("delegation", {})
+        cfg = CLI_CONFIG.get("delegation", {})
+        if cfg:
+            return cfg
+    except Exception:
+        pass
+    try:
+        from hermes_cli.config import load_config
+        full = load_config()
+        return full.get("delegation", {})
    except Exception:
        return {}

--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@ -7,6 +7,7 @@ persistence via bind mounts.

 import logging
 import os
+import shutil
 import subprocess
 import sys
 import threading
@ -19,6 +20,44 @@ from tools.interrupt import is_interrupted
 logger = logging.getLogger(__name__)


+# Common Docker Desktop install paths checked when 'docker' is not in PATH.
+# macOS Intel: /usr/local/bin, macOS Apple Silicon (Homebrew): /opt/homebrew/bin,
+# Docker Desktop app bundle: /Applications/Docker.app/Contents/Resources/bin
+_DOCKER_SEARCH_PATHS = [
+    "/usr/local/bin/docker",
+    "/opt/homebrew/bin/docker",
+    "/Applications/Docker.app/Contents/Resources/bin/docker",
+]
+
+_docker_executable: Optional[str] = None  # resolved once, cached
+
+
+def find_docker() -> Optional[str]:
+    """Locate the docker CLI binary.
+
+    Checks ``shutil.which`` first (respects PATH), then probes well-known
+    install locations on macOS where Docker Desktop may not be in PATH
+    (e.g. when running as a gateway service via launchd).
+
+    Returns the absolute path, or ``None`` if docker cannot be found.
+    """
+    global _docker_executable
+    if _docker_executable is not None:
+        return _docker_executable
+
+    found = shutil.which("docker")
+    if found:
+        _docker_executable = found
+        return found
+
+    for path in _DOCKER_SEARCH_PATHS:
+        if os.path.isfile(path) and os.access(path, os.X_OK):
+            _docker_executable = path
+            logger.info("Found docker at non-PATH location: %s", path)
+            return path
+
+    return None
+

 # Security flags applied to every container.
 # The container itself is the security boundary (isolated from host).
@ -145,9 +184,14 @@ class DockerEnvironment(BaseEnvironment):
        all_run_args = list(_SECURITY_ARGS) + writable_args + resource_args + volume_args
        logger.info(f"Docker run_args: {all_run_args}")

+        # Resolve the docker executable once so it works even when
+        # /usr/local/bin is not in PATH (common on macOS gateway/service).
+        docker_exe = find_docker() or "docker"
+
        self._inner = _Docker(
            image=image, cwd=cwd, timeout=timeout,
            run_args=all_run_args,
+            executable=docker_exe,
        )
        self._container_id = self._inner.container_id

@ -162,8 +206,9 @@ class DockerEnvironment(BaseEnvironment):
        if _storage_opt_ok is not None:
            return _storage_opt_ok
        try:
+            docker = find_docker() or "docker"
            result = subprocess.run(
-                ["docker", "info", "--format", "{{.Driver}}"],
+                [docker, "info", "--format", "{{.Driver}}"],
                capture_output=True, text=True, timeout=10,
            )
            driver = result.stdout.strip().lower()
@ -173,14 +218,14 @@ class DockerEnvironment(BaseEnvironment):
            # overlay2 only supports storage-opt on XFS with pquota.
            # Probe by attempting a dry-ish run — the fastest reliable check.
            probe = subprocess.run(
-                ["docker", "create", "--storage-opt", "size=1m", "hello-world"],
+                [docker, "create", "--storage-opt", "size=1m", "hello-world"],
                capture_output=True, text=True, timeout=15,
            )
            if probe.returncode == 0:
                # Clean up the created container
                container_id = probe.stdout.strip()
                if container_id:
-                    subprocess.run(["docker", "rm", container_id],
+                    subprocess.run([docker, "rm", container_id],
                                   capture_output=True, timeout=5)
                _storage_opt_ok = True
            else:
--- a/tools/environments/modal.py
+++ b/tools/environments/modal.py
@ -50,7 +50,7 @@ class ModalEnvironment(BaseEnvironment):
    def __init__(
        self,
        image: str,
-        cwd: str = "~",
+        cwd: str = "/root",
        timeout: int = 60,
        modal_sandbox_kwargs: Optional[Dict[str, Any]] = None,
        persistent_filesystem: bool = True,
@ -95,6 +95,7 @@ class ModalEnvironment(BaseEnvironment):
            startup_timeout=180.0,
            runtime_timeout=3600.0,
            modal_sandbox_kwargs=sandbox_kwargs,
+            install_pipx=True,  # Required: installs pipx + swe-rex runtime (swerex-remote)
        )

    def execute(self, command: str, cwd: str = "", *,
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@ -14,6 +14,14 @@ logger = logging.getLogger(__name__)
 _file_ops_lock = threading.Lock()
 _file_ops_cache: dict = {}

+# Track files read per task to detect re-read loops after context compression.
+# Per task_id we store:
+#   "last_key":     the key of the most recent read/search call (or None)
+#   "consecutive":  how many times that exact call has been repeated in a row
+#   "read_history": set of (path, offset, limit) tuples for get_read_files_summary
+_read_tracker_lock = threading.Lock()
+_read_tracker: dict = {}
+

 def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
    """Get or create ShellFileOperations for a terminal environment.
@ -132,11 +140,97 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
        result = file_ops.read_file(path, offset, limit)
        if result.content:
            result.content = redact_sensitive_text(result.content)
-        return json.dumps(result.to_dict(), ensure_ascii=False)
+        result_dict = result.to_dict()
+
+        # Track reads to detect *consecutive* re-read loops.
+        # The counter resets whenever any other tool is called in between,
+        # so only truly back-to-back identical reads trigger warnings/blocks.
+        read_key = ("read", path, offset, limit)
+        with _read_tracker_lock:
+            task_data = _read_tracker.setdefault(task_id, {
+                "last_key": None, "consecutive": 0, "read_history": set(),
+            })
+            task_data["read_history"].add((path, offset, limit))
+            if task_data["last_key"] == read_key:
+                task_data["consecutive"] += 1
+            else:
+                task_data["last_key"] = read_key
+                task_data["consecutive"] = 1
+            count = task_data["consecutive"]
+
+        if count >= 4:
+            # Hard block: stop returning content to break the loop
+            return json.dumps({
+                "error": (
+                    f"BLOCKED: You have read this exact file region {count} times in a row. "
+                    "The content has NOT changed. You already have this information. "
+                    "STOP re-reading and proceed with your task."
+                ),
+                "path": path,
+                "already_read": count,
+            }, ensure_ascii=False)
+        elif count >= 3:
+            result_dict["_warning"] = (
+                f"You have read this exact file region {count} times consecutively. "
+                "The content has not changed since your last read. Use the information you already have. "
+                "If you are stuck in a loop, stop reading and proceed with writing or responding."
+            )
+
+        return json.dumps(result_dict, ensure_ascii=False)
    except Exception as e:
        return json.dumps({"error": str(e)}, ensure_ascii=False)


+def get_read_files_summary(task_id: str = "default") -> list:
+    """Return a list of files read in this session for the given task.
+
+    Used by context compression to preserve file-read history across
+    compression boundaries.
+    """
+    with _read_tracker_lock:
+        task_data = _read_tracker.get(task_id, {})
+        read_history = task_data.get("read_history", set())
+        seen_paths: dict = {}
+        for (path, offset, limit) in read_history:
+            if path not in seen_paths:
+                seen_paths[path] = []
+            seen_paths[path].append(f"lines {offset}-{offset + limit - 1}")
+        return [
+            {"path": p, "regions": regions}
+            for p, regions in sorted(seen_paths.items())
+        ]
+
+
+def clear_read_tracker(task_id: str = None):
+    """Clear the read tracker.
+
+    Call with a task_id to clear just that task, or without to clear all.
+    Should be called when a session is destroyed to prevent memory leaks
+    in long-running gateway processes.
+    """
+    with _read_tracker_lock:
+        if task_id:
+            _read_tracker.pop(task_id, None)
+        else:
+            _read_tracker.clear()
+
+
+def notify_other_tool_call(task_id: str = "default"):
+    """Reset consecutive read/search counter for a task.
+
+    Called by the tool dispatcher (model_tools.py) whenever a tool OTHER
+    than read_file / search_files is executed.  This ensures we only warn
+    or block on *truly consecutive* repeated reads — if the agent does
+    anything else in between (write, patch, terminal, etc.) the counter
+    resets and the next read is treated as fresh.
+    """
+    with _read_tracker_lock:
+        task_data = _read_tracker.get(task_id)
+        if task_data:
+            task_data["last_key"] = None
+            task_data["consecutive"] = 0
+
+
 def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
    """Write content to a file."""
    try:
@ -144,7 +238,7 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
        result = file_ops.write_file(path, content)
        return json.dumps(result.to_dict(), ensure_ascii=False)
    except Exception as e:
-        print(f"[FileTools] write_file error: {type(e).__name__}: {e}", flush=True)  
+        logger.error("write_file error: %s: %s", type(e).__name__, e)
        return json.dumps({"error": str(e)}, ensure_ascii=False)


@ -185,6 +279,30 @@ def search_tool(pattern: str, target: str = "content", path: str = ".",
                task_id: str = "default") -> str:
    """Search for content or files."""
    try:
+        # Track searches to detect *consecutive* repeated search loops.
+        search_key = ("search", pattern, target, str(path), file_glob or "")
+        with _read_tracker_lock:
+            task_data = _read_tracker.setdefault(task_id, {
+                "last_key": None, "consecutive": 0, "read_history": set(),
+            })
+            if task_data["last_key"] == search_key:
+                task_data["consecutive"] += 1
+            else:
+                task_data["last_key"] = search_key
+                task_data["consecutive"] = 1
+            count = task_data["consecutive"]
+
+        if count >= 4:
+            return json.dumps({
+                "error": (
+                    f"BLOCKED: You have run this exact search {count} times in a row. "
+                    "The results have NOT changed. You already have this information. "
+                    "STOP re-searching and proceed with your task."
+                ),
+                "pattern": pattern,
+                "already_searched": count,
+            }, ensure_ascii=False)
+
        file_ops = _get_file_ops(task_id)
        result = file_ops.search(
            pattern=pattern, path=path, target=target, file_glob=file_glob,
@ -195,6 +313,13 @@ def search_tool(pattern: str, target: str = "content", path: str = ".",
                if hasattr(m, 'content') and m.content:
                    m.content = redact_sensitive_text(m.content)
        result_dict = result.to_dict()
+
+        if count >= 3:
+            result_dict["_warning"] = (
+                f"You have run this exact search {count} times consecutively. "
+                "The results have not changed. Use the information you already have."
+            )
+
        result_json = json.dumps(result_dict, ensure_ascii=False)
        # Hint when results were truncated — explicit next offset is clearer
        # than relying on the model to infer it from total_count vs match count.
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@ -538,6 +538,14 @@ class SamplingHandler:
                f"Sampling LLM call failed: {_sanitize_error(str(exc))}"
            )

+        # Guard against empty choices (content filtering, provider errors)
+        if not getattr(response, "choices", None):
+            self.metrics["errors"] += 1
+            return self._error(
+                f"LLM returned empty response (no choices) for server "
+                f"'{self.server_name}'"
+            )
+
        # Track metrics
        choice = response.choices[0]
        self.metrics["requests"] += 1
@ -1323,29 +1331,23 @@ def discover_mcp_tools() -> List[str]:

    async def _discover_one(name: str, cfg: dict) -> List[str]:
        """Connect to a single server and return its registered tool names."""
-        transport_desc = cfg.get("url", f'{cfg.get("command", "?")} {" ".join(cfg.get("args", [])[:2])}')
-        try:
-            registered = await _discover_and_register_server(name, cfg)
-            transport_type = "HTTP" if "url" in cfg else "stdio"
-            return registered
-        except Exception as exc:
-            logger.warning(
-                "Failed to connect to MCP server '%s': %s",
-                name, exc,
-            )
-            return []
+        return await _discover_and_register_server(name, cfg)

    async def _discover_all():
        nonlocal failed_count
+        server_names = list(new_servers.keys())
        # Connect to all servers in PARALLEL
        results = await asyncio.gather(
            *(_discover_one(name, cfg) for name, cfg in new_servers.items()),
            return_exceptions=True,
        )
-        for result in results:
+        for name, result in zip(server_names, results):
            if isinstance(result, Exception):
                failed_count += 1
-                logger.warning("MCP discovery error: %s", result)
+                logger.warning(
+                    "Failed to connect to MCP server '%s': %s",
+                    name, result,
+                )
            elif isinstance(result, list):
                all_tools.extend(result)
            else:
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@ -323,7 +323,10 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
        # Step 1: Start the Atropos API server (run-api)
        print(f"[{run_id}] Starting Atropos API server (run-api)...")
        
-        api_log_file = open(api_log, "w")
+        # File must stay open while the subprocess runs; we store the handle
+        # on run_state so _stop_training_run() can close it when done.
+        api_log_file = open(api_log, "w")  # closed by _stop_training_run
+        run_state.api_log_file = api_log_file
        run_state.api_process = subprocess.Popen(
            ["run-api"],
            stdout=api_log_file,
@ -337,6 +340,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
        if run_state.api_process.poll() is not None:
            run_state.status = "failed"
            run_state.error_message = f"API server exited with code {run_state.api_process.returncode}. Check {api_log}"
+            _stop_training_run(run_state)
            return
        
        print(f"[{run_id}] Atropos API server started")
@ -344,7 +348,8 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
        # Step 2: Start the Tinker trainer
        print(f"[{run_id}] Starting Tinker trainer: launch_training.py --config {config_path}")
        
-        trainer_log_file = open(trainer_log, "w")
+        trainer_log_file = open(trainer_log, "w")  # closed by _stop_training_run
+        run_state.trainer_log_file = trainer_log_file
        run_state.trainer_process = subprocess.Popen(
            [sys.executable, "launch_training.py", "--config", str(config_path)],
            stdout=trainer_log_file,
@ -360,8 +365,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
        if run_state.trainer_process.poll() is not None:
            run_state.status = "failed"
            run_state.error_message = f"Trainer exited with code {run_state.trainer_process.returncode}. Check {trainer_log}"
-            if run_state.api_process:
-                run_state.api_process.terminate()
+            _stop_training_run(run_state)
            return
        
        print(f"[{run_id}] Trainer started, inference server on port 8001")
@ -380,11 +384,13 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
        if not env_info:
            run_state.status = "failed"
            run_state.error_message = f"Environment '{run_state.environment}' not found"
+            _stop_training_run(run_state)
            return
        
        print(f"[{run_id}] Starting environment: {env_info.file_path} serve")
        
-        env_log_file = open(env_log, "w")
+        env_log_file = open(env_log, "w")  # closed by _stop_training_run
+        run_state.env_log_file = env_log_file
        run_state.env_process = subprocess.Popen(
            [sys.executable, str(env_info.file_path), "serve", "--config", str(config_path)],
            stdout=env_log_file,
@ -398,10 +404,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
        if run_state.env_process.poll() is not None:
            run_state.status = "failed"
            run_state.error_message = f"Environment exited with code {run_state.env_process.returncode}. Check {env_log}"
-            if run_state.trainer_process:
-                run_state.trainer_process.terminate()
-            if run_state.api_process:
-                run_state.api_process.terminate()
+            _stop_training_run(run_state)
            return
        
        run_state.status = "running"
@ -480,6 +483,16 @@ def _stop_training_run(run_state: RunState):
    if run_state.status == "running":
        run_state.status = "stopped"

+    # Close log file handles that were opened for subprocess stdout.
+    for attr in ("env_log_file", "trainer_log_file", "api_log_file"):
+        fh = getattr(run_state, attr, None)
+        if fh is not None:
+            try:
+                fh.close()
+            except Exception:
+                pass
+            setattr(run_state, attr, None)
+

 # ============================================================================
 # Environment Discovery Tools
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@ -8,10 +8,13 @@ human-friendly channel names to IDs. Works in both CLI and gateway contexts.
 import json
 import logging
 import os
+import re
 import time

 logger = logging.getLogger(__name__)

+_TELEGRAM_TOPIC_TARGET_RE = re.compile(r"^\s*(-?\d+)(?::(\d+))?\s*$")
+

 SEND_MESSAGE_SCHEMA = {
    "name": "send_message",
@ -33,7 +36,7 @@ SEND_MESSAGE_SCHEMA = {
            },
            "target": {
                "type": "string",
-                "description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', or 'platform:chat_id'. Examples: 'telegram', 'discord:#bot-home', 'slack:#engineering', 'signal:+15551234567'"
+                "description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', 'platform:chat_id', or Telegram topic 'telegram:chat_id:thread_id'. Examples: 'telegram', 'telegram:-1001234567890:17585', 'discord:#bot-home', 'slack:#engineering', 'signal:+15551234567'"
            },
            "message": {
                "type": "string",
@ -73,23 +76,30 @@ def _handle_send(args):

    parts = target.split(":", 1)
    platform_name = parts[0].strip().lower()
-    chat_id = parts[1].strip() if len(parts) > 1 else None
+    target_ref = parts[1].strip() if len(parts) > 1 else None
+    chat_id = None
+    thread_id = None
+
+    if target_ref:
+        chat_id, thread_id, is_explicit = _parse_target_ref(platform_name, target_ref)
+    else:
+        is_explicit = False

    # Resolve human-friendly channel names to numeric IDs
-    if chat_id and not chat_id.lstrip("-").isdigit():
+    if target_ref and not is_explicit:
        try:
            from gateway.channel_directory import resolve_channel_name
-            resolved = resolve_channel_name(platform_name, chat_id)
+            resolved = resolve_channel_name(platform_name, target_ref)
            if resolved:
-                chat_id = resolved
+                chat_id, thread_id, _ = _parse_target_ref(platform_name, resolved)
            else:
                return json.dumps({
-                    "error": f"Could not resolve '{chat_id}' on {platform_name}. "
+                    "error": f"Could not resolve '{target_ref}' on {platform_name}. "
                    f"Use send_message(action='list') to see available targets."
                })
        except Exception:
            return json.dumps({
-                "error": f"Could not resolve '{chat_id}' on {platform_name}. "
+                "error": f"Could not resolve '{target_ref}' on {platform_name}. "
                f"Try using a numeric channel ID instead."
            })

@ -109,6 +119,7 @@ def _handle_send(args):
        "slack": Platform.SLACK,
        "whatsapp": Platform.WHATSAPP,
        "signal": Platform.SIGNAL,
+        "email": Platform.EMAIL,
    }
    platform = platform_map.get(platform_name)
    if not platform:
@ -134,7 +145,7 @@ def _handle_send(args):

    try:
        from model_tools import _run_async
-        result = _run_async(_send_to_platform(platform, pconfig, chat_id, message))
+        result = _run_async(_send_to_platform(platform, pconfig, chat_id, message, thread_id=thread_id))
        if used_home_channel and isinstance(result, dict) and result.get("success"):
            result["note"] = f"Sent to {platform_name} home channel (chat_id: {chat_id})"

@ -143,7 +154,7 @@ def _handle_send(args):
            try:
                from gateway.mirror import mirror_to_session
                source_label = os.getenv("HERMES_SESSION_PLATFORM", "cli")
-                if mirror_to_session(platform_name, chat_id, message, source_label=source_label):
+                if mirror_to_session(platform_name, chat_id, message, source_label=source_label, thread_id=thread_id):
                    result["mirrored"] = True
            except Exception:
                pass
@ -153,26 +164,42 @@ def _handle_send(args):
        return json.dumps({"error": f"Send failed: {e}"})


-async def _send_to_platform(platform, pconfig, chat_id, message):
+def _parse_target_ref(platform_name: str, target_ref: str):
+    """Parse a tool target into chat_id/thread_id and whether it is explicit."""
+    if platform_name == "telegram":
+        match = _TELEGRAM_TOPIC_TARGET_RE.fullmatch(target_ref)
+        if match:
+            return match.group(1), match.group(2), True
+    if target_ref.lstrip("-").isdigit():
+        return target_ref, None, True
+    return None, None, False
+
+
+async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None):
    """Route a message to the appropriate platform sender."""
    from gateway.config import Platform
    if platform == Platform.TELEGRAM:
-        return await _send_telegram(pconfig.token, chat_id, message)
+        return await _send_telegram(pconfig.token, chat_id, message, thread_id=thread_id)
    elif platform == Platform.DISCORD:
        return await _send_discord(pconfig.token, chat_id, message)
    elif platform == Platform.SLACK:
        return await _send_slack(pconfig.token, chat_id, message)
    elif platform == Platform.SIGNAL:
        return await _send_signal(pconfig.extra, chat_id, message)
+    elif platform == Platform.EMAIL:
+        return await _send_email(pconfig.extra, chat_id, message)
    return {"error": f"Direct sending not yet implemented for {platform.value}"}


-async def _send_telegram(token, chat_id, message):
+async def _send_telegram(token, chat_id, message, thread_id=None):
    """Send via Telegram Bot API (one-shot, no polling needed)."""
    try:
        from telegram import Bot
        bot = Bot(token=token)
-        msg = await bot.send_message(chat_id=int(chat_id), text=message)
+        send_kwargs = {"chat_id": int(chat_id), "text": message}
+        if thread_id is not None:
+            send_kwargs["message_thread_id"] = int(thread_id)
+        msg = await bot.send_message(**send_kwargs)
        return {"success": True, "platform": "telegram", "chat_id": chat_id, "message_id": str(msg.message_id)}
    except ImportError:
        return {"error": "python-telegram-bot not installed. Run: pip install python-telegram-bot"}
@ -259,6 +286,35 @@ async def _send_signal(extra, chat_id, message):
        return {"error": f"Signal send failed: {e}"}


+async def _send_email(extra, chat_id, message):
+    """Send via SMTP (one-shot, no persistent connection needed)."""
+    import smtplib
+    from email.mime.text import MIMEText
+
+    address = extra.get("address") or os.getenv("EMAIL_ADDRESS", "")
+    password = os.getenv("EMAIL_PASSWORD", "")
+    smtp_host = extra.get("smtp_host") or os.getenv("EMAIL_SMTP_HOST", "")
+    smtp_port = int(os.getenv("EMAIL_SMTP_PORT", "587"))
+
+    if not all([address, password, smtp_host]):
+        return {"error": "Email not configured (EMAIL_ADDRESS, EMAIL_PASSWORD, EMAIL_SMTP_HOST required)"}
+
+    try:
+        msg = MIMEText(message, "plain", "utf-8")
+        msg["From"] = address
+        msg["To"] = chat_id
+        msg["Subject"] = "Hermes Agent"
+
+        server = smtplib.SMTP(smtp_host, smtp_port)
+        server.starttls()
+        server.login(address, password)
+        server.send_message(msg)
+        server.quit()
+        return {"success": True, "platform": "email", "chat_id": chat_id}
+    except Exception as e:
+        return {"error": f"Email send failed: {e}"}
+
+
 def _check_send_message():
    """Gate send_message on gateway running (always available on messaging platforms)."""
    platform = os.getenv("HERMES_SESSION_PLATFORM", "")
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@ -68,7 +68,7 @@ import os
 import re
 import sys
 from pathlib import Path
-from typing import Dict, Any, List, Optional, Tuple
+from typing import Dict, Any, List, Optional, Set, Tuple

 import yaml

@ -222,37 +222,81 @@ def _parse_tags(tags_value) -> List[str]:
    return [t.strip().strip('"\'') for t in tags_value.split(',') if t.strip()]


-def _find_all_skills() -> List[Dict[str, Any]]:
+
+def _get_disabled_skill_names() -> Set[str]:
+    """Load disabled skill names from config (once per call).
+
+    Resolves platform from ``HERMES_PLATFORM`` env var, falls back to
+    the global disabled list.
    """
-    Recursively find all skills in ~/.hermes/skills/.
-    
-    Returns metadata for progressive disclosure (tier 1):
-    - name, description, category
-    
+    import os
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        skills_cfg = config.get("skills", {})
+        resolved_platform = os.getenv("HERMES_PLATFORM")
+        if resolved_platform:
+            platform_disabled = skills_cfg.get("platform_disabled", {}).get(resolved_platform)
+            if platform_disabled is not None:
+                return set(platform_disabled)
+        return set(skills_cfg.get("disabled", []))
+    except Exception:
+        return set()
+
+
+def _is_skill_disabled(name: str, platform: str = None) -> bool:
+    """Check if a skill is disabled in config."""
+    import os
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        skills_cfg = config.get("skills", {})
+        resolved_platform = platform or os.getenv("HERMES_PLATFORM")
+        if resolved_platform:
+            platform_disabled = skills_cfg.get("platform_disabled", {}).get(resolved_platform)
+            if platform_disabled is not None:
+                return name in platform_disabled
+        return name in skills_cfg.get("disabled", [])
+    except Exception:
+        return False
+
+
+def _find_all_skills(*, skip_disabled: bool = False) -> List[Dict[str, Any]]:
+    """Recursively find all skills in ~/.hermes/skills/.
+
+    Args:
+        skip_disabled: If True, return ALL skills regardless of disabled
+            state (used by ``hermes skills`` config UI). Default False
+            filters out disabled skills.
+
    Returns:
-        List of skill metadata dicts
+        List of skill metadata dicts (name, description, category).
    """
    skills = []
-    
+
    if not SKILLS_DIR.exists():
        return skills
-    
+
+    # Load disabled set once (not per-skill)
+    disabled = set() if skip_disabled else _get_disabled_skill_names()
+
    for skill_md in SKILLS_DIR.rglob("SKILL.md"):
        if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
            continue
-            
+
        skill_dir = skill_md.parent
-        
+
        try:
            content = skill_md.read_text(encoding='utf-8')
            frontmatter, body = _parse_frontmatter(content)

-            # Skip skills incompatible with the current OS platform
            if not skill_matches_platform(frontmatter):
                continue
-            
+
            name = frontmatter.get('name', skill_dir.name)[:MAX_NAME_LENGTH]
-            
+            if name in disabled:
+                continue
+
            description = frontmatter.get('description', '')
            if not description:
                for line in body.strip().split('\n'):
@ -260,25 +304,25 @@ def _find_all_skills() -> List[Dict[str, Any]]:
                    if line and not line.startswith('#'):
                        description = line
                        break
-            
+
            if len(description) > MAX_DESCRIPTION_LENGTH:
                description = description[:MAX_DESCRIPTION_LENGTH - 3] + "..."
-            
+
            category = _get_category_from_path(skill_md)
-            
+
            skills.append({
                "name": name,
                "description": description,
                "category": category,
            })
-            
+
        except (UnicodeDecodeError, PermissionError) as e:
            logger.warning("Failed to read skill file %s: %s", skill_md, e)
            continue
        except Exception as e:
            logger.warning("Error parsing skill %s: %s", skill_md, e, exc_info=True)
            continue
-    
+
    return skills


--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -434,6 +434,23 @@ def clear_task_env_overrides(task_id: str):
    _task_env_overrides.pop(task_id, None)

 # Configuration from environment variables
+
+def _parse_env_var(name: str, default: str, converter=int, type_label: str = "integer"):
+    """Parse an environment variable with *converter*, raising a clear error on bad values.
+
+    Without this wrapper, a single malformed env var (e.g. TERMINAL_TIMEOUT=5m)
+    causes an unhandled ValueError that kills every terminal command.
+    """
+    raw = os.getenv(name, default)
+    try:
+        return converter(raw)
+    except (ValueError, json.JSONDecodeError):
+        raise ValueError(
+            f"Invalid value for {name}: {raw!r} (expected {type_label}). "
+            f"Check ~/.hermes/.env or environment variables."
+        )
+
+
 def _get_env_config() -> Dict[str, Any]:
    """Get terminal environment configuration from environment variables."""
    # Default image with Python and Node.js for maximum compatibility
@ -446,7 +463,7 @@ def _get_env_config() -> Dict[str, Any]:
    if env_type == "local":
        default_cwd = os.getcwd()
    else:
-        default_cwd = "~"
+        default_cwd = "/root"
    
    # Read TERMINAL_CWD but sanity-check it for container backends.
    # If the CWD looks like a host-local path that can't exist inside a
@ -470,19 +487,19 @@ def _get_env_config() -> Dict[str, Any]:
        "modal_image": os.getenv("TERMINAL_MODAL_IMAGE", default_image),
        "daytona_image": os.getenv("TERMINAL_DAYTONA_IMAGE", default_image),
        "cwd": cwd,
-        "timeout": int(os.getenv("TERMINAL_TIMEOUT", "180")),
-        "lifetime_seconds": int(os.getenv("TERMINAL_LIFETIME_SECONDS", "300")),
+        "timeout": _parse_env_var("TERMINAL_TIMEOUT", "180"),
+        "lifetime_seconds": _parse_env_var("TERMINAL_LIFETIME_SECONDS", "300"),
        # SSH-specific config
        "ssh_host": os.getenv("TERMINAL_SSH_HOST", ""),
        "ssh_user": os.getenv("TERMINAL_SSH_USER", ""),
-        "ssh_port": int(os.getenv("TERMINAL_SSH_PORT", "22")),
+        "ssh_port": _parse_env_var("TERMINAL_SSH_PORT", "22"),
        "ssh_key": os.getenv("TERMINAL_SSH_KEY", ""),
        # Container resource config (applies to docker, singularity, modal, daytona -- ignored for local/ssh)
-        "container_cpu": float(os.getenv("TERMINAL_CONTAINER_CPU", "1")),
-        "container_memory": int(os.getenv("TERMINAL_CONTAINER_MEMORY", "5120")),     # MB (default 5GB)
-        "container_disk": int(os.getenv("TERMINAL_CONTAINER_DISK", "51200")),        # MB (default 50GB)
+        "container_cpu": _parse_env_var("TERMINAL_CONTAINER_CPU", "1", float, "number"),
+        "container_memory": _parse_env_var("TERMINAL_CONTAINER_MEMORY", "5120"),     # MB (default 5GB)
+        "container_disk": _parse_env_var("TERMINAL_CONTAINER_DISK", "51200"),        # MB (default 50GB)
        "container_persistent": os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("true", "1", "yes"),
-        "docker_volumes": json.loads(os.getenv("TERMINAL_DOCKER_VOLUMES", "[]")),
+        "docker_volumes": _parse_env_var("TERMINAL_DOCKER_VOLUMES", "[]", json.loads, "valid JSON"),
    }


@ -536,7 +553,12 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
        if memory > 0:
            sandbox_kwargs["memory"] = memory
        if disk > 0:
-            sandbox_kwargs["ephemeral_disk"] = disk
+            try:
+                import inspect, modal
+                if "ephemeral_disk" in inspect.signature(modal.Sandbox.create).parameters:
+                    sandbox_kwargs["ephemeral_disk"] = disk
+            except Exception:
+                pass
        
        return _ModalEnvironment(
            image=image, cwd=cwd, timeout=timeout,
@ -1112,9 +1134,14 @@ def check_terminal_requirements() -> bool:
            return True
        elif env_type == "docker":
            from minisweagent.environments.docker import DockerEnvironment
-            # Check if docker is available
+            # Check if docker is available (use find_docker for macOS PATH issues)
+            from tools.environments.docker import find_docker
            import subprocess
-            result = subprocess.run(["docker", "version"], capture_output=True, timeout=5)
+            docker = find_docker()
+            if not docker:
+                logger.error("Docker executable not found in PATH or common install locations")
+                return False
+            result = subprocess.run([docker, "version"], capture_output=True, timeout=5)
            return result.returncode == 0
        elif env_type == "singularity":
            from minisweagent.environments.singularity import SingularityEnvironment
--- a/tools/todo_tool.py
+++ b/tools/todo_tool.py
@ -105,8 +105,17 @@ class TodoStore:
            "cancelled": "[~]",
        }

-        lines = ["[Your task list was preserved across context compression]"]
-        for item in self._items:
+        # Only inject pending/in_progress items — completed/cancelled ones
+        # cause the model to re-do finished work after compression.
+        active_items = [
+            item for item in self._items
+            if item["status"] in ("pending", "in_progress")
+        ]
+        if not active_items:
+            return None
+
+        lines = ["[Your active task list was preserved across context compression]"]
+        for item in active_items:
            marker = markers.get(item["status"], "[?]")
            lines.append(f"- {marker} {item['id']}. {item['content']} ({item['status']})")

--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@ -259,6 +259,7 @@ async def vision_analyze_tool(
        
        # Check auxiliary vision client availability
        if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
+            logger.error("Vision analysis unavailable: no auxiliary vision model configured")
            return json.dumps({
                "success": False,
                "analysis": "Vision analysis unavailable: no auxiliary vision model configured. "