Merge branch 'main' into codex/align-codex-provider-conventions-mainrepo

2026-02-28 18:13:38 -08:00 · 2026-02-28 18:13:38 -08:00 · 5a79e423fe
commit 5a79e423fe
parent 32070e6bc0 7f7643cf63
96 changed files with 10884 additions and 447 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@ -43,16 +43,55 @@ if _env_path.exists():
 load_dotenv()

 # Bridge config.yaml values into the environment so os.getenv() picks them up.
-# Values already set in the environment (from .env or shell) take precedence.
+# config.yaml is authoritative for terminal settings — overrides .env.
 _config_path = _hermes_home / 'config.yaml'
 if _config_path.exists():
    try:
        import yaml as _yaml
        with open(_config_path) as _f:
            _cfg = _yaml.safe_load(_f) or {}
+        # Top-level simple values (fallback only — don't override .env)
        for _key, _val in _cfg.items():
            if isinstance(_val, (str, int, float, bool)) and _key not in os.environ:
                os.environ[_key] = str(_val)
+        # Terminal config is nested — bridge to TERMINAL_* env vars.
+        # config.yaml overrides .env for these since it's the documented config path.
+        _terminal_cfg = _cfg.get("terminal", {})
+        if _terminal_cfg and isinstance(_terminal_cfg, dict):
+            _terminal_env_map = {
+                "backend": "TERMINAL_ENV",
+                "cwd": "TERMINAL_CWD",
+                "timeout": "TERMINAL_TIMEOUT",
+                "lifetime_seconds": "TERMINAL_LIFETIME_SECONDS",
+                "docker_image": "TERMINAL_DOCKER_IMAGE",
+                "singularity_image": "TERMINAL_SINGULARITY_IMAGE",
+                "modal_image": "TERMINAL_MODAL_IMAGE",
+                "ssh_host": "TERMINAL_SSH_HOST",
+                "ssh_user": "TERMINAL_SSH_USER",
+                "ssh_port": "TERMINAL_SSH_PORT",
+                "ssh_key": "TERMINAL_SSH_KEY",
+                "container_cpu": "TERMINAL_CONTAINER_CPU",
+                "container_memory": "TERMINAL_CONTAINER_MEMORY",
+                "container_disk": "TERMINAL_CONTAINER_DISK",
+                "container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
+            }
+            for _cfg_key, _env_var in _terminal_env_map.items():
+                if _cfg_key in _terminal_cfg:
+                    os.environ[_env_var] = str(_terminal_cfg[_cfg_key])
+        _compression_cfg = _cfg.get("compression", {})
+        if _compression_cfg and isinstance(_compression_cfg, dict):
+            _compression_env_map = {
+                "enabled": "CONTEXT_COMPRESSION_ENABLED",
+                "threshold": "CONTEXT_COMPRESSION_THRESHOLD",
+                "summary_model": "CONTEXT_COMPRESSION_MODEL",
+            }
+            for _cfg_key, _env_var in _compression_env_map.items():
+                if _cfg_key in _compression_cfg:
+                    os.environ[_env_var] = str(_compression_cfg[_cfg_key])
+        _agent_cfg = _cfg.get("agent", {})
+        if _agent_cfg and isinstance(_agent_cfg, dict):
+            if "max_turns" in _agent_cfg:
+                os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
    except Exception:
        pass  # Non-fatal; gateway can still run with .env values

@ -131,6 +170,7 @@ class GatewayRunner:
        self.session_store = SessionStore(
            self.config.sessions_dir, self.config,
            has_active_processes_fn=lambda key: process_registry.has_active_for_session(key),
+            on_auto_reset=self._flush_memories_before_reset,
        )
        self.delivery_router = DeliveryRouter(self.config)
        self._running = False
@ -145,6 +185,14 @@ class GatewayRunner:
        # Key: session_key, Value: {"command": str, "pattern_key": str}
        self._pending_approvals: Dict[str, Dict[str, str]] = {}
        
+        # Initialize session database for session_search tool support
+        self._session_db = None
+        try:
+            from hermes_state import SessionDB
+            self._session_db = SessionDB()
+        except Exception as e:
+            logger.debug("SQLite session store not available: %s", e)
+        
        # DM pairing store for code-based user authorization
        from gateway.pairing import PairingStore
        self.pairing_store = PairingStore()
@ -153,6 +201,66 @@ class GatewayRunner:
        from gateway.hooks import HookRegistry
        self.hooks = HookRegistry()
    
+    def _flush_memories_before_reset(self, old_entry):
+        """Prompt the agent to save memories/skills before an auto-reset.
+        
+        Called synchronously by SessionStore before destroying an expired session.
+        Loads the transcript, gives the agent a real turn with memory + skills
+        tools, and explicitly asks it to preserve anything worth keeping.
+        """
+        try:
+            history = self.session_store.load_transcript(old_entry.session_id)
+            if not history or len(history) < 4:
+                return
+
+            from run_agent import AIAgent
+            _flush_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
+            _flush_base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
+            _flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL", "anthropic/claude-opus-4.6")
+
+            if not _flush_api_key:
+                return
+
+            tmp_agent = AIAgent(
+                model=_flush_model,
+                api_key=_flush_api_key,
+                base_url=_flush_base_url,
+                max_iterations=8,
+                quiet_mode=True,
+                enabled_toolsets=["memory", "skills"],
+                session_id=old_entry.session_id,
+            )
+
+            # Build conversation history from transcript
+            msgs = [
+                {"role": m.get("role"), "content": m.get("content")}
+                for m in history
+                if m.get("role") in ("user", "assistant") and m.get("content")
+            ]
+
+            # Give the agent a real turn to think about what to save
+            flush_prompt = (
+                "[System: This session is about to be automatically reset due to "
+                "inactivity or a scheduled daily reset. The conversation context "
+                "will be cleared after this turn.\n\n"
+                "Review the conversation above and:\n"
+                "1. Save any important facts, preferences, or decisions to memory "
+                "(user profile or your notes) that would be useful in future sessions.\n"
+                "2. If you discovered a reusable workflow or solved a non-trivial "
+                "problem, consider saving it as a skill.\n"
+                "3. If nothing is worth saving, that's fine — just skip.\n\n"
+                "Do NOT respond to the user. Just use the memory and skill_manage "
+                "tools if needed, then stop.]"
+            )
+
+            tmp_agent.run_conversation(
+                user_message=flush_prompt,
+                conversation_history=msgs,
+            )
+            logger.info("Pre-reset save completed for session %s", old_entry.session_id)
+        except Exception as e:
+            logger.debug("Pre-reset save failed for session %s: %s", old_entry.session_id, e)
+    
    @staticmethod
    def _load_prefill_messages() -> List[Dict[str, Any]]:
        """Load ephemeral prefill messages from config or env var.
@ -523,6 +631,18 @@ class GatewayRunner:
        
        # Check for commands
        command = event.get_command()
+        
+        # Emit command:* hook for any recognized slash command
+        _known_commands = {"new", "reset", "help", "status", "stop", "model",
+                          "personality", "retry", "undo", "sethome", "set-home"}
+        if command and command in _known_commands:
+            await self.hooks.emit(f"command:{command}", {
+                "platform": source.platform.value if source.platform else "",
+                "user_id": source.user_id,
+                "command": command,
+                "args": event.get_command_args().strip(),
+            })
+        
        if command in ["new", "reset"]:
            return await self._handle_reset_command(event)
        
@ -550,8 +670,28 @@ class GatewayRunner:
        if command in ["sethome", "set-home"]:
            return await self._handle_set_home_command(event)
        
+        # Skill slash commands: /skill-name loads the skill and sends to agent
+        if command:
+            try:
+                from agent.skill_commands import get_skill_commands, build_skill_invocation_message
+                skill_cmds = get_skill_commands()
+                cmd_key = f"/{command}"
+                if cmd_key in skill_cmds:
+                    user_instruction = event.get_command_args().strip()
+                    msg = build_skill_invocation_message(cmd_key, user_instruction)
+                    if msg:
+                        event.text = msg
+                        # Fall through to normal message processing with skill content
+            except Exception as e:
+                logger.debug("Skill command check failed (non-fatal): %s", e)
+        
        # Check for pending exec approval responses
-        session_key_preview = f"agent:main:{source.platform.value}:{source.chat_type}:{source.chat_id}" if source.chat_type != "dm" else f"agent:main:{source.platform.value}:dm"
+        if source.chat_type != "dm":
+            session_key_preview = f"agent:main:{source.platform.value}:{source.chat_type}:{source.chat_id}"
+        elif source.platform and source.platform.value == "whatsapp" and source.chat_id:
+            session_key_preview = f"agent:main:{source.platform.value}:dm:{source.chat_id}"
+        else:
+            session_key_preview = f"agent:main:{source.platform.value}:dm"
        if session_key_preview in self._pending_approvals:
            user_text = event.text.strip().lower()
            if user_text in ("yes", "y", "approve", "ok", "go", "do it"):
@ -573,6 +713,19 @@ class GatewayRunner:
        session_entry = self.session_store.get_or_create_session(source)
        session_key = session_entry.session_key
        
+        # Emit session:start for new or auto-reset sessions
+        _is_new_session = (
+            session_entry.created_at == session_entry.updated_at
+            or getattr(session_entry, "was_auto_reset", False)
+        )
+        if _is_new_session:
+            await self.hooks.emit("session:start", {
+                "platform": source.platform.value if source.platform else "",
+                "user_id": source.user_id,
+                "session_id": session_entry.session_id,
+                "session_key": session_key,
+            })
+        
        # Build session context
        context = build_session_context(source, self.config, session_entry)
        
@ -665,7 +818,39 @@ class GatewayRunner:
                message_text = await self._enrich_message_with_transcription(
                    message_text, audio_paths
                )
-        
+
+        # -----------------------------------------------------------------
+        # Enrich document messages with context notes for the agent
+        # -----------------------------------------------------------------
+        if event.media_urls and event.message_type == MessageType.DOCUMENT:
+            for i, path in enumerate(event.media_urls):
+                mtype = event.media_types[i] if i < len(event.media_types) else ""
+                if not (mtype.startswith("application/") or mtype.startswith("text/")):
+                    continue
+                # Extract display filename by stripping the doc_{uuid12}_ prefix
+                import os as _os
+                basename = _os.path.basename(path)
+                # Format: doc_<12hex>_<original_filename>
+                parts = basename.split("_", 2)
+                display_name = parts[2] if len(parts) >= 3 else basename
+                # Sanitize to prevent prompt injection via filenames
+                import re as _re
+                display_name = _re.sub(r'[^\w.\- ]', '_', display_name)
+
+                if mtype.startswith("text/"):
+                    context_note = (
+                        f"[The user sent a text document: '{display_name}'. "
+                        f"Its content has been included below. "
+                        f"The file is also saved at: {path}]"
+                    )
+                else:
+                    context_note = (
+                        f"[The user sent a document: '{display_name}'. "
+                        f"The file is saved at: {path}. "
+                        f"Ask the user what they'd like you to do with it.]"
+                    )
+                message_text = f"{context_note}\n\n{message_text}"
+
        try:
            # Emit agent:start hook
            hook_ctx = {
@ -874,51 +1059,105 @@ class GatewayRunner:
    
    async def _handle_help_command(self, event: MessageEvent) -> str:
        """Handle /help command - list available commands."""
-        return (
-            "📖 **Hermes Commands**\n"
-            "\n"
-            "`/new` — Start a new conversation\n"
-            "`/reset` — Reset conversation history\n"
-            "`/status` — Show session info\n"
-            "`/stop` — Interrupt the running agent\n"
-            "`/model [name]` — Show or change the model\n"
-            "`/personality [name]` — Set a personality\n"
-            "`/retry` — Retry your last message\n"
-            "`/undo` — Remove the last exchange\n"
-            "`/sethome` — Set this chat as the home channel\n"
-            "`/help` — Show this message"
-        )
+        lines = [
+            "📖 **Hermes Commands**\n",
+            "`/new` — Start a new conversation",
+            "`/reset` — Reset conversation history",
+            "`/status` — Show session info",
+            "`/stop` — Interrupt the running agent",
+            "`/model [name]` — Show or change the model",
+            "`/personality [name]` — Set a personality",
+            "`/retry` — Retry your last message",
+            "`/undo` — Remove the last exchange",
+            "`/sethome` — Set this chat as the home channel",
+            "`/help` — Show this message",
+        ]
+        try:
+            from agent.skill_commands import get_skill_commands
+            skill_cmds = get_skill_commands()
+            if skill_cmds:
+                lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} installed):")
+                for cmd in sorted(skill_cmds):
+                    lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}")
+        except Exception:
+            pass
+        return "\n".join(lines)
    
    async def _handle_model_command(self, event: MessageEvent) -> str:
        """Handle /model command - show or change the current model."""
+        import yaml
+
        args = event.get_command_args().strip()
-        current = os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6")
-        
+        config_path = _hermes_home / 'config.yaml'
+
+        # Resolve current model the same way the agent init does:
+        # env vars first, then config.yaml always overrides.
+        current = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
+        try:
+            if config_path.exists():
+                with open(config_path) as f:
+                    cfg = yaml.safe_load(f) or {}
+                model_cfg = cfg.get("model", {})
+                if isinstance(model_cfg, str):
+                    current = model_cfg
+                elif isinstance(model_cfg, dict):
+                    current = model_cfg.get("default", current)
+        except Exception:
+            pass
+
        if not args:
            return f"🤖 **Current model:** `{current}`\n\nTo change: `/model provider/model-name`"
-        
+
+        if "/" not in args:
+            return (
+                f"🤖 Invalid model format: `{args}`\n\n"
+                f"Use `provider/model-name` format, e.g.:\n"
+                f"• `anthropic/claude-sonnet-4`\n"
+                f"• `google/gemini-2.5-pro`\n"
+                f"• `openai/gpt-4o`"
+            )
+
+        # Write to config.yaml (source of truth), same pattern as CLI save_config_value.
+        try:
+            user_config = {}
+            if config_path.exists():
+                with open(config_path) as f:
+                    user_config = yaml.safe_load(f) or {}
+            if "model" not in user_config or not isinstance(user_config["model"], dict):
+                user_config["model"] = {}
+            user_config["model"]["default"] = args
+            with open(config_path, 'w') as f:
+                yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
+        except Exception as e:
+            return f"⚠️ Failed to save model change: {e}"
+
+        # Also set env var so code reading it before the next agent init sees the update.
        os.environ["HERMES_MODEL"] = args
+
        return f"🤖 Model changed to `{args}`\n_(takes effect on next message)_"
    
    async def _handle_personality_command(self, event: MessageEvent) -> str:
        """Handle /personality command - list or set a personality."""
+        import yaml
+
        args = event.get_command_args().strip().lower()
-        
+        config_path = _hermes_home / 'config.yaml'
+
        try:
-            import yaml
-            config_path = _hermes_home / 'config.yaml'
            if config_path.exists():
                with open(config_path, 'r') as f:
                    config = yaml.safe_load(f) or {}
                personalities = config.get("agent", {}).get("personalities", {})
            else:
+                config = {}
                personalities = {}
        except Exception:
+            config = {}
            personalities = {}
-        
+
        if not personalities:
            return "No personalities configured in `~/.hermes/config.yaml`"
-        
+
        if not args:
            lines = ["🎭 **Available Personalities**\n"]
            for name, prompt in personalities.items():
@ -926,11 +1165,25 @@ class GatewayRunner:
                lines.append(f"• `{name}` — {preview}")
            lines.append(f"\nUsage: `/personality <name>`")
            return "\n".join(lines)
-        
+
        if args in personalities:
-            os.environ["HERMES_PERSONALITY"] = personalities[args]
+            new_prompt = personalities[args]
+
+            # Write to config.yaml, same pattern as CLI save_config_value.
+            try:
+                if "agent" not in config or not isinstance(config.get("agent"), dict):
+                    config["agent"] = {}
+                config["agent"]["system_prompt"] = new_prompt
+                with open(config_path, 'w') as f:
+                    yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+            except Exception as e:
+                return f"⚠️ Failed to save personality change: {e}"
+
+            # Update in-memory so it takes effect on the very next message.
+            self._ephemeral_system_prompt = new_prompt
+
            return f"🎭 Personality set to **{args}**\n_(takes effect on next message)_"
-        
+
        available = ", ".join(f"`{n}`" for n in personalities.keys())
        return f"Unknown personality: `{args}`\n\nAvailable: {available}"
    
@ -1291,9 +1544,24 @@ class GatewayRunner:
            default_toolset = default_toolset_map.get(source.platform, "hermes-telegram")
            enabled_toolsets = [default_toolset]
        
-        # Check if tool progress notifications are enabled
-        tool_progress_enabled = os.getenv("HERMES_TOOL_PROGRESS", "true").lower() in ("1", "true", "yes")
-        progress_mode = os.getenv("HERMES_TOOL_PROGRESS_MODE", "all")  # "all" or "new" (only new tools)
+        # Tool progress mode from config.yaml: "all", "new", "verbose", "off"
+        # Falls back to env vars for backward compatibility
+        _progress_cfg = {}
+        try:
+            _tp_cfg_path = _hermes_home / "config.yaml"
+            if _tp_cfg_path.exists():
+                import yaml as _tp_yaml
+                with open(_tp_cfg_path) as _tp_f:
+                    _tp_data = _tp_yaml.safe_load(_tp_f) or {}
+                _progress_cfg = _tp_data.get("display", {})
+        except Exception:
+            pass
+        progress_mode = (
+            _progress_cfg.get("tool_progress")
+            or os.getenv("HERMES_TOOL_PROGRESS_MODE")
+            or "all"
+        )
+        tool_progress_enabled = progress_mode != "off"
        
        # Queue for progress messages (thread-safe)
        progress_queue = queue.Queue() if tool_progress_enabled else None
@ -1394,6 +1662,25 @@ class GatewayRunner:
        result_holder = [None]  # Mutable container for the result
        tools_holder = [None]   # Mutable container for the tool definitions
        
+        # Bridge sync step_callback → async hooks.emit for agent:step events
+        _loop_for_step = asyncio.get_event_loop()
+        _hooks_ref = self.hooks
+
+        def _step_callback_sync(iteration: int, tool_names: list) -> None:
+            try:
+                asyncio.run_coroutine_threadsafe(
+                    _hooks_ref.emit("agent:step", {
+                        "platform": source.platform.value if source.platform else "",
+                        "user_id": source.user_id,
+                        "session_id": session_id,
+                        "iteration": iteration,
+                        "tool_names": tool_names,
+                    }),
+                    _loop_for_step,
+                )
+            except Exception as _e:
+                logger.debug("agent:step hook error: %s", _e)
+
        def run_sync():
            # Pass session_key to process registry via env var so background
            # processes can be mapped back to this gateway session
@ -1451,13 +1738,17 @@ class GatewayRunner:
                **runtime_kwargs,
                max_iterations=max_iterations,
                quiet_mode=True,
+                verbose_logging=False,
                enabled_toolsets=enabled_toolsets,
                ephemeral_system_prompt=combined_ephemeral or None,
                prefill_messages=self._prefill_messages or None,
                reasoning_config=self._reasoning_config,
                session_id=session_id,
                tool_progress_callback=progress_callback if tool_progress_enabled else None,
+                step_callback=_step_callback_sync if _hooks_ref.loaded_hooks else None,
                platform=platform_key,
+                honcho_session_key=session_key,
+                session_db=self._session_db,
            )
            
            # Store agent reference for interrupt support
@ -1507,6 +1798,19 @@ class GatewayRunner:
                            content = f"[Delivered from {mirror_src}] {content}"
                        agent_history.append({"role": role, "content": content})
            
+            # Collect MEDIA paths already in history so we can exclude them
+            # from the current turn's extraction. This is compression-safe:
+            # even if the message list shrinks, we know which paths are old.
+            _history_media_paths: set = set()
+            for _hm in agent_history:
+                if _hm.get("role") in ("tool", "function"):
+                    _hc = _hm.get("content", "")
+                    if "MEDIA:" in _hc:
+                        for _match in re.finditer(r'MEDIA:(\S+)', _hc):
+                            _p = _match.group(1).strip().rstrip('",}')
+                            if _p:
+                                _history_media_paths.add(_p)
+            
            result = agent.run_conversation(message, conversation_history=agent_history)
            result_holder[0] = result
            
@ -1527,22 +1831,25 @@ class GatewayRunner:
            # doesn't include them.  We collect unique tags from tool results and
            # append any that aren't already present in the final response, so the
            # adapter's extract_media() can find and deliver the files exactly once.
+            #
+            # Uses path-based deduplication against _history_media_paths (collected
+            # before run_conversation) instead of index slicing. This is safe even
+            # when context compression shrinks the message list. (Fixes #160)
            if "MEDIA:" not in final_response:
                media_tags = []
                has_voice_directive = False
                for msg in result.get("messages", []):
-                    if msg.get("role") == "tool" or msg.get("role") == "function":
+                    if msg.get("role") in ("tool", "function"):
                        content = msg.get("content", "")
                        if "MEDIA:" in content:
                            for match in re.finditer(r'MEDIA:(\S+)', content):
                                path = match.group(1).strip().rstrip('",}')
-                                if path:
+                                if path and path not in _history_media_paths:
                                    media_tags.append(f"MEDIA:{path}")
                            if "[[audio_as_voice]]" in content:
                                has_voice_directive = True
                
                if media_tags:
-                    # Deduplicate while preserving order
                    seen = set()
                    unique_tags = []
                    for tag in media_tags:
@ -1668,10 +1975,10 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, interval: int
    needing a separate `hermes cron daemon` or system cron entry.

    Also refreshes the channel directory every 5 minutes and prunes the
-    image/audio cache once per hour.
+    image/audio/document cache once per hour.
    """
    from cron.scheduler import tick as cron_tick
-    from gateway.platforms.base import cleanup_image_cache
+    from gateway.platforms.base import cleanup_image_cache, cleanup_document_cache

    IMAGE_CACHE_EVERY = 60   # ticks — once per hour at default 60s interval
    CHANNEL_DIR_EVERY = 5    # ticks — every 5 minutes
@ -1700,6 +2007,12 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, interval: int
                    logger.info("Image cache cleanup: removed %d stale file(s)", removed)
            except Exception as e:
                logger.debug("Image cache cleanup error: %s", e)
+            try:
+                removed = cleanup_document_cache(max_age_hours=24)
+                if removed:
+                    logger.info("Document cache cleanup: removed %d stale file(s)", removed)
+            except Exception as e:
+                logger.debug("Document cache cleanup error: %s", e)

        stop_event.wait(timeout=interval)
    logger.info("Cron ticker stopped")