From 52ba940c9b04077c235fbfa1888d390d5c208ca0 Mon Sep 17 00:00:00 2001 From: Verne <1783491278@qq.com> Date: Wed, 11 Mar 2026 22:12:11 +0800 Subject: [PATCH] feat(gateway): add reasoning hot reload Add a /reasoning command across gateway adapters so users can inspect or change reasoning effort without editing config by hand. Reload reasoning settings from config.yaml before each agent run, including background tasks, so the next message picks up the new value consistently. --- gateway/platforms/discord.py | 11 ++ gateway/platforms/telegram.py | 1 + gateway/run.py | 49 ++++-- tests/gateway/test_reasoning_command.py | 220 ++++++++++++++++++++++++ 4 files changed, 262 insertions(+), 19 deletions(-) create mode 100644 tests/gateway/test_reasoning_command.py diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index e68e5292..e05a421e 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -580,6 +580,17 @@ class DiscordAdapter(BasePlatformAdapter): except Exception as e: logger.debug("Discord followup failed: %s", e) + @tree.command(name="reasoning", description="Show or change reasoning effort") + @discord.app_commands.describe(effort="Reasoning effort: xhigh, high, medium, low, minimal, or none.") + async def slash_reasoning(interaction: discord.Interaction, effort: str = ""): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, f"/reasoning {effort}".strip()) + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + @tree.command(name="personality", description="Set a personality") @discord.app_commands.describe(name="Personality name. Leave empty to list available.") async def slash_personality(interaction: discord.Interaction, name: str = ""): diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 5243d302..06f423c6 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -159,6 +159,7 @@ class TelegramAdapter(BasePlatformAdapter): BotCommand("new", "Start a new conversation"), BotCommand("reset", "Reset conversation history"), BotCommand("model", "Show or change the model"), + BotCommand("reasoning", "Show or change reasoning effort"), BotCommand("personality", "Set a personality"), BotCommand("retry", "Retry your last message"), BotCommand("undo", "Remove the last exchange"), diff --git a/gateway/run.py b/gateway/run.py index 1b7a2ed6..5bac7da5 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -468,23 +468,25 @@ class GatewayRunner: @staticmethod def _load_reasoning_config() -> dict | None: - """Load reasoning effort from config or env var. - - Checks HERMES_REASONING_EFFORT env var first, then agent.reasoning_effort - in config.yaml. Valid: "xhigh", "high", "medium", "low", "minimal", "none". - Returns None to use default (medium). + """Load reasoning effort from config with env fallback. + + Checks agent.reasoning_effort in config.yaml first, then + HERMES_REASONING_EFFORT as a fallback. Valid: "xhigh", "high", + "medium", "low", "minimal", "none". Returns None to use default + (medium). """ - effort = os.getenv("HERMES_REASONING_EFFORT", "") + effort = "" + try: + import yaml as _y + cfg_path = _hermes_home / "config.yaml" + if cfg_path.exists(): + with open(cfg_path, encoding="utf-8") as _f: + cfg = _y.safe_load(_f) or {} + effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip() + except Exception: + pass if not effort: - try: - import yaml as _y - cfg_path = _hermes_home / "config.yaml" - if cfg_path.exists(): - with open(cfg_path, encoding="utf-8") as _f: - cfg = _y.safe_load(_f) or {} - effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip() - except Exception: - pass + effort = os.getenv("HERMES_REASONING_EFFORT", "") if not effort: return None effort = effort.lower().strip() @@ -933,7 +935,7 @@ class GatewayRunner: command = event.get_command() # Emit command:* hook for any recognized slash command - _known_commands = {"new", "reset", "help", "status", "stop", "model", + _known_commands = {"new", "reset", "help", "status", "stop", "model", "reasoning", "personality", "retry", "undo", "sethome", "set-home", "compress", "usage", "insights", "reload-mcp", "reload_mcp", "update", "title", "resume", "provider", "rollback", @@ -960,7 +962,10 @@ class GatewayRunner: if command == "model": return await self._handle_model_command(event) - + + if command == "reasoning": + return await self._handle_reasoning_command(event) + if command == "provider": return await self._handle_provider_command(event) @@ -2203,6 +2208,8 @@ class GatewayRunner: pr = self._provider_routing max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90")) + reasoning_config = self._load_reasoning_config() + self._reasoning_config = reasoning_config def run_sync(): agent = AIAgent( @@ -2212,7 +2219,7 @@ class GatewayRunner: quiet_mode=True, verbose_logging=False, enabled_toolsets=enabled_toolsets, - reasoning_config=self._reasoning_config, + reasoning_config=reasoning_config, providers_allowed=pr.get("only"), providers_ignored=pr.get("ignore"), providers_order=pr.get("order"), @@ -2310,6 +2317,8 @@ class GatewayRunner: args = event.get_command_args().strip().lower() config_path = _hermes_home / "config.yaml" + self._reasoning_config = self._load_reasoning_config() + self._show_reasoning = self._load_show_reasoning() def _save_config_key(key_path: str, value): """Save a dot-separated key to config.yaml.""" @@ -3368,6 +3377,8 @@ class GatewayRunner: pr = self._provider_routing honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key) + reasoning_config = self._load_reasoning_config() + self._reasoning_config = reasoning_config agent = AIAgent( model=model, **runtime_kwargs, @@ -3377,7 +3388,7 @@ class GatewayRunner: enabled_toolsets=enabled_toolsets, ephemeral_system_prompt=combined_ephemeral or None, prefill_messages=self._prefill_messages or None, - reasoning_config=self._reasoning_config, + reasoning_config=reasoning_config, providers_allowed=pr.get("only"), providers_ignored=pr.get("ignore"), providers_order=pr.get("order"), diff --git a/tests/gateway/test_reasoning_command.py b/tests/gateway/test_reasoning_command.py new file mode 100644 index 00000000..745094fe --- /dev/null +++ b/tests/gateway/test_reasoning_command.py @@ -0,0 +1,220 @@ +"""Tests for gateway /reasoning command and hot reload behavior.""" + +import asyncio +import inspect +import sys +import types +from unittest.mock import AsyncMock, MagicMock + +import pytest +import yaml + +import gateway.run as gateway_run +from gateway.config import Platform +from gateway.platforms.base import MessageEvent +from gateway.session import SessionSource + + +def _make_event(text="/reasoning", platform=Platform.TELEGRAM, user_id="12345", chat_id="67890"): + """Build a MessageEvent for testing.""" + source = SessionSource( + platform=platform, + user_id=user_id, + chat_id=chat_id, + user_name="testuser", + ) + return MessageEvent(text=text, source=source) + + +def _make_runner(): + """Create a bare GatewayRunner without calling __init__.""" + runner = object.__new__(gateway_run.GatewayRunner) + runner.adapters = {} + runner._ephemeral_system_prompt = "" + runner._prefill_messages = [] + runner._reasoning_config = None + runner._show_reasoning = False + runner._provider_routing = {} + runner._fallback_model = None + runner._running_agents = {} + runner.hooks = MagicMock() + runner.hooks.emit = AsyncMock() + runner.hooks.loaded_hooks = [] + runner._session_db = None + runner._get_or_create_gateway_honcho = lambda session_key: (None, None) + return runner + + +class _CapturingAgent: + """Fake agent that records init kwargs for assertions.""" + + last_init = None + + def __init__(self, *args, **kwargs): + type(self).last_init = dict(kwargs) + self.tools = [] + + def run_conversation(self, user_message: str, conversation_history=None, task_id=None): + return { + "final_response": "ok", + "messages": [], + "api_calls": 1, + } + + +class TestReasoningCommand: + @pytest.mark.asyncio + async def test_reasoning_in_help_output(self): + runner = _make_runner() + event = _make_event(text="/help") + + result = await runner._handle_help_command(event) + + assert "/reasoning [level|show|hide]" in result + + def test_reasoning_is_known_command(self): + source = inspect.getsource(gateway_run.GatewayRunner._handle_message) + assert '"reasoning"' in source + + @pytest.mark.asyncio + async def test_reasoning_command_reloads_current_state_from_config(self, tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + hermes_home.mkdir() + config_path = hermes_home / "config.yaml" + config_path.write_text( + "agent:\n reasoning_effort: none\ndisplay:\n show_reasoning: true\n", + encoding="utf-8", + ) + + monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home) + monkeypatch.delenv("HERMES_REASONING_EFFORT", raising=False) + + runner = _make_runner() + runner._reasoning_config = {"enabled": True, "effort": "xhigh"} + runner._show_reasoning = False + + result = await runner._handle_reasoning_command(_make_event("/reasoning")) + + assert "**Effort:** `none (disabled)`" in result + assert "**Display:** on ✓" in result + assert runner._reasoning_config == {"enabled": False} + assert runner._show_reasoning is True + + @pytest.mark.asyncio + async def test_handle_reasoning_command_updates_config_and_cache(self, tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + hermes_home.mkdir() + config_path = hermes_home / "config.yaml" + config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8") + + monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home) + monkeypatch.delenv("HERMES_REASONING_EFFORT", raising=False) + + runner = _make_runner() + runner._reasoning_config = {"enabled": True, "effort": "medium"} + + result = await runner._handle_reasoning_command(_make_event("/reasoning low")) + + saved = yaml.safe_load(config_path.read_text(encoding="utf-8")) + assert saved["agent"]["reasoning_effort"] == "low" + assert runner._reasoning_config == {"enabled": True, "effort": "low"} + assert "takes effect on next message" in result + + def test_run_agent_reloads_reasoning_config_per_message(self, tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + hermes_home.mkdir() + (hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: low\n", encoding="utf-8") + + monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home) + monkeypatch.setattr(gateway_run, "_env_path", hermes_home / ".env") + monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None) + monkeypatch.setattr( + gateway_run, + "_resolve_runtime_agent_kwargs", + lambda: { + "provider": "openrouter", + "api_mode": "chat_completions", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "test-key", + }, + ) + monkeypatch.delenv("HERMES_REASONING_EFFORT", raising=False) + fake_run_agent = types.ModuleType("run_agent") + fake_run_agent.AIAgent = _CapturingAgent + monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent) + + _CapturingAgent.last_init = None + runner = _make_runner() + runner._reasoning_config = {"enabled": True, "effort": "xhigh"} + + source = SessionSource( + platform=Platform.LOCAL, + chat_id="cli", + chat_name="CLI", + chat_type="dm", + user_id="user-1", + ) + + result = asyncio.run( + runner._run_agent( + message="ping", + context_prompt="", + history=[], + source=source, + session_id="session-1", + session_key="agent:main:local:dm", + ) + ) + + assert result["final_response"] == "ok" + assert _CapturingAgent.last_init is not None + assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "low"} + + def test_run_agent_prefers_config_over_stale_reasoning_env(self, tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + hermes_home.mkdir() + (hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: none\n", encoding="utf-8") + + monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home) + monkeypatch.setattr(gateway_run, "_env_path", hermes_home / ".env") + monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None) + monkeypatch.setattr( + gateway_run, + "_resolve_runtime_agent_kwargs", + lambda: { + "provider": "openrouter", + "api_mode": "chat_completions", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "test-key", + }, + ) + monkeypatch.setenv("HERMES_REASONING_EFFORT", "low") + fake_run_agent = types.ModuleType("run_agent") + fake_run_agent.AIAgent = _CapturingAgent + monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent) + + _CapturingAgent.last_init = None + runner = _make_runner() + + source = SessionSource( + platform=Platform.LOCAL, + chat_id="cli", + chat_name="CLI", + chat_type="dm", + user_id="user-1", + ) + + result = asyncio.run( + runner._run_agent( + message="ping", + context_prompt="", + history=[], + source=source, + session_id="session-1", + session_key="agent:main:local:dm", + ) + ) + + assert result["final_response"] == "ok" + assert _CapturingAgent.last_init is not None + assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": False}