Merge pull request #1275 from NousResearch/hermes/hermes-f48b210a
feat(gateway): salvage reasoning hot reload from #938
This commit is contained in:
commit
486cb772b8
4 changed files with 262 additions and 19 deletions
|
|
@ -580,6 +580,17 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="reasoning", description="Show or change reasoning effort")
|
||||
@discord.app_commands.describe(effort="Reasoning effort: xhigh, high, medium, low, minimal, or none.")
|
||||
async def slash_reasoning(interaction: discord.Interaction, effort: str = ""):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, f"/reasoning {effort}".strip())
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Done~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="personality", description="Set a personality")
|
||||
@discord.app_commands.describe(name="Personality name. Leave empty to list available.")
|
||||
async def slash_personality(interaction: discord.Interaction, name: str = ""):
|
||||
|
|
|
|||
|
|
@ -159,6 +159,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
BotCommand("new", "Start a new conversation"),
|
||||
BotCommand("reset", "Reset conversation history"),
|
||||
BotCommand("model", "Show or change the model"),
|
||||
BotCommand("reasoning", "Show or change reasoning effort"),
|
||||
BotCommand("personality", "Set a personality"),
|
||||
BotCommand("retry", "Retry your last message"),
|
||||
BotCommand("undo", "Remove the last exchange"),
|
||||
|
|
|
|||
|
|
@ -468,23 +468,25 @@ class GatewayRunner:
|
|||
|
||||
@staticmethod
|
||||
def _load_reasoning_config() -> dict | None:
|
||||
"""Load reasoning effort from config or env var.
|
||||
|
||||
Checks HERMES_REASONING_EFFORT env var first, then agent.reasoning_effort
|
||||
in config.yaml. Valid: "xhigh", "high", "medium", "low", "minimal", "none".
|
||||
Returns None to use default (medium).
|
||||
"""Load reasoning effort from config with env fallback.
|
||||
|
||||
Checks agent.reasoning_effort in config.yaml first, then
|
||||
HERMES_REASONING_EFFORT as a fallback. Valid: "xhigh", "high",
|
||||
"medium", "low", "minimal", "none". Returns None to use default
|
||||
(medium).
|
||||
"""
|
||||
effort = os.getenv("HERMES_REASONING_EFFORT", "")
|
||||
effort = ""
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
|
||||
except Exception:
|
||||
pass
|
||||
if not effort:
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
|
||||
except Exception:
|
||||
pass
|
||||
effort = os.getenv("HERMES_REASONING_EFFORT", "")
|
||||
if not effort:
|
||||
return None
|
||||
effort = effort.lower().strip()
|
||||
|
|
@ -933,7 +935,7 @@ class GatewayRunner:
|
|||
command = event.get_command()
|
||||
|
||||
# Emit command:* hook for any recognized slash command
|
||||
_known_commands = {"new", "reset", "help", "status", "stop", "model",
|
||||
_known_commands = {"new", "reset", "help", "status", "stop", "model", "reasoning",
|
||||
"personality", "retry", "undo", "sethome", "set-home",
|
||||
"compress", "usage", "insights", "reload-mcp", "reload_mcp",
|
||||
"update", "title", "resume", "provider", "rollback",
|
||||
|
|
@ -960,7 +962,10 @@ class GatewayRunner:
|
|||
|
||||
if command == "model":
|
||||
return await self._handle_model_command(event)
|
||||
|
||||
|
||||
if command == "reasoning":
|
||||
return await self._handle_reasoning_command(event)
|
||||
|
||||
if command == "provider":
|
||||
return await self._handle_provider_command(event)
|
||||
|
||||
|
|
@ -2203,6 +2208,8 @@ class GatewayRunner:
|
|||
|
||||
pr = self._provider_routing
|
||||
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
|
||||
reasoning_config = self._load_reasoning_config()
|
||||
self._reasoning_config = reasoning_config
|
||||
|
||||
def run_sync():
|
||||
agent = AIAgent(
|
||||
|
|
@ -2212,7 +2219,7 @@ class GatewayRunner:
|
|||
quiet_mode=True,
|
||||
verbose_logging=False,
|
||||
enabled_toolsets=enabled_toolsets,
|
||||
reasoning_config=self._reasoning_config,
|
||||
reasoning_config=reasoning_config,
|
||||
providers_allowed=pr.get("only"),
|
||||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
|
|
@ -2310,6 +2317,8 @@ class GatewayRunner:
|
|||
|
||||
args = event.get_command_args().strip().lower()
|
||||
config_path = _hermes_home / "config.yaml"
|
||||
self._reasoning_config = self._load_reasoning_config()
|
||||
self._show_reasoning = self._load_show_reasoning()
|
||||
|
||||
def _save_config_key(key_path: str, value):
|
||||
"""Save a dot-separated key to config.yaml."""
|
||||
|
|
@ -3368,6 +3377,8 @@ class GatewayRunner:
|
|||
|
||||
pr = self._provider_routing
|
||||
honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key)
|
||||
reasoning_config = self._load_reasoning_config()
|
||||
self._reasoning_config = reasoning_config
|
||||
agent = AIAgent(
|
||||
model=model,
|
||||
**runtime_kwargs,
|
||||
|
|
@ -3377,7 +3388,7 @@ class GatewayRunner:
|
|||
enabled_toolsets=enabled_toolsets,
|
||||
ephemeral_system_prompt=combined_ephemeral or None,
|
||||
prefill_messages=self._prefill_messages or None,
|
||||
reasoning_config=self._reasoning_config,
|
||||
reasoning_config=reasoning_config,
|
||||
providers_allowed=pr.get("only"),
|
||||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
|
|
|
|||
220
tests/gateway/test_reasoning_command.py
Normal file
220
tests/gateway/test_reasoning_command.py
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
"""Tests for gateway /reasoning command and hot reload behavior."""
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
import sys
|
||||
import types
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
import gateway.run as gateway_run
|
||||
from gateway.config import Platform
|
||||
from gateway.platforms.base import MessageEvent
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
def _make_event(text="/reasoning", platform=Platform.TELEGRAM, user_id="12345", chat_id="67890"):
|
||||
"""Build a MessageEvent for testing."""
|
||||
source = SessionSource(
|
||||
platform=platform,
|
||||
user_id=user_id,
|
||||
chat_id=chat_id,
|
||||
user_name="testuser",
|
||||
)
|
||||
return MessageEvent(text=text, source=source)
|
||||
|
||||
|
||||
def _make_runner():
|
||||
"""Create a bare GatewayRunner without calling __init__."""
|
||||
runner = object.__new__(gateway_run.GatewayRunner)
|
||||
runner.adapters = {}
|
||||
runner._ephemeral_system_prompt = ""
|
||||
runner._prefill_messages = []
|
||||
runner._reasoning_config = None
|
||||
runner._show_reasoning = False
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._running_agents = {}
|
||||
runner.hooks = MagicMock()
|
||||
runner.hooks.emit = AsyncMock()
|
||||
runner.hooks.loaded_hooks = []
|
||||
runner._session_db = None
|
||||
runner._get_or_create_gateway_honcho = lambda session_key: (None, None)
|
||||
return runner
|
||||
|
||||
|
||||
class _CapturingAgent:
|
||||
"""Fake agent that records init kwargs for assertions."""
|
||||
|
||||
last_init = None
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
type(self).last_init = dict(kwargs)
|
||||
self.tools = []
|
||||
|
||||
def run_conversation(self, user_message: str, conversation_history=None, task_id=None):
|
||||
return {
|
||||
"final_response": "ok",
|
||||
"messages": [],
|
||||
"api_calls": 1,
|
||||
}
|
||||
|
||||
|
||||
class TestReasoningCommand:
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_in_help_output(self):
|
||||
runner = _make_runner()
|
||||
event = _make_event(text="/help")
|
||||
|
||||
result = await runner._handle_help_command(event)
|
||||
|
||||
assert "/reasoning [level|show|hide]" in result
|
||||
|
||||
def test_reasoning_is_known_command(self):
|
||||
source = inspect.getsource(gateway_run.GatewayRunner._handle_message)
|
||||
assert '"reasoning"' in source
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_command_reloads_current_state_from_config(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text(
|
||||
"agent:\n reasoning_effort: none\ndisplay:\n show_reasoning: true\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||
monkeypatch.delenv("HERMES_REASONING_EFFORT", raising=False)
|
||||
|
||||
runner = _make_runner()
|
||||
runner._reasoning_config = {"enabled": True, "effort": "xhigh"}
|
||||
runner._show_reasoning = False
|
||||
|
||||
result = await runner._handle_reasoning_command(_make_event("/reasoning"))
|
||||
|
||||
assert "**Effort:** `none (disabled)`" in result
|
||||
assert "**Display:** on ✓" in result
|
||||
assert runner._reasoning_config == {"enabled": False}
|
||||
assert runner._show_reasoning is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handle_reasoning_command_updates_config_and_cache(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||
monkeypatch.delenv("HERMES_REASONING_EFFORT", raising=False)
|
||||
|
||||
runner = _make_runner()
|
||||
runner._reasoning_config = {"enabled": True, "effort": "medium"}
|
||||
|
||||
result = await runner._handle_reasoning_command(_make_event("/reasoning low"))
|
||||
|
||||
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
assert saved["agent"]["reasoning_effort"] == "low"
|
||||
assert runner._reasoning_config == {"enabled": True, "effort": "low"}
|
||||
assert "takes effect on next message" in result
|
||||
|
||||
def test_run_agent_reloads_reasoning_config_per_message(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
(hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: low\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||
monkeypatch.setattr(gateway_run, "_env_path", hermes_home / ".env")
|
||||
monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
gateway_run,
|
||||
"_resolve_runtime_agent_kwargs",
|
||||
lambda: {
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_key": "test-key",
|
||||
},
|
||||
)
|
||||
monkeypatch.delenv("HERMES_REASONING_EFFORT", raising=False)
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = _CapturingAgent
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
|
||||
_CapturingAgent.last_init = None
|
||||
runner = _make_runner()
|
||||
runner._reasoning_config = {"enabled": True, "effort": "xhigh"}
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.LOCAL,
|
||||
chat_id="cli",
|
||||
chat_name="CLI",
|
||||
chat_type="dm",
|
||||
user_id="user-1",
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
runner._run_agent(
|
||||
message="ping",
|
||||
context_prompt="",
|
||||
history=[],
|
||||
source=source,
|
||||
session_id="session-1",
|
||||
session_key="agent:main:local:dm",
|
||||
)
|
||||
)
|
||||
|
||||
assert result["final_response"] == "ok"
|
||||
assert _CapturingAgent.last_init is not None
|
||||
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "low"}
|
||||
|
||||
def test_run_agent_prefers_config_over_stale_reasoning_env(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
(hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: none\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||
monkeypatch.setattr(gateway_run, "_env_path", hermes_home / ".env")
|
||||
monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
gateway_run,
|
||||
"_resolve_runtime_agent_kwargs",
|
||||
lambda: {
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_key": "test-key",
|
||||
},
|
||||
)
|
||||
monkeypatch.setenv("HERMES_REASONING_EFFORT", "low")
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = _CapturingAgent
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
|
||||
_CapturingAgent.last_init = None
|
||||
runner = _make_runner()
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.LOCAL,
|
||||
chat_id="cli",
|
||||
chat_name="CLI",
|
||||
chat_type="dm",
|
||||
user_id="user-1",
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
runner._run_agent(
|
||||
message="ping",
|
||||
context_prompt="",
|
||||
history=[],
|
||||
source=source,
|
||||
session_id="session-1",
|
||||
session_key="agent:main:local:dm",
|
||||
)
|
||||
)
|
||||
|
||||
assert result["final_response"] == "ok"
|
||||
assert _CapturingAgent.last_init is not None
|
||||
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": False}
|
||||
Loading…
Add table
Add a link
Reference in a new issue