Merge pull request #1275 from NousResearch/hermes/hermes-f48b210a
feat(gateway): salvage reasoning hot reload from #938
This commit is contained in:
commit
486cb772b8
4 changed files with 262 additions and 19 deletions
|
|
@ -580,6 +580,17 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Discord followup failed: %s", e)
|
logger.debug("Discord followup failed: %s", e)
|
||||||
|
|
||||||
|
@tree.command(name="reasoning", description="Show or change reasoning effort")
|
||||||
|
@discord.app_commands.describe(effort="Reasoning effort: xhigh, high, medium, low, minimal, or none.")
|
||||||
|
async def slash_reasoning(interaction: discord.Interaction, effort: str = ""):
|
||||||
|
await interaction.response.defer(ephemeral=True)
|
||||||
|
event = self._build_slash_event(interaction, f"/reasoning {effort}".strip())
|
||||||
|
await self.handle_message(event)
|
||||||
|
try:
|
||||||
|
await interaction.followup.send("Done~", ephemeral=True)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("Discord followup failed: %s", e)
|
||||||
|
|
||||||
@tree.command(name="personality", description="Set a personality")
|
@tree.command(name="personality", description="Set a personality")
|
||||||
@discord.app_commands.describe(name="Personality name. Leave empty to list available.")
|
@discord.app_commands.describe(name="Personality name. Leave empty to list available.")
|
||||||
async def slash_personality(interaction: discord.Interaction, name: str = ""):
|
async def slash_personality(interaction: discord.Interaction, name: str = ""):
|
||||||
|
|
|
||||||
|
|
@ -159,6 +159,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||||
BotCommand("new", "Start a new conversation"),
|
BotCommand("new", "Start a new conversation"),
|
||||||
BotCommand("reset", "Reset conversation history"),
|
BotCommand("reset", "Reset conversation history"),
|
||||||
BotCommand("model", "Show or change the model"),
|
BotCommand("model", "Show or change the model"),
|
||||||
|
BotCommand("reasoning", "Show or change reasoning effort"),
|
||||||
BotCommand("personality", "Set a personality"),
|
BotCommand("personality", "Set a personality"),
|
||||||
BotCommand("retry", "Retry your last message"),
|
BotCommand("retry", "Retry your last message"),
|
||||||
BotCommand("undo", "Remove the last exchange"),
|
BotCommand("undo", "Remove the last exchange"),
|
||||||
|
|
|
||||||
|
|
@ -468,23 +468,25 @@ class GatewayRunner:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _load_reasoning_config() -> dict | None:
|
def _load_reasoning_config() -> dict | None:
|
||||||
"""Load reasoning effort from config or env var.
|
"""Load reasoning effort from config with env fallback.
|
||||||
|
|
||||||
Checks HERMES_REASONING_EFFORT env var first, then agent.reasoning_effort
|
Checks agent.reasoning_effort in config.yaml first, then
|
||||||
in config.yaml. Valid: "xhigh", "high", "medium", "low", "minimal", "none".
|
HERMES_REASONING_EFFORT as a fallback. Valid: "xhigh", "high",
|
||||||
Returns None to use default (medium).
|
"medium", "low", "minimal", "none". Returns None to use default
|
||||||
|
(medium).
|
||||||
"""
|
"""
|
||||||
effort = os.getenv("HERMES_REASONING_EFFORT", "")
|
effort = ""
|
||||||
|
try:
|
||||||
|
import yaml as _y
|
||||||
|
cfg_path = _hermes_home / "config.yaml"
|
||||||
|
if cfg_path.exists():
|
||||||
|
with open(cfg_path, encoding="utf-8") as _f:
|
||||||
|
cfg = _y.safe_load(_f) or {}
|
||||||
|
effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
if not effort:
|
if not effort:
|
||||||
try:
|
effort = os.getenv("HERMES_REASONING_EFFORT", "")
|
||||||
import yaml as _y
|
|
||||||
cfg_path = _hermes_home / "config.yaml"
|
|
||||||
if cfg_path.exists():
|
|
||||||
with open(cfg_path, encoding="utf-8") as _f:
|
|
||||||
cfg = _y.safe_load(_f) or {}
|
|
||||||
effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
if not effort:
|
if not effort:
|
||||||
return None
|
return None
|
||||||
effort = effort.lower().strip()
|
effort = effort.lower().strip()
|
||||||
|
|
@ -933,7 +935,7 @@ class GatewayRunner:
|
||||||
command = event.get_command()
|
command = event.get_command()
|
||||||
|
|
||||||
# Emit command:* hook for any recognized slash command
|
# Emit command:* hook for any recognized slash command
|
||||||
_known_commands = {"new", "reset", "help", "status", "stop", "model",
|
_known_commands = {"new", "reset", "help", "status", "stop", "model", "reasoning",
|
||||||
"personality", "retry", "undo", "sethome", "set-home",
|
"personality", "retry", "undo", "sethome", "set-home",
|
||||||
"compress", "usage", "insights", "reload-mcp", "reload_mcp",
|
"compress", "usage", "insights", "reload-mcp", "reload_mcp",
|
||||||
"update", "title", "resume", "provider", "rollback",
|
"update", "title", "resume", "provider", "rollback",
|
||||||
|
|
@ -960,7 +962,10 @@ class GatewayRunner:
|
||||||
|
|
||||||
if command == "model":
|
if command == "model":
|
||||||
return await self._handle_model_command(event)
|
return await self._handle_model_command(event)
|
||||||
|
|
||||||
|
if command == "reasoning":
|
||||||
|
return await self._handle_reasoning_command(event)
|
||||||
|
|
||||||
if command == "provider":
|
if command == "provider":
|
||||||
return await self._handle_provider_command(event)
|
return await self._handle_provider_command(event)
|
||||||
|
|
||||||
|
|
@ -2203,6 +2208,8 @@ class GatewayRunner:
|
||||||
|
|
||||||
pr = self._provider_routing
|
pr = self._provider_routing
|
||||||
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
|
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
|
||||||
|
reasoning_config = self._load_reasoning_config()
|
||||||
|
self._reasoning_config = reasoning_config
|
||||||
|
|
||||||
def run_sync():
|
def run_sync():
|
||||||
agent = AIAgent(
|
agent = AIAgent(
|
||||||
|
|
@ -2212,7 +2219,7 @@ class GatewayRunner:
|
||||||
quiet_mode=True,
|
quiet_mode=True,
|
||||||
verbose_logging=False,
|
verbose_logging=False,
|
||||||
enabled_toolsets=enabled_toolsets,
|
enabled_toolsets=enabled_toolsets,
|
||||||
reasoning_config=self._reasoning_config,
|
reasoning_config=reasoning_config,
|
||||||
providers_allowed=pr.get("only"),
|
providers_allowed=pr.get("only"),
|
||||||
providers_ignored=pr.get("ignore"),
|
providers_ignored=pr.get("ignore"),
|
||||||
providers_order=pr.get("order"),
|
providers_order=pr.get("order"),
|
||||||
|
|
@ -2310,6 +2317,8 @@ class GatewayRunner:
|
||||||
|
|
||||||
args = event.get_command_args().strip().lower()
|
args = event.get_command_args().strip().lower()
|
||||||
config_path = _hermes_home / "config.yaml"
|
config_path = _hermes_home / "config.yaml"
|
||||||
|
self._reasoning_config = self._load_reasoning_config()
|
||||||
|
self._show_reasoning = self._load_show_reasoning()
|
||||||
|
|
||||||
def _save_config_key(key_path: str, value):
|
def _save_config_key(key_path: str, value):
|
||||||
"""Save a dot-separated key to config.yaml."""
|
"""Save a dot-separated key to config.yaml."""
|
||||||
|
|
@ -3368,6 +3377,8 @@ class GatewayRunner:
|
||||||
|
|
||||||
pr = self._provider_routing
|
pr = self._provider_routing
|
||||||
honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key)
|
honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key)
|
||||||
|
reasoning_config = self._load_reasoning_config()
|
||||||
|
self._reasoning_config = reasoning_config
|
||||||
agent = AIAgent(
|
agent = AIAgent(
|
||||||
model=model,
|
model=model,
|
||||||
**runtime_kwargs,
|
**runtime_kwargs,
|
||||||
|
|
@ -3377,7 +3388,7 @@ class GatewayRunner:
|
||||||
enabled_toolsets=enabled_toolsets,
|
enabled_toolsets=enabled_toolsets,
|
||||||
ephemeral_system_prompt=combined_ephemeral or None,
|
ephemeral_system_prompt=combined_ephemeral or None,
|
||||||
prefill_messages=self._prefill_messages or None,
|
prefill_messages=self._prefill_messages or None,
|
||||||
reasoning_config=self._reasoning_config,
|
reasoning_config=reasoning_config,
|
||||||
providers_allowed=pr.get("only"),
|
providers_allowed=pr.get("only"),
|
||||||
providers_ignored=pr.get("ignore"),
|
providers_ignored=pr.get("ignore"),
|
||||||
providers_order=pr.get("order"),
|
providers_order=pr.get("order"),
|
||||||
|
|
|
||||||
220
tests/gateway/test_reasoning_command.py
Normal file
220
tests/gateway/test_reasoning_command.py
Normal file
|
|
@ -0,0 +1,220 @@
|
||||||
|
"""Tests for gateway /reasoning command and hot reload behavior."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import inspect
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
|
from unittest.mock import AsyncMock, MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
import gateway.run as gateway_run
|
||||||
|
from gateway.config import Platform
|
||||||
|
from gateway.platforms.base import MessageEvent
|
||||||
|
from gateway.session import SessionSource
|
||||||
|
|
||||||
|
|
||||||
|
def _make_event(text="/reasoning", platform=Platform.TELEGRAM, user_id="12345", chat_id="67890"):
|
||||||
|
"""Build a MessageEvent for testing."""
|
||||||
|
source = SessionSource(
|
||||||
|
platform=platform,
|
||||||
|
user_id=user_id,
|
||||||
|
chat_id=chat_id,
|
||||||
|
user_name="testuser",
|
||||||
|
)
|
||||||
|
return MessageEvent(text=text, source=source)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_runner():
|
||||||
|
"""Create a bare GatewayRunner without calling __init__."""
|
||||||
|
runner = object.__new__(gateway_run.GatewayRunner)
|
||||||
|
runner.adapters = {}
|
||||||
|
runner._ephemeral_system_prompt = ""
|
||||||
|
runner._prefill_messages = []
|
||||||
|
runner._reasoning_config = None
|
||||||
|
runner._show_reasoning = False
|
||||||
|
runner._provider_routing = {}
|
||||||
|
runner._fallback_model = None
|
||||||
|
runner._running_agents = {}
|
||||||
|
runner.hooks = MagicMock()
|
||||||
|
runner.hooks.emit = AsyncMock()
|
||||||
|
runner.hooks.loaded_hooks = []
|
||||||
|
runner._session_db = None
|
||||||
|
runner._get_or_create_gateway_honcho = lambda session_key: (None, None)
|
||||||
|
return runner
|
||||||
|
|
||||||
|
|
||||||
|
class _CapturingAgent:
|
||||||
|
"""Fake agent that records init kwargs for assertions."""
|
||||||
|
|
||||||
|
last_init = None
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
type(self).last_init = dict(kwargs)
|
||||||
|
self.tools = []
|
||||||
|
|
||||||
|
def run_conversation(self, user_message: str, conversation_history=None, task_id=None):
|
||||||
|
return {
|
||||||
|
"final_response": "ok",
|
||||||
|
"messages": [],
|
||||||
|
"api_calls": 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TestReasoningCommand:
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_reasoning_in_help_output(self):
|
||||||
|
runner = _make_runner()
|
||||||
|
event = _make_event(text="/help")
|
||||||
|
|
||||||
|
result = await runner._handle_help_command(event)
|
||||||
|
|
||||||
|
assert "/reasoning [level|show|hide]" in result
|
||||||
|
|
||||||
|
def test_reasoning_is_known_command(self):
|
||||||
|
source = inspect.getsource(gateway_run.GatewayRunner._handle_message)
|
||||||
|
assert '"reasoning"' in source
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_reasoning_command_reloads_current_state_from_config(self, tmp_path, monkeypatch):
|
||||||
|
hermes_home = tmp_path / "hermes"
|
||||||
|
hermes_home.mkdir()
|
||||||
|
config_path = hermes_home / "config.yaml"
|
||||||
|
config_path.write_text(
|
||||||
|
"agent:\n reasoning_effort: none\ndisplay:\n show_reasoning: true\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||||
|
monkeypatch.delenv("HERMES_REASONING_EFFORT", raising=False)
|
||||||
|
|
||||||
|
runner = _make_runner()
|
||||||
|
runner._reasoning_config = {"enabled": True, "effort": "xhigh"}
|
||||||
|
runner._show_reasoning = False
|
||||||
|
|
||||||
|
result = await runner._handle_reasoning_command(_make_event("/reasoning"))
|
||||||
|
|
||||||
|
assert "**Effort:** `none (disabled)`" in result
|
||||||
|
assert "**Display:** on ✓" in result
|
||||||
|
assert runner._reasoning_config == {"enabled": False}
|
||||||
|
assert runner._show_reasoning is True
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_handle_reasoning_command_updates_config_and_cache(self, tmp_path, monkeypatch):
|
||||||
|
hermes_home = tmp_path / "hermes"
|
||||||
|
hermes_home.mkdir()
|
||||||
|
config_path = hermes_home / "config.yaml"
|
||||||
|
config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8")
|
||||||
|
|
||||||
|
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||||
|
monkeypatch.delenv("HERMES_REASONING_EFFORT", raising=False)
|
||||||
|
|
||||||
|
runner = _make_runner()
|
||||||
|
runner._reasoning_config = {"enabled": True, "effort": "medium"}
|
||||||
|
|
||||||
|
result = await runner._handle_reasoning_command(_make_event("/reasoning low"))
|
||||||
|
|
||||||
|
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||||
|
assert saved["agent"]["reasoning_effort"] == "low"
|
||||||
|
assert runner._reasoning_config == {"enabled": True, "effort": "low"}
|
||||||
|
assert "takes effect on next message" in result
|
||||||
|
|
||||||
|
def test_run_agent_reloads_reasoning_config_per_message(self, tmp_path, monkeypatch):
|
||||||
|
hermes_home = tmp_path / "hermes"
|
||||||
|
hermes_home.mkdir()
|
||||||
|
(hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: low\n", encoding="utf-8")
|
||||||
|
|
||||||
|
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||||
|
monkeypatch.setattr(gateway_run, "_env_path", hermes_home / ".env")
|
||||||
|
monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
gateway_run,
|
||||||
|
"_resolve_runtime_agent_kwargs",
|
||||||
|
lambda: {
|
||||||
|
"provider": "openrouter",
|
||||||
|
"api_mode": "chat_completions",
|
||||||
|
"base_url": "https://openrouter.ai/api/v1",
|
||||||
|
"api_key": "test-key",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
monkeypatch.delenv("HERMES_REASONING_EFFORT", raising=False)
|
||||||
|
fake_run_agent = types.ModuleType("run_agent")
|
||||||
|
fake_run_agent.AIAgent = _CapturingAgent
|
||||||
|
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||||
|
|
||||||
|
_CapturingAgent.last_init = None
|
||||||
|
runner = _make_runner()
|
||||||
|
runner._reasoning_config = {"enabled": True, "effort": "xhigh"}
|
||||||
|
|
||||||
|
source = SessionSource(
|
||||||
|
platform=Platform.LOCAL,
|
||||||
|
chat_id="cli",
|
||||||
|
chat_name="CLI",
|
||||||
|
chat_type="dm",
|
||||||
|
user_id="user-1",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(
|
||||||
|
runner._run_agent(
|
||||||
|
message="ping",
|
||||||
|
context_prompt="",
|
||||||
|
history=[],
|
||||||
|
source=source,
|
||||||
|
session_id="session-1",
|
||||||
|
session_key="agent:main:local:dm",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["final_response"] == "ok"
|
||||||
|
assert _CapturingAgent.last_init is not None
|
||||||
|
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "low"}
|
||||||
|
|
||||||
|
def test_run_agent_prefers_config_over_stale_reasoning_env(self, tmp_path, monkeypatch):
|
||||||
|
hermes_home = tmp_path / "hermes"
|
||||||
|
hermes_home.mkdir()
|
||||||
|
(hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: none\n", encoding="utf-8")
|
||||||
|
|
||||||
|
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||||
|
monkeypatch.setattr(gateway_run, "_env_path", hermes_home / ".env")
|
||||||
|
monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
gateway_run,
|
||||||
|
"_resolve_runtime_agent_kwargs",
|
||||||
|
lambda: {
|
||||||
|
"provider": "openrouter",
|
||||||
|
"api_mode": "chat_completions",
|
||||||
|
"base_url": "https://openrouter.ai/api/v1",
|
||||||
|
"api_key": "test-key",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
monkeypatch.setenv("HERMES_REASONING_EFFORT", "low")
|
||||||
|
fake_run_agent = types.ModuleType("run_agent")
|
||||||
|
fake_run_agent.AIAgent = _CapturingAgent
|
||||||
|
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||||
|
|
||||||
|
_CapturingAgent.last_init = None
|
||||||
|
runner = _make_runner()
|
||||||
|
|
||||||
|
source = SessionSource(
|
||||||
|
platform=Platform.LOCAL,
|
||||||
|
chat_id="cli",
|
||||||
|
chat_name="CLI",
|
||||||
|
chat_type="dm",
|
||||||
|
user_id="user-1",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(
|
||||||
|
runner._run_agent(
|
||||||
|
message="ping",
|
||||||
|
context_prompt="",
|
||||||
|
history=[],
|
||||||
|
source=source,
|
||||||
|
session_id="session-1",
|
||||||
|
session_key="agent:main:local:dm",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["final_response"] == "ok"
|
||||||
|
assert _CapturingAgent.last_init is not None
|
||||||
|
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": False}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue