fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
This commit is contained in:
Teknium 2026-03-12 05:38:19 -07:00 committed by GitHub
parent a370ab8391
commit e782b92bca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 221 additions and 10 deletions

View file

@ -228,6 +228,7 @@ class GatewayRunner:
self._prefill_messages = self._load_prefill_messages()
self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
self._reasoning_config = self._load_reasoning_config()
self._show_reasoning = self._load_show_reasoning()
self._provider_routing = self._load_provider_routing()
self._fallback_model = self._load_fallback_model()
@ -421,6 +422,20 @@ class GatewayRunner:
logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
return None
@staticmethod
def _load_show_reasoning() -> bool:
"""Load show_reasoning toggle from config.yaml display section."""
try:
import yaml as _y
cfg_path = _hermes_home / "config.yaml"
if cfg_path.exists():
with open(cfg_path, encoding="utf-8") as _f:
cfg = _y.safe_load(_f) or {}
return bool(cfg.get("display", {}).get("show_reasoning", False))
except Exception:
pass
return False
@staticmethod
def _load_background_notifications_mode() -> str:
"""Load background process notification mode from config or env var.
@ -846,7 +861,7 @@ class GatewayRunner:
"personality", "retry", "undo", "sethome", "set-home",
"compress", "usage", "insights", "reload-mcp", "reload_mcp",
"update", "title", "resume", "provider", "rollback",
"background"}
"background", "reasoning"}
if command and command in _known_commands:
await self.hooks.emit(f"command:{command}", {
"platform": source.platform.value if source.platform else "",
@ -911,6 +926,9 @@ class GatewayRunner:
if command == "background":
return await self._handle_background_command(event)
if command == "reasoning":
return await self._handle_reasoning_command(event)
# User-defined quick commands (bypass agent loop, no LLM call)
if command:
@ -1352,7 +1370,20 @@ class GatewayRunner:
response = agent_result.get("final_response", "")
agent_messages = agent_result.get("messages", [])
# Prepend reasoning/thinking if display is enabled
if getattr(self, "_show_reasoning", False) and response:
last_reasoning = agent_result.get("last_reasoning")
if last_reasoning:
# Collapse long reasoning to keep messages readable
lines = last_reasoning.strip().splitlines()
if len(lines) > 15:
display_reasoning = "\n".join(lines[:15])
display_reasoning += f"\n_... ({len(lines) - 15} more lines)_"
else:
display_reasoning = last_reasoning.strip()
response = f"💭 **Reasoning:**\n```\n{display_reasoning}\n```\n\n{response}"
# Emit agent:end hook
await self.hooks.emit("agent:end", {
**hook_ctx,
@ -1543,6 +1574,7 @@ class GatewayRunner:
"`/resume [name]` — Resume a previously-named session",
"`/usage` — Show token usage for this session",
"`/insights [days]` — Show usage insights and analytics",
"`/reasoning [level|show|hide]` — Set reasoning effort or toggle display",
"`/rollback [number]` — List or restore filesystem checkpoints",
"`/background <prompt>` — Run a prompt in a separate background session",
"`/reload-mcp` — Reload MCP servers from config",
@ -2170,6 +2202,88 @@ class GatewayRunner:
except Exception:
pass
async def _handle_reasoning_command(self, event: MessageEvent) -> str:
"""Handle /reasoning command — manage reasoning effort and display toggle.
Usage:
/reasoning Show current effort level and display state
/reasoning <level> Set reasoning effort (none, low, medium, high, xhigh)
/reasoning show|on Show model reasoning in responses
/reasoning hide|off Hide model reasoning from responses
"""
import yaml
args = event.get_command_args().strip().lower()
config_path = _hermes_home / "config.yaml"
def _save_config_key(key_path: str, value):
"""Save a dot-separated key to config.yaml."""
try:
user_config = {}
if config_path.exists():
with open(config_path, encoding="utf-8") as f:
user_config = yaml.safe_load(f) or {}
keys = key_path.split(".")
current = user_config
for k in keys[:-1]:
if k not in current or not isinstance(current[k], dict):
current[k] = {}
current = current[k]
current[keys[-1]] = value
with open(config_path, "w", encoding="utf-8") as f:
yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
return True
except Exception as e:
logger.error("Failed to save config key %s: %s", key_path, e)
return False
if not args:
# Show current state
rc = self._reasoning_config
if rc is None:
level = "medium (default)"
elif rc.get("enabled") is False:
level = "none (disabled)"
else:
level = rc.get("effort", "medium")
display_state = "on ✓" if self._show_reasoning else "off"
return (
"🧠 **Reasoning Settings**\n\n"
f"**Effort:** `{level}`\n"
f"**Display:** {display_state}\n\n"
"_Usage:_ `/reasoning <none|low|medium|high|xhigh|show|hide>`"
)
# Display toggle
if args in ("show", "on"):
self._show_reasoning = True
_save_config_key("display.show_reasoning", True)
return "🧠 ✓ Reasoning display: **ON**\nModel thinking will be shown before each response."
if args in ("hide", "off"):
self._show_reasoning = False
_save_config_key("display.show_reasoning", False)
return "🧠 ✓ Reasoning display: **OFF**"
# Effort level change
effort = args.strip()
if effort == "none":
parsed = {"enabled": False}
elif effort in ("xhigh", "high", "medium", "low", "minimal"):
parsed = {"enabled": True, "effort": effort}
else:
return (
f"⚠️ Unknown argument: `{effort}`\n\n"
"**Valid levels:** none, low, minimal, medium, high, xhigh\n"
"**Display:** show, hide"
)
self._reasoning_config = parsed
if _save_config_key("agent.reasoning_effort", effort):
return f"🧠 ✓ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_"
else:
return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)"
async def _handle_compress_command(self, event: MessageEvent) -> str:
"""Handle /compress command -- manually compress conversation context."""
source = event.source
@ -3273,6 +3387,7 @@ class GatewayRunner:
return {
"final_response": final_response,
"last_reasoning": result.get("last_reasoning"),
"messages": result_holder[0].get("messages", []) if result_holder[0] else [],
"api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
"tools": tools_holder[0] or [],