feat: default reasoning effort from xhigh to medium

Reduces token usage and latency for most tasks by defaulting to
medium reasoning effort instead of xhigh. Users can still override
via config or CLI flag. Updates code, tests, example config, and docs.
This commit is contained in:
teknium1 2026-03-07 10:14:19 -08:00
parent 23e84de830
commit b84f9e410c
9 changed files with 25 additions and 24 deletions

View file

@ -330,7 +330,7 @@ class GatewayRunner:
Checks HERMES_REASONING_EFFORT env var first, then agent.reasoning_effort
in config.yaml. Valid: "xhigh", "high", "medium", "low", "minimal", "none".
Returns None to use default (xhigh).
Returns None to use default (medium).
"""
effort = os.getenv("HERMES_REASONING_EFFORT", "")
if not effort:
@ -351,7 +351,7 @@ class GatewayRunner:
valid = ("xhigh", "high", "medium", "low", "minimal")
if effort in valid:
return {"enabled": True, "effort": effort}
logger.warning("Unknown reasoning_effort '%s', using default (xhigh)", effort)
logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
return None
@staticmethod