feat: default reasoning effort from xhigh to medium
Reduces token usage and latency for most tasks by defaulting to medium reasoning effort instead of xhigh. Users can still override via config or CLI flag. Updates code, tests, example config, and docs.
This commit is contained in:
parent
23e84de830
commit
b84f9e410c
9 changed files with 25 additions and 24 deletions
|
|
@ -330,7 +330,7 @@ class GatewayRunner:
|
|||
|
||||
Checks HERMES_REASONING_EFFORT env var first, then agent.reasoning_effort
|
||||
in config.yaml. Valid: "xhigh", "high", "medium", "low", "minimal", "none".
|
||||
Returns None to use default (xhigh).
|
||||
Returns None to use default (medium).
|
||||
"""
|
||||
effort = os.getenv("HERMES_REASONING_EFFORT", "")
|
||||
if not effort:
|
||||
|
|
@ -351,7 +351,7 @@ class GatewayRunner:
|
|||
valid = ("xhigh", "high", "medium", "low", "minimal")
|
||||
if effort in valid:
|
||||
return {"enabled": True, "effort": effort}
|
||||
logger.warning("Unknown reasoning_effort '%s', using default (xhigh)", effort)
|
||||
logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue