diff --git a/gateway/run.py b/gateway/run.py index 103f8813..940dcdf0 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1125,10 +1125,16 @@ class GatewayRunner: get_model_context_length, ) - # Read model + compression config from config.yaml — same - # source of truth the agent itself uses. + # Read model + compression config from config.yaml. + # NOTE: hygiene threshold is intentionally HIGHER than the agent's + # own compressor (0.85 vs 0.50). Hygiene is a safety net for + # sessions that grew too large between turns — it fires pre-agent + # to prevent API failures. The agent's own compressor handles + # normal context management during its tool loop with accurate + # real token counts. Having hygiene at 0.50 caused premature + # compression on every turn in long gateway sessions. _hyg_model = "anthropic/claude-sonnet-4.6" - _hyg_threshold_pct = 0.50 + _hyg_threshold_pct = 0.85 _hyg_compression_enabled = True try: _hyg_cfg_path = _hermes_home / "config.yaml" @@ -1144,22 +1150,18 @@ class GatewayRunner: elif isinstance(_model_cfg, dict): _hyg_model = _model_cfg.get("default", _hyg_model) - # Read compression settings + # Read compression settings — only use enabled flag. + # The threshold is intentionally separate from the agent's + # compression.threshold (hygiene runs higher). _comp_cfg = _hyg_data.get("compression", {}) if isinstance(_comp_cfg, dict): - _hyg_threshold_pct = float( - _comp_cfg.get("threshold", _hyg_threshold_pct) - ) _hyg_compression_enabled = str( _comp_cfg.get("enabled", True) ).lower() in ("true", "1", "yes") except Exception: pass - # Also check env overrides (same as run_agent.py) - _hyg_threshold_pct = float( - os.getenv("CONTEXT_COMPRESSION_THRESHOLD", str(_hyg_threshold_pct)) - ) + # Check env override for disabling compression entirely if os.getenv("CONTEXT_COMPRESSION_ENABLED", "").lower() in ("false", "0", "no"): _hyg_compression_enabled = False