From 1bb8ed4495a29ba52163f125a4edbffa6b1bee5d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 12 Mar 2026 15:51:50 -0700 Subject: [PATCH] chore: lower default compression threshold from 85% to 50% (#1096) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: ClawHub skill install — use /download ZIP endpoint The ClawHub API v1 version endpoint only returns file metadata (path, size, sha256, contentType) without inline content or download URLs. Our code was looking for inline content in the metadata, which never existed, causing all ClawHub installs to fail with: 'no inline/raw file content was available' Fix: Use the /api/v1/download endpoint (same as the official clawhub CLI) to download skills as ZIP bundles and extract files in-memory. Changes: - Add _download_zip() method that downloads and extracts ZIP bundles - Retry on 429 rate limiting with Retry-After header support - Path sanitization and binary file filtering for security - Keep _extract_files() as a fallback for inline/raw content - Also fix nested file lookup (version_data.version.files) * chore: lower default compression threshold from 85% to 50% Triggers context compression earlier — at 50% of the model's context window instead of 85%. Updated in all four places where the default is defined: context_compressor.py, cli.py, run_agent.py, config.py, and gateway/run.py. --- agent/context_compressor.py | 2 +- cli.py | 2 +- gateway/run.py | 2 +- hermes_cli/config.py | 4 ++-- run_agent.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index a0ca0c99..b2dff9c8 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -28,7 +28,7 @@ class ContextCompressor: def __init__( self, model: str, - threshold_percent: float = 0.85, + threshold_percent: float = 0.50, protect_first_n: int = 3, protect_last_n: int = 4, summary_target_tokens: int = 2500, diff --git a/cli.py b/cli.py index 1e418177..04794230 100755 --- a/cli.py +++ b/cli.py @@ -175,7 +175,7 @@ def load_cli_config() -> Dict[str, Any]: }, "compression": { "enabled": True, # Auto-compress when approaching context limit - "threshold": 0.85, # Compress at 85% of model's context limit + "threshold": 0.50, # Compress at 50% of model's context limit "summary_model": "google/gemini-3-flash-preview", # Fast/cheap model for summaries }, "agent": { diff --git a/gateway/run.py b/gateway/run.py index 8c068559..07490b77 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1057,7 +1057,7 @@ class GatewayRunner: # Read model + compression config from config.yaml — same # source of truth the agent itself uses. _hyg_model = "anthropic/claude-sonnet-4.6" - _hyg_threshold_pct = 0.85 + _hyg_threshold_pct = 0.50 _hyg_compression_enabled = True try: _hyg_cfg_path = _hermes_home / "config.yaml" diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 39501e93..c05ebd5a 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -121,7 +121,7 @@ DEFAULT_CONFIG = { "compression": { "enabled": True, - "threshold": 0.85, + "threshold": 0.50, "summary_model": "google/gemini-3-flash-preview", "summary_provider": "auto", }, @@ -1119,7 +1119,7 @@ def show_config(): enabled = compression.get('enabled', True) print(f" Enabled: {'yes' if enabled else 'no'}") if enabled: - print(f" Threshold: {compression.get('threshold', 0.85) * 100:.0f}%") + print(f" Threshold: {compression.get('threshold', 0.50) * 100:.0f}%") print(f" Model: {compression.get('summary_model', 'google/gemini-3-flash-preview')}") comp_provider = compression.get('summary_provider', 'auto') if comp_provider != 'auto': diff --git a/run_agent.py b/run_agent.py index a07cc472..2f082571 100644 --- a/run_agent.py +++ b/run_agent.py @@ -664,7 +664,7 @@ class AIAgent: # Initialize context compressor for automatic context management # Compresses conversation when approaching model's context limit # Configuration via config.yaml (compression section) or environment variables - compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85")) + compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.50")) compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes") compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None