chore: lower default compression threshold from 85% to 50% (#1096)

* fix: ClawHub skill install — use /download ZIP endpoint The ClawHub API v1 version endpoint only returns file metadata (path, size, sha256, contentType) without inline content or download URLs. Our code was looking for inline content in the metadata, which never existed, causing all ClawHub installs to fail with: 'no inline/raw file content was available' Fix: Use the /api/v1/download endpoint (same as the official clawhub CLI) to download skills as ZIP bundles and extract files in-memory. Changes: - Add _download_zip() method that downloads and extracts ZIP bundles - Retry on 429 rate limiting with Retry-After header support - Path sanitization and binary file filtering for security - Keep _extract_files() as a fallback for inline/raw content - Also fix nested file lookup (version_data.version.files) * chore: lower default compression threshold from 85% to 50% Triggers context compression earlier — at 50% of the model's context window instead of 85%. Updated in all four places where the default is defined: context_compressor.py, cli.py, run_agent.py, config.py, and gateway/run.py.
2026-03-12 15:51:50 -07:00 · 2026-03-12 15:51:50 -07:00 · 1bb8ed4495
commit 1bb8ed4495
parent e004c094ea
5 changed files with 6 additions and 6 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -28,7 +28,7 @@ class ContextCompressor:
    def __init__(
        self,
        model: str,
-        threshold_percent: float = 0.85,
+        threshold_percent: float = 0.50,
        protect_first_n: int = 3,
        protect_last_n: int = 4,
        summary_target_tokens: int = 2500,
--- a/cli.py
+++ b/cli.py
@ -175,7 +175,7 @@ def load_cli_config() -> Dict[str, Any]:
        },
        "compression": {
            "enabled": True,      # Auto-compress when approaching context limit
-            "threshold": 0.85,    # Compress at 85% of model's context limit
+            "threshold": 0.50,    # Compress at 50% of model's context limit
            "summary_model": "google/gemini-3-flash-preview",  # Fast/cheap model for summaries
        },
        "agent": {
--- a/gateway/run.py
+++ b/gateway/run.py
@ -1057,7 +1057,7 @@ class GatewayRunner:
            # Read model + compression config from config.yaml — same
            # source of truth the agent itself uses.
            _hyg_model = "anthropic/claude-sonnet-4.6"
-            _hyg_threshold_pct = 0.85
+            _hyg_threshold_pct = 0.50
            _hyg_compression_enabled = True
            try:
                _hyg_cfg_path = _hermes_home / "config.yaml"
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -121,7 +121,7 @@ DEFAULT_CONFIG = {
    
    "compression": {
        "enabled": True,
-        "threshold": 0.85,
+        "threshold": 0.50,
        "summary_model": "google/gemini-3-flash-preview",
        "summary_provider": "auto",
    },
@ -1119,7 +1119,7 @@ def show_config():
    enabled = compression.get('enabled', True)
    print(f"  Enabled:      {'yes' if enabled else 'no'}")
    if enabled:
-        print(f"  Threshold:    {compression.get('threshold', 0.85) * 100:.0f}%")
+        print(f"  Threshold:    {compression.get('threshold', 0.50) * 100:.0f}%")
        print(f"  Model:        {compression.get('summary_model', 'google/gemini-3-flash-preview')}")
        comp_provider = compression.get('summary_provider', 'auto')
        if comp_provider != 'auto':
--- a/run_agent.py
+++ b/run_agent.py
@ -664,7 +664,7 @@ class AIAgent:
        # Initialize context compressor for automatic context management
        # Compresses conversation when approaching model's context limit
        # Configuration via config.yaml (compression section) or environment variables
-        compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
+        compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.50"))
        compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
        compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None