From 0698ddb49618646c6a576fe7d8e15d8503604c5a Mon Sep 17 00:00:00 2001 From: Mibayy Date: Sun, 22 Mar 2026 11:20:27 +0000 Subject: [PATCH] fix(compression): remove hardcoded gemini-3-flash-preview as default summary model Closes #2453 The DEFAULT_CONFIG was hardcoding google/gemini-3-flash-preview as the summary_model for context compression. This caused unexpected OpenRouter charges for users who configured a different provider/model, because the compression task would silently fall back to gemini via OpenRouter even when the user's main model was on a different provider. Fix: change summary_model default to empty string. When empty, call_llm() resolves the model through the standard auto-detection chain (auxiliary.compression config -> env vars -> main provider), which correctly uses the user's configured provider and model. Users who want a dedicated cheap model for compression can still explicitly set compression.summary_model in their config.yaml. --- cli.py | 2 +- hermes_cli/config.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cli.py b/cli.py index ac9231b0..931610bf 100644 --- a/cli.py +++ b/cli.py @@ -180,7 +180,7 @@ def load_cli_config() -> Dict[str, Any]: "compression": { "enabled": True, # Auto-compress when approaching context limit "threshold": 0.50, # Compress at 50% of model's context limit - "summary_model": "google/gemini-3-flash-preview", # Fast/cheap model for summaries + "summary_model": "", # Model for summaries (empty = use main model) }, "smart_model_routing": { "enabled": False, diff --git a/hermes_cli/config.py b/hermes_cli/config.py index b2fd27c1..b0e14f2a 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -159,7 +159,7 @@ DEFAULT_CONFIG = { "compression": { "enabled": True, "threshold": 0.50, - "summary_model": "google/gemini-3-flash-preview", + "summary_model": "", # empty = use main configured model "summary_provider": "auto", "summary_base_url": None, }, @@ -1659,7 +1659,8 @@ def show_config(): print(f" Enabled: {'yes' if enabled else 'no'}") if enabled: print(f" Threshold: {compression.get('threshold', 0.50) * 100:.0f}%") - print(f" Model: {compression.get('summary_model', 'google/gemini-3-flash-preview')}") + _sm = compression.get('summary_model', '') or '(main model)' + print(f" Model: {_sm}") comp_provider = compression.get('summary_provider', 'auto') if comp_provider != 'auto': print(f" Provider: {comp_provider}")