refactor: update context compression configuration to use config.yaml and improve model handling
This commit is contained in:
parent
0afe1b707d
commit
6366177118
4 changed files with 19 additions and 4 deletions
|
|
@ -10,7 +10,7 @@
|
||||||
OPENROUTER_API_KEY=
|
OPENROUTER_API_KEY=
|
||||||
|
|
||||||
# Default model to use (OpenRouter format: provider/model)
|
# Default model to use (OpenRouter format: provider/model)
|
||||||
# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus
|
# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-3-flash-preview, zhipuai/glm-4-plus
|
||||||
LLM_MODEL=anthropic/claude-opus-4.6
|
LLM_MODEL=anthropic/claude-opus-4.6
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
@ -200,9 +200,10 @@ IMAGE_TOOLS_DEBUG=false
|
||||||
# When conversation approaches model's context limit, middle turns are
|
# When conversation approaches model's context limit, middle turns are
|
||||||
# automatically summarized to free up space.
|
# automatically summarized to free up space.
|
||||||
#
|
#
|
||||||
|
# Context compression is configured in ~/.hermes/config.yaml under compression:
|
||||||
# CONTEXT_COMPRESSION_ENABLED=true # Enable auto-compression (default: true)
|
# CONTEXT_COMPRESSION_ENABLED=true # Enable auto-compression (default: true)
|
||||||
# CONTEXT_COMPRESSION_THRESHOLD=0.85 # Compress at 85% of context limit
|
# CONTEXT_COMPRESSION_THRESHOLD=0.85 # Compress at 85% of context limit
|
||||||
# CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001 # Fast model for summaries
|
# Model is set via compression.summary_model in config.yaml (default: google/gemini-3-flash-preview)
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# RL TRAINING (Tinker + Atropos)
|
# RL TRAINING (Tinker + Atropos)
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ class ContextCompressor:
|
||||||
protect_last_n: int = 4,
|
protect_last_n: int = 4,
|
||||||
summary_target_tokens: int = 500,
|
summary_target_tokens: int = 500,
|
||||||
quiet_mode: bool = False,
|
quiet_mode: bool = False,
|
||||||
|
summary_model_override: str = None,
|
||||||
):
|
):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.threshold_percent = threshold_percent
|
self.threshold_percent = threshold_percent
|
||||||
|
|
@ -49,7 +50,8 @@ class ContextCompressor:
|
||||||
self.last_completion_tokens = 0
|
self.last_completion_tokens = 0
|
||||||
self.last_total_tokens = 0
|
self.last_total_tokens = 0
|
||||||
|
|
||||||
self.client, self.summary_model = get_text_auxiliary_client()
|
self.client, default_model = get_text_auxiliary_client()
|
||||||
|
self.summary_model = summary_model_override or default_model
|
||||||
|
|
||||||
def update_from_response(self, usage: Dict[str, Any]):
|
def update_from_response(self, usage: Dict[str, Any]):
|
||||||
"""Update tracked token usage from API response."""
|
"""Update tracked token usage from API response."""
|
||||||
|
|
|
||||||
|
|
@ -78,6 +78,16 @@ if _config_path.exists():
|
||||||
for _cfg_key, _env_var in _terminal_env_map.items():
|
for _cfg_key, _env_var in _terminal_env_map.items():
|
||||||
if _cfg_key in _terminal_cfg:
|
if _cfg_key in _terminal_cfg:
|
||||||
os.environ[_env_var] = str(_terminal_cfg[_cfg_key])
|
os.environ[_env_var] = str(_terminal_cfg[_cfg_key])
|
||||||
|
_compression_cfg = _cfg.get("compression", {})
|
||||||
|
if _compression_cfg and isinstance(_compression_cfg, dict):
|
||||||
|
_compression_env_map = {
|
||||||
|
"enabled": "CONTEXT_COMPRESSION_ENABLED",
|
||||||
|
"threshold": "CONTEXT_COMPRESSION_THRESHOLD",
|
||||||
|
"summary_model": "CONTEXT_COMPRESSION_MODEL",
|
||||||
|
}
|
||||||
|
for _cfg_key, _env_var in _compression_env_map.items():
|
||||||
|
if _cfg_key in _compression_cfg:
|
||||||
|
os.environ[_env_var] = str(_compression_cfg[_cfg_key])
|
||||||
except Exception:
|
except Exception:
|
||||||
pass # Non-fatal; gateway can still run with .env values
|
pass # Non-fatal; gateway can still run with .env values
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -479,9 +479,10 @@ class AIAgent:
|
||||||
|
|
||||||
# Initialize context compressor for automatic context management
|
# Initialize context compressor for automatic context management
|
||||||
# Compresses conversation when approaching model's context limit
|
# Compresses conversation when approaching model's context limit
|
||||||
# Configuration via environment variables (can be set in .env or cli-config.yaml)
|
# Configuration via config.yaml (compression section) or environment variables
|
||||||
compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
|
compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
|
||||||
compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
|
compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
|
||||||
|
compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None
|
||||||
|
|
||||||
self.context_compressor = ContextCompressor(
|
self.context_compressor = ContextCompressor(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
|
|
@ -489,6 +490,7 @@ class AIAgent:
|
||||||
protect_first_n=3,
|
protect_first_n=3,
|
||||||
protect_last_n=4,
|
protect_last_n=4,
|
||||||
summary_target_tokens=500,
|
summary_target_tokens=500,
|
||||||
|
summary_model_override=compression_summary_model,
|
||||||
quiet_mode=self.quiet_mode,
|
quiet_mode=self.quiet_mode,
|
||||||
)
|
)
|
||||||
self.compression_enabled = compression_enabled
|
self.compression_enabled = compression_enabled
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue