fix: unify gateway session hygiene with agent compression config
The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.
Changes:
- Gateway hygiene now reads model name from config.yaml and uses
get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
models, and that message count alone no longer triggers compression
For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.
This commit is contained in:
parent
3ffaac00dd
commit
67275641f8
2 changed files with 253 additions and 180 deletions
278
gateway/run.py
278
gateway/run.py
|
|
@ -900,159 +900,187 @@ class GatewayRunner:
|
||||||
# every new message rehydrates an oversized transcript, causing
|
# every new message rehydrates an oversized transcript, causing
|
||||||
# repeated truncation/context failures. Detect this early and
|
# repeated truncation/context failures. Detect this early and
|
||||||
# compress proactively — before the agent even starts. (#628)
|
# compress proactively — before the agent even starts. (#628)
|
||||||
|
#
|
||||||
|
# Thresholds are derived from the SAME compression config the
|
||||||
|
# agent uses (compression.threshold × model context length) so
|
||||||
|
# CLI and messaging platforms behave identically.
|
||||||
# -----------------------------------------------------------------
|
# -----------------------------------------------------------------
|
||||||
if history and len(history) >= 4:
|
if history and len(history) >= 4:
|
||||||
from agent.model_metadata import estimate_messages_tokens_rough
|
from agent.model_metadata import (
|
||||||
|
estimate_messages_tokens_rough,
|
||||||
|
get_model_context_length,
|
||||||
|
)
|
||||||
|
|
||||||
# Read thresholds from config.yaml → session_hygiene section
|
# Read model + compression config from config.yaml — same
|
||||||
_hygiene_cfg = {}
|
# source of truth the agent itself uses.
|
||||||
|
_hyg_model = "anthropic/claude-sonnet-4.6"
|
||||||
|
_hyg_threshold_pct = 0.85
|
||||||
|
_hyg_compression_enabled = True
|
||||||
try:
|
try:
|
||||||
_hyg_cfg_path = _hermes_home / "config.yaml"
|
_hyg_cfg_path = _hermes_home / "config.yaml"
|
||||||
if _hyg_cfg_path.exists():
|
if _hyg_cfg_path.exists():
|
||||||
import yaml as _hyg_yaml
|
import yaml as _hyg_yaml
|
||||||
with open(_hyg_cfg_path) as _hyg_f:
|
with open(_hyg_cfg_path) as _hyg_f:
|
||||||
_hyg_data = _hyg_yaml.safe_load(_hyg_f) or {}
|
_hyg_data = _hyg_yaml.safe_load(_hyg_f) or {}
|
||||||
_hygiene_cfg = _hyg_data.get("session_hygiene", {})
|
|
||||||
if not isinstance(_hygiene_cfg, dict):
|
# Resolve model name (same logic as run_sync)
|
||||||
_hygiene_cfg = {}
|
_model_cfg = _hyg_data.get("model", {})
|
||||||
|
if isinstance(_model_cfg, str):
|
||||||
|
_hyg_model = _model_cfg
|
||||||
|
elif isinstance(_model_cfg, dict):
|
||||||
|
_hyg_model = _model_cfg.get("default", _hyg_model)
|
||||||
|
|
||||||
|
# Read compression settings
|
||||||
|
_comp_cfg = _hyg_data.get("compression", {})
|
||||||
|
if isinstance(_comp_cfg, dict):
|
||||||
|
_hyg_threshold_pct = float(
|
||||||
|
_comp_cfg.get("threshold", _hyg_threshold_pct)
|
||||||
|
)
|
||||||
|
_hyg_compression_enabled = str(
|
||||||
|
_comp_cfg.get("enabled", True)
|
||||||
|
).lower() in ("true", "1", "yes")
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
_compress_token_threshold = int(
|
# Also check env overrides (same as run_agent.py)
|
||||||
_hygiene_cfg.get("auto_compress_tokens", 100_000)
|
_hyg_threshold_pct = float(
|
||||||
)
|
os.getenv("CONTEXT_COMPRESSION_THRESHOLD", str(_hyg_threshold_pct))
|
||||||
_compress_msg_threshold = int(
|
|
||||||
_hygiene_cfg.get("auto_compress_messages", 200)
|
|
||||||
)
|
|
||||||
_warn_token_threshold = int(
|
|
||||||
_hygiene_cfg.get("warn_tokens", 200_000)
|
|
||||||
)
|
)
|
||||||
|
if os.getenv("CONTEXT_COMPRESSION_ENABLED", "").lower() in ("false", "0", "no"):
|
||||||
|
_hyg_compression_enabled = False
|
||||||
|
|
||||||
_msg_count = len(history)
|
if _hyg_compression_enabled:
|
||||||
_approx_tokens = estimate_messages_tokens_rough(history)
|
_hyg_context_length = get_model_context_length(_hyg_model)
|
||||||
|
_compress_token_threshold = int(
|
||||||
_needs_compress = (
|
_hyg_context_length * _hyg_threshold_pct
|
||||||
_approx_tokens >= _compress_token_threshold
|
|
||||||
or _msg_count >= _compress_msg_threshold
|
|
||||||
)
|
|
||||||
|
|
||||||
if _needs_compress:
|
|
||||||
logger.info(
|
|
||||||
"Session hygiene: %s messages, ~%s tokens — auto-compressing "
|
|
||||||
"(thresholds: %s msgs / %s tokens)",
|
|
||||||
_msg_count, f"{_approx_tokens:,}",
|
|
||||||
_compress_msg_threshold, f"{_compress_token_threshold:,}",
|
|
||||||
)
|
)
|
||||||
|
# Warn if still huge after compression (95% of context)
|
||||||
|
_warn_token_threshold = int(_hyg_context_length * 0.95)
|
||||||
|
|
||||||
|
_msg_count = len(history)
|
||||||
|
_approx_tokens = estimate_messages_tokens_rough(history)
|
||||||
|
|
||||||
|
_needs_compress = _approx_tokens >= _compress_token_threshold
|
||||||
|
|
||||||
|
if _needs_compress:
|
||||||
|
logger.info(
|
||||||
|
"Session hygiene: %s messages, ~%s tokens — auto-compressing "
|
||||||
|
"(threshold: %s%% of %s = %s tokens)",
|
||||||
|
_msg_count, f"{_approx_tokens:,}",
|
||||||
|
int(_hyg_threshold_pct * 100),
|
||||||
|
f"{_hyg_context_length:,}",
|
||||||
|
f"{_compress_token_threshold:,}",
|
||||||
|
)
|
||||||
|
|
||||||
|
_hyg_adapter = self.adapters.get(source.platform)
|
||||||
|
if _hyg_adapter:
|
||||||
|
try:
|
||||||
|
await _hyg_adapter.send(
|
||||||
|
source.chat_id,
|
||||||
|
f"🗜️ Session is large ({_msg_count} messages, "
|
||||||
|
f"~{_approx_tokens:,} tokens). Auto-compressing..."
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
_hyg_adapter = self.adapters.get(source.platform)
|
|
||||||
if _hyg_adapter:
|
|
||||||
try:
|
try:
|
||||||
await _hyg_adapter.send(
|
from run_agent import AIAgent
|
||||||
source.chat_id,
|
|
||||||
f"🗜️ Session is large ({_msg_count} messages, "
|
|
||||||
f"~{_approx_tokens:,} tokens). Auto-compressing..."
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
_hyg_runtime = _resolve_runtime_agent_kwargs()
|
||||||
from run_agent import AIAgent
|
if _hyg_runtime.get("api_key"):
|
||||||
|
_hyg_msgs = [
|
||||||
|
{"role": m.get("role"), "content": m.get("content")}
|
||||||
|
for m in history
|
||||||
|
if m.get("role") in ("user", "assistant")
|
||||||
|
and m.get("content")
|
||||||
|
]
|
||||||
|
|
||||||
_hyg_runtime = _resolve_runtime_agent_kwargs()
|
if len(_hyg_msgs) >= 4:
|
||||||
if _hyg_runtime.get("api_key"):
|
_hyg_agent = AIAgent(
|
||||||
_hyg_msgs = [
|
**_hyg_runtime,
|
||||||
{"role": m.get("role"), "content": m.get("content")}
|
max_iterations=4,
|
||||||
for m in history
|
quiet_mode=True,
|
||||||
if m.get("role") in ("user", "assistant")
|
enabled_toolsets=["memory"],
|
||||||
and m.get("content")
|
session_id=session_entry.session_id,
|
||||||
]
|
|
||||||
|
|
||||||
if len(_hyg_msgs) >= 4:
|
|
||||||
_hyg_agent = AIAgent(
|
|
||||||
**_hyg_runtime,
|
|
||||||
max_iterations=4,
|
|
||||||
quiet_mode=True,
|
|
||||||
enabled_toolsets=["memory"],
|
|
||||||
session_id=session_entry.session_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
_compressed, _ = await loop.run_in_executor(
|
|
||||||
None,
|
|
||||||
lambda: _hyg_agent._compress_context(
|
|
||||||
_hyg_msgs, "",
|
|
||||||
approx_tokens=_approx_tokens,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
self.session_store.rewrite_transcript(
|
|
||||||
session_entry.session_id, _compressed
|
|
||||||
)
|
|
||||||
history = _compressed
|
|
||||||
_new_count = len(_compressed)
|
|
||||||
_new_tokens = estimate_messages_tokens_rough(
|
|
||||||
_compressed
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
"Session hygiene: compressed %s → %s msgs, "
|
|
||||||
"~%s → ~%s tokens",
|
|
||||||
_msg_count, _new_count,
|
|
||||||
f"{_approx_tokens:,}", f"{_new_tokens:,}",
|
|
||||||
)
|
|
||||||
|
|
||||||
if _hyg_adapter:
|
|
||||||
try:
|
|
||||||
await _hyg_adapter.send(
|
|
||||||
source.chat_id,
|
|
||||||
f"🗜️ Compressed: {_msg_count} → "
|
|
||||||
f"{_new_count} messages, "
|
|
||||||
f"~{_approx_tokens:,} → "
|
|
||||||
f"~{_new_tokens:,} tokens"
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Still too large after compression — warn user
|
|
||||||
if _new_tokens >= _warn_token_threshold:
|
|
||||||
logger.warning(
|
|
||||||
"Session hygiene: still ~%s tokens after "
|
|
||||||
"compression — suggesting /reset",
|
|
||||||
f"{_new_tokens:,}",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
_compressed, _ = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: _hyg_agent._compress_context(
|
||||||
|
_hyg_msgs, "",
|
||||||
|
approx_tokens=_approx_tokens,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.session_store.rewrite_transcript(
|
||||||
|
session_entry.session_id, _compressed
|
||||||
|
)
|
||||||
|
history = _compressed
|
||||||
|
_new_count = len(_compressed)
|
||||||
|
_new_tokens = estimate_messages_tokens_rough(
|
||||||
|
_compressed
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Session hygiene: compressed %s → %s msgs, "
|
||||||
|
"~%s → ~%s tokens",
|
||||||
|
_msg_count, _new_count,
|
||||||
|
f"{_approx_tokens:,}", f"{_new_tokens:,}",
|
||||||
|
)
|
||||||
|
|
||||||
if _hyg_adapter:
|
if _hyg_adapter:
|
||||||
try:
|
try:
|
||||||
await _hyg_adapter.send(
|
await _hyg_adapter.send(
|
||||||
source.chat_id,
|
source.chat_id,
|
||||||
"⚠️ Session is still very large "
|
f"🗜️ Compressed: {_msg_count} → "
|
||||||
"after compression "
|
f"{_new_count} messages, "
|
||||||
f"(~{_new_tokens:,} tokens). "
|
f"~{_approx_tokens:,} → "
|
||||||
"Consider using /reset to start "
|
f"~{_new_tokens:,} tokens"
|
||||||
"fresh if you experience issues."
|
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
except Exception as e:
|
# Still too large after compression — warn user
|
||||||
logger.warning(
|
if _new_tokens >= _warn_token_threshold:
|
||||||
"Session hygiene auto-compress failed: %s", e
|
logger.warning(
|
||||||
)
|
"Session hygiene: still ~%s tokens after "
|
||||||
# Compression failed and session is dangerously large
|
"compression — suggesting /reset",
|
||||||
if _approx_tokens >= _warn_token_threshold:
|
f"{_new_tokens:,}",
|
||||||
_hyg_adapter = self.adapters.get(source.platform)
|
)
|
||||||
if _hyg_adapter:
|
if _hyg_adapter:
|
||||||
try:
|
try:
|
||||||
await _hyg_adapter.send(
|
await _hyg_adapter.send(
|
||||||
source.chat_id,
|
source.chat_id,
|
||||||
f"⚠️ Session is very large "
|
"⚠️ Session is still very large "
|
||||||
f"({_msg_count} messages, "
|
"after compression "
|
||||||
f"~{_approx_tokens:,} tokens) and "
|
f"(~{_new_tokens:,} tokens). "
|
||||||
"auto-compression failed. Consider "
|
"Consider using /reset to start "
|
||||||
"using /compress or /reset to avoid "
|
"fresh if you experience issues."
|
||||||
"issues."
|
)
|
||||||
)
|
except Exception:
|
||||||
except Exception:
|
pass
|
||||||
pass
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
"Session hygiene auto-compress failed: %s", e
|
||||||
|
)
|
||||||
|
# Compression failed and session is dangerously large
|
||||||
|
if _approx_tokens >= _warn_token_threshold:
|
||||||
|
_hyg_adapter = self.adapters.get(source.platform)
|
||||||
|
if _hyg_adapter:
|
||||||
|
try:
|
||||||
|
await _hyg_adapter.send(
|
||||||
|
source.chat_id,
|
||||||
|
f"⚠️ Session is very large "
|
||||||
|
f"({_msg_count} messages, "
|
||||||
|
f"~{_approx_tokens:,} tokens) and "
|
||||||
|
"auto-compression failed. Consider "
|
||||||
|
"using /compress or /reset to avoid "
|
||||||
|
"issues."
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# First-message onboarding -- only on the very first interaction ever
|
# First-message onboarding -- only on the very first interaction ever
|
||||||
if not history and not self.session_store.has_any_sessions():
|
if not history and not self.session_store.has_any_sessions():
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,10 @@
|
||||||
|
|
||||||
Verifies that the gateway detects pathologically large transcripts and
|
Verifies that the gateway detects pathologically large transcripts and
|
||||||
triggers auto-compression before running the agent. (#628)
|
triggers auto-compression before running the agent. (#628)
|
||||||
|
|
||||||
|
The hygiene system uses the SAME compression config as the agent:
|
||||||
|
compression.threshold × model context length
|
||||||
|
so CLI and messaging platforms behave identically.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
@ -38,75 +42,113 @@ def _make_large_history_tokens(target_tokens: int) -> list:
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Detection threshold tests
|
# Detection threshold tests (model-aware, unified with compression config)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
class TestSessionHygieneThresholds:
|
class TestSessionHygieneThresholds:
|
||||||
"""Test that the threshold logic correctly identifies large sessions."""
|
"""Test that the threshold logic correctly identifies large sessions.
|
||||||
|
|
||||||
|
Thresholds are derived from model context length × compression threshold,
|
||||||
|
matching what the agent's ContextCompressor uses.
|
||||||
|
"""
|
||||||
|
|
||||||
def test_small_session_below_thresholds(self):
|
def test_small_session_below_thresholds(self):
|
||||||
"""A 10-message session should not trigger compression."""
|
"""A 10-message session should not trigger compression."""
|
||||||
history = _make_history(10)
|
history = _make_history(10)
|
||||||
msg_count = len(history)
|
|
||||||
approx_tokens = estimate_messages_tokens_rough(history)
|
approx_tokens = estimate_messages_tokens_rough(history)
|
||||||
|
|
||||||
compress_token_threshold = 100_000
|
# For a 200k-context model at 85% threshold = 170k
|
||||||
compress_msg_threshold = 200
|
context_length = 200_000
|
||||||
|
threshold_pct = 0.85
|
||||||
|
compress_token_threshold = int(context_length * threshold_pct)
|
||||||
|
|
||||||
needs_compress = (
|
needs_compress = approx_tokens >= compress_token_threshold
|
||||||
approx_tokens >= compress_token_threshold
|
|
||||||
or msg_count >= compress_msg_threshold
|
|
||||||
)
|
|
||||||
assert not needs_compress
|
assert not needs_compress
|
||||||
|
|
||||||
def test_large_message_count_triggers(self):
|
|
||||||
"""200+ messages should trigger compression even if tokens are low."""
|
|
||||||
history = _make_history(250, content_size=10)
|
|
||||||
msg_count = len(history)
|
|
||||||
|
|
||||||
compress_msg_threshold = 200
|
|
||||||
needs_compress = msg_count >= compress_msg_threshold
|
|
||||||
assert needs_compress
|
|
||||||
|
|
||||||
def test_large_token_count_triggers(self):
|
def test_large_token_count_triggers(self):
|
||||||
"""High token count should trigger compression even if message count is low."""
|
"""High token count should trigger compression when exceeding model threshold."""
|
||||||
# 50 messages with huge content to exceed 100K tokens
|
# Build a history that exceeds 85% of a 200k model (170k tokens)
|
||||||
history = _make_history(50, content_size=10_000)
|
history = _make_large_history_tokens(180_000)
|
||||||
approx_tokens = estimate_messages_tokens_rough(history)
|
approx_tokens = estimate_messages_tokens_rough(history)
|
||||||
|
|
||||||
compress_token_threshold = 100_000
|
context_length = 200_000
|
||||||
|
threshold_pct = 0.85
|
||||||
|
compress_token_threshold = int(context_length * threshold_pct)
|
||||||
|
|
||||||
needs_compress = approx_tokens >= compress_token_threshold
|
needs_compress = approx_tokens >= compress_token_threshold
|
||||||
assert needs_compress
|
assert needs_compress
|
||||||
|
|
||||||
def test_under_both_thresholds_no_trigger(self):
|
def test_under_threshold_no_trigger(self):
|
||||||
"""Session under both thresholds should not trigger."""
|
"""Session under threshold should not trigger, even with many messages."""
|
||||||
history = _make_history(100, content_size=100)
|
# 250 short messages — lots of messages but well under token threshold
|
||||||
msg_count = len(history)
|
history = _make_history(250, content_size=10)
|
||||||
approx_tokens = estimate_messages_tokens_rough(history)
|
approx_tokens = estimate_messages_tokens_rough(history)
|
||||||
|
|
||||||
compress_token_threshold = 100_000
|
# 200k model at 85% = 170k token threshold
|
||||||
compress_msg_threshold = 200
|
context_length = 200_000
|
||||||
|
threshold_pct = 0.85
|
||||||
|
compress_token_threshold = int(context_length * threshold_pct)
|
||||||
|
|
||||||
needs_compress = (
|
needs_compress = approx_tokens >= compress_token_threshold
|
||||||
approx_tokens >= compress_token_threshold
|
assert not needs_compress, (
|
||||||
or msg_count >= compress_msg_threshold
|
f"250 short messages (~{approx_tokens} tokens) should NOT trigger "
|
||||||
|
f"compression at {compress_token_threshold} token threshold"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_message_count_alone_does_not_trigger(self):
|
||||||
|
"""Message count alone should NOT trigger — only token count matters.
|
||||||
|
|
||||||
|
The old system used an OR of token-count and message-count thresholds,
|
||||||
|
which caused premature compression in tool-heavy sessions with 200+
|
||||||
|
messages but low total tokens.
|
||||||
|
"""
|
||||||
|
# 300 very short messages — old system would compress, new should not
|
||||||
|
history = _make_history(300, content_size=10)
|
||||||
|
approx_tokens = estimate_messages_tokens_rough(history)
|
||||||
|
|
||||||
|
context_length = 200_000
|
||||||
|
threshold_pct = 0.85
|
||||||
|
compress_token_threshold = int(context_length * threshold_pct)
|
||||||
|
|
||||||
|
# Token-based check only
|
||||||
|
needs_compress = approx_tokens >= compress_token_threshold
|
||||||
assert not needs_compress
|
assert not needs_compress
|
||||||
|
|
||||||
def test_custom_thresholds(self):
|
def test_threshold_scales_with_model(self):
|
||||||
"""Custom thresholds from config should be respected."""
|
"""Different models should have different compression thresholds."""
|
||||||
history = _make_history(60, content_size=100)
|
# 128k model at 85% = 108,800 tokens
|
||||||
msg_count = len(history)
|
small_model_threshold = int(128_000 * 0.85)
|
||||||
|
# 200k model at 85% = 170,000 tokens
|
||||||
|
large_model_threshold = int(200_000 * 0.85)
|
||||||
|
# 1M model at 85% = 850,000 tokens
|
||||||
|
huge_model_threshold = int(1_000_000 * 0.85)
|
||||||
|
|
||||||
# Custom lower threshold
|
# A session at ~120k tokens:
|
||||||
compress_msg_threshold = 50
|
history = _make_large_history_tokens(120_000)
|
||||||
needs_compress = msg_count >= compress_msg_threshold
|
approx_tokens = estimate_messages_tokens_rough(history)
|
||||||
assert needs_compress
|
|
||||||
|
|
||||||
# Custom higher threshold
|
# Should trigger for 128k model
|
||||||
compress_msg_threshold = 100
|
assert approx_tokens >= small_model_threshold
|
||||||
needs_compress = msg_count >= compress_msg_threshold
|
# Should NOT trigger for 200k model
|
||||||
assert not needs_compress
|
assert approx_tokens < large_model_threshold
|
||||||
|
# Should NOT trigger for 1M model
|
||||||
|
assert approx_tokens < huge_model_threshold
|
||||||
|
|
||||||
|
def test_custom_threshold_percentage(self):
|
||||||
|
"""Custom threshold percentage from config should be respected."""
|
||||||
|
context_length = 200_000
|
||||||
|
|
||||||
|
# At 50% threshold = 100k
|
||||||
|
low_threshold = int(context_length * 0.50)
|
||||||
|
# At 90% threshold = 180k
|
||||||
|
high_threshold = int(context_length * 0.90)
|
||||||
|
|
||||||
|
history = _make_large_history_tokens(150_000)
|
||||||
|
approx_tokens = estimate_messages_tokens_rough(history)
|
||||||
|
|
||||||
|
# Should trigger at 50% but not at 90%
|
||||||
|
assert approx_tokens >= low_threshold
|
||||||
|
assert approx_tokens < high_threshold
|
||||||
|
|
||||||
def test_minimum_message_guard(self):
|
def test_minimum_message_guard(self):
|
||||||
"""Sessions with fewer than 4 messages should never trigger."""
|
"""Sessions with fewer than 4 messages should never trigger."""
|
||||||
|
|
@ -117,18 +159,19 @@ class TestSessionHygieneThresholds:
|
||||||
|
|
||||||
|
|
||||||
class TestSessionHygieneWarnThreshold:
|
class TestSessionHygieneWarnThreshold:
|
||||||
"""Test the post-compression warning threshold."""
|
"""Test the post-compression warning threshold (95% of context)."""
|
||||||
|
|
||||||
def test_warn_when_still_large(self):
|
def test_warn_when_still_large(self):
|
||||||
"""If compressed result is still above warn_tokens, should warn."""
|
"""If compressed result is still above 95% of context, should warn."""
|
||||||
# Simulate post-compression tokens
|
context_length = 200_000
|
||||||
warn_threshold = 200_000
|
warn_threshold = int(context_length * 0.95) # 190k
|
||||||
post_compress_tokens = 250_000
|
post_compress_tokens = 195_000
|
||||||
assert post_compress_tokens >= warn_threshold
|
assert post_compress_tokens >= warn_threshold
|
||||||
|
|
||||||
def test_no_warn_when_under(self):
|
def test_no_warn_when_under(self):
|
||||||
"""If compressed result is under warn_tokens, no warning."""
|
"""If compressed result is under 95% of context, no warning."""
|
||||||
warn_threshold = 200_000
|
context_length = 200_000
|
||||||
|
warn_threshold = int(context_length * 0.95) # 190k
|
||||||
post_compress_tokens = 150_000
|
post_compress_tokens = 150_000
|
||||||
assert post_compress_tokens < warn_threshold
|
assert post_compress_tokens < warn_threshold
|
||||||
|
|
||||||
|
|
@ -150,10 +193,12 @@ class TestTokenEstimation:
|
||||||
assert estimate_messages_tokens_rough(many) > estimate_messages_tokens_rough(few)
|
assert estimate_messages_tokens_rough(many) > estimate_messages_tokens_rough(few)
|
||||||
|
|
||||||
def test_pathological_session_detected(self):
|
def test_pathological_session_detected(self):
|
||||||
"""The reported pathological case: 648 messages, ~299K tokens."""
|
"""The reported pathological case: 648 messages, ~299K tokens.
|
||||||
# Simulate a 648-message session averaging ~460 tokens per message
|
|
||||||
|
With a 200k model at 85% threshold (170k), this should trigger.
|
||||||
|
"""
|
||||||
history = _make_history(648, content_size=1800)
|
history = _make_history(648, content_size=1800)
|
||||||
tokens = estimate_messages_tokens_rough(history)
|
tokens = estimate_messages_tokens_rough(history)
|
||||||
# Should be well above the 100K default threshold
|
# Should be well above the 170K threshold for a 200k model
|
||||||
assert tokens > 100_000
|
threshold = int(200_000 * 0.85)
|
||||||
assert len(history) > 200
|
assert tokens > threshold
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue