feat: improve context compaction handoff summaries (#1273)

Adapt PR #916 onto current main by replacing the old context summary marker
with a clearer handoff wrapper, updating the summarization prompt for
resume-oriented summaries, and preserving the current call_llm-based
compression path.
This commit is contained in:
Teknium 2026-03-14 02:33:31 -07:00 committed by GitHub
parent 728fa66ef0
commit 5c479eedf1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 55 additions and 19 deletions

View file

@ -17,6 +17,16 @@ from agent.model_metadata import (
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
SUMMARY_PREFIX = (
"[CONTEXT COMPACTION] Earlier turns in this conversation were compacted "
"to save context space. The summary below describes work that was "
"already completed, and the current session state may still reflect "
"that work (for example, files may already be changed). Use the summary "
"and the current state to continue from where things left off, and "
"avoid repeating work:"
)
LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
class ContextCompressor: class ContextCompressor:
"""Compresses conversation context when approaching the model's context limit. """Compresses conversation context when approaching the model's context limit.
@ -102,22 +112,22 @@ class ContextCompressor:
parts.append(f"[{role.upper()}]: {content}") parts.append(f"[{role.upper()}]: {content}")
content_to_summarize = "\n\n".join(parts) content_to_summarize = "\n\n".join(parts)
prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history. prompt = f"""Create a concise handoff summary for a later assistant that will continue this conversation after earlier turns are compacted.
Write from a neutral perspective describing: Describe:
1. What actions were taken (tool calls, searches, file operations) 1. What actions were taken (tool calls, searches, file operations)
2. Key information or results obtained 2. Key information or results obtained
3. Important decisions or findings 3. Important decisions, constraints, or user preferences
4. Relevant data, file names, or outputs 4. Relevant data, file names, outputs, or next steps needed to continue
Keep factual and informative. Target ~{self.summary_target_tokens} tokens. Keep it factual, concise, and focused on helping the next assistant resume without repeating work. Target ~{self.summary_target_tokens} tokens.
--- ---
TURNS TO SUMMARIZE: TURNS TO SUMMARIZE:
{content_to_summarize} {content_to_summarize}
--- ---
Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" Write only the summary body. Do not include any preamble or prefix; the system will add the handoff wrapper."""
# Use the centralized LLM router — handles provider resolution, # Use the centralized LLM router — handles provider resolution,
# auth, and fallback internally. # auth, and fallback internally.
@ -137,9 +147,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
if not isinstance(content, str): if not isinstance(content, str):
content = str(content) if content else "" content = str(content) if content else ""
summary = content.strip() summary = content.strip()
if not summary.startswith("[CONTEXT SUMMARY]:"): return self._with_summary_prefix(summary)
summary = "[CONTEXT SUMMARY]: " + summary
return summary
except RuntimeError: except RuntimeError:
logging.warning("Context compression: no provider available for " logging.warning("Context compression: no provider available for "
"summary. Middle turns will be dropped without summary.") "summary. Middle turns will be dropped without summary.")
@ -148,6 +156,16 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
logging.warning("Failed to generate context summary: %s", e) logging.warning("Failed to generate context summary: %s", e)
return None return None
@staticmethod
def _with_summary_prefix(summary: str) -> str:
"""Normalize summary text to the current compaction handoff format."""
text = (summary or "").strip()
for prefix in (LEGACY_SUMMARY_PREFIX, SUMMARY_PREFIX):
if text.startswith(prefix):
text = text[len(prefix):].lstrip()
break
return f"{SUMMARY_PREFIX}\n{text}" if text else SUMMARY_PREFIX
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Tool-call / tool-result pair integrity helpers # Tool-call / tool-result pair integrity helpers
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@ -287,7 +305,10 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
for i in range(compress_start): for i in range(compress_start):
msg = messages[i].copy() msg = messages[i].copy()
if i == 0 and msg.get("role") == "system" and self.compression_count == 0: if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]" msg["content"] = (
(msg.get("content") or "")
+ "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
)
compressed.append(msg) compressed.append(msg)
if summary: if summary:

View file

@ -3,7 +3,7 @@
import pytest import pytest
from unittest.mock import patch, MagicMock from unittest.mock import patch, MagicMock
from agent.context_compressor import ContextCompressor from agent.context_compressor import ContextCompressor, SUMMARY_PREFIX
@pytest.fixture() @pytest.fixture()
@ -138,7 +138,7 @@ class TestGenerateSummaryNoneContent:
with patch("agent.context_compressor.call_llm", return_value=mock_response): with patch("agent.context_compressor.call_llm", return_value=mock_response):
summary = c._generate_summary(messages) summary = c._generate_summary(messages)
assert isinstance(summary, str) assert isinstance(summary, str)
assert "CONTEXT SUMMARY" in summary assert summary.startswith(SUMMARY_PREFIX)
def test_none_content_in_system_message_compress(self): def test_none_content_in_system_message_compress(self):
"""System message with content=None should not crash during compress.""" """System message with content=None should not crash during compress."""
@ -172,7 +172,7 @@ class TestNonStringContent:
with patch("agent.context_compressor.call_llm", return_value=mock_response): with patch("agent.context_compressor.call_llm", return_value=mock_response):
summary = c._generate_summary(messages) summary = c._generate_summary(messages)
assert isinstance(summary, str) assert isinstance(summary, str)
assert "CONTEXT SUMMARY" in summary assert summary.startswith(SUMMARY_PREFIX)
def test_none_content_coerced_to_empty(self): def test_none_content_coerced_to_empty(self):
mock_response = MagicMock() mock_response = MagicMock()
@ -189,9 +189,19 @@ class TestNonStringContent:
with patch("agent.context_compressor.call_llm", return_value=mock_response): with patch("agent.context_compressor.call_llm", return_value=mock_response):
summary = c._generate_summary(messages) summary = c._generate_summary(messages)
# None content → empty string → "[CONTEXT SUMMARY]: " prefix added # None content → empty string → standardized compaction handoff prefix added
assert summary is not None assert summary is not None
assert "CONTEXT SUMMARY" in summary assert summary == SUMMARY_PREFIX
class TestSummaryPrefixNormalization:
def test_legacy_prefix_is_replaced(self):
summary = ContextCompressor._with_summary_prefix("[CONTEXT SUMMARY]: did work")
assert summary == f"{SUMMARY_PREFIX}\ndid work"
def test_existing_new_prefix_is_not_duplicated(self):
summary = ContextCompressor._with_summary_prefix(f"{SUMMARY_PREFIX}\ndid work")
assert summary == f"{SUMMARY_PREFIX}\ndid work"
class TestCompressWithClient: class TestCompressWithClient:
@ -211,7 +221,7 @@ class TestCompressWithClient:
# Should have summary message in the middle # Should have summary message in the middle
contents = [m.get("content", "") for m in result] contents = [m.get("content", "") for m in result]
assert any("CONTEXT SUMMARY" in c for c in contents) assert any(c.startswith(SUMMARY_PREFIX) for c in contents)
assert len(result) < len(msgs) assert len(result) < len(msgs)
def test_summarization_does_not_split_tool_call_pairs(self): def test_summarization_does_not_split_tool_call_pairs(self):
@ -283,7 +293,9 @@ class TestCompressWithClient:
] ]
with patch("agent.context_compressor.call_llm", return_value=mock_response): with patch("agent.context_compressor.call_llm", return_value=mock_response):
result = c.compress(msgs) result = c.compress(msgs)
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")] summary_msg = [
m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)
]
assert len(summary_msg) == 1 assert len(summary_msg) == 1
assert summary_msg[0]["role"] == "user" assert summary_msg[0]["role"] == "user"
@ -311,7 +323,9 @@ class TestCompressWithClient:
] ]
with patch("agent.context_compressor.call_llm", return_value=mock_response): with patch("agent.context_compressor.call_llm", return_value=mock_response):
result = c.compress(msgs) result = c.compress(msgs)
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")] summary_msg = [
m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)
]
assert len(summary_msg) == 1 assert len(summary_msg) == 1
assert summary_msg[0]["role"] == "assistant" assert summary_msg[0]["role"] == "assistant"

View file

@ -17,6 +17,7 @@ from unittest.mock import MagicMock, patch
import pytest import pytest
from agent.context_compressor import SUMMARY_PREFIX
from run_agent import AIAgent from run_agent import AIAgent
@ -340,7 +341,7 @@ class TestPreflightCompression:
# Simulate compression reducing messages # Simulate compression reducing messages
mock_compress.return_value = ( mock_compress.return_value = (
[ [
{"role": "user", "content": "[CONTEXT SUMMARY]: Previous conversation"}, {"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
{"role": "user", "content": "hello"}, {"role": "user", "content": "hello"},
], ],
"new system prompt", "new system prompt",