The architecture has been updated

2026-03-31 23:31:36 +03:00 · 2026-03-31 23:31:36 +03:00 · a01257ead9
commit a01257ead9
parent 805f7a017e
1119 changed files with 226 additions and 352 deletions
--- a/hermes_code/tests/test_compression_boundary.py
+++ b/hermes_code/tests/test_compression_boundary.py
@ -0,0 +1,199 @@
+"""Tests for context compression boundary alignment.
+
+Verifies that _align_boundary_backward correctly handles tool result groups
+so that parallel tool calls are never split during compression.
+"""
+
+import pytest
+from unittest.mock import patch, MagicMock
+
+from agent.context_compressor import ContextCompressor
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _tc(call_id: str) -> dict:
+    """Create a minimal tool_call dict."""
+    return {"id": call_id, "type": "function", "function": {"name": "test", "arguments": "{}"}}
+
+
+def _tool_result(call_id: str, content: str = "result") -> dict:
+    """Create a tool result message."""
+    return {"role": "tool", "tool_call_id": call_id, "content": content}
+
+
+def _assistant_with_tools(*call_ids: str) -> dict:
+    """Create an assistant message with tool_calls."""
+    return {"role": "assistant", "tool_calls": [_tc(cid) for cid in call_ids], "content": None}
+
+
+def _make_compressor(**kwargs) -> ContextCompressor:
+    defaults = dict(
+        model="test-model",
+        threshold_percent=0.75,
+        protect_first_n=3,
+        protect_last_n=4,
+        quiet_mode=True,
+    )
+    defaults.update(kwargs)
+    with patch("agent.context_compressor.get_model_context_length", return_value=8000):
+        return ContextCompressor(**defaults)
+
+
+# ---------------------------------------------------------------------------
+# _align_boundary_backward tests
+# ---------------------------------------------------------------------------
+
+class TestAlignBoundaryBackward:
+    """Test that compress-end boundary never splits a tool_call/result group."""
+
+    def test_boundary_at_clean_position(self):
+        """Boundary after a user message — no adjustment needed."""
+        comp = _make_compressor()
+        messages = [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "hi"},
+            {"role": "user", "content": "do something"},
+            _assistant_with_tools("tc_1"),
+            _tool_result("tc_1", "done"),
+            {"role": "user", "content": "thanks"},  # idx=6
+            {"role": "assistant", "content": "np"},
+        ]
+        # Boundary at 7, messages[6] = user — no adjustment
+        assert comp._align_boundary_backward(messages, 7) == 7
+
+    def test_boundary_after_assistant_with_tools(self):
+        """Original case: boundary right after assistant with tool_calls."""
+        comp = _make_compressor()
+        messages = [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "hi"},
+            _assistant_with_tools("tc_1", "tc_2"),  # idx=3
+            _tool_result("tc_1"),                    # idx=4
+            _tool_result("tc_2"),                    # idx=5
+            {"role": "user", "content": "next"},
+        ]
+        # Boundary at 4, messages[3] = assistant with tool_calls → pull back to 3
+        assert comp._align_boundary_backward(messages, 4) == 3
+
+    def test_boundary_in_middle_of_tool_results(self):
+        """THE BUG: boundary falls between tool results of the same group."""
+        comp = _make_compressor()
+        messages = [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "hi"},
+            {"role": "user", "content": "do 5 things"},
+            _assistant_with_tools("tc_A", "tc_B", "tc_C", "tc_D", "tc_E"),  # idx=4
+            _tool_result("tc_A", "result A"),    # idx=5
+            _tool_result("tc_B", "result B"),    # idx=6
+            _tool_result("tc_C", "result C"),    # idx=7
+            _tool_result("tc_D", "result D"),    # idx=8
+            _tool_result("tc_E", "result E"),    # idx=9
+            {"role": "user", "content": "ok"},
+            {"role": "assistant", "content": "done"},
+        ]
+        # Boundary at 8 — in middle of tool results. messages[7] = tool result.
+        # Must walk back to idx=4 (the parent assistant).
+        assert comp._align_boundary_backward(messages, 8) == 4
+
+    def test_boundary_at_last_tool_result(self):
+        """Boundary right after last tool result — messages[idx-1] is tool."""
+        comp = _make_compressor()
+        messages = [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "hi"},
+            _assistant_with_tools("tc_1", "tc_2", "tc_3"),  # idx=3
+            _tool_result("tc_1"),    # idx=4
+            _tool_result("tc_2"),    # idx=5
+            _tool_result("tc_3"),    # idx=6
+            {"role": "user", "content": "next"},
+        ]
+        # Boundary at 7 — messages[6] is last tool result.
+        # Walk back: [6]=tool, [5]=tool, [4]=tool, [3]=assistant with tools → idx=3
+        assert comp._align_boundary_backward(messages, 7) == 3
+
+    def test_boundary_with_consecutive_tool_groups(self):
+        """Two consecutive tool groups — only walk back to the nearest parent."""
+        comp = _make_compressor()
+        messages = [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "hello"},
+            _assistant_with_tools("tc_1"),     # idx=2
+            _tool_result("tc_1"),              # idx=3
+            {"role": "user", "content": "more"},
+            _assistant_with_tools("tc_2", "tc_3"),  # idx=5
+            _tool_result("tc_2"),              # idx=6
+            _tool_result("tc_3"),              # idx=7
+            {"role": "user", "content": "done"},
+        ]
+        # Boundary at 7 — messages[6] = tool result for tc_2 group
+        # Walk back: [6]=tool, [5]=assistant with tools → idx=5
+        assert comp._align_boundary_backward(messages, 7) == 5
+
+
+# ---------------------------------------------------------------------------
+# End-to-end: compression must not lose tool results
+# ---------------------------------------------------------------------------
+
+class TestCompressionToolResultPreservation:
+    """Verify that compress() never silently drops tool results."""
+
+    def test_parallel_tool_results_not_lost(self):
+        """The exact scenario that triggered silent data loss before the fix."""
+        comp = _make_compressor(protect_first_n=3, protect_last_n=4)
+
+        messages = [
+            {"role": "system", "content": "You are helpful."},            # 0
+            {"role": "user", "content": "Hello"},                         # 1
+            {"role": "assistant", "content": "Hi there!"},                # 2  (end of head)
+            {"role": "user", "content": "Read 7 files for me"},           # 3
+            _assistant_with_tools("tc_A", "tc_B", "tc_C", "tc_D", "tc_E", "tc_F", "tc_G"),  # 4
+            _tool_result("tc_A", "content of file A"),                    # 5
+            _tool_result("tc_B", "content of file B"),                    # 6
+            _tool_result("tc_C", "content of file C"),                    # 7
+            _tool_result("tc_D", "content of file D"),                    # 8
+            _tool_result("tc_E", "content of file E"),                    # 9
+            _tool_result("tc_F", "content of file F"),                    # 10
+            _tool_result("tc_G", "CRITICAL DATA in file G"),              # 11 ← compress_end=15-4=11
+            {"role": "user", "content": "Now summarize them"},            # 12
+            {"role": "assistant", "content": "Here is the summary..."},   # 13
+            {"role": "user", "content": "Thanks"},                        # 14
+        ]
+        # 15 messages. compress_end = 15 - 4 = 11 (before fix: splits tool group)
+
+        fake_summary = "[Summary of earlier conversation]"
+        with patch.object(comp, "_generate_summary", return_value=fake_summary):
+            result = comp.compress(messages, current_tokens=7000)
+
+        # After compression, no tool results should be orphaned/lost.
+        # All tool results in the result must have a matching assistant tool_call.
+        assistant_call_ids = set()
+        for msg in result:
+            if msg.get("role") == "assistant":
+                for tc in msg.get("tool_calls") or []:
+                    cid = tc.get("id", "")
+                    if cid:
+                        assistant_call_ids.add(cid)
+
+        tool_result_ids = set()
+        for msg in result:
+            if msg.get("role") == "tool":
+                cid = msg.get("tool_call_id")
+                if cid:
+                    tool_result_ids.add(cid)
+
+        # Every tool result must have a parent — no orphans
+        orphaned = tool_result_ids - assistant_call_ids
+        assert not orphaned, f"Orphaned tool results found (data loss!): {orphaned}"
+
+        # Every assistant tool_call must have a real result (not a stub)
+        for msg in result:
+            if msg.get("role") == "tool":
+                assert msg["content"] != "[Result from earlier conversation — see context summary above]", \
+                    f"Stub result found for {msg.get('tool_call_id')} — real result was lost"