feat: iteration budget pressure via tool result injection

Two-tier warning system that nudges the LLM as it approaches max_iterations, injected into the last tool result JSON rather than as a separate system message: - Caution (70%): {"_budget_warning": "[BUDGET: 42/60...]"} - Warning (90%): {"_budget_warning": "[BUDGET WARNING: 54/60...]"} For JSON tool results, adds a _budget_warning field to the existing dict. For plain text results, appends the warning as text. Key properties: - No system messages injected mid-conversation - No changes to message structure - Prompt cache stays valid - Configurable thresholds (0.7 / 0.9) - Can be disabled: _budget_pressure_enabled = False Inspired by PR #421 (@Bartok9) and issue #414. 8 tests covering thresholds, edge cases, JSON and text injection.
2026-03-09 05:19:42 -07:00 · 2026-03-09 05:19:42 -07:00 · 21ff0d39ad
commit 21ff0d39ad
parent 4b619c9672
2 changed files with 129 additions and 2 deletions
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@ -1208,3 +1208,78 @@ class TestSystemPromptStability:
        conversation_history = []
        should_prefetch = not conversation_history
        assert should_prefetch is True
+
+
+# ---------------------------------------------------------------------------
+# Iteration budget pressure warnings
+# ---------------------------------------------------------------------------
+
+class TestBudgetPressure:
+    """Budget pressure warning system (issue #414)."""
+
+    def test_no_warning_below_caution(self, agent):
+        agent.max_iterations = 60
+        assert agent._get_budget_warning(30) is None
+
+    def test_caution_at_70_percent(self, agent):
+        agent.max_iterations = 60
+        msg = agent._get_budget_warning(42)
+        assert msg is not None
+        assert "[BUDGET:" in msg
+        assert "18 iterations left" in msg
+
+    def test_warning_at_90_percent(self, agent):
+        agent.max_iterations = 60
+        msg = agent._get_budget_warning(54)
+        assert "[BUDGET WARNING:" in msg
+        assert "Provide your final response NOW" in msg
+
+    def test_last_iteration(self, agent):
+        agent.max_iterations = 60
+        msg = agent._get_budget_warning(59)
+        assert "1 iteration(s) left" in msg
+
+    def test_disabled(self, agent):
+        agent.max_iterations = 60
+        agent._budget_pressure_enabled = False
+        assert agent._get_budget_warning(55) is None
+
+    def test_zero_max_iterations(self, agent):
+        agent.max_iterations = 0
+        assert agent._get_budget_warning(0) is None
+
+    def test_injects_into_json_tool_result(self, agent):
+        """Warning should be injected as _budget_warning field in JSON tool results."""
+        import json
+        agent.max_iterations = 10
+        messages = [
+            {"role": "tool", "content": json.dumps({"output": "done", "exit_code": 0}), "tool_call_id": "tc1"}
+        ]
+        warning = agent._get_budget_warning(9)
+        assert warning is not None
+        # Simulate the injection logic
+        last_content = messages[-1]["content"]
+        parsed = json.loads(last_content)
+        parsed["_budget_warning"] = warning
+        messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False)
+        result = json.loads(messages[-1]["content"])
+        assert "_budget_warning" in result
+        assert "BUDGET WARNING" in result["_budget_warning"]
+        assert result["output"] == "done"  # original content preserved
+
+    def test_appends_to_non_json_tool_result(self, agent):
+        """Warning should be appended as text for non-JSON tool results."""
+        agent.max_iterations = 10
+        messages = [
+            {"role": "tool", "content": "plain text result", "tool_call_id": "tc1"}
+        ]
+        warning = agent._get_budget_warning(9)
+        # Simulate injection logic for non-JSON
+        last_content = messages[-1]["content"]
+        try:
+            import json
+            json.loads(last_content)
+        except (json.JSONDecodeError, TypeError):
+            messages[-1]["content"] = last_content + f"\n\n{warning}"
+        assert "plain text result" in messages[-1]["content"]
+        assert "BUDGET WARNING" in messages[-1]["content"]