feat: iteration budget pressure via tool result injection

Two-tier warning system that nudges the LLM as it approaches
max_iterations, injected into the last tool result JSON rather
than as a separate system message:

- Caution (70%): {"_budget_warning": "[BUDGET: 42/60...]"}
- Warning (90%): {"_budget_warning": "[BUDGET WARNING: 54/60...]"}

For JSON tool results, adds a _budget_warning field to the existing
dict. For plain text results, appends the warning as text.

Key properties:
- No system messages injected mid-conversation
- No changes to message structure
- Prompt cache stays valid
- Configurable thresholds (0.7 / 0.9)
- Can be disabled: _budget_pressure_enabled = False

Inspired by PR #421 (@Bartok9) and issue #414.
8 tests covering thresholds, edge cases, JSON and text injection.
This commit is contained in:
teknium1 2026-03-09 05:19:42 -07:00
parent 4b619c9672
commit 21ff0d39ad
2 changed files with 129 additions and 2 deletions

View file

@ -1208,3 +1208,78 @@ class TestSystemPromptStability:
conversation_history = []
should_prefetch = not conversation_history
assert should_prefetch is True
# ---------------------------------------------------------------------------
# Iteration budget pressure warnings
# ---------------------------------------------------------------------------
class TestBudgetPressure:
"""Budget pressure warning system (issue #414)."""
def test_no_warning_below_caution(self, agent):
agent.max_iterations = 60
assert agent._get_budget_warning(30) is None
def test_caution_at_70_percent(self, agent):
agent.max_iterations = 60
msg = agent._get_budget_warning(42)
assert msg is not None
assert "[BUDGET:" in msg
assert "18 iterations left" in msg
def test_warning_at_90_percent(self, agent):
agent.max_iterations = 60
msg = agent._get_budget_warning(54)
assert "[BUDGET WARNING:" in msg
assert "Provide your final response NOW" in msg
def test_last_iteration(self, agent):
agent.max_iterations = 60
msg = agent._get_budget_warning(59)
assert "1 iteration(s) left" in msg
def test_disabled(self, agent):
agent.max_iterations = 60
agent._budget_pressure_enabled = False
assert agent._get_budget_warning(55) is None
def test_zero_max_iterations(self, agent):
agent.max_iterations = 0
assert agent._get_budget_warning(0) is None
def test_injects_into_json_tool_result(self, agent):
"""Warning should be injected as _budget_warning field in JSON tool results."""
import json
agent.max_iterations = 10
messages = [
{"role": "tool", "content": json.dumps({"output": "done", "exit_code": 0}), "tool_call_id": "tc1"}
]
warning = agent._get_budget_warning(9)
assert warning is not None
# Simulate the injection logic
last_content = messages[-1]["content"]
parsed = json.loads(last_content)
parsed["_budget_warning"] = warning
messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False)
result = json.loads(messages[-1]["content"])
assert "_budget_warning" in result
assert "BUDGET WARNING" in result["_budget_warning"]
assert result["output"] == "done" # original content preserved
def test_appends_to_non_json_tool_result(self, agent):
"""Warning should be appended as text for non-JSON tool results."""
agent.max_iterations = 10
messages = [
{"role": "tool", "content": "plain text result", "tool_call_id": "tc1"}
]
warning = agent._get_budget_warning(9)
# Simulate injection logic for non-JSON
last_content = messages[-1]["content"]
try:
import json
json.loads(last_content)
except (json.JSONDecodeError, TypeError):
messages[-1]["content"] = last_content + f"\n\n{warning}"
assert "plain text result" in messages[-1]["content"]
assert "BUDGET WARNING" in messages[-1]["content"]