feat: iteration budget pressure via tool result injection
Two-tier warning system that nudges the LLM as it approaches
max_iterations, injected into the last tool result JSON rather
than as a separate system message:
- Caution (70%): {"_budget_warning": "[BUDGET: 42/60...]"}
- Warning (90%): {"_budget_warning": "[BUDGET WARNING: 54/60...]"}
For JSON tool results, adds a _budget_warning field to the existing
dict. For plain text results, appends the warning as text.
Key properties:
- No system messages injected mid-conversation
- No changes to message structure
- Prompt cache stays valid
- Configurable thresholds (0.7 / 0.9)
- Can be disabled: _budget_pressure_enabled = False
Inspired by PR #421 (@Bartok9) and issue #414.
8 tests covering thresholds, edge cases, JSON and text injection.
This commit is contained in:
parent
4b619c9672
commit
21ff0d39ad
2 changed files with 129 additions and 2 deletions
56
run_agent.py
56
run_agent.py
|
|
@ -297,6 +297,13 @@ class AIAgent:
|
||||||
self._use_prompt_caching = is_openrouter and is_claude
|
self._use_prompt_caching = is_openrouter and is_claude
|
||||||
self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost)
|
self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost)
|
||||||
|
|
||||||
|
# Iteration budget pressure: warn the LLM as it approaches max_iterations.
|
||||||
|
# Warnings are injected into the last tool result JSON (not as separate
|
||||||
|
# messages) so they don't break message structure or invalidate caching.
|
||||||
|
self._budget_caution_threshold = 0.7 # 70% — nudge to start wrapping up
|
||||||
|
self._budget_warning_threshold = 0.9 # 90% — urgent, respond now
|
||||||
|
self._budget_pressure_enabled = True
|
||||||
|
|
||||||
# Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
|
# Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
|
||||||
# so tool failures, API errors, etc. are inspectable after the fact.
|
# so tool failures, API errors, etc. are inspectable after the fact.
|
||||||
from agent.redact import RedactingFormatter
|
from agent.redact import RedactingFormatter
|
||||||
|
|
@ -2691,7 +2698,7 @@ class AIAgent:
|
||||||
|
|
||||||
return compressed, new_system_prompt
|
return compressed, new_system_prompt
|
||||||
|
|
||||||
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str) -> None:
|
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||||
"""Execute tool calls from the assistant message and append results to messages."""
|
"""Execute tool calls from the assistant message and append results to messages."""
|
||||||
for i, tool_call in enumerate(assistant_message.tool_calls, 1):
|
for i, tool_call in enumerate(assistant_message.tool_calls, 1):
|
||||||
# SAFETY: check interrupt BEFORE starting each tool.
|
# SAFETY: check interrupt BEFORE starting each tool.
|
||||||
|
|
@ -2938,6 +2945,51 @@ class AIAgent:
|
||||||
if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
|
if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
|
||||||
time.sleep(self.tool_delay)
|
time.sleep(self.tool_delay)
|
||||||
|
|
||||||
|
# ── Budget pressure injection ─────────────────────────────────
|
||||||
|
# After all tool calls in this turn are processed, check if we're
|
||||||
|
# approaching max_iterations. If so, inject a warning into the LAST
|
||||||
|
# tool result's JSON so the LLM sees it naturally when reading results.
|
||||||
|
budget_warning = self._get_budget_warning(api_call_count)
|
||||||
|
if budget_warning and messages and messages[-1].get("role") == "tool":
|
||||||
|
last_content = messages[-1]["content"]
|
||||||
|
try:
|
||||||
|
parsed = json.loads(last_content)
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
parsed["_budget_warning"] = budget_warning
|
||||||
|
messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False)
|
||||||
|
else:
|
||||||
|
messages[-1]["content"] = last_content + f"\n\n{budget_warning}"
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
messages[-1]["content"] = last_content + f"\n\n{budget_warning}"
|
||||||
|
if not self.quiet_mode:
|
||||||
|
remaining = self.max_iterations - api_call_count
|
||||||
|
tier = "⚠️ WARNING" if remaining <= self.max_iterations * 0.1 else "💡 CAUTION"
|
||||||
|
print(f"{self.log_prefix}{tier}: {remaining} iterations remaining")
|
||||||
|
|
||||||
|
def _get_budget_warning(self, api_call_count: int) -> Optional[str]:
|
||||||
|
"""Return a budget pressure string, or None if not yet needed.
|
||||||
|
|
||||||
|
Two-tier system:
|
||||||
|
- Caution (70%): nudge to consolidate work
|
||||||
|
- Warning (90%): urgent, must respond now
|
||||||
|
"""
|
||||||
|
if not self._budget_pressure_enabled or self.max_iterations <= 0:
|
||||||
|
return None
|
||||||
|
progress = api_call_count / self.max_iterations
|
||||||
|
remaining = self.max_iterations - api_call_count
|
||||||
|
if progress >= self._budget_warning_threshold:
|
||||||
|
return (
|
||||||
|
f"[BUDGET WARNING: Iteration {api_call_count}/{self.max_iterations}. "
|
||||||
|
f"Only {remaining} iteration(s) left. "
|
||||||
|
"Provide your final response NOW. No more tool calls unless absolutely critical.]"
|
||||||
|
)
|
||||||
|
if progress >= self._budget_caution_threshold:
|
||||||
|
return (
|
||||||
|
f"[BUDGET: Iteration {api_call_count}/{self.max_iterations}. "
|
||||||
|
f"{remaining} iterations left. Start consolidating your work.]"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
|
def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
|
||||||
"""Request a summary when max iterations are reached. Returns the final response text."""
|
"""Request a summary when max iterations are reached. Returns the final response text."""
|
||||||
print(f"⚠️ Reached maximum iterations ({self.max_iterations}). Requesting summary...")
|
print(f"⚠️ Reached maximum iterations ({self.max_iterations}). Requesting summary...")
|
||||||
|
|
@ -4183,7 +4235,7 @@ class AIAgent:
|
||||||
|
|
||||||
messages.append(assistant_msg)
|
messages.append(assistant_msg)
|
||||||
|
|
||||||
self._execute_tool_calls(assistant_message, messages, effective_task_id)
|
self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
|
||||||
|
|
||||||
# Refund the iteration if the ONLY tool(s) called were
|
# Refund the iteration if the ONLY tool(s) called were
|
||||||
# execute_code (programmatic tool calling). These are
|
# execute_code (programmatic tool calling). These are
|
||||||
|
|
|
||||||
|
|
@ -1208,3 +1208,78 @@ class TestSystemPromptStability:
|
||||||
conversation_history = []
|
conversation_history = []
|
||||||
should_prefetch = not conversation_history
|
should_prefetch = not conversation_history
|
||||||
assert should_prefetch is True
|
assert should_prefetch is True
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Iteration budget pressure warnings
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestBudgetPressure:
|
||||||
|
"""Budget pressure warning system (issue #414)."""
|
||||||
|
|
||||||
|
def test_no_warning_below_caution(self, agent):
|
||||||
|
agent.max_iterations = 60
|
||||||
|
assert agent._get_budget_warning(30) is None
|
||||||
|
|
||||||
|
def test_caution_at_70_percent(self, agent):
|
||||||
|
agent.max_iterations = 60
|
||||||
|
msg = agent._get_budget_warning(42)
|
||||||
|
assert msg is not None
|
||||||
|
assert "[BUDGET:" in msg
|
||||||
|
assert "18 iterations left" in msg
|
||||||
|
|
||||||
|
def test_warning_at_90_percent(self, agent):
|
||||||
|
agent.max_iterations = 60
|
||||||
|
msg = agent._get_budget_warning(54)
|
||||||
|
assert "[BUDGET WARNING:" in msg
|
||||||
|
assert "Provide your final response NOW" in msg
|
||||||
|
|
||||||
|
def test_last_iteration(self, agent):
|
||||||
|
agent.max_iterations = 60
|
||||||
|
msg = agent._get_budget_warning(59)
|
||||||
|
assert "1 iteration(s) left" in msg
|
||||||
|
|
||||||
|
def test_disabled(self, agent):
|
||||||
|
agent.max_iterations = 60
|
||||||
|
agent._budget_pressure_enabled = False
|
||||||
|
assert agent._get_budget_warning(55) is None
|
||||||
|
|
||||||
|
def test_zero_max_iterations(self, agent):
|
||||||
|
agent.max_iterations = 0
|
||||||
|
assert agent._get_budget_warning(0) is None
|
||||||
|
|
||||||
|
def test_injects_into_json_tool_result(self, agent):
|
||||||
|
"""Warning should be injected as _budget_warning field in JSON tool results."""
|
||||||
|
import json
|
||||||
|
agent.max_iterations = 10
|
||||||
|
messages = [
|
||||||
|
{"role": "tool", "content": json.dumps({"output": "done", "exit_code": 0}), "tool_call_id": "tc1"}
|
||||||
|
]
|
||||||
|
warning = agent._get_budget_warning(9)
|
||||||
|
assert warning is not None
|
||||||
|
# Simulate the injection logic
|
||||||
|
last_content = messages[-1]["content"]
|
||||||
|
parsed = json.loads(last_content)
|
||||||
|
parsed["_budget_warning"] = warning
|
||||||
|
messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False)
|
||||||
|
result = json.loads(messages[-1]["content"])
|
||||||
|
assert "_budget_warning" in result
|
||||||
|
assert "BUDGET WARNING" in result["_budget_warning"]
|
||||||
|
assert result["output"] == "done" # original content preserved
|
||||||
|
|
||||||
|
def test_appends_to_non_json_tool_result(self, agent):
|
||||||
|
"""Warning should be appended as text for non-JSON tool results."""
|
||||||
|
agent.max_iterations = 10
|
||||||
|
messages = [
|
||||||
|
{"role": "tool", "content": "plain text result", "tool_call_id": "tc1"}
|
||||||
|
]
|
||||||
|
warning = agent._get_budget_warning(9)
|
||||||
|
# Simulate injection logic for non-JSON
|
||||||
|
last_content = messages[-1]["content"]
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
json.loads(last_content)
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
messages[-1]["content"] = last_content + f"\n\n{warning}"
|
||||||
|
assert "plain text result" in messages[-1]["content"]
|
||||||
|
assert "BUDGET WARNING" in messages[-1]["content"]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue