fix: escalate read/search blocking, track search loops, filter completed todos
- Block file reads after 3+ re-reads of same region (no content returned) - Track search_files calls and block repeated identical searches - Filter completed/cancelled todos from post-compression injection to prevent agent from re-doing finished work - Add 10 new tests covering all three fixes
This commit is contained in:
parent
9eee529a7f
commit
e2fe1373f3
4 changed files with 167 additions and 9 deletions
|
|
@ -19,6 +19,7 @@ from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
from tools.file_tools import (
|
from tools.file_tools import (
|
||||||
read_file_tool,
|
read_file_tool,
|
||||||
|
search_tool,
|
||||||
get_read_files_summary,
|
get_read_files_summary,
|
||||||
clear_read_tracker,
|
clear_read_tracker,
|
||||||
_read_tracker,
|
_read_tracker,
|
||||||
|
|
@ -39,9 +40,16 @@ def _fake_read_file(path, offset=1, limit=500):
|
||||||
return _FakeReadResult(content=f"content of {path}", total_lines=10)
|
return _FakeReadResult(content=f"content of {path}", total_lines=10)
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeSearchResult:
|
||||||
|
"""Minimal stand-in for FileOperations.search return value."""
|
||||||
|
def to_dict(self):
|
||||||
|
return {"matches": [{"file": "test.py", "line": 1, "text": "match"}]}
|
||||||
|
|
||||||
|
|
||||||
def _make_fake_file_ops():
|
def _make_fake_file_ops():
|
||||||
fake = MagicMock()
|
fake = MagicMock()
|
||||||
fake.read_file = _fake_read_file
|
fake.read_file = _fake_read_file
|
||||||
|
fake.search = lambda **kw: _FakeSearchResult()
|
||||||
return fake
|
return fake
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -71,11 +79,23 @@ class TestReadLoopDetection(unittest.TestCase):
|
||||||
self.assertIn("2 times", result["_warning"])
|
self.assertIn("2 times", result["_warning"])
|
||||||
|
|
||||||
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
|
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
|
||||||
def test_third_read_increments_count(self, _mock_ops):
|
def test_third_read_is_blocked(self, _mock_ops):
|
||||||
|
"""3rd read of the same region returns error, no content."""
|
||||||
for _ in range(2):
|
for _ in range(2):
|
||||||
read_file_tool("/tmp/test.py", task_id="t1")
|
read_file_tool("/tmp/test.py", task_id="t1")
|
||||||
result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
|
result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
|
||||||
self.assertIn("3 times", result["_warning"])
|
self.assertIn("error", result)
|
||||||
|
self.assertIn("BLOCKED", result["error"])
|
||||||
|
self.assertNotIn("content", result)
|
||||||
|
|
||||||
|
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
|
||||||
|
def test_fourth_read_still_blocked(self, _mock_ops):
|
||||||
|
"""Subsequent reads remain blocked with incrementing count."""
|
||||||
|
for _ in range(3):
|
||||||
|
read_file_tool("/tmp/test.py", task_id="t1")
|
||||||
|
result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
|
||||||
|
self.assertIn("BLOCKED", result["error"])
|
||||||
|
self.assertIn("4 times", result["error"])
|
||||||
|
|
||||||
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
|
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
|
||||||
def test_different_region_no_warning(self, _mock_ops):
|
def test_different_region_no_warning(self, _mock_ops):
|
||||||
|
|
@ -267,5 +287,94 @@ class TestCompressionFileHistory(unittest.TestCase):
|
||||||
self.assertIn("do NOT re-read", history_content)
|
self.assertIn("do NOT re-read", history_content)
|
||||||
|
|
||||||
|
|
||||||
|
class TestSearchLoopDetection(unittest.TestCase):
|
||||||
|
"""Verify that search_tool detects and blocks repeated searches."""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
clear_read_tracker()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
clear_read_tracker()
|
||||||
|
|
||||||
|
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
|
||||||
|
def test_first_search_no_warning(self, _mock_ops):
|
||||||
|
result = json.loads(search_tool("def main", task_id="t1"))
|
||||||
|
self.assertNotIn("_warning", result)
|
||||||
|
self.assertNotIn("error", result)
|
||||||
|
|
||||||
|
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
|
||||||
|
def test_second_search_has_warning(self, _mock_ops):
|
||||||
|
search_tool("def main", task_id="t1")
|
||||||
|
result = json.loads(search_tool("def main", task_id="t1"))
|
||||||
|
self.assertIn("_warning", result)
|
||||||
|
self.assertIn("2 times", result["_warning"])
|
||||||
|
|
||||||
|
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
|
||||||
|
def test_third_search_is_blocked(self, _mock_ops):
|
||||||
|
for _ in range(2):
|
||||||
|
search_tool("def main", task_id="t1")
|
||||||
|
result = json.loads(search_tool("def main", task_id="t1"))
|
||||||
|
self.assertIn("error", result)
|
||||||
|
self.assertIn("BLOCKED", result["error"])
|
||||||
|
self.assertNotIn("matches", result)
|
||||||
|
|
||||||
|
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
|
||||||
|
def test_different_pattern_no_warning(self, _mock_ops):
|
||||||
|
search_tool("def main", task_id="t1")
|
||||||
|
result = json.loads(search_tool("class Foo", task_id="t1"))
|
||||||
|
self.assertNotIn("_warning", result)
|
||||||
|
self.assertNotIn("error", result)
|
||||||
|
|
||||||
|
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
|
||||||
|
def test_different_task_isolated(self, _mock_ops):
|
||||||
|
search_tool("def main", task_id="t1")
|
||||||
|
result = json.loads(search_tool("def main", task_id="t2"))
|
||||||
|
self.assertNotIn("_warning", result)
|
||||||
|
|
||||||
|
|
||||||
|
class TestTodoInjectionFiltering(unittest.TestCase):
|
||||||
|
"""Verify that format_for_injection filters completed/cancelled todos."""
|
||||||
|
|
||||||
|
def test_filters_completed_and_cancelled(self):
|
||||||
|
from tools.todo_tool import TodoStore
|
||||||
|
store = TodoStore()
|
||||||
|
store.write([
|
||||||
|
{"id": "1", "content": "Read codebase", "status": "completed"},
|
||||||
|
{"id": "2", "content": "Write fix", "status": "in_progress"},
|
||||||
|
{"id": "3", "content": "Run tests", "status": "pending"},
|
||||||
|
{"id": "4", "content": "Abandoned", "status": "cancelled"},
|
||||||
|
])
|
||||||
|
injection = store.format_for_injection()
|
||||||
|
self.assertNotIn("Read codebase", injection)
|
||||||
|
self.assertNotIn("Abandoned", injection)
|
||||||
|
self.assertIn("Write fix", injection)
|
||||||
|
self.assertIn("Run tests", injection)
|
||||||
|
|
||||||
|
def test_all_completed_returns_none(self):
|
||||||
|
from tools.todo_tool import TodoStore
|
||||||
|
store = TodoStore()
|
||||||
|
store.write([
|
||||||
|
{"id": "1", "content": "Done", "status": "completed"},
|
||||||
|
{"id": "2", "content": "Also done", "status": "cancelled"},
|
||||||
|
])
|
||||||
|
self.assertIsNone(store.format_for_injection())
|
||||||
|
|
||||||
|
def test_empty_store_returns_none(self):
|
||||||
|
from tools.todo_tool import TodoStore
|
||||||
|
store = TodoStore()
|
||||||
|
self.assertIsNone(store.format_for_injection())
|
||||||
|
|
||||||
|
def test_all_active_included(self):
|
||||||
|
from tools.todo_tool import TodoStore
|
||||||
|
store = TodoStore()
|
||||||
|
store.write([
|
||||||
|
{"id": "1", "content": "Task A", "status": "pending"},
|
||||||
|
{"id": "2", "content": "Task B", "status": "in_progress"},
|
||||||
|
])
|
||||||
|
injection = store.format_for_injection()
|
||||||
|
self.assertIn("Task A", injection)
|
||||||
|
self.assertIn("Task B", injection)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
|
|
@ -78,7 +78,7 @@ _TOOL_STUBS = {
|
||||||
"web_extract": (
|
"web_extract": (
|
||||||
"web_extract",
|
"web_extract",
|
||||||
"urls: list",
|
"urls: list",
|
||||||
'"""Extract content from URLs. Returns dict with results list of {url, title, content, error}."""',
|
'"""Extract content from URLs. Returns dict with results list of {url, content, error}."""',
|
||||||
'{"urls": urls}',
|
'{"urls": urls}',
|
||||||
),
|
),
|
||||||
"read_file": (
|
"read_file": (
|
||||||
|
|
@ -605,7 +605,7 @@ _TOOL_DOC_LINES = [
|
||||||
" Returns {\"data\": {\"web\": [{\"url\", \"title\", \"description\"}, ...]}}"),
|
" Returns {\"data\": {\"web\": [{\"url\", \"title\", \"description\"}, ...]}}"),
|
||||||
("web_extract",
|
("web_extract",
|
||||||
" web_extract(urls: list[str]) -> dict\n"
|
" web_extract(urls: list[str]) -> dict\n"
|
||||||
" Returns {\"results\": [{\"url\", \"title\", \"content\", \"error\"}, ...]} where content is markdown"),
|
" Returns {\"results\": [{\"url\", \"content\", \"error\"}, ...]} where content is markdown"),
|
||||||
("read_file",
|
("read_file",
|
||||||
" read_file(path: str, offset: int = 1, limit: int = 500) -> dict\n"
|
" read_file(path: str, offset: int = 1, limit: int = 500) -> dict\n"
|
||||||
" Lines are 1-indexed. Returns {\"content\": \"...\", \"total_lines\": N}"),
|
" Lines are 1-indexed. Returns {\"content\": \"...\", \"total_lines\": N}"),
|
||||||
|
|
@ -643,7 +643,10 @@ def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict:
|
||||||
import_examples = [n for n in ("web_search", "terminal") if n in enabled_sandbox_tools]
|
import_examples = [n for n in ("web_search", "terminal") if n in enabled_sandbox_tools]
|
||||||
if not import_examples:
|
if not import_examples:
|
||||||
import_examples = sorted(enabled_sandbox_tools)[:2]
|
import_examples = sorted(enabled_sandbox_tools)[:2]
|
||||||
|
if import_examples:
|
||||||
import_str = ", ".join(import_examples) + ", ..."
|
import_str = ", ".join(import_examples) + ", ..."
|
||||||
|
else:
|
||||||
|
import_str = "..."
|
||||||
|
|
||||||
description = (
|
description = (
|
||||||
"Run a Python script that can call Hermes tools programmatically. "
|
"Run a Python script that can call Hermes tools programmatically. "
|
||||||
|
|
|
||||||
|
|
@ -142,7 +142,18 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
|
||||||
task_reads[read_key] = task_reads.get(read_key, 0) + 1
|
task_reads[read_key] = task_reads.get(read_key, 0) + 1
|
||||||
count = task_reads[read_key]
|
count = task_reads[read_key]
|
||||||
|
|
||||||
if count > 1:
|
if count >= 3:
|
||||||
|
# Hard block: stop returning content to break the loop
|
||||||
|
return json.dumps({
|
||||||
|
"error": (
|
||||||
|
f"BLOCKED: You have read this exact file region {count} times. "
|
||||||
|
"The content has NOT changed. You already have this information. "
|
||||||
|
"STOP re-reading and proceed with your task."
|
||||||
|
),
|
||||||
|
"path": path,
|
||||||
|
"already_read": count,
|
||||||
|
}, ensure_ascii=False)
|
||||||
|
elif count > 1:
|
||||||
result_dict["_warning"] = (
|
result_dict["_warning"] = (
|
||||||
f"You have already read this exact file region {count} times in this session. "
|
f"You have already read this exact file region {count} times in this session. "
|
||||||
"The content has not changed. Use the information you already have instead of re-reading. "
|
"The content has not changed. Use the information you already have instead of re-reading. "
|
||||||
|
|
@ -224,12 +235,38 @@ def search_tool(pattern: str, target: str = "content", path: str = ".",
|
||||||
task_id: str = "default") -> str:
|
task_id: str = "default") -> str:
|
||||||
"""Search for content or files."""
|
"""Search for content or files."""
|
||||||
try:
|
try:
|
||||||
|
# Track searches to detect repeated search loops
|
||||||
|
search_key = ("search", pattern, target, path, file_glob or "")
|
||||||
|
with _read_tracker_lock:
|
||||||
|
task_reads = _read_tracker.setdefault(task_id, {})
|
||||||
|
task_reads[search_key] = task_reads.get(search_key, 0) + 1
|
||||||
|
count = task_reads[search_key]
|
||||||
|
|
||||||
|
if count >= 3:
|
||||||
|
return json.dumps({
|
||||||
|
"error": (
|
||||||
|
f"BLOCKED: You have run this exact search {count} times. "
|
||||||
|
"The results have NOT changed. You already have this information. "
|
||||||
|
"STOP re-searching and proceed with your task."
|
||||||
|
),
|
||||||
|
"pattern": pattern,
|
||||||
|
"already_searched": count,
|
||||||
|
}, ensure_ascii=False)
|
||||||
|
|
||||||
file_ops = _get_file_ops(task_id)
|
file_ops = _get_file_ops(task_id)
|
||||||
result = file_ops.search(
|
result = file_ops.search(
|
||||||
pattern=pattern, path=path, target=target, file_glob=file_glob,
|
pattern=pattern, path=path, target=target, file_glob=file_glob,
|
||||||
limit=limit, offset=offset, output_mode=output_mode, context=context
|
limit=limit, offset=offset, output_mode=output_mode, context=context
|
||||||
)
|
)
|
||||||
return json.dumps(result.to_dict(), ensure_ascii=False)
|
result_dict = result.to_dict()
|
||||||
|
|
||||||
|
if count > 1:
|
||||||
|
result_dict["_warning"] = (
|
||||||
|
f"You have run this exact search {count} times in this session. "
|
||||||
|
"The results have not changed. Use the information you already have."
|
||||||
|
)
|
||||||
|
|
||||||
|
return json.dumps(result_dict, ensure_ascii=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return json.dumps({"error": str(e)}, ensure_ascii=False)
|
return json.dumps({"error": str(e)}, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -105,8 +105,17 @@ class TodoStore:
|
||||||
"cancelled": "[~]",
|
"cancelled": "[~]",
|
||||||
}
|
}
|
||||||
|
|
||||||
lines = ["[Your task list was preserved across context compression]"]
|
# Only inject pending/in_progress items — completed/cancelled ones
|
||||||
for item in self._items:
|
# cause the model to re-do finished work after compression.
|
||||||
|
active_items = [
|
||||||
|
item for item in self._items
|
||||||
|
if item["status"] in ("pending", "in_progress")
|
||||||
|
]
|
||||||
|
if not active_items:
|
||||||
|
return None
|
||||||
|
|
||||||
|
lines = ["[Your active task list was preserved across context compression]"]
|
||||||
|
for item in active_items:
|
||||||
marker = markers.get(item["status"], "[?]")
|
marker = markers.get(item["status"], "[?]")
|
||||||
lines.append(f"- {marker} {item['id']}. {item['content']} ({item['status']})")
|
lines.append(f"- {marker} {item['id']}. {item['content']} ({item['status']})")
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue