fix(gateway): strip orphaned tool_results + let /reset bypass running agent (#2180)
Two fixes for Telegram/gateway-specific bugs: 1. Anthropic adapter: strip orphaned tool_result blocks (mirror of existing tool_use stripping). Context compression or session truncation can remove an assistant message containing a tool_use while leaving the subsequent tool_result intact. Anthropic rejects these with a 400: 'unexpected tool_use_id found in tool_result blocks'. The adapter now collects all tool_use IDs and filters out any tool_result blocks referencing IDs not in that set. 2. Gateway: /reset and /new now bypass the running-agent guard (like /status already does). Previously, sending /reset while an agent was running caused the raw text to be queued and later fed back as a user message with the same broken history — replaying the corrupted session instead of resetting it. Now the running agent is interrupted, pending messages are cleared, and the reset command dispatches immediately. Tests updated: existing tests now include proper tool_use→tool_result pairs; two new tests cover orphaned tool_result stripping. Co-authored-by: Test <test@test.com>
This commit is contained in:
parent
80e578d3e3
commit
2ea4dd30c6
3 changed files with 122 additions and 6 deletions
|
|
@ -935,6 +935,26 @@ def convert_messages_to_anthropic(
|
||||||
if not m["content"]:
|
if not m["content"]:
|
||||||
m["content"] = [{"type": "text", "text": "(tool call removed)"}]
|
m["content"] = [{"type": "text", "text": "(tool call removed)"}]
|
||||||
|
|
||||||
|
# Strip orphaned tool_result blocks (no matching tool_use precedes them).
|
||||||
|
# This is the mirror of the above: context compression or session truncation
|
||||||
|
# can remove an assistant message containing a tool_use while leaving the
|
||||||
|
# subsequent tool_result intact. Anthropic rejects these with a 400.
|
||||||
|
tool_use_ids = set()
|
||||||
|
for m in result:
|
||||||
|
if m["role"] == "assistant" and isinstance(m["content"], list):
|
||||||
|
for block in m["content"]:
|
||||||
|
if block.get("type") == "tool_use":
|
||||||
|
tool_use_ids.add(block.get("id"))
|
||||||
|
for m in result:
|
||||||
|
if m["role"] == "user" and isinstance(m["content"], list):
|
||||||
|
m["content"] = [
|
||||||
|
b
|
||||||
|
for b in m["content"]
|
||||||
|
if b.get("type") != "tool_result" or b.get("tool_use_id") in tool_use_ids
|
||||||
|
]
|
||||||
|
if not m["content"]:
|
||||||
|
m["content"] = [{"type": "text", "text": "(tool result removed)"}]
|
||||||
|
|
||||||
# Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
|
# Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
|
||||||
fixed = []
|
fixed = []
|
||||||
for m in result:
|
for m in result:
|
||||||
|
|
|
||||||
|
|
@ -1344,6 +1344,31 @@ class GatewayRunner:
|
||||||
if event.get_command() == "status":
|
if event.get_command() == "status":
|
||||||
return await self._handle_status_command(event)
|
return await self._handle_status_command(event)
|
||||||
|
|
||||||
|
# /reset and /new must bypass the running-agent guard so they
|
||||||
|
# actually dispatch as commands instead of being queued as user
|
||||||
|
# text (which would be fed back to the agent with the same
|
||||||
|
# broken history — #2170). Interrupt the agent first, then
|
||||||
|
# clear the adapter's pending queue so the stale "/reset" text
|
||||||
|
# doesn't get re-processed as a user message after the
|
||||||
|
# interrupt completes.
|
||||||
|
from hermes_cli.commands import resolve_command as _resolve_cmd_inner
|
||||||
|
_evt_cmd = event.get_command()
|
||||||
|
_cmd_def_inner = _resolve_cmd_inner(_evt_cmd) if _evt_cmd else None
|
||||||
|
if _cmd_def_inner and _cmd_def_inner.name == "new":
|
||||||
|
running_agent = self._running_agents.get(_quick_key)
|
||||||
|
if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
|
||||||
|
running_agent.interrupt("Session reset requested")
|
||||||
|
# Clear any pending messages so the old text doesn't replay
|
||||||
|
adapter = self.adapters.get(source.platform)
|
||||||
|
if adapter and hasattr(adapter, 'get_pending_message'):
|
||||||
|
adapter.get_pending_message(_quick_key) # consume and discard
|
||||||
|
self._pending_messages.pop(_quick_key, None)
|
||||||
|
# Clean up the running agent entry so the reset handler
|
||||||
|
# doesn't think an agent is still active.
|
||||||
|
if _quick_key in self._running_agents:
|
||||||
|
del self._running_agents[_quick_key]
|
||||||
|
return await self._handle_reset_command(event)
|
||||||
|
|
||||||
if event.message_type == MessageType.PHOTO:
|
if event.message_type == MessageType.PHOTO:
|
||||||
logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
|
logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
|
||||||
adapter = self.adapters.get(source.platform)
|
adapter = self.adapters.get(source.platform)
|
||||||
|
|
|
||||||
|
|
@ -578,21 +578,39 @@ class TestConvertMessages:
|
||||||
|
|
||||||
def test_converts_tool_results(self):
|
def test_converts_tool_results(self):
|
||||||
messages = [
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
"tool_calls": [
|
||||||
|
{"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}},
|
||||||
|
],
|
||||||
|
},
|
||||||
{"role": "tool", "tool_call_id": "tc_1", "content": "result data"},
|
{"role": "tool", "tool_call_id": "tc_1", "content": "result data"},
|
||||||
]
|
]
|
||||||
_, result = convert_messages_to_anthropic(messages)
|
_, result = convert_messages_to_anthropic(messages)
|
||||||
assert result[0]["role"] == "user"
|
# tool result is in the second message (user role)
|
||||||
assert result[0]["content"][0]["type"] == "tool_result"
|
user_msg = [m for m in result if m["role"] == "user"][0]
|
||||||
assert result[0]["content"][0]["tool_use_id"] == "tc_1"
|
assert user_msg["content"][0]["type"] == "tool_result"
|
||||||
|
assert user_msg["content"][0]["tool_use_id"] == "tc_1"
|
||||||
|
|
||||||
def test_merges_consecutive_tool_results(self):
|
def test_merges_consecutive_tool_results(self):
|
||||||
messages = [
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
"tool_calls": [
|
||||||
|
{"id": "tc_1", "function": {"name": "tool_a", "arguments": "{}"}},
|
||||||
|
{"id": "tc_2", "function": {"name": "tool_b", "arguments": "{}"}},
|
||||||
|
],
|
||||||
|
},
|
||||||
{"role": "tool", "tool_call_id": "tc_1", "content": "result 1"},
|
{"role": "tool", "tool_call_id": "tc_1", "content": "result 1"},
|
||||||
{"role": "tool", "tool_call_id": "tc_2", "content": "result 2"},
|
{"role": "tool", "tool_call_id": "tc_2", "content": "result 2"},
|
||||||
]
|
]
|
||||||
_, result = convert_messages_to_anthropic(messages)
|
_, result = convert_messages_to_anthropic(messages)
|
||||||
assert len(result) == 1
|
# assistant + merged user (with 2 tool_results)
|
||||||
assert len(result[0]["content"]) == 2
|
user_msgs = [m for m in result if m["role"] == "user"]
|
||||||
|
assert len(user_msgs) == 1
|
||||||
|
assert len(user_msgs[0]["content"]) == 2
|
||||||
|
|
||||||
def test_strips_orphaned_tool_use(self):
|
def test_strips_orphaned_tool_use(self):
|
||||||
messages = [
|
messages = [
|
||||||
|
|
@ -610,6 +628,51 @@ class TestConvertMessages:
|
||||||
assistant_blocks = result[0]["content"]
|
assistant_blocks = result[0]["content"]
|
||||||
assert all(b.get("type") != "tool_use" for b in assistant_blocks)
|
assert all(b.get("type") != "tool_use" for b in assistant_blocks)
|
||||||
|
|
||||||
|
def test_strips_orphaned_tool_result(self):
|
||||||
|
"""tool_result with no matching tool_use should be stripped.
|
||||||
|
|
||||||
|
This happens when context compression removes the assistant message
|
||||||
|
containing the tool_use but leaves the subsequent tool_result intact.
|
||||||
|
Anthropic rejects orphaned tool_results with a 400.
|
||||||
|
"""
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "Hello"},
|
||||||
|
{"role": "assistant", "content": "Hi there"},
|
||||||
|
# The assistant tool_use message was removed by compression,
|
||||||
|
# but the tool_result survived:
|
||||||
|
{"role": "tool", "tool_call_id": "tc_gone", "content": "stale result"},
|
||||||
|
{"role": "user", "content": "Thanks"},
|
||||||
|
]
|
||||||
|
_, result = convert_messages_to_anthropic(messages)
|
||||||
|
# tc_gone has no matching tool_use — its tool_result should be stripped
|
||||||
|
for m in result:
|
||||||
|
if m["role"] == "user" and isinstance(m["content"], list):
|
||||||
|
assert all(
|
||||||
|
b.get("type") != "tool_result"
|
||||||
|
for b in m["content"]
|
||||||
|
), "Orphaned tool_result should have been stripped"
|
||||||
|
|
||||||
|
def test_strips_orphaned_tool_result_preserves_valid(self):
|
||||||
|
"""Orphaned tool_results are stripped while valid ones survive."""
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
"tool_calls": [
|
||||||
|
{"id": "tc_valid", "function": {"name": "search", "arguments": "{}"}},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{"role": "tool", "tool_call_id": "tc_valid", "content": "good result"},
|
||||||
|
{"role": "tool", "tool_call_id": "tc_orphan", "content": "stale result"},
|
||||||
|
]
|
||||||
|
_, result = convert_messages_to_anthropic(messages)
|
||||||
|
user_msg = [m for m in result if m["role"] == "user"][0]
|
||||||
|
tool_results = [
|
||||||
|
b for b in user_msg["content"] if b.get("type") == "tool_result"
|
||||||
|
]
|
||||||
|
assert len(tool_results) == 1
|
||||||
|
assert tool_results[0]["tool_use_id"] == "tc_valid"
|
||||||
|
|
||||||
def test_system_with_cache_control(self):
|
def test_system_with_cache_control(self):
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
|
|
@ -641,11 +704,19 @@ class TestConvertMessages:
|
||||||
def test_tool_cache_control_is_preserved_on_tool_result_block(self):
|
def test_tool_cache_control_is_preserved_on_tool_result_block(self):
|
||||||
messages = apply_anthropic_cache_control([
|
messages = apply_anthropic_cache_control([
|
||||||
{"role": "system", "content": "System prompt"},
|
{"role": "system", "content": "System prompt"},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
"tool_calls": [
|
||||||
|
{"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}},
|
||||||
|
],
|
||||||
|
},
|
||||||
{"role": "tool", "tool_call_id": "tc_1", "content": "result"},
|
{"role": "tool", "tool_call_id": "tc_1", "content": "result"},
|
||||||
])
|
])
|
||||||
|
|
||||||
_, result = convert_messages_to_anthropic(messages)
|
_, result = convert_messages_to_anthropic(messages)
|
||||||
tool_block = result[0]["content"][0]
|
user_msg = [m for m in result if m["role"] == "user"][0]
|
||||||
|
tool_block = user_msg["content"][0]
|
||||||
|
|
||||||
assert tool_block["type"] == "tool_result"
|
assert tool_block["type"] == "tool_result"
|
||||||
assert tool_block["tool_use_id"] == "tc_1"
|
assert tool_block["tool_use_id"] == "tc_1"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue