Merge PR #627: fix: continue non-tool replies after output-length truncation
Authored by tripledoublev (vincent). Rebased onto current main and conflict-resolved. When finish_reason='length' on a non-tool chat-completions response, instead of rolling back and returning None, the agent now: - Appends the truncated text and a continuation prompt - Retries up to 3 times, accumulating partial chunks - Concatenates all chunks into the final response - Preserves existing rollback behavior for tool-call truncations
This commit is contained in:
commit
d6d5a43d3a
2 changed files with 85 additions and 5 deletions
60
run_agent.py
60
run_agent.py
|
|
@ -3233,6 +3233,8 @@ class AIAgent:
|
||||||
final_response = None
|
final_response = None
|
||||||
interrupted = False
|
interrupted = False
|
||||||
codex_ack_continuations = 0
|
codex_ack_continuations = 0
|
||||||
|
length_continue_retries = 0
|
||||||
|
truncated_response_prefix = ""
|
||||||
|
|
||||||
# Clear any stale interrupt state at start
|
# Clear any stale interrupt state at start
|
||||||
self.clear_interrupt()
|
self.clear_interrupt()
|
||||||
|
|
@ -3375,6 +3377,7 @@ class AIAgent:
|
||||||
codex_auth_retry_attempted = False
|
codex_auth_retry_attempted = False
|
||||||
nous_auth_retry_attempted = False
|
nous_auth_retry_attempted = False
|
||||||
restart_with_compressed_messages = False
|
restart_with_compressed_messages = False
|
||||||
|
restart_with_length_continuation = False
|
||||||
|
|
||||||
finish_reason = "stop"
|
finish_reason = "stop"
|
||||||
response = None # Guard against UnboundLocalError if all retries fail
|
response = None # Guard against UnboundLocalError if all retries fail
|
||||||
|
|
@ -3525,19 +3528,60 @@ class AIAgent:
|
||||||
finish_reason = "stop"
|
finish_reason = "stop"
|
||||||
else:
|
else:
|
||||||
finish_reason = response.choices[0].finish_reason
|
finish_reason = response.choices[0].finish_reason
|
||||||
|
|
||||||
# Handle "length" finish_reason - response was truncated
|
|
||||||
if finish_reason == "length":
|
if finish_reason == "length":
|
||||||
print(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens")
|
print(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens")
|
||||||
|
|
||||||
|
if self.api_mode == "chat_completions":
|
||||||
|
assistant_message = response.choices[0].message
|
||||||
|
if not assistant_message.tool_calls:
|
||||||
|
length_continue_retries += 1
|
||||||
|
interim_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||||
|
messages.append(interim_msg)
|
||||||
|
self._log_msg_to_db(interim_msg)
|
||||||
|
if assistant_message.content:
|
||||||
|
truncated_response_prefix += assistant_message.content
|
||||||
|
|
||||||
|
if length_continue_retries < 3:
|
||||||
|
print(
|
||||||
|
f"{self.log_prefix}↻ Requesting continuation "
|
||||||
|
f"({length_continue_retries}/3)..."
|
||||||
|
)
|
||||||
|
continue_msg = {
|
||||||
|
"role": "user",
|
||||||
|
"content": (
|
||||||
|
"[System: Your previous response was truncated by the output "
|
||||||
|
"length limit. Continue exactly where you left off. Do not "
|
||||||
|
"restart or repeat prior text. Finish the answer directly.]"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
messages.append(continue_msg)
|
||||||
|
self._log_msg_to_db(continue_msg)
|
||||||
|
self._session_messages = messages
|
||||||
|
self._save_session_log(messages)
|
||||||
|
restart_with_length_continuation = True
|
||||||
|
break
|
||||||
|
|
||||||
|
partial_response = self._strip_think_blocks(truncated_response_prefix).strip()
|
||||||
|
self._cleanup_task_resources(effective_task_id)
|
||||||
|
self._persist_session(messages, conversation_history)
|
||||||
|
return {
|
||||||
|
"final_response": partial_response or None,
|
||||||
|
"messages": messages,
|
||||||
|
"api_calls": api_call_count,
|
||||||
|
"completed": False,
|
||||||
|
"partial": True,
|
||||||
|
"error": "Response remained truncated after 3 continuation attempts",
|
||||||
|
}
|
||||||
|
|
||||||
# If we have prior messages, roll back to last complete state
|
# If we have prior messages, roll back to last complete state
|
||||||
if len(messages) > 1:
|
if len(messages) > 1:
|
||||||
print(f"{self.log_prefix} ⏪ Rolling back to last complete assistant turn")
|
print(f"{self.log_prefix} ⏪ Rolling back to last complete assistant turn")
|
||||||
rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
|
rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
|
||||||
|
|
||||||
self._cleanup_task_resources(effective_task_id)
|
self._cleanup_task_resources(effective_task_id)
|
||||||
self._persist_session(messages, conversation_history)
|
self._persist_session(messages, conversation_history)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"final_response": None,
|
"final_response": None,
|
||||||
"messages": rolled_back_messages,
|
"messages": rolled_back_messages,
|
||||||
|
|
@ -3870,6 +3914,9 @@ class AIAgent:
|
||||||
self.iteration_budget.refund()
|
self.iteration_budget.refund()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if restart_with_length_continuation:
|
||||||
|
continue
|
||||||
|
|
||||||
# Guard: if all retries exhausted without a successful response
|
# Guard: if all retries exhausted without a successful response
|
||||||
# (e.g. repeated context-length errors that exhausted retry_count),
|
# (e.g. repeated context-length errors that exhausted retry_count),
|
||||||
# the `response` variable is still None. Break out cleanly.
|
# the `response` variable is still None. Break out cleanly.
|
||||||
|
|
@ -4260,6 +4307,9 @@ class AIAgent:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
codex_ack_continuations = 0
|
codex_ack_continuations = 0
|
||||||
|
|
||||||
|
if truncated_response_prefix:
|
||||||
|
final_response = truncated_response_prefix + final_response
|
||||||
|
|
||||||
# Strip <think> blocks from user-facing response (keep raw in messages for trajectory)
|
# Strip <think> blocks from user-facing response (keep raw in messages for trajectory)
|
||||||
final_response = self._strip_think_blocks(final_response).strip()
|
final_response = self._strip_think_blocks(final_response).strip()
|
||||||
|
|
|
||||||
|
|
@ -829,6 +829,36 @@ class TestRunConversation:
|
||||||
assert result["final_response"] == "All done"
|
assert result["final_response"] == "All done"
|
||||||
assert result["completed"] is True
|
assert result["completed"] is True
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("first_content", "second_content", "expected_final"),
|
||||||
|
[
|
||||||
|
("Part 1 ", "Part 2", "Part 1 Part 2"),
|
||||||
|
("<think>internal reasoning</think>", "Recovered final answer", "Recovered final answer"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_length_finish_reason_requests_continuation(
|
||||||
|
self, agent, first_content, second_content, expected_final
|
||||||
|
):
|
||||||
|
self._setup_agent(agent)
|
||||||
|
first = _mock_response(content=first_content, finish_reason="length")
|
||||||
|
second = _mock_response(content=second_content, finish_reason="stop")
|
||||||
|
agent.client.chat.completions.create.side_effect = [first, second]
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(agent, "_persist_session"),
|
||||||
|
patch.object(agent, "_save_trajectory"),
|
||||||
|
patch.object(agent, "_cleanup_task_resources"),
|
||||||
|
):
|
||||||
|
result = agent.run_conversation("hello")
|
||||||
|
|
||||||
|
assert result["completed"] is True
|
||||||
|
assert result["api_calls"] == 2
|
||||||
|
assert result["final_response"] == expected_final
|
||||||
|
|
||||||
|
second_call_messages = agent.client.chat.completions.create.call_args_list[1].kwargs["messages"]
|
||||||
|
assert second_call_messages[-1]["role"] == "user"
|
||||||
|
assert "truncated by the output length limit" in second_call_messages[-1]["content"]
|
||||||
|
|
||||||
|
|
||||||
class TestRetryExhaustion:
|
class TestRetryExhaustion:
|
||||||
"""Regression: retry_count > max_retries was dead code (off-by-one).
|
"""Regression: retry_count > max_retries was dead code (off-by-one).
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue