Merge PR #627: fix: continue non-tool replies after output-length truncation

Authored by tripledoublev (vincent). Rebased onto current main and conflict-resolved. When finish_reason='length' on a non-tool chat-completions response, instead of rolling back and returning None, the agent now: - Appends the truncated text and a continuation prompt - Retries up to 3 times, accumulating partial chunks - Concatenates all chunks into the final response - Preserves existing rollback behavior for tool-call truncations
2026-03-10 04:33:14 -07:00 · 2026-03-10 04:33:14 -07:00 · d6d5a43d3a
commit d6d5a43d3a
parent d723208b1b b0a5fe8974
2 changed files with 85 additions and 5 deletions
--- a/run_agent.py
+++ b/run_agent.py
@ -3233,6 +3233,8 @@ class AIAgent:
        final_response = None
        interrupted = False
        codex_ack_continuations = 0
        length_continue_retries = 0
        truncated_response_prefix = ""
        # Clear any stale interrupt state at start
        self.clear_interrupt()
@ -3375,6 +3377,7 @@ class AIAgent:
            codex_auth_retry_attempted = False
            nous_auth_retry_attempted = False
            restart_with_compressed_messages = False
            restart_with_length_continuation = False
            finish_reason = "stop"
            response = None  # Guard against UnboundLocalError if all retries fail
@ -3525,19 +3528,60 @@ class AIAgent:
                            finish_reason = "stop"
                    else:
                        finish_reason = response.choices[0].finish_reason
-                    
+
                    # Handle "length" finish_reason - response was truncated
                    if finish_reason == "length":
                        print(f"{self.log_prefix}⚠️  Response truncated (finish_reason='length') - model hit max output tokens")
-                        
+
                        if self.api_mode == "chat_completions":
                            assistant_message = response.choices[0].message
                            if not assistant_message.tool_calls:
                                length_continue_retries += 1
                                interim_msg = self._build_assistant_message(assistant_message, finish_reason)
                                messages.append(interim_msg)
                                self._log_msg_to_db(interim_msg)
                                if assistant_message.content:
                                    truncated_response_prefix += assistant_message.content
                                if length_continue_retries < 3:
                                    print(
                                        f"{self.log_prefix}↻ Requesting continuation "
                                        f"({length_continue_retries}/3)..."
                                    )
                                    continue_msg = {
                                        "role": "user",
                                        "content": (
                                            "[System: Your previous response was truncated by the output "
                                            "length limit. Continue exactly where you left off. Do not "
                                            "restart or repeat prior text. Finish the answer directly.]"
                                        ),
                                    }
                                    messages.append(continue_msg)
                                    self._log_msg_to_db(continue_msg)
                                    self._session_messages = messages
                                    self._save_session_log(messages)
                                    restart_with_length_continuation = True
                                    break
                                partial_response = self._strip_think_blocks(truncated_response_prefix).strip()
                                self._cleanup_task_resources(effective_task_id)
                                self._persist_session(messages, conversation_history)
                                return {
                                    "final_response": partial_response or None,
                                    "messages": messages,
                                    "api_calls": api_call_count,
                                    "completed": False,
                                    "partial": True,
                                    "error": "Response remained truncated after 3 continuation attempts",
                                }
                        # If we have prior messages, roll back to last complete state
                        if len(messages) > 1:
                            print(f"{self.log_prefix}   ⏪ Rolling back to last complete assistant turn")
                            rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
-                            
+
                            self._cleanup_task_resources(effective_task_id)
                            self._persist_session(messages, conversation_history)
-                            
+
                            return {
                                "final_response": None,
                                "messages": rolled_back_messages,
@ -3870,6 +3914,9 @@ class AIAgent:
                self.iteration_budget.refund()
                continue
            if restart_with_length_continuation:
                continue
            # Guard: if all retries exhausted without a successful response
            # (e.g. repeated context-length errors that exhausted retry_count),
            # the `response` variable is still None. Break out cleanly.
@ -4260,6 +4307,9 @@ class AIAgent:
                        continue
                    codex_ack_continuations = 0
                    if truncated_response_prefix:
                        final_response = truncated_response_prefix + final_response
                    # Strip <think> blocks from user-facing response (keep raw in messages for trajectory)
                    final_response = self._strip_think_blocks(final_response).strip()
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@ -829,6 +829,36 @@ class TestRunConversation:
        assert result["final_response"] == "All done"
        assert result["completed"] is True
    @pytest.mark.parametrize(
        ("first_content", "second_content", "expected_final"),
        [
            ("Part 1 ", "Part 2", "Part 1 Part 2"),
            ("<think>internal reasoning</think>", "Recovered final answer", "Recovered final answer"),
        ],
    )
    def test_length_finish_reason_requests_continuation(
        self, agent, first_content, second_content, expected_final
    ):
        self._setup_agent(agent)
        first = _mock_response(content=first_content, finish_reason="length")
        second = _mock_response(content=second_content, finish_reason="stop")
        agent.client.chat.completions.create.side_effect = [first, second]
        with (
            patch.object(agent, "_persist_session"),
            patch.object(agent, "_save_trajectory"),
            patch.object(agent, "_cleanup_task_resources"),
        ):
            result = agent.run_conversation("hello")
        assert result["completed"] is True
        assert result["api_calls"] == 2
        assert result["final_response"] == expected_final
        second_call_messages = agent.client.chat.completions.create.call_args_list[1].kwargs["messages"]
        assert second_call_messages[-1]["role"] == "user"
        assert "truncated by the output length limit" in second_call_messages[-1]["content"]
 class TestRetryExhaustion:
    """Regression: retry_count > max_retries was dead code (off-by-one).