From 746abf5e28e00e9ce900fdae0556e26dd1e87518 Mon Sep 17 00:00:00 2001 From: Peppi Littera Date: Fri, 20 Mar 2026 00:26:36 +0100 Subject: [PATCH] fix: use reasoning content as response when model only produces think blocks Local models (especially Qwen 3.5) sometimes wrap their entire response inside tags, leaving actual content empty. Previously this caused 3 retries and then an error, wasting tokens and failing the request. Now when retries are exhausted and reasoning_text contains the response, it is used as final_response instead of returning an error. The user sees the actual answer instead of "Model generated only think blocks." --- run_agent.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/run_agent.py b/run_agent.py index 878188f9..e8741002 100644 --- a/run_agent.py +++ b/run_agent.py @@ -6532,7 +6532,21 @@ class AIAgent: self._response_was_previewed = True break - # No fallback -- append the empty message as-is + # No fallback -- if reasoning_text exists, the model put its + # entire response inside tags; use that as the content. + if reasoning_text: + self._vprint(f"{self.log_prefix}Using reasoning as response content (model wrapped entire response in think tags).", force=True) + final_response = reasoning_text + empty_msg = { + "role": "assistant", + "content": final_response, + "reasoning": reasoning_text, + "finish_reason": finish_reason, + } + messages.append(empty_msg) + break + + # Truly empty -- no reasoning and no content empty_msg = { "role": "assistant", "content": final_response, @@ -6540,10 +6554,10 @@ class AIAgent: "finish_reason": finish_reason, } messages.append(empty_msg) - + self._cleanup_task_resources(effective_task_id) self._persist_session(messages, conversation_history) - + return { "final_response": final_response or None, "messages": messages,