fix: suppress verbose logs during streaming TTS, improve hallucination filter, stop continuous mode on errors

- Add _vprint() helper to suppress log output when stream_callback is active
- Expand Whisper hallucination filter with multi-language phrases and regex pattern for repetitive text
- Stop continuous voice mode when agent returns a failed result (e.g. 429 rate limit)
This commit is contained in:
0xbyt4 2026-03-06 01:51:10 +03:00
parent 3a1b35ed92
commit b00c5949fc
3 changed files with 130 additions and 70 deletions

5
cli.py
View file

@ -4242,6 +4242,11 @@ class HermesCLI:
if result and result.get("failed") and not response:
error_detail = result.get("error", "Unknown error")
response = f"Error: {error_detail}"
# Stop continuous voice mode on persistent errors (e.g. 429 rate limit)
# to avoid an infinite error → record → error loop
if self._voice_continuous:
self._voice_continuous = False
_cprint(f"\n{_DIM}Continuous voice mode stopped due to error.{_RST}")
# Handle interrupt - check if we were interrupted
pending_message = None

View file

@ -493,6 +493,10 @@ class AIAgent:
]:
logging.getLogger(quiet_logger).setLevel(logging.ERROR)
# Internal stream callback (set during streaming TTS).
# Initialized here so _vprint can reference it before run_conversation.
self._stream_callback = None
# Initialize LLM client via centralized provider router.
# The router handles auth resolution, base URL, headers, and
# Codex/Anthropic wrapping for all known providers.
@ -812,6 +816,12 @@ class AIAgent:
else:
print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
def _vprint(self, *args, **kwargs):
"""Verbose print — suppressed when streaming TTS is active."""
if getattr(self, "_stream_callback", None) is not None:
return
print(*args, **kwargs)
def _max_tokens_param(self, value: int) -> dict:
"""Return the correct max tokens kwarg for the current provider.
@ -1340,7 +1350,7 @@ class AIAgent:
encoding="utf-8",
)
print(f"{self.log_prefix}🧾 Request debug dump written to: {dump_file}")
self._vprint(f"{self.log_prefix}🧾 Request debug dump written to: {dump_file}")
if os.getenv("HERMES_DUMP_REQUEST_STDOUT", "").strip().lower() in {"1", "true", "yes", "on"}:
print(json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str))
@ -1482,7 +1492,7 @@ class AIAgent:
# Replay the items into the store (replace mode)
self._todo_store.write(last_todo_response, merge=False)
if not self.quiet_mode:
print(f"{self.log_prefix}📋 Restored {len(last_todo_response)} todo item(s) from history")
self._vprint(f"{self.log_prefix}📋 Restored {len(last_todo_response)} todo item(s) from history")
_set_interrupt(False)
@property
@ -3578,7 +3588,7 @@ class AIAgent:
if self._interrupt_requested:
remaining_calls = assistant_message.tool_calls[i-1:]
if remaining_calls:
print(f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)")
self._vprint(f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)")
for skipped_tc in remaining_calls:
skipped_name = skipped_tc.function.name
skip_msg = {
@ -3640,7 +3650,7 @@ class AIAgent:
)
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
print(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
self._vprint(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
elif function_name == "session_search":
if not self._session_db:
function_result = json.dumps({"success": False, "error": "Session database not available."})
@ -3655,7 +3665,7 @@ class AIAgent:
)
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
print(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
self._vprint(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
elif function_name == "memory":
target = function_args.get("target", "memory")
from tools.memory_tool import memory_tool as _memory_tool
@ -3671,7 +3681,7 @@ class AIAgent:
self._honcho_save_user_observation(function_args.get("content", ""))
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
print(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
self._vprint(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
elif function_name == "clarify":
from tools.clarify_tool import clarify_tool as _clarify_tool
function_result = _clarify_tool(
@ -3681,7 +3691,7 @@ class AIAgent:
)
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
print(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
self._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
elif function_name == "delegate_task":
from tools.delegate_tool import delegate_task as _delegate_task
tasks_arg = function_args.get("tasks")
@ -3714,8 +3724,8 @@ class AIAgent:
if spinner:
spinner.stop(cute_msg)
elif self.quiet_mode:
print(f" {cute_msg}")
elif self.quiet_mode:
self._vprint(f" {cute_msg}")
elif self.quiet_mode and self._stream_callback is None:
face = random.choice(KawaiiSpinner.KAWAII_WAITING)
tool_emoji_map = {
'web_search': '🔍', 'web_extract': '📄', 'web_crawl': '🕸️',
@ -3802,7 +3812,7 @@ class AIAgent:
if self._interrupt_requested and i < len(assistant_message.tool_calls):
remaining = len(assistant_message.tool_calls) - i
print(f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)")
self._vprint(f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)")
for skipped_tc in assistant_message.tool_calls[i:]:
skipped_name = skipped_tc.function.name
skip_msg = {
@ -4344,11 +4354,11 @@ class AIAgent:
thinking_spinner = None
if not self.quiet_mode:
print(f"\n{self.log_prefix}🔄 Making API call #{api_call_count}/{self.max_iterations}...")
print(f"{self.log_prefix} 📊 Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)")
print(f"{self.log_prefix} 🔧 Available tools: {len(self.tools) if self.tools else 0}")
else:
# Animated thinking spinner in quiet mode
self._vprint(f"\n{self.log_prefix}🔄 Making API call #{api_call_count}/{self.max_iterations}...")
self._vprint(f"{self.log_prefix} 📊 Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)")
self._vprint(f"{self.log_prefix} 🔧 Available tools: {len(self.tools) if self.tools else 0}")
elif self._stream_callback is None:
# Animated thinking spinner in quiet mode (skip during streaming TTS)
face = random.choice(KawaiiSpinner.KAWAII_THINKING)
verb = random.choice(KawaiiSpinner.THINKING_VERBS)
if self.thinking_callback:
@ -4401,7 +4411,7 @@ class AIAgent:
self.thinking_callback("")
if not self.quiet_mode:
print(f"{self.log_prefix}⏱️ API call completed in {api_duration:.2f}s")
self._vprint(f"{self.log_prefix}⏱️ API call completed in {api_duration:.2f}s")
if self.verbose_logging:
# Log response with provider info if available
@ -4478,17 +4488,17 @@ class AIAgent:
if self.verbose_logging:
logging.debug(f"Response attributes for invalid response: {resp_attrs}")
print(f"{self.log_prefix}⚠️ Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}")
print(f"{self.log_prefix} 🏢 Provider: {provider_name}")
print(f"{self.log_prefix} 📝 Provider message: {error_msg[:200]}")
print(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
self._vprint(f"{self.log_prefix}⚠️ Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}")
self._vprint(f"{self.log_prefix} 🏢 Provider: {provider_name}")
self._vprint(f"{self.log_prefix} 📝 Provider message: {error_msg[:200]}")
self._vprint(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
if retry_count >= max_retries:
# Try fallback before giving up
if self._try_activate_fallback():
retry_count = 0
continue
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
self._vprint(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.", force=True)
logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.")
self._persist_session(messages, conversation_history)
return {
@ -4501,14 +4511,14 @@ class AIAgent:
# Longer backoff for rate limiting (likely cause of None choices)
wait_time = min(5 * (2 ** (retry_count - 1)), 120) # 5s, 10s, 20s, 40s, 80s, 120s
print(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...")
self._vprint(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...")
logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
# Sleep in small increments to stay responsive to interrupts
sleep_end = time.time() + wait_time
while time.time() < sleep_end:
if self._interrupt_requested:
print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
self._persist_session(messages, conversation_history)
self.clear_interrupt()
return {
@ -4541,7 +4551,7 @@ class AIAgent:
finish_reason = response.choices[0].finish_reason
if finish_reason == "length":
print(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens")
self._vprint(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens")
if self.api_mode == "chat_completions":
assistant_message = response.choices[0].message
@ -4553,7 +4563,7 @@ class AIAgent:
truncated_response_prefix += assistant_message.content
if length_continue_retries < 3:
print(
self._vprint(
f"{self.log_prefix}↻ Requesting continuation "
f"({length_continue_retries}/3)..."
)
@ -4585,7 +4595,7 @@ class AIAgent:
# If we have prior messages, roll back to last complete state
if len(messages) > 1:
print(f"{self.log_prefix} ⏪ Rolling back to last complete assistant turn")
self._vprint(f"{self.log_prefix} ⏪ Rolling back to last complete assistant turn")
rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
self._cleanup_task_resources(effective_task_id)
@ -4601,7 +4611,7 @@ class AIAgent:
}
else:
# First message was truncated - mark as failed
print(f"{self.log_prefix}❌ First response truncated - cannot recover")
self._vprint(f"{self.log_prefix}❌ First response truncated - cannot recover")
self._persist_session(messages, conversation_history)
return {
"final_response": None,
@ -4661,7 +4671,7 @@ class AIAgent:
prompt = usage_dict["prompt_tokens"]
hit_pct = (cached / prompt * 100) if prompt > 0 else 0
if not self.quiet_mode:
print(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
break # Success, exit retry loop
@ -4672,7 +4682,7 @@ class AIAgent:
if self.thinking_callback:
self.thinking_callback("")
api_elapsed = time.time() - api_start_time
print(f"{self.log_prefix}⚡ Interrupted during API call.")
self._vprint(f"{self.log_prefix}⚡ Interrupted during API call.", force=True)
self._persist_session(messages, conversation_history)
interrupted = True
final_response = f"Operation interrupted: waiting for model response ({api_elapsed:.1f}s elapsed)."
@ -4695,7 +4705,7 @@ class AIAgent:
):
codex_auth_retry_attempted = True
if self._try_refresh_codex_client_credentials(force=True):
print(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...")
self._vprint(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...")
continue
if (
self.api_mode == "chat_completions"
@ -4743,14 +4753,14 @@ class AIAgent:
error_type = type(api_error).__name__
error_msg = str(api_error).lower()
print(f"{self.log_prefix}⚠️ API call failed (attempt {retry_count}/{max_retries}): {error_type}")
print(f"{self.log_prefix} ⏱️ Time elapsed before failure: {elapsed_time:.2f}s")
print(f"{self.log_prefix} 📝 Error: {str(api_error)[:200]}")
print(f"{self.log_prefix} 📊 Request context: {len(api_messages)} messages, ~{approx_tokens:,} tokens, {len(self.tools) if self.tools else 0} tools")
self._vprint(f"{self.log_prefix}⚠️ API call failed (attempt {retry_count}/{max_retries}): {error_type}")
self._vprint(f"{self.log_prefix} ⏱️ Time elapsed before failure: {elapsed_time:.2f}s")
self._vprint(f"{self.log_prefix} 📝 Error: {str(api_error)[:200]}")
self._vprint(f"{self.log_prefix} 📊 Request context: {len(api_messages)} messages, ~{approx_tokens:,} tokens, {len(self.tools) if self.tools else 0} tools")
# Check for interrupt before deciding to retry
if self._interrupt_requested:
print(f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.")
self._vprint(f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.")
self._persist_session(messages, conversation_history)
self.clear_interrupt()
return {
@ -4775,7 +4785,7 @@ class AIAgent:
if is_payload_too_large:
compression_attempts += 1
if compression_attempts > max_compression_attempts:
print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.")
self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True)
logging.error(f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
self._persist_session(messages, conversation_history)
return {
@ -4785,7 +4795,7 @@ class AIAgent:
"error": f"Request payload too large: max compression attempts ({max_compression_attempts}) reached.",
"partial": True
}
print(f"{self.log_prefix}⚠️ Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")
self._vprint(f"{self.log_prefix}⚠️ Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")
original_len = len(messages)
messages, active_system_prompt = self._compress_context(
@ -4794,12 +4804,12 @@ class AIAgent:
)
if len(messages) < original_len:
print(f"{self.log_prefix} 🗜️ Compressed {original_len}{len(messages)} messages, retrying...")
self._vprint(f"{self.log_prefix} 🗜️ Compressed {original_len}{len(messages)} messages, retrying...")
time.sleep(2) # Brief pause between compression retries
restart_with_compressed_messages = True
break
else:
print(f"{self.log_prefix}❌ Payload too large and cannot compress further.")
self._vprint(f"{self.log_prefix}❌ Payload too large and cannot compress further.")
logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.")
self._persist_session(messages, conversation_history)
return {
@ -4830,7 +4840,7 @@ class AIAgent:
parsed_limit = parse_context_limit_from_error(error_msg)
if parsed_limit and parsed_limit < old_ctx:
new_ctx = parsed_limit
print(f"{self.log_prefix}⚠️ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})")
self._vprint(f"{self.log_prefix}⚠️ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
else:
# Step down to the next probe tier
new_ctx = get_next_probe_tier(old_ctx)
@ -4839,13 +4849,13 @@ class AIAgent:
compressor.context_length = new_ctx
compressor.threshold_tokens = int(new_ctx * compressor.threshold_percent)
compressor._context_probed = True
print(f"{self.log_prefix}⚠️ Context length exceeded — stepping down: {old_ctx:,}{new_ctx:,} tokens")
self._vprint(f"{self.log_prefix}⚠️ Context length exceeded — stepping down: {old_ctx:,}{new_ctx:,} tokens", force=True)
else:
print(f"{self.log_prefix}⚠️ Context length exceeded at minimum tier — attempting compression...")
self._vprint(f"{self.log_prefix}⚠️ Context length exceeded at minimum tier — attempting compression...", force=True)
compression_attempts += 1
if compression_attempts > max_compression_attempts:
print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.")
self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
self._persist_session(messages, conversation_history)
return {
@ -4855,7 +4865,7 @@ class AIAgent:
"error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
"partial": True
}
print(f"{self.log_prefix} 🗜️ Context compression attempt {compression_attempts}/{max_compression_attempts}...")
self._vprint(f"{self.log_prefix} 🗜️ Context compression attempt {compression_attempts}/{max_compression_attempts}...")
original_len = len(messages)
messages, active_system_prompt = self._compress_context(
@ -4865,14 +4875,14 @@ class AIAgent:
if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
if len(messages) < original_len:
print(f"{self.log_prefix} 🗜️ Compressed {original_len}{len(messages)} messages, retrying...")
self._vprint(f"{self.log_prefix} 🗜️ Compressed {original_len}{len(messages)} messages, retrying...")
time.sleep(2) # Brief pause between compression retries
restart_with_compressed_messages = True
break
else:
# Can't compress further and already at minimum tier
print(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.")
print(f"{self.log_prefix} 💡 The conversation has accumulated too much content.")
self._vprint(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.", force=True)
self._vprint(f"{self.log_prefix} 💡 The conversation has accumulated too much content.", force=True)
logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
self._persist_session(messages, conversation_history)
return {
@ -4908,8 +4918,8 @@ class AIAgent:
self._dump_api_request_debug(
api_kwargs, reason="non_retryable_client_error", error=api_error,
)
print(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.")
print(f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.")
self._vprint(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.")
self._vprint(f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.")
logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
self._persist_session(messages, conversation_history)
return {
@ -4926,7 +4936,7 @@ class AIAgent:
if self._try_activate_fallback():
retry_count = 0
continue
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
self._vprint(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.", force=True)
logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
logging.error(f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}")
raise api_error
@ -4934,15 +4944,15 @@ class AIAgent:
wait_time = min(2 ** retry_count, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
if retry_count >= max_retries:
print(f"{self.log_prefix}⚠️ API call failed after {retry_count} attempts: {str(api_error)[:100]}")
print(f"{self.log_prefix}⏳ Final retry in {wait_time}s...")
self._vprint(f"{self.log_prefix}⚠️ API call failed after {retry_count} attempts: {str(api_error)[:100]}")
self._vprint(f"{self.log_prefix}⏳ Final retry in {wait_time}s...")
# Sleep in small increments so we can respond to interrupts quickly
# instead of blocking the entire wait_time in one sleep() call
sleep_end = time.time() + wait_time
while time.time() < sleep_end:
if self._interrupt_requested:
print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
self._persist_session(messages, conversation_history)
self.clear_interrupt()
return {
@ -5006,7 +5016,7 @@ class AIAgent:
# Handle assistant response
if assistant_message.content and not self.quiet_mode:
print(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
self._vprint(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
# Notify progress callback of model's thinking (used by subagent
# delegation to relay the child's reasoning to the parent display).
@ -5033,15 +5043,15 @@ class AIAgent:
self._incomplete_scratchpad_retries = 0
self._incomplete_scratchpad_retries += 1
print(f"{self.log_prefix}⚠️ Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)")
self._vprint(f"{self.log_prefix}⚠️ Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)")
if self._incomplete_scratchpad_retries <= 2:
print(f"{self.log_prefix}🔄 Retrying API call ({self._incomplete_scratchpad_retries}/2)...")
self._vprint(f"{self.log_prefix}🔄 Retrying API call ({self._incomplete_scratchpad_retries}/2)...")
# Don't add the broken message, just retry
continue
else:
# Max retries - discard this turn and save as partial
print(f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.")
self._vprint(f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.")
self._incomplete_scratchpad_retries = 0
rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
@ -5084,7 +5094,7 @@ class AIAgent:
if self._codex_incomplete_retries < 3:
if not self.quiet_mode:
print(f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)")
self._vprint(f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)")
self._session_messages = messages
self._save_session_log(messages)
continue
@ -5105,7 +5115,7 @@ class AIAgent:
# Check for tool calls
if assistant_message.tool_calls:
if not self.quiet_mode:
print(f"{self.log_prefix}🔧 Processing {len(assistant_message.tool_calls)} tool call(s)...")
self._vprint(f"{self.log_prefix}🔧 Processing {len(assistant_message.tool_calls)} tool call(s)...")
if self.verbose_logging:
for tc in assistant_message.tool_calls:
@ -5124,11 +5134,30 @@ class AIAgent:
if tc.function.name not in self.valid_tool_names
]
if invalid_tool_calls:
# Track retries for invalid tool calls
if not hasattr(self, '_invalid_tool_retries'):
self._invalid_tool_retries = 0
self._invalid_tool_retries += 1
# Return helpful error to model — model can self-correct next turn
available = ", ".join(sorted(self.valid_tool_names))
invalid_name = invalid_tool_calls[0]
invalid_preview = invalid_name[:80] + "..." if len(invalid_name) > 80 else invalid_name
print(f"{self.log_prefix}⚠️ Unknown tool '{invalid_preview}' — sending error to model for self-correction")
self._vprint(f"{self.log_prefix}⚠️ Unknown tool '{invalid_preview}' — sending error to model for self-correction ({self._invalid_tool_retries}/3)")
if self._invalid_tool_retries >= 3:
self._vprint(f"{self.log_prefix}❌ Max retries (3) for invalid tool calls exceeded. Stopping as partial.")
self._invalid_tool_retries = 0
self._persist_session(messages, conversation_history)
return {
"final_response": None,
"messages": messages,
"api_calls": api_call_count,
"completed": False,
"partial": True,
"error": f"Model generated invalid tool call: {invalid_preview}"
}
assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
messages.append(assistant_msg)
for tc in assistant_message.tool_calls:
@ -5165,15 +5194,15 @@ class AIAgent:
self._invalid_json_retries += 1
tool_name, error_msg = invalid_json_args[0]
print(f"{self.log_prefix}⚠️ Invalid JSON in tool call arguments for '{tool_name}': {error_msg}")
self._vprint(f"{self.log_prefix}⚠️ Invalid JSON in tool call arguments for '{tool_name}': {error_msg}")
if self._invalid_json_retries < 3:
print(f"{self.log_prefix}🔄 Retrying API call ({self._invalid_json_retries}/3)...")
self._vprint(f"{self.log_prefix}🔄 Retrying API call ({self._invalid_json_retries}/3)...")
# Don't add anything to messages, just retry the API call
continue
else:
# Instead of returning partial, inject a helpful message and let model recover
print(f"{self.log_prefix}⚠️ Injecting recovery message for invalid JSON...")
self._vprint(f"{self.log_prefix}⚠️ Injecting recovery message for invalid JSON...")
self._invalid_json_retries = 0 # Reset for next attempt
# Add a user message explaining the issue
@ -5203,7 +5232,7 @@ class AIAgent:
if self.quiet_mode:
clean = self._strip_think_blocks(turn_content).strip()
if clean:
print(f" ┊ 💬 {clean}")
self._vprint(f" ┊ 💬 {clean}")
messages.append(assistant_msg)
@ -5279,19 +5308,19 @@ class AIAgent:
self._empty_content_retries += 1
reasoning_text = self._extract_reasoning(assistant_message)
print(f"{self.log_prefix}⚠️ Response only contains think block with no content after it")
self._vprint(f"{self.log_prefix}⚠️ Response only contains think block with no content after it")
if reasoning_text:
reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
print(f"{self.log_prefix} Reasoning: {reasoning_preview}")
self._vprint(f"{self.log_prefix} Reasoning: {reasoning_preview}")
else:
content_preview = final_response[:80] + "..." if len(final_response) > 80 else final_response
print(f"{self.log_prefix} Content: '{content_preview}'")
self._vprint(f"{self.log_prefix} Content: '{content_preview}'")
if self._empty_content_retries < 3:
print(f"{self.log_prefix}🔄 Retrying API call ({self._empty_content_retries}/3)...")
self._vprint(f"{self.log_prefix}🔄 Retrying API call ({self._empty_content_retries}/3)...")
continue
else:
print(f"{self.log_prefix}❌ Max retries (3) for empty content exceeded.")
self._vprint(f"{self.log_prefix}❌ Max retries (3) for empty content exceeded.")
self._empty_content_retries = 0
# If a prior tool_calls turn had real content, salvage it:

View file

@ -12,6 +12,7 @@ Dependencies (optional):
import logging
import os
import platform
import re
import shutil
import subprocess
import tempfile
@ -350,12 +351,37 @@ WHISPER_HALLUCINATIONS = {
"you",
"the end.",
"the end",
# Non-English hallucinations (common on silence)
"продолжение следует",
"продолжение следует...",
"sous-titres",
"sous-titres réalisés par la communauté d'amara.org",
"sottotitoli creati dalla comunità amara.org",
"untertitel von stephanie geiges",
"amara.org",
"www.mooji.org",
"ご視聴ありがとうございました",
}
# Regex patterns for repetitive hallucinations (e.g. "Thank you. Thank you. Thank you.")
_HALLUCINATION_REPEAT_RE = re.compile(
r'^(?:thank you|thanks|bye|you|ok|okay|the end|\.|\s|,|!)+$',
flags=re.IGNORECASE,
)
def is_whisper_hallucination(transcript: str) -> bool:
"""Check if a transcript is a known Whisper hallucination on silence."""
return transcript.strip().lower() in WHISPER_HALLUCINATIONS
cleaned = transcript.strip().lower()
if not cleaned:
return True
# Exact match against known phrases
if cleaned.rstrip('.!') in WHISPER_HALLUCINATIONS or cleaned in WHISPER_HALLUCINATIONS:
return True
# Repetitive patterns (e.g. "Thank you. Thank you. Thank you. you")
if _HALLUCINATION_REPEAT_RE.match(cleaned):
return True
return False
# ============================================================================