From b00c5949fcae98de1495308e36ff971ccc88aa7c Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Fri, 6 Mar 2026 01:51:10 +0300 Subject: [PATCH] fix: suppress verbose logs during streaming TTS, improve hallucination filter, stop continuous mode on errors - Add _vprint() helper to suppress log output when stream_callback is active - Expand Whisper hallucination filter with multi-language phrases and regex pattern for repetitive text - Stop continuous voice mode when agent returns a failed result (e.g. 429 rate limit) --- cli.py | 5 ++ run_agent.py | 167 ++++++++++++++++++++++++++------------------ tools/voice_mode.py | 28 +++++++- 3 files changed, 130 insertions(+), 70 deletions(-) diff --git a/cli.py b/cli.py index d15d43a1..0778b626 100755 --- a/cli.py +++ b/cli.py @@ -4242,6 +4242,11 @@ class HermesCLI: if result and result.get("failed") and not response: error_detail = result.get("error", "Unknown error") response = f"Error: {error_detail}" + # Stop continuous voice mode on persistent errors (e.g. 429 rate limit) + # to avoid an infinite error β†’ record β†’ error loop + if self._voice_continuous: + self._voice_continuous = False + _cprint(f"\n{_DIM}Continuous voice mode stopped due to error.{_RST}") # Handle interrupt - check if we were interrupted pending_message = None diff --git a/run_agent.py b/run_agent.py index 6dd08436..475a797f 100644 --- a/run_agent.py +++ b/run_agent.py @@ -493,6 +493,10 @@ class AIAgent: ]: logging.getLogger(quiet_logger).setLevel(logging.ERROR) + # Internal stream callback (set during streaming TTS). + # Initialized here so _vprint can reference it before run_conversation. + self._stream_callback = None + # Initialize LLM client via centralized provider router. # The router handles auth resolution, base URL, headers, and # Codex/Anthropic wrapping for all known providers. @@ -812,6 +816,12 @@ class AIAgent: else: print(f"πŸ“Š Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)") + def _vprint(self, *args, **kwargs): + """Verbose print β€” suppressed when streaming TTS is active.""" + if getattr(self, "_stream_callback", None) is not None: + return + print(*args, **kwargs) + def _max_tokens_param(self, value: int) -> dict: """Return the correct max tokens kwarg for the current provider. @@ -1340,7 +1350,7 @@ class AIAgent: encoding="utf-8", ) - print(f"{self.log_prefix}🧾 Request debug dump written to: {dump_file}") + self._vprint(f"{self.log_prefix}🧾 Request debug dump written to: {dump_file}") if os.getenv("HERMES_DUMP_REQUEST_STDOUT", "").strip().lower() in {"1", "true", "yes", "on"}: print(json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str)) @@ -1482,7 +1492,7 @@ class AIAgent: # Replay the items into the store (replace mode) self._todo_store.write(last_todo_response, merge=False) if not self.quiet_mode: - print(f"{self.log_prefix}πŸ“‹ Restored {len(last_todo_response)} todo item(s) from history") + self._vprint(f"{self.log_prefix}πŸ“‹ Restored {len(last_todo_response)} todo item(s) from history") _set_interrupt(False) @property @@ -3578,7 +3588,7 @@ class AIAgent: if self._interrupt_requested: remaining_calls = assistant_message.tool_calls[i-1:] if remaining_calls: - print(f"{self.log_prefix}⚑ Interrupt: skipping {len(remaining_calls)} tool call(s)") + self._vprint(f"{self.log_prefix}⚑ Interrupt: skipping {len(remaining_calls)} tool call(s)") for skipped_tc in remaining_calls: skipped_name = skipped_tc.function.name skip_msg = { @@ -3640,7 +3650,7 @@ class AIAgent: ) tool_duration = time.time() - tool_start_time if self.quiet_mode: - print(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}") + self._vprint(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}") elif function_name == "session_search": if not self._session_db: function_result = json.dumps({"success": False, "error": "Session database not available."}) @@ -3655,7 +3665,7 @@ class AIAgent: ) tool_duration = time.time() - tool_start_time if self.quiet_mode: - print(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}") + self._vprint(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}") elif function_name == "memory": target = function_args.get("target", "memory") from tools.memory_tool import memory_tool as _memory_tool @@ -3671,7 +3681,7 @@ class AIAgent: self._honcho_save_user_observation(function_args.get("content", "")) tool_duration = time.time() - tool_start_time if self.quiet_mode: - print(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}") + self._vprint(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}") elif function_name == "clarify": from tools.clarify_tool import clarify_tool as _clarify_tool function_result = _clarify_tool( @@ -3681,7 +3691,7 @@ class AIAgent: ) tool_duration = time.time() - tool_start_time if self.quiet_mode: - print(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}") + self._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}") elif function_name == "delegate_task": from tools.delegate_tool import delegate_task as _delegate_task tasks_arg = function_args.get("tasks") @@ -3714,8 +3724,8 @@ class AIAgent: if spinner: spinner.stop(cute_msg) elif self.quiet_mode: - print(f" {cute_msg}") - elif self.quiet_mode: + self._vprint(f" {cute_msg}") + elif self.quiet_mode and self._stream_callback is None: face = random.choice(KawaiiSpinner.KAWAII_WAITING) tool_emoji_map = { 'web_search': 'πŸ”', 'web_extract': 'πŸ“„', 'web_crawl': 'πŸ•ΈοΈ', @@ -3802,7 +3812,7 @@ class AIAgent: if self._interrupt_requested and i < len(assistant_message.tool_calls): remaining = len(assistant_message.tool_calls) - i - print(f"{self.log_prefix}⚑ Interrupt: skipping {remaining} remaining tool call(s)") + self._vprint(f"{self.log_prefix}⚑ Interrupt: skipping {remaining} remaining tool call(s)") for skipped_tc in assistant_message.tool_calls[i:]: skipped_name = skipped_tc.function.name skip_msg = { @@ -4344,11 +4354,11 @@ class AIAgent: thinking_spinner = None if not self.quiet_mode: - print(f"\n{self.log_prefix}πŸ”„ Making API call #{api_call_count}/{self.max_iterations}...") - print(f"{self.log_prefix} πŸ“Š Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)") - print(f"{self.log_prefix} πŸ”§ Available tools: {len(self.tools) if self.tools else 0}") - else: - # Animated thinking spinner in quiet mode + self._vprint(f"\n{self.log_prefix}πŸ”„ Making API call #{api_call_count}/{self.max_iterations}...") + self._vprint(f"{self.log_prefix} πŸ“Š Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)") + self._vprint(f"{self.log_prefix} πŸ”§ Available tools: {len(self.tools) if self.tools else 0}") + elif self._stream_callback is None: + # Animated thinking spinner in quiet mode (skip during streaming TTS) face = random.choice(KawaiiSpinner.KAWAII_THINKING) verb = random.choice(KawaiiSpinner.THINKING_VERBS) if self.thinking_callback: @@ -4401,7 +4411,7 @@ class AIAgent: self.thinking_callback("") if not self.quiet_mode: - print(f"{self.log_prefix}⏱️ API call completed in {api_duration:.2f}s") + self._vprint(f"{self.log_prefix}⏱️ API call completed in {api_duration:.2f}s") if self.verbose_logging: # Log response with provider info if available @@ -4478,17 +4488,17 @@ class AIAgent: if self.verbose_logging: logging.debug(f"Response attributes for invalid response: {resp_attrs}") - print(f"{self.log_prefix}⚠️ Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}") - print(f"{self.log_prefix} 🏒 Provider: {provider_name}") - print(f"{self.log_prefix} πŸ“ Provider message: {error_msg[:200]}") - print(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)") + self._vprint(f"{self.log_prefix}⚠️ Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}") + self._vprint(f"{self.log_prefix} 🏒 Provider: {provider_name}") + self._vprint(f"{self.log_prefix} πŸ“ Provider message: {error_msg[:200]}") + self._vprint(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)") if retry_count >= max_retries: # Try fallback before giving up if self._try_activate_fallback(): retry_count = 0 continue - print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.") + self._vprint(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.", force=True) logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.") self._persist_session(messages, conversation_history) return { @@ -4501,14 +4511,14 @@ class AIAgent: # Longer backoff for rate limiting (likely cause of None choices) wait_time = min(5 * (2 ** (retry_count - 1)), 120) # 5s, 10s, 20s, 40s, 80s, 120s - print(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...") + self._vprint(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...") logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}") # Sleep in small increments to stay responsive to interrupts sleep_end = time.time() + wait_time while time.time() < sleep_end: if self._interrupt_requested: - print(f"{self.log_prefix}⚑ Interrupt detected during retry wait, aborting.") + self._vprint(f"{self.log_prefix}⚑ Interrupt detected during retry wait, aborting.") self._persist_session(messages, conversation_history) self.clear_interrupt() return { @@ -4541,7 +4551,7 @@ class AIAgent: finish_reason = response.choices[0].finish_reason if finish_reason == "length": - print(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens") + self._vprint(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens") if self.api_mode == "chat_completions": assistant_message = response.choices[0].message @@ -4553,7 +4563,7 @@ class AIAgent: truncated_response_prefix += assistant_message.content if length_continue_retries < 3: - print( + self._vprint( f"{self.log_prefix}↻ Requesting continuation " f"({length_continue_retries}/3)..." ) @@ -4585,7 +4595,7 @@ class AIAgent: # If we have prior messages, roll back to last complete state if len(messages) > 1: - print(f"{self.log_prefix} βͺ Rolling back to last complete assistant turn") + self._vprint(f"{self.log_prefix} βͺ Rolling back to last complete assistant turn") rolled_back_messages = self._get_messages_up_to_last_assistant(messages) self._cleanup_task_resources(effective_task_id) @@ -4601,7 +4611,7 @@ class AIAgent: } else: # First message was truncated - mark as failed - print(f"{self.log_prefix}❌ First response truncated - cannot recover") + self._vprint(f"{self.log_prefix}❌ First response truncated - cannot recover") self._persist_session(messages, conversation_history) return { "final_response": None, @@ -4661,7 +4671,7 @@ class AIAgent: prompt = usage_dict["prompt_tokens"] hit_pct = (cached / prompt * 100) if prompt > 0 else 0 if not self.quiet_mode: - print(f"{self.log_prefix} πŸ’Ύ Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)") + self._vprint(f"{self.log_prefix} πŸ’Ύ Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)") break # Success, exit retry loop @@ -4672,7 +4682,7 @@ class AIAgent: if self.thinking_callback: self.thinking_callback("") api_elapsed = time.time() - api_start_time - print(f"{self.log_prefix}⚑ Interrupted during API call.") + self._vprint(f"{self.log_prefix}⚑ Interrupted during API call.", force=True) self._persist_session(messages, conversation_history) interrupted = True final_response = f"Operation interrupted: waiting for model response ({api_elapsed:.1f}s elapsed)." @@ -4695,7 +4705,7 @@ class AIAgent: ): codex_auth_retry_attempted = True if self._try_refresh_codex_client_credentials(force=True): - print(f"{self.log_prefix}πŸ” Codex auth refreshed after 401. Retrying request...") + self._vprint(f"{self.log_prefix}πŸ” Codex auth refreshed after 401. Retrying request...") continue if ( self.api_mode == "chat_completions" @@ -4743,14 +4753,14 @@ class AIAgent: error_type = type(api_error).__name__ error_msg = str(api_error).lower() - print(f"{self.log_prefix}⚠️ API call failed (attempt {retry_count}/{max_retries}): {error_type}") - print(f"{self.log_prefix} ⏱️ Time elapsed before failure: {elapsed_time:.2f}s") - print(f"{self.log_prefix} πŸ“ Error: {str(api_error)[:200]}") - print(f"{self.log_prefix} πŸ“Š Request context: {len(api_messages)} messages, ~{approx_tokens:,} tokens, {len(self.tools) if self.tools else 0} tools") + self._vprint(f"{self.log_prefix}⚠️ API call failed (attempt {retry_count}/{max_retries}): {error_type}") + self._vprint(f"{self.log_prefix} ⏱️ Time elapsed before failure: {elapsed_time:.2f}s") + self._vprint(f"{self.log_prefix} πŸ“ Error: {str(api_error)[:200]}") + self._vprint(f"{self.log_prefix} πŸ“Š Request context: {len(api_messages)} messages, ~{approx_tokens:,} tokens, {len(self.tools) if self.tools else 0} tools") # Check for interrupt before deciding to retry if self._interrupt_requested: - print(f"{self.log_prefix}⚑ Interrupt detected during error handling, aborting retries.") + self._vprint(f"{self.log_prefix}⚑ Interrupt detected during error handling, aborting retries.") self._persist_session(messages, conversation_history) self.clear_interrupt() return { @@ -4775,7 +4785,7 @@ class AIAgent: if is_payload_too_large: compression_attempts += 1 if compression_attempts > max_compression_attempts: - print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.") + self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True) logging.error(f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts.") self._persist_session(messages, conversation_history) return { @@ -4785,7 +4795,7 @@ class AIAgent: "error": f"Request payload too large: max compression attempts ({max_compression_attempts}) reached.", "partial": True } - print(f"{self.log_prefix}⚠️ Request payload too large (413) β€” compression attempt {compression_attempts}/{max_compression_attempts}...") + self._vprint(f"{self.log_prefix}⚠️ Request payload too large (413) β€” compression attempt {compression_attempts}/{max_compression_attempts}...") original_len = len(messages) messages, active_system_prompt = self._compress_context( @@ -4794,12 +4804,12 @@ class AIAgent: ) if len(messages) < original_len: - print(f"{self.log_prefix} πŸ—œοΈ Compressed {original_len} β†’ {len(messages)} messages, retrying...") + self._vprint(f"{self.log_prefix} πŸ—œοΈ Compressed {original_len} β†’ {len(messages)} messages, retrying...") time.sleep(2) # Brief pause between compression retries restart_with_compressed_messages = True break else: - print(f"{self.log_prefix}❌ Payload too large and cannot compress further.") + self._vprint(f"{self.log_prefix}❌ Payload too large and cannot compress further.") logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.") self._persist_session(messages, conversation_history) return { @@ -4830,7 +4840,7 @@ class AIAgent: parsed_limit = parse_context_limit_from_error(error_msg) if parsed_limit and parsed_limit < old_ctx: new_ctx = parsed_limit - print(f"{self.log_prefix}⚠️ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})") + self._vprint(f"{self.log_prefix}⚠️ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True) else: # Step down to the next probe tier new_ctx = get_next_probe_tier(old_ctx) @@ -4839,13 +4849,13 @@ class AIAgent: compressor.context_length = new_ctx compressor.threshold_tokens = int(new_ctx * compressor.threshold_percent) compressor._context_probed = True - print(f"{self.log_prefix}⚠️ Context length exceeded β€” stepping down: {old_ctx:,} β†’ {new_ctx:,} tokens") + self._vprint(f"{self.log_prefix}⚠️ Context length exceeded β€” stepping down: {old_ctx:,} β†’ {new_ctx:,} tokens", force=True) else: - print(f"{self.log_prefix}⚠️ Context length exceeded at minimum tier β€” attempting compression...") + self._vprint(f"{self.log_prefix}⚠️ Context length exceeded at minimum tier β€” attempting compression...", force=True) compression_attempts += 1 if compression_attempts > max_compression_attempts: - print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.") + self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True) logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.") self._persist_session(messages, conversation_history) return { @@ -4855,7 +4865,7 @@ class AIAgent: "error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.", "partial": True } - print(f"{self.log_prefix} πŸ—œοΈ Context compression attempt {compression_attempts}/{max_compression_attempts}...") + self._vprint(f"{self.log_prefix} πŸ—œοΈ Context compression attempt {compression_attempts}/{max_compression_attempts}...") original_len = len(messages) messages, active_system_prompt = self._compress_context( @@ -4865,14 +4875,14 @@ class AIAgent: if len(messages) < original_len or new_ctx and new_ctx < old_ctx: if len(messages) < original_len: - print(f"{self.log_prefix} πŸ—œοΈ Compressed {original_len} β†’ {len(messages)} messages, retrying...") + self._vprint(f"{self.log_prefix} πŸ—œοΈ Compressed {original_len} β†’ {len(messages)} messages, retrying...") time.sleep(2) # Brief pause between compression retries restart_with_compressed_messages = True break else: # Can't compress further and already at minimum tier - print(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.") - print(f"{self.log_prefix} πŸ’‘ The conversation has accumulated too much content.") + self._vprint(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.", force=True) + self._vprint(f"{self.log_prefix} πŸ’‘ The conversation has accumulated too much content.", force=True) logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.") self._persist_session(messages, conversation_history) return { @@ -4908,8 +4918,8 @@ class AIAgent: self._dump_api_request_debug( api_kwargs, reason="non_retryable_client_error", error=api_error, ) - print(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.") - print(f"{self.log_prefix} πŸ’‘ This type of error won't be fixed by retrying.") + self._vprint(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.") + self._vprint(f"{self.log_prefix} πŸ’‘ This type of error won't be fixed by retrying.") logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}") self._persist_session(messages, conversation_history) return { @@ -4926,7 +4936,7 @@ class AIAgent: if self._try_activate_fallback(): retry_count = 0 continue - print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.") + self._vprint(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.", force=True) logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}") logging.error(f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}") raise api_error @@ -4934,15 +4944,15 @@ class AIAgent: wait_time = min(2 ** retry_count, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}") if retry_count >= max_retries: - print(f"{self.log_prefix}⚠️ API call failed after {retry_count} attempts: {str(api_error)[:100]}") - print(f"{self.log_prefix}⏳ Final retry in {wait_time}s...") + self._vprint(f"{self.log_prefix}⚠️ API call failed after {retry_count} attempts: {str(api_error)[:100]}") + self._vprint(f"{self.log_prefix}⏳ Final retry in {wait_time}s...") # Sleep in small increments so we can respond to interrupts quickly # instead of blocking the entire wait_time in one sleep() call sleep_end = time.time() + wait_time while time.time() < sleep_end: if self._interrupt_requested: - print(f"{self.log_prefix}⚑ Interrupt detected during retry wait, aborting.") + self._vprint(f"{self.log_prefix}⚑ Interrupt detected during retry wait, aborting.") self._persist_session(messages, conversation_history) self.clear_interrupt() return { @@ -5006,7 +5016,7 @@ class AIAgent: # Handle assistant response if assistant_message.content and not self.quiet_mode: - print(f"{self.log_prefix}πŸ€– Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}") + self._vprint(f"{self.log_prefix}πŸ€– Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}") # Notify progress callback of model's thinking (used by subagent # delegation to relay the child's reasoning to the parent display). @@ -5033,15 +5043,15 @@ class AIAgent: self._incomplete_scratchpad_retries = 0 self._incomplete_scratchpad_retries += 1 - print(f"{self.log_prefix}⚠️ Incomplete detected (opened but never closed)") + self._vprint(f"{self.log_prefix}⚠️ Incomplete detected (opened but never closed)") if self._incomplete_scratchpad_retries <= 2: - print(f"{self.log_prefix}πŸ”„ Retrying API call ({self._incomplete_scratchpad_retries}/2)...") + self._vprint(f"{self.log_prefix}πŸ”„ Retrying API call ({self._incomplete_scratchpad_retries}/2)...") # Don't add the broken message, just retry continue else: # Max retries - discard this turn and save as partial - print(f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.") + self._vprint(f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.") self._incomplete_scratchpad_retries = 0 rolled_back_messages = self._get_messages_up_to_last_assistant(messages) @@ -5084,7 +5094,7 @@ class AIAgent: if self._codex_incomplete_retries < 3: if not self.quiet_mode: - print(f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)") + self._vprint(f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)") self._session_messages = messages self._save_session_log(messages) continue @@ -5105,7 +5115,7 @@ class AIAgent: # Check for tool calls if assistant_message.tool_calls: if not self.quiet_mode: - print(f"{self.log_prefix}πŸ”§ Processing {len(assistant_message.tool_calls)} tool call(s)...") + self._vprint(f"{self.log_prefix}πŸ”§ Processing {len(assistant_message.tool_calls)} tool call(s)...") if self.verbose_logging: for tc in assistant_message.tool_calls: @@ -5124,11 +5134,30 @@ class AIAgent: if tc.function.name not in self.valid_tool_names ] if invalid_tool_calls: + # Track retries for invalid tool calls + if not hasattr(self, '_invalid_tool_retries'): + self._invalid_tool_retries = 0 + self._invalid_tool_retries += 1 + # Return helpful error to model β€” model can self-correct next turn available = ", ".join(sorted(self.valid_tool_names)) invalid_name = invalid_tool_calls[0] invalid_preview = invalid_name[:80] + "..." if len(invalid_name) > 80 else invalid_name - print(f"{self.log_prefix}⚠️ Unknown tool '{invalid_preview}' β€” sending error to model for self-correction") + self._vprint(f"{self.log_prefix}⚠️ Unknown tool '{invalid_preview}' β€” sending error to model for self-correction ({self._invalid_tool_retries}/3)") + + if self._invalid_tool_retries >= 3: + self._vprint(f"{self.log_prefix}❌ Max retries (3) for invalid tool calls exceeded. Stopping as partial.") + self._invalid_tool_retries = 0 + self._persist_session(messages, conversation_history) + return { + "final_response": None, + "messages": messages, + "api_calls": api_call_count, + "completed": False, + "partial": True, + "error": f"Model generated invalid tool call: {invalid_preview}" + } + assistant_msg = self._build_assistant_message(assistant_message, finish_reason) messages.append(assistant_msg) for tc in assistant_message.tool_calls: @@ -5165,15 +5194,15 @@ class AIAgent: self._invalid_json_retries += 1 tool_name, error_msg = invalid_json_args[0] - print(f"{self.log_prefix}⚠️ Invalid JSON in tool call arguments for '{tool_name}': {error_msg}") + self._vprint(f"{self.log_prefix}⚠️ Invalid JSON in tool call arguments for '{tool_name}': {error_msg}") if self._invalid_json_retries < 3: - print(f"{self.log_prefix}πŸ”„ Retrying API call ({self._invalid_json_retries}/3)...") + self._vprint(f"{self.log_prefix}πŸ”„ Retrying API call ({self._invalid_json_retries}/3)...") # Don't add anything to messages, just retry the API call continue else: # Instead of returning partial, inject a helpful message and let model recover - print(f"{self.log_prefix}⚠️ Injecting recovery message for invalid JSON...") + self._vprint(f"{self.log_prefix}⚠️ Injecting recovery message for invalid JSON...") self._invalid_json_retries = 0 # Reset for next attempt # Add a user message explaining the issue @@ -5203,7 +5232,7 @@ class AIAgent: if self.quiet_mode: clean = self._strip_think_blocks(turn_content).strip() if clean: - print(f" β”Š πŸ’¬ {clean}") + self._vprint(f" β”Š πŸ’¬ {clean}") messages.append(assistant_msg) @@ -5279,19 +5308,19 @@ class AIAgent: self._empty_content_retries += 1 reasoning_text = self._extract_reasoning(assistant_message) - print(f"{self.log_prefix}⚠️ Response only contains think block with no content after it") + self._vprint(f"{self.log_prefix}⚠️ Response only contains think block with no content after it") if reasoning_text: reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text - print(f"{self.log_prefix} Reasoning: {reasoning_preview}") + self._vprint(f"{self.log_prefix} Reasoning: {reasoning_preview}") else: content_preview = final_response[:80] + "..." if len(final_response) > 80 else final_response - print(f"{self.log_prefix} Content: '{content_preview}'") + self._vprint(f"{self.log_prefix} Content: '{content_preview}'") if self._empty_content_retries < 3: - print(f"{self.log_prefix}πŸ”„ Retrying API call ({self._empty_content_retries}/3)...") + self._vprint(f"{self.log_prefix}πŸ”„ Retrying API call ({self._empty_content_retries}/3)...") continue else: - print(f"{self.log_prefix}❌ Max retries (3) for empty content exceeded.") + self._vprint(f"{self.log_prefix}❌ Max retries (3) for empty content exceeded.") self._empty_content_retries = 0 # If a prior tool_calls turn had real content, salvage it: diff --git a/tools/voice_mode.py b/tools/voice_mode.py index bdf2c535..87b6cad6 100644 --- a/tools/voice_mode.py +++ b/tools/voice_mode.py @@ -12,6 +12,7 @@ Dependencies (optional): import logging import os import platform +import re import shutil import subprocess import tempfile @@ -350,12 +351,37 @@ WHISPER_HALLUCINATIONS = { "you", "the end.", "the end", + # Non-English hallucinations (common on silence) + "ΠΏΡ€ΠΎΠ΄ΠΎΠ»ΠΆΠ΅Π½ΠΈΠ΅ слСдуСт", + "ΠΏΡ€ΠΎΠ΄ΠΎΠ»ΠΆΠ΅Π½ΠΈΠ΅ слСдуСт...", + "sous-titres", + "sous-titres rΓ©alisΓ©s par la communautΓ© d'amara.org", + "sottotitoli creati dalla comunitΓ  amara.org", + "untertitel von stephanie geiges", + "amara.org", + "www.mooji.org", + "γ”θ¦–θ΄γ‚γ‚ŠγŒγ¨γ†γ”γ–γ„γΎγ—γŸ", } +# Regex patterns for repetitive hallucinations (e.g. "Thank you. Thank you. Thank you.") +_HALLUCINATION_REPEAT_RE = re.compile( + r'^(?:thank you|thanks|bye|you|ok|okay|the end|\.|\s|,|!)+$', + flags=re.IGNORECASE, +) + def is_whisper_hallucination(transcript: str) -> bool: """Check if a transcript is a known Whisper hallucination on silence.""" - return transcript.strip().lower() in WHISPER_HALLUCINATIONS + cleaned = transcript.strip().lower() + if not cleaned: + return True + # Exact match against known phrases + if cleaned.rstrip('.!') in WHISPER_HALLUCINATIONS or cleaned in WHISPER_HALLUCINATIONS: + return True + # Repetitive patterns (e.g. "Thank you. Thank you. Thank you. you") + if _HALLUCINATION_REPEAT_RE.match(cleaned): + return True + return False # ============================================================================