fix: suppress verbose logs during streaming TTS, improve hallucination filter, stop continuous mode on errors
- Add _vprint() helper to suppress log output when stream_callback is active - Expand Whisper hallucination filter with multi-language phrases and regex pattern for repetitive text - Stop continuous voice mode when agent returns a failed result (e.g. 429 rate limit)
This commit is contained in:
parent
3a1b35ed92
commit
b00c5949fc
3 changed files with 130 additions and 70 deletions
5
cli.py
5
cli.py
|
|
@ -4242,6 +4242,11 @@ class HermesCLI:
|
|||
if result and result.get("failed") and not response:
|
||||
error_detail = result.get("error", "Unknown error")
|
||||
response = f"Error: {error_detail}"
|
||||
# Stop continuous voice mode on persistent errors (e.g. 429 rate limit)
|
||||
# to avoid an infinite error → record → error loop
|
||||
if self._voice_continuous:
|
||||
self._voice_continuous = False
|
||||
_cprint(f"\n{_DIM}Continuous voice mode stopped due to error.{_RST}")
|
||||
|
||||
# Handle interrupt - check if we were interrupted
|
||||
pending_message = None
|
||||
|
|
|
|||
167
run_agent.py
167
run_agent.py
|
|
@ -493,6 +493,10 @@ class AIAgent:
|
|||
]:
|
||||
logging.getLogger(quiet_logger).setLevel(logging.ERROR)
|
||||
|
||||
# Internal stream callback (set during streaming TTS).
|
||||
# Initialized here so _vprint can reference it before run_conversation.
|
||||
self._stream_callback = None
|
||||
|
||||
# Initialize LLM client via centralized provider router.
|
||||
# The router handles auth resolution, base URL, headers, and
|
||||
# Codex/Anthropic wrapping for all known providers.
|
||||
|
|
@ -812,6 +816,12 @@ class AIAgent:
|
|||
else:
|
||||
print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
|
||||
|
||||
def _vprint(self, *args, **kwargs):
|
||||
"""Verbose print — suppressed when streaming TTS is active."""
|
||||
if getattr(self, "_stream_callback", None) is not None:
|
||||
return
|
||||
print(*args, **kwargs)
|
||||
|
||||
def _max_tokens_param(self, value: int) -> dict:
|
||||
"""Return the correct max tokens kwarg for the current provider.
|
||||
|
||||
|
|
@ -1340,7 +1350,7 @@ class AIAgent:
|
|||
encoding="utf-8",
|
||||
)
|
||||
|
||||
print(f"{self.log_prefix}🧾 Request debug dump written to: {dump_file}")
|
||||
self._vprint(f"{self.log_prefix}🧾 Request debug dump written to: {dump_file}")
|
||||
|
||||
if os.getenv("HERMES_DUMP_REQUEST_STDOUT", "").strip().lower() in {"1", "true", "yes", "on"}:
|
||||
print(json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str))
|
||||
|
|
@ -1482,7 +1492,7 @@ class AIAgent:
|
|||
# Replay the items into the store (replace mode)
|
||||
self._todo_store.write(last_todo_response, merge=False)
|
||||
if not self.quiet_mode:
|
||||
print(f"{self.log_prefix}📋 Restored {len(last_todo_response)} todo item(s) from history")
|
||||
self._vprint(f"{self.log_prefix}📋 Restored {len(last_todo_response)} todo item(s) from history")
|
||||
_set_interrupt(False)
|
||||
|
||||
@property
|
||||
|
|
@ -3578,7 +3588,7 @@ class AIAgent:
|
|||
if self._interrupt_requested:
|
||||
remaining_calls = assistant_message.tool_calls[i-1:]
|
||||
if remaining_calls:
|
||||
print(f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)")
|
||||
self._vprint(f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)")
|
||||
for skipped_tc in remaining_calls:
|
||||
skipped_name = skipped_tc.function.name
|
||||
skip_msg = {
|
||||
|
|
@ -3640,7 +3650,7 @@ class AIAgent:
|
|||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if self.quiet_mode:
|
||||
print(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
|
||||
self._vprint(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "session_search":
|
||||
if not self._session_db:
|
||||
function_result = json.dumps({"success": False, "error": "Session database not available."})
|
||||
|
|
@ -3655,7 +3665,7 @@ class AIAgent:
|
|||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if self.quiet_mode:
|
||||
print(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
|
||||
self._vprint(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "memory":
|
||||
target = function_args.get("target", "memory")
|
||||
from tools.memory_tool import memory_tool as _memory_tool
|
||||
|
|
@ -3671,7 +3681,7 @@ class AIAgent:
|
|||
self._honcho_save_user_observation(function_args.get("content", ""))
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if self.quiet_mode:
|
||||
print(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
|
||||
self._vprint(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "clarify":
|
||||
from tools.clarify_tool import clarify_tool as _clarify_tool
|
||||
function_result = _clarify_tool(
|
||||
|
|
@ -3681,7 +3691,7 @@ class AIAgent:
|
|||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if self.quiet_mode:
|
||||
print(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
|
||||
self._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "delegate_task":
|
||||
from tools.delegate_tool import delegate_task as _delegate_task
|
||||
tasks_arg = function_args.get("tasks")
|
||||
|
|
@ -3714,8 +3724,8 @@ class AIAgent:
|
|||
if spinner:
|
||||
spinner.stop(cute_msg)
|
||||
elif self.quiet_mode:
|
||||
print(f" {cute_msg}")
|
||||
elif self.quiet_mode:
|
||||
self._vprint(f" {cute_msg}")
|
||||
elif self.quiet_mode and self._stream_callback is None:
|
||||
face = random.choice(KawaiiSpinner.KAWAII_WAITING)
|
||||
tool_emoji_map = {
|
||||
'web_search': '🔍', 'web_extract': '📄', 'web_crawl': '🕸️',
|
||||
|
|
@ -3802,7 +3812,7 @@ class AIAgent:
|
|||
|
||||
if self._interrupt_requested and i < len(assistant_message.tool_calls):
|
||||
remaining = len(assistant_message.tool_calls) - i
|
||||
print(f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)")
|
||||
self._vprint(f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)")
|
||||
for skipped_tc in assistant_message.tool_calls[i:]:
|
||||
skipped_name = skipped_tc.function.name
|
||||
skip_msg = {
|
||||
|
|
@ -4344,11 +4354,11 @@ class AIAgent:
|
|||
thinking_spinner = None
|
||||
|
||||
if not self.quiet_mode:
|
||||
print(f"\n{self.log_prefix}🔄 Making API call #{api_call_count}/{self.max_iterations}...")
|
||||
print(f"{self.log_prefix} 📊 Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)")
|
||||
print(f"{self.log_prefix} 🔧 Available tools: {len(self.tools) if self.tools else 0}")
|
||||
else:
|
||||
# Animated thinking spinner in quiet mode
|
||||
self._vprint(f"\n{self.log_prefix}🔄 Making API call #{api_call_count}/{self.max_iterations}...")
|
||||
self._vprint(f"{self.log_prefix} 📊 Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)")
|
||||
self._vprint(f"{self.log_prefix} 🔧 Available tools: {len(self.tools) if self.tools else 0}")
|
||||
elif self._stream_callback is None:
|
||||
# Animated thinking spinner in quiet mode (skip during streaming TTS)
|
||||
face = random.choice(KawaiiSpinner.KAWAII_THINKING)
|
||||
verb = random.choice(KawaiiSpinner.THINKING_VERBS)
|
||||
if self.thinking_callback:
|
||||
|
|
@ -4401,7 +4411,7 @@ class AIAgent:
|
|||
self.thinking_callback("")
|
||||
|
||||
if not self.quiet_mode:
|
||||
print(f"{self.log_prefix}⏱️ API call completed in {api_duration:.2f}s")
|
||||
self._vprint(f"{self.log_prefix}⏱️ API call completed in {api_duration:.2f}s")
|
||||
|
||||
if self.verbose_logging:
|
||||
# Log response with provider info if available
|
||||
|
|
@ -4478,17 +4488,17 @@ class AIAgent:
|
|||
if self.verbose_logging:
|
||||
logging.debug(f"Response attributes for invalid response: {resp_attrs}")
|
||||
|
||||
print(f"{self.log_prefix}⚠️ Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}")
|
||||
print(f"{self.log_prefix} 🏢 Provider: {provider_name}")
|
||||
print(f"{self.log_prefix} 📝 Provider message: {error_msg[:200]}")
|
||||
print(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
|
||||
self._vprint(f"{self.log_prefix}⚠️ Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}")
|
||||
self._vprint(f"{self.log_prefix} 🏢 Provider: {provider_name}")
|
||||
self._vprint(f"{self.log_prefix} 📝 Provider message: {error_msg[:200]}")
|
||||
self._vprint(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
|
||||
|
||||
if retry_count >= max_retries:
|
||||
# Try fallback before giving up
|
||||
if self._try_activate_fallback():
|
||||
retry_count = 0
|
||||
continue
|
||||
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
|
||||
self._vprint(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.", force=True)
|
||||
logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
|
|
@ -4501,14 +4511,14 @@ class AIAgent:
|
|||
|
||||
# Longer backoff for rate limiting (likely cause of None choices)
|
||||
wait_time = min(5 * (2 ** (retry_count - 1)), 120) # 5s, 10s, 20s, 40s, 80s, 120s
|
||||
print(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...")
|
||||
self._vprint(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...")
|
||||
logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
|
||||
|
||||
# Sleep in small increments to stay responsive to interrupts
|
||||
sleep_end = time.time() + wait_time
|
||||
while time.time() < sleep_end:
|
||||
if self._interrupt_requested:
|
||||
print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
|
||||
self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
self.clear_interrupt()
|
||||
return {
|
||||
|
|
@ -4541,7 +4551,7 @@ class AIAgent:
|
|||
finish_reason = response.choices[0].finish_reason
|
||||
|
||||
if finish_reason == "length":
|
||||
print(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens")
|
||||
self._vprint(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens")
|
||||
|
||||
if self.api_mode == "chat_completions":
|
||||
assistant_message = response.choices[0].message
|
||||
|
|
@ -4553,7 +4563,7 @@ class AIAgent:
|
|||
truncated_response_prefix += assistant_message.content
|
||||
|
||||
if length_continue_retries < 3:
|
||||
print(
|
||||
self._vprint(
|
||||
f"{self.log_prefix}↻ Requesting continuation "
|
||||
f"({length_continue_retries}/3)..."
|
||||
)
|
||||
|
|
@ -4585,7 +4595,7 @@ class AIAgent:
|
|||
|
||||
# If we have prior messages, roll back to last complete state
|
||||
if len(messages) > 1:
|
||||
print(f"{self.log_prefix} ⏪ Rolling back to last complete assistant turn")
|
||||
self._vprint(f"{self.log_prefix} ⏪ Rolling back to last complete assistant turn")
|
||||
rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
|
||||
|
||||
self._cleanup_task_resources(effective_task_id)
|
||||
|
|
@ -4601,7 +4611,7 @@ class AIAgent:
|
|||
}
|
||||
else:
|
||||
# First message was truncated - mark as failed
|
||||
print(f"{self.log_prefix}❌ First response truncated - cannot recover")
|
||||
self._vprint(f"{self.log_prefix}❌ First response truncated - cannot recover")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"final_response": None,
|
||||
|
|
@ -4661,7 +4671,7 @@ class AIAgent:
|
|||
prompt = usage_dict["prompt_tokens"]
|
||||
hit_pct = (cached / prompt * 100) if prompt > 0 else 0
|
||||
if not self.quiet_mode:
|
||||
print(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
|
||||
self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
|
||||
|
||||
break # Success, exit retry loop
|
||||
|
||||
|
|
@ -4672,7 +4682,7 @@ class AIAgent:
|
|||
if self.thinking_callback:
|
||||
self.thinking_callback("")
|
||||
api_elapsed = time.time() - api_start_time
|
||||
print(f"{self.log_prefix}⚡ Interrupted during API call.")
|
||||
self._vprint(f"{self.log_prefix}⚡ Interrupted during API call.", force=True)
|
||||
self._persist_session(messages, conversation_history)
|
||||
interrupted = True
|
||||
final_response = f"Operation interrupted: waiting for model response ({api_elapsed:.1f}s elapsed)."
|
||||
|
|
@ -4695,7 +4705,7 @@ class AIAgent:
|
|||
):
|
||||
codex_auth_retry_attempted = True
|
||||
if self._try_refresh_codex_client_credentials(force=True):
|
||||
print(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...")
|
||||
self._vprint(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...")
|
||||
continue
|
||||
if (
|
||||
self.api_mode == "chat_completions"
|
||||
|
|
@ -4743,14 +4753,14 @@ class AIAgent:
|
|||
error_type = type(api_error).__name__
|
||||
error_msg = str(api_error).lower()
|
||||
|
||||
print(f"{self.log_prefix}⚠️ API call failed (attempt {retry_count}/{max_retries}): {error_type}")
|
||||
print(f"{self.log_prefix} ⏱️ Time elapsed before failure: {elapsed_time:.2f}s")
|
||||
print(f"{self.log_prefix} 📝 Error: {str(api_error)[:200]}")
|
||||
print(f"{self.log_prefix} 📊 Request context: {len(api_messages)} messages, ~{approx_tokens:,} tokens, {len(self.tools) if self.tools else 0} tools")
|
||||
self._vprint(f"{self.log_prefix}⚠️ API call failed (attempt {retry_count}/{max_retries}): {error_type}")
|
||||
self._vprint(f"{self.log_prefix} ⏱️ Time elapsed before failure: {elapsed_time:.2f}s")
|
||||
self._vprint(f"{self.log_prefix} 📝 Error: {str(api_error)[:200]}")
|
||||
self._vprint(f"{self.log_prefix} 📊 Request context: {len(api_messages)} messages, ~{approx_tokens:,} tokens, {len(self.tools) if self.tools else 0} tools")
|
||||
|
||||
# Check for interrupt before deciding to retry
|
||||
if self._interrupt_requested:
|
||||
print(f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.")
|
||||
self._vprint(f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
self.clear_interrupt()
|
||||
return {
|
||||
|
|
@ -4775,7 +4785,7 @@ class AIAgent:
|
|||
if is_payload_too_large:
|
||||
compression_attempts += 1
|
||||
if compression_attempts > max_compression_attempts:
|
||||
print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.")
|
||||
self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True)
|
||||
logging.error(f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
|
|
@ -4785,7 +4795,7 @@ class AIAgent:
|
|||
"error": f"Request payload too large: max compression attempts ({max_compression_attempts}) reached.",
|
||||
"partial": True
|
||||
}
|
||||
print(f"{self.log_prefix}⚠️ Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")
|
||||
self._vprint(f"{self.log_prefix}⚠️ Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")
|
||||
|
||||
original_len = len(messages)
|
||||
messages, active_system_prompt = self._compress_context(
|
||||
|
|
@ -4794,12 +4804,12 @@ class AIAgent:
|
|||
)
|
||||
|
||||
if len(messages) < original_len:
|
||||
print(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
|
||||
self._vprint(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
|
||||
time.sleep(2) # Brief pause between compression retries
|
||||
restart_with_compressed_messages = True
|
||||
break
|
||||
else:
|
||||
print(f"{self.log_prefix}❌ Payload too large and cannot compress further.")
|
||||
self._vprint(f"{self.log_prefix}❌ Payload too large and cannot compress further.")
|
||||
logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
|
|
@ -4830,7 +4840,7 @@ class AIAgent:
|
|||
parsed_limit = parse_context_limit_from_error(error_msg)
|
||||
if parsed_limit and parsed_limit < old_ctx:
|
||||
new_ctx = parsed_limit
|
||||
print(f"{self.log_prefix}⚠️ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})")
|
||||
self._vprint(f"{self.log_prefix}⚠️ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
|
||||
else:
|
||||
# Step down to the next probe tier
|
||||
new_ctx = get_next_probe_tier(old_ctx)
|
||||
|
|
@ -4839,13 +4849,13 @@ class AIAgent:
|
|||
compressor.context_length = new_ctx
|
||||
compressor.threshold_tokens = int(new_ctx * compressor.threshold_percent)
|
||||
compressor._context_probed = True
|
||||
print(f"{self.log_prefix}⚠️ Context length exceeded — stepping down: {old_ctx:,} → {new_ctx:,} tokens")
|
||||
self._vprint(f"{self.log_prefix}⚠️ Context length exceeded — stepping down: {old_ctx:,} → {new_ctx:,} tokens", force=True)
|
||||
else:
|
||||
print(f"{self.log_prefix}⚠️ Context length exceeded at minimum tier — attempting compression...")
|
||||
self._vprint(f"{self.log_prefix}⚠️ Context length exceeded at minimum tier — attempting compression...", force=True)
|
||||
|
||||
compression_attempts += 1
|
||||
if compression_attempts > max_compression_attempts:
|
||||
print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.")
|
||||
self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
|
||||
logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
|
|
@ -4855,7 +4865,7 @@ class AIAgent:
|
|||
"error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
|
||||
"partial": True
|
||||
}
|
||||
print(f"{self.log_prefix} 🗜️ Context compression attempt {compression_attempts}/{max_compression_attempts}...")
|
||||
self._vprint(f"{self.log_prefix} 🗜️ Context compression attempt {compression_attempts}/{max_compression_attempts}...")
|
||||
|
||||
original_len = len(messages)
|
||||
messages, active_system_prompt = self._compress_context(
|
||||
|
|
@ -4865,14 +4875,14 @@ class AIAgent:
|
|||
|
||||
if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
|
||||
if len(messages) < original_len:
|
||||
print(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
|
||||
self._vprint(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
|
||||
time.sleep(2) # Brief pause between compression retries
|
||||
restart_with_compressed_messages = True
|
||||
break
|
||||
else:
|
||||
# Can't compress further and already at minimum tier
|
||||
print(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.")
|
||||
print(f"{self.log_prefix} 💡 The conversation has accumulated too much content.")
|
||||
self._vprint(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.", force=True)
|
||||
self._vprint(f"{self.log_prefix} 💡 The conversation has accumulated too much content.", force=True)
|
||||
logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
|
|
@ -4908,8 +4918,8 @@ class AIAgent:
|
|||
self._dump_api_request_debug(
|
||||
api_kwargs, reason="non_retryable_client_error", error=api_error,
|
||||
)
|
||||
print(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.")
|
||||
print(f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.")
|
||||
self._vprint(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.")
|
||||
self._vprint(f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.")
|
||||
logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
|
|
@ -4926,7 +4936,7 @@ class AIAgent:
|
|||
if self._try_activate_fallback():
|
||||
retry_count = 0
|
||||
continue
|
||||
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
|
||||
self._vprint(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.", force=True)
|
||||
logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
|
||||
logging.error(f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}")
|
||||
raise api_error
|
||||
|
|
@ -4934,15 +4944,15 @@ class AIAgent:
|
|||
wait_time = min(2 ** retry_count, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
|
||||
logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
|
||||
if retry_count >= max_retries:
|
||||
print(f"{self.log_prefix}⚠️ API call failed after {retry_count} attempts: {str(api_error)[:100]}")
|
||||
print(f"{self.log_prefix}⏳ Final retry in {wait_time}s...")
|
||||
self._vprint(f"{self.log_prefix}⚠️ API call failed after {retry_count} attempts: {str(api_error)[:100]}")
|
||||
self._vprint(f"{self.log_prefix}⏳ Final retry in {wait_time}s...")
|
||||
|
||||
# Sleep in small increments so we can respond to interrupts quickly
|
||||
# instead of blocking the entire wait_time in one sleep() call
|
||||
sleep_end = time.time() + wait_time
|
||||
while time.time() < sleep_end:
|
||||
if self._interrupt_requested:
|
||||
print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
|
||||
self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
self.clear_interrupt()
|
||||
return {
|
||||
|
|
@ -5006,7 +5016,7 @@ class AIAgent:
|
|||
|
||||
# Handle assistant response
|
||||
if assistant_message.content and not self.quiet_mode:
|
||||
print(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
|
||||
self._vprint(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
|
||||
|
||||
# Notify progress callback of model's thinking (used by subagent
|
||||
# delegation to relay the child's reasoning to the parent display).
|
||||
|
|
@ -5033,15 +5043,15 @@ class AIAgent:
|
|||
self._incomplete_scratchpad_retries = 0
|
||||
self._incomplete_scratchpad_retries += 1
|
||||
|
||||
print(f"{self.log_prefix}⚠️ Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)")
|
||||
self._vprint(f"{self.log_prefix}⚠️ Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)")
|
||||
|
||||
if self._incomplete_scratchpad_retries <= 2:
|
||||
print(f"{self.log_prefix}🔄 Retrying API call ({self._incomplete_scratchpad_retries}/2)...")
|
||||
self._vprint(f"{self.log_prefix}🔄 Retrying API call ({self._incomplete_scratchpad_retries}/2)...")
|
||||
# Don't add the broken message, just retry
|
||||
continue
|
||||
else:
|
||||
# Max retries - discard this turn and save as partial
|
||||
print(f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.")
|
||||
self._vprint(f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.")
|
||||
self._incomplete_scratchpad_retries = 0
|
||||
|
||||
rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
|
||||
|
|
@ -5084,7 +5094,7 @@ class AIAgent:
|
|||
|
||||
if self._codex_incomplete_retries < 3:
|
||||
if not self.quiet_mode:
|
||||
print(f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)")
|
||||
self._vprint(f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)")
|
||||
self._session_messages = messages
|
||||
self._save_session_log(messages)
|
||||
continue
|
||||
|
|
@ -5105,7 +5115,7 @@ class AIAgent:
|
|||
# Check for tool calls
|
||||
if assistant_message.tool_calls:
|
||||
if not self.quiet_mode:
|
||||
print(f"{self.log_prefix}🔧 Processing {len(assistant_message.tool_calls)} tool call(s)...")
|
||||
self._vprint(f"{self.log_prefix}🔧 Processing {len(assistant_message.tool_calls)} tool call(s)...")
|
||||
|
||||
if self.verbose_logging:
|
||||
for tc in assistant_message.tool_calls:
|
||||
|
|
@ -5124,11 +5134,30 @@ class AIAgent:
|
|||
if tc.function.name not in self.valid_tool_names
|
||||
]
|
||||
if invalid_tool_calls:
|
||||
# Track retries for invalid tool calls
|
||||
if not hasattr(self, '_invalid_tool_retries'):
|
||||
self._invalid_tool_retries = 0
|
||||
self._invalid_tool_retries += 1
|
||||
|
||||
# Return helpful error to model — model can self-correct next turn
|
||||
available = ", ".join(sorted(self.valid_tool_names))
|
||||
invalid_name = invalid_tool_calls[0]
|
||||
invalid_preview = invalid_name[:80] + "..." if len(invalid_name) > 80 else invalid_name
|
||||
print(f"{self.log_prefix}⚠️ Unknown tool '{invalid_preview}' — sending error to model for self-correction")
|
||||
self._vprint(f"{self.log_prefix}⚠️ Unknown tool '{invalid_preview}' — sending error to model for self-correction ({self._invalid_tool_retries}/3)")
|
||||
|
||||
if self._invalid_tool_retries >= 3:
|
||||
self._vprint(f"{self.log_prefix}❌ Max retries (3) for invalid tool calls exceeded. Stopping as partial.")
|
||||
self._invalid_tool_retries = 0
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"final_response": None,
|
||||
"messages": messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": False,
|
||||
"partial": True,
|
||||
"error": f"Model generated invalid tool call: {invalid_preview}"
|
||||
}
|
||||
|
||||
assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||
messages.append(assistant_msg)
|
||||
for tc in assistant_message.tool_calls:
|
||||
|
|
@ -5165,15 +5194,15 @@ class AIAgent:
|
|||
self._invalid_json_retries += 1
|
||||
|
||||
tool_name, error_msg = invalid_json_args[0]
|
||||
print(f"{self.log_prefix}⚠️ Invalid JSON in tool call arguments for '{tool_name}': {error_msg}")
|
||||
self._vprint(f"{self.log_prefix}⚠️ Invalid JSON in tool call arguments for '{tool_name}': {error_msg}")
|
||||
|
||||
if self._invalid_json_retries < 3:
|
||||
print(f"{self.log_prefix}🔄 Retrying API call ({self._invalid_json_retries}/3)...")
|
||||
self._vprint(f"{self.log_prefix}🔄 Retrying API call ({self._invalid_json_retries}/3)...")
|
||||
# Don't add anything to messages, just retry the API call
|
||||
continue
|
||||
else:
|
||||
# Instead of returning partial, inject a helpful message and let model recover
|
||||
print(f"{self.log_prefix}⚠️ Injecting recovery message for invalid JSON...")
|
||||
self._vprint(f"{self.log_prefix}⚠️ Injecting recovery message for invalid JSON...")
|
||||
self._invalid_json_retries = 0 # Reset for next attempt
|
||||
|
||||
# Add a user message explaining the issue
|
||||
|
|
@ -5203,7 +5232,7 @@ class AIAgent:
|
|||
if self.quiet_mode:
|
||||
clean = self._strip_think_blocks(turn_content).strip()
|
||||
if clean:
|
||||
print(f" ┊ 💬 {clean}")
|
||||
self._vprint(f" ┊ 💬 {clean}")
|
||||
|
||||
messages.append(assistant_msg)
|
||||
|
||||
|
|
@ -5279,19 +5308,19 @@ class AIAgent:
|
|||
self._empty_content_retries += 1
|
||||
|
||||
reasoning_text = self._extract_reasoning(assistant_message)
|
||||
print(f"{self.log_prefix}⚠️ Response only contains think block with no content after it")
|
||||
self._vprint(f"{self.log_prefix}⚠️ Response only contains think block with no content after it")
|
||||
if reasoning_text:
|
||||
reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
|
||||
print(f"{self.log_prefix} Reasoning: {reasoning_preview}")
|
||||
self._vprint(f"{self.log_prefix} Reasoning: {reasoning_preview}")
|
||||
else:
|
||||
content_preview = final_response[:80] + "..." if len(final_response) > 80 else final_response
|
||||
print(f"{self.log_prefix} Content: '{content_preview}'")
|
||||
self._vprint(f"{self.log_prefix} Content: '{content_preview}'")
|
||||
|
||||
if self._empty_content_retries < 3:
|
||||
print(f"{self.log_prefix}🔄 Retrying API call ({self._empty_content_retries}/3)...")
|
||||
self._vprint(f"{self.log_prefix}🔄 Retrying API call ({self._empty_content_retries}/3)...")
|
||||
continue
|
||||
else:
|
||||
print(f"{self.log_prefix}❌ Max retries (3) for empty content exceeded.")
|
||||
self._vprint(f"{self.log_prefix}❌ Max retries (3) for empty content exceeded.")
|
||||
self._empty_content_retries = 0
|
||||
|
||||
# If a prior tool_calls turn had real content, salvage it:
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ Dependencies (optional):
|
|||
import logging
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
|
@ -350,12 +351,37 @@ WHISPER_HALLUCINATIONS = {
|
|||
"you",
|
||||
"the end.",
|
||||
"the end",
|
||||
# Non-English hallucinations (common on silence)
|
||||
"продолжение следует",
|
||||
"продолжение следует...",
|
||||
"sous-titres",
|
||||
"sous-titres réalisés par la communauté d'amara.org",
|
||||
"sottotitoli creati dalla comunità amara.org",
|
||||
"untertitel von stephanie geiges",
|
||||
"amara.org",
|
||||
"www.mooji.org",
|
||||
"ご視聴ありがとうございました",
|
||||
}
|
||||
|
||||
# Regex patterns for repetitive hallucinations (e.g. "Thank you. Thank you. Thank you.")
|
||||
_HALLUCINATION_REPEAT_RE = re.compile(
|
||||
r'^(?:thank you|thanks|bye|you|ok|okay|the end|\.|\s|,|!)+$',
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def is_whisper_hallucination(transcript: str) -> bool:
|
||||
"""Check if a transcript is a known Whisper hallucination on silence."""
|
||||
return transcript.strip().lower() in WHISPER_HALLUCINATIONS
|
||||
cleaned = transcript.strip().lower()
|
||||
if not cleaned:
|
||||
return True
|
||||
# Exact match against known phrases
|
||||
if cleaned.rstrip('.!') in WHISPER_HALLUCINATIONS or cleaned in WHISPER_HALLUCINATIONS:
|
||||
return True
|
||||
# Repetitive patterns (e.g. "Thank you. Thank you. Thank you. you")
|
||||
if _HALLUCINATION_REPEAT_RE.match(cleaned):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue