Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.
This commit is contained in:
teknium 2026-01-10 05:56:26 +00:00
parent 21f9e2df40
commit 4071ba29da
8 changed files with 572 additions and 111 deletions

View file

@ -43,7 +43,7 @@ else:
# Import our tool system
from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
from tools.terminal_tool import cleanup_vm
from tools.simple_terminal_tool import cleanup_vm
class AIAgent:
@ -177,9 +177,11 @@ class AIAgent:
disabled_toolsets=disabled_toolsets
)
# Show tool configuration
# Show tool configuration and store valid tool names for validation
self.valid_tool_names = set()
if self.tools:
tool_names = [tool["function"]["name"] for tool in self.tools]
self.valid_tool_names = {tool["function"]["name"] for tool in self.tools}
tool_names = sorted(self.valid_tool_names)
print(f"🛠️ Loaded {len(self.tools)} tools: {', '.join(tool_names)}")
# Show filtering info if applied
@ -495,6 +497,49 @@ class AIAgent:
if self.verbose_logging:
logging.debug(f"API Response received - Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
# Validate response has valid choices before proceeding
if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
# This is often rate limiting or provider returning malformed response
retry_count += 1
error_details = []
if response is None:
error_details.append("response is None")
elif not hasattr(response, 'choices'):
error_details.append("response has no 'choices' attribute")
elif response.choices is None:
error_details.append("response.choices is None")
else:
error_details.append("response.choices is empty")
# Check for error field in response (some providers include this)
error_msg = "Unknown"
if response and hasattr(response, 'error') and response.error:
error_msg = str(response.error)
elif response and hasattr(response, 'message') and response.message:
error_msg = str(response.message)
print(f"{self.log_prefix}⚠️ Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}")
print(f"{self.log_prefix} 📝 Provider message: {error_msg[:200]}")
print(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
if retry_count > max_retries:
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.")
return {
"messages": messages,
"completed": False,
"api_calls": api_call_count,
"error": f"Invalid API response (choices is None/empty). Likely rate limited by provider.",
"failed": True # Mark as failure for filtering
}
# Longer backoff for rate limiting (likely cause of None choices)
wait_time = min(5 * (2 ** (retry_count - 1)), 120) # 5s, 10s, 20s, 40s, 80s, 120s
print(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...")
logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)}")
time.sleep(wait_time)
continue # Retry the API call
break # Success, exit retry loop
except Exception as api_error:
@ -503,13 +548,32 @@ class AIAgent:
# Enhanced error logging
error_type = type(api_error).__name__
error_msg = str(api_error)
error_msg = str(api_error).lower()
print(f"{self.log_prefix}⚠️ API call failed (attempt {retry_count}/{max_retries}): {error_type}")
print(f"{self.log_prefix} ⏱️ Time elapsed before failure: {elapsed_time:.2f}s")
print(f"{self.log_prefix} 📝 Error: {error_msg[:200]}")
print(f"{self.log_prefix} 📝 Error: {str(api_error)[:200]}")
print(f"{self.log_prefix} 📊 Request context: {len(api_messages)} messages, ~{approx_tokens:,} tokens, {len(self.tools) if self.tools else 0} tools")
# Check for non-retryable errors (context length exceeded)
is_context_length_error = any(phrase in error_msg for phrase in [
'context length', 'maximum context', 'token limit',
'too many tokens', 'reduce the length', 'exceeds the limit'
])
if is_context_length_error:
print(f"{self.log_prefix}❌ Context length exceeded - this error cannot be resolved by retrying.")
print(f"{self.log_prefix} 💡 The conversation has accumulated too much content from tool responses.")
logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot continue.")
# Return a partial result instead of crashing
return {
"messages": messages,
"completed": False,
"api_calls": api_call_count,
"error": f"Context length exceeded ({approx_tokens:,} tokens). Conversation terminated early.",
"partial": True
}
if retry_count > max_retries:
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
@ -537,6 +601,43 @@ class AIAgent:
for tc in assistant_message.tool_calls:
logging.debug(f"Tool call: {tc.function.name} with args: {tc.function.arguments[:200]}...")
# Validate tool call names - detect model hallucinations
invalid_tool_calls = [
tc.function.name for tc in assistant_message.tool_calls
if tc.function.name not in self.valid_tool_names
]
if invalid_tool_calls:
# Track retries for invalid tool calls
if not hasattr(self, '_invalid_tool_retries'):
self._invalid_tool_retries = 0
self._invalid_tool_retries += 1
invalid_preview = invalid_tool_calls[0][:80] + "..." if len(invalid_tool_calls[0]) > 80 else invalid_tool_calls[0]
print(f"{self.log_prefix}⚠️ Invalid tool call detected: '{invalid_preview}'")
print(f"{self.log_prefix} Valid tools: {sorted(self.valid_tool_names)}")
if self._invalid_tool_retries < 3:
print(f"{self.log_prefix}🔄 Retrying API call ({self._invalid_tool_retries}/3)...")
# Don't add anything to messages, just retry the API call
continue
else:
print(f"{self.log_prefix}❌ Max retries (3) for invalid tool calls exceeded. Stopping as partial.")
# Return partial result - don't include the bad tool call in messages
self._invalid_tool_retries = 0 # Reset for next conversation
return {
"final_response": None,
"messages": messages, # Messages up to last valid point
"api_calls": api_call_count,
"completed": False,
"partial": True,
"error": f"Model generated invalid tool call: {invalid_preview}"
}
# Reset retry counter on successful tool call validation
if hasattr(self, '_invalid_tool_retries'):
self._invalid_tool_retries = 0
# Extract reasoning from response if available (for reasoning models like minimax, kimi, etc.)
reasoning_content = None
if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning:
@ -669,7 +770,8 @@ class AIAgent:
"final_response": final_response,
"messages": messages,
"api_calls": api_call_count,
"completed": completed
"completed": completed,
"partial": False # True only when stopped due to invalid tool calls
}
def chat(self, message: str) -> str: