rate limits

This commit is contained in:
hjc-puro 2025-11-17 18:35:36 -05:00
parent f813959750
commit 1614c15bb1
4 changed files with 43 additions and 28 deletions

View file

@ -388,7 +388,7 @@ class AIAgent:
while api_call_count < self.max_iterations: while api_call_count < self.max_iterations:
api_call_count += 1 api_call_count += 1
print(f"\n🔄 Making API call #{api_call_count}...") print(f"\n🔄 Making OpenAI-compatible API call #{api_call_count}...")
# Log request details if verbose # Log request details if verbose
if self.verbose_logging: if self.verbose_logging:
@ -397,8 +397,8 @@ class AIAgent:
api_start_time = time.time() api_start_time = time.time()
retry_count = 0 retry_count = 0
max_retries = 3 max_retries = 6 # Increased to allow longer backoff periods
while retry_count <= max_retries: while retry_count <= max_retries:
try: try:
# Prepare messages for API call # Prepare messages for API call
@ -407,30 +407,30 @@ class AIAgent:
if active_system_prompt: if active_system_prompt:
# Insert system message at the beginning # Insert system message at the beginning
api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages
# Make API call with tools # Make API call with tools
response = self.client.chat.completions.create( response = self.client.chat.completions.create(
model=self.model, model=self.model,
messages=api_messages, messages=api_messages,
tools=self.tools if self.tools else None, tools=self.tools if self.tools else None,
timeout=60.0 # Add explicit timeout timeout=300.0 # 5 minute timeout for long-running agent tasks
) )
api_duration = time.time() - api_start_time api_duration = time.time() - api_start_time
print(f"⏱️ API call completed in {api_duration:.2f}s") print(f"⏱️ OpenAI-compatible API call completed in {api_duration:.2f}s")
if self.verbose_logging: if self.verbose_logging:
logging.debug(f"API Response received - Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}") logging.debug(f"API Response received - Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
break # Success, exit retry loop break # Success, exit retry loop
except Exception as api_error: except Exception as api_error:
retry_count += 1 retry_count += 1
if retry_count > max_retries: if retry_count > max_retries:
raise api_error raise api_error
wait_time = min(2 ** retry_count, 10) # Exponential backoff, max 10s wait_time = min(2 ** retry_count, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
print(f"⚠️ API call failed (attempt {retry_count}/{max_retries}): {str(api_error)[:100]}") print(f"⚠️ OpenAI-compatible API call failed (attempt {retry_count}/{max_retries}): {str(api_error)[:100]}")
print(f"⏳ Retrying in {wait_time}s...") print(f"⏳ Retrying in {wait_time}s...")
logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}") logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
time.sleep(wait_time) time.sleep(wait_time)
@ -522,11 +522,11 @@ class AIAgent:
"content": final_response "content": final_response
}) })
print(f"🎉 Conversation completed after {api_call_count} API call(s)") print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
break break
except Exception as e: except Exception as e:
error_msg = f"Error during API call #{api_call_count}: {str(e)}" error_msg = f"Error during OpenAI-compatible API call #{api_call_count}: {str(e)}"
print(f"{error_msg}") print(f"{error_msg}")
if self.verbose_logging: if self.verbose_logging:

View file

@ -161,11 +161,11 @@ def _construct_aggregator_prompt(system_prompt: str, responses: List[str]) -> st
async def _run_reference_model_safe( async def _run_reference_model_safe(
model: str, model: str,
user_prompt: str, user_prompt: str,
temperature: float = REFERENCE_TEMPERATURE, temperature: float = REFERENCE_TEMPERATURE,
max_tokens: int = 32000, max_tokens: int = 32000,
max_retries: int = 3 max_retries: int = 6
) -> tuple[str, str, bool]: ) -> tuple[str, str, bool]:
""" """
Run a single reference model with retry logic and graceful failure handling. Run a single reference model with retry logic and graceful failure handling.
@ -212,8 +212,8 @@ async def _run_reference_model_safe(
print(f"⚠️ {model} unknown error (attempt {attempt + 1}): {error_str}") print(f"⚠️ {model} unknown error (attempt {attempt + 1}): {error_str}")
if attempt < max_retries - 1: if attempt < max_retries - 1:
# Exponential backoff for rate limiting # Exponential backoff for rate limiting: 2s, 4s, 8s, 16s, 32s, 60s
sleep_time = 2 ** attempt sleep_time = min(2 ** (attempt + 1), 60)
print(f" Retrying in {sleep_time}s...") print(f" Retrying in {sleep_time}s...")
await asyncio.sleep(sleep_time) await asyncio.sleep(sleep_time)
else: else:

View file

@ -34,7 +34,7 @@ SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM envi
**Environment:** **Environment:**
- Minimal Debian-based OS with internet access - Minimal Debian-based OS with internet access
- Automatic VM lifecycle management (creates on-demand, reuses, cleans up) - Automatic VM lifecycle management (creates on-demand, reuses, cleans up)
- No state persistence - each command runs independently - Filesystem is persisted between tool calls but environment variables, venvs, etc are reset.
**Command Execution:** **Command Execution:**
- Simple commands: Just provide the 'command' parameter - Simple commands: Just provide the 'command' parameter
@ -43,13 +43,18 @@ SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM envi
**Examples:** **Examples:**
- Run command: `{"command": "ls -la"}` - Run command: `{"command": "ls -la"}`
- Background task: `{"command": "python server.py", "background": True}` - Background task: `{"command": "source path/to/my/venv/bin/activate && python server.py", "background": True}`
- With timeout: `{"command": "long_task.sh", "timeout": 300}` - With timeout: `{"command": "long_task.sh", "timeout": 300}`
**Best Practices:** **Best Practices:**
- Run servers/long processes in background - Run servers/long processes in background
- Monitor disk usage for large tasks - Monitor disk usage for large tasks
- Install tools as needed with apt-get""" - Install whatever tools you need with sudo apt-get
- Do not be afraid to run pip with --break-system-packages
**Things to avoid**
- Do NOT use interactive tools such as tmux, vim, nano, python repl - you will get stuck. Even git sometimes becomes interactive if the output is large. If you're not sure pipe to cat.
"""
# Global state for VM lifecycle management # Global state for VM lifecycle management
_active_instances: Dict[str, Any] = {} _active_instances: Dict[str, Any] = {}
@ -89,7 +94,12 @@ def _cleanup_inactive_vms(vm_lifetime_seconds: int = 300):
del _last_activity[task_id] del _last_activity[task_id]
except Exception as e: except Exception as e:
print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}") # 404 errors are benign - VM already cleaned up by TTL
error_str = str(e)
if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
else:
print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}")
def _cleanup_thread_worker(): def _cleanup_thread_worker():
@ -150,7 +160,12 @@ def cleanup_vm(task_id: str):
del _last_activity[task_id] del _last_activity[task_id]
except Exception as e: except Exception as e:
print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}") # 404 errors are benign - VM already cleaned up by TTL
error_str = str(e)
if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
else:
print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}")
atexit.register(_stop_cleanup_thread) atexit.register(_stop_cleanup_thread)

View file

@ -184,10 +184,10 @@ Your goal is to preserve ALL important information while reducing length. Never
Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights.""" Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""
# Call the LLM asynchronously with retry logic for flaky API # Call the LLM asynchronously with retry logic for flaky API
max_retries = 3 max_retries = 6
retry_delay = 2 # Start with 2 seconds retry_delay = 2 # Start with 2 seconds
last_error = None last_error = None
for attempt in range(max_retries): for attempt in range(max_retries):
try: try:
response = await nous_client.chat.completions.create( response = await nous_client.chat.completions.create(
@ -206,7 +206,7 @@ Create a markdown summary that captures all key information in a well-organized,
print(f"⚠️ LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}") print(f"⚠️ LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
print(f" Retrying in {retry_delay}s...") print(f" Retrying in {retry_delay}s...")
await asyncio.sleep(retry_delay) await asyncio.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff: 2s, 4s, 8s retry_delay = min(retry_delay * 2, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s
else: else:
# All retries exhausted # All retries exhausted
raise last_error raise last_error