rate limits

2025-11-17 18:35:36 -05:00 · 2025-11-17 18:35:36 -05:00 · 1614c15bb1
commit 1614c15bb1
parent f813959750
4 changed files with 43 additions and 28 deletions
--- a/tools/mixture_of_agents_tool.py
+++ b/tools/mixture_of_agents_tool.py
@ -161,11 +161,11 @@ def _construct_aggregator_prompt(system_prompt: str, responses: List[str]) -> st


 async def _run_reference_model_safe(
-    model: str, 
-    user_prompt: str, 
+    model: str,
+    user_prompt: str,
    temperature: float = REFERENCE_TEMPERATURE,
    max_tokens: int = 32000,
-    max_retries: int = 3
+    max_retries: int = 6
 ) -> tuple[str, str, bool]:
    """
    Run a single reference model with retry logic and graceful failure handling.
@ -212,8 +212,8 @@ async def _run_reference_model_safe(
                print(f"⚠️  {model} unknown error (attempt {attempt + 1}): {error_str}")
                
            if attempt < max_retries - 1:
-                # Exponential backoff for rate limiting
-                sleep_time = 2 ** attempt
+                # Exponential backoff for rate limiting: 2s, 4s, 8s, 16s, 32s, 60s
+                sleep_time = min(2 ** (attempt + 1), 60)
                print(f"   Retrying in {sleep_time}s...")
                await asyncio.sleep(sleep_time)
            else:
--- a/tools/simple_terminal_tool.py
+++ b/tools/simple_terminal_tool.py
@ -34,7 +34,7 @@ SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM envi
 **Environment:**
 - Minimal Debian-based OS with internet access
 - Automatic VM lifecycle management (creates on-demand, reuses, cleans up)
- No state persistence - each command runs independently
+- Filesystem is persisted between tool calls but environment variables, venvs, etc are reset.

 **Command Execution:**
 - Simple commands: Just provide the 'command' parameter
@ -43,13 +43,18 @@ SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM envi

 **Examples:**
 - Run command: `{"command": "ls -la"}`
- Background task: `{"command": "python server.py", "background": True}`
+- Background task: `{"command": "source path/to/my/venv/bin/activate && python server.py", "background": True}`
 - With timeout: `{"command": "long_task.sh", "timeout": 300}`

 **Best Practices:**
 - Run servers/long processes in background
 - Monitor disk usage for large tasks
- Install tools as needed with apt-get"""
+- Install whatever tools you need with sudo apt-get
+- Do not be afraid to run pip with --break-system-packages
+
+**Things to avoid**
+- Do NOT use interactive tools such as tmux, vim, nano, python repl - you will get stuck. Even git sometimes becomes interactive if the output is large. If you're not sure pipe to cat.
+"""

 # Global state for VM lifecycle management
 _active_instances: Dict[str, Any] = {}
@ -89,7 +94,12 @@ def _cleanup_inactive_vms(vm_lifetime_seconds: int = 300):
                    del _last_activity[task_id]

            except Exception as e:
-                print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}")
+                # 404 errors are benign - VM already cleaned up by TTL
+                error_str = str(e)
+                if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
+                    print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
+                else:
+                    print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}")


 def _cleanup_thread_worker():
@ -150,7 +160,12 @@ def cleanup_vm(task_id: str):
                del _last_activity[task_id]

        except Exception as e:
-            print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}")
+            # 404 errors are benign - VM already cleaned up by TTL
+            error_str = str(e)
+            if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
+                print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
+            else:
+                print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}")


 atexit.register(_stop_cleanup_thread)
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@ -184,10 +184,10 @@ Your goal is to preserve ALL important information while reducing length. Never
 Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""

        # Call the LLM asynchronously with retry logic for flaky API
-        max_retries = 3
+        max_retries = 6
        retry_delay = 2  # Start with 2 seconds
        last_error = None
-        
+
        for attempt in range(max_retries):
            try:
                response = await nous_client.chat.completions.create(
@ -206,7 +206,7 @@ Create a markdown summary that captures all key information in a well-organized,
                    print(f"⚠️  LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
                    print(f"   Retrying in {retry_delay}s...")
                    await asyncio.sleep(retry_delay)
-                    retry_delay *= 2  # Exponential backoff: 2s, 4s, 8s
+                    retry_delay = min(retry_delay * 2, 60)  # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s
                else:
                    # All retries exhausted
                    raise last_error