rate limits
This commit is contained in:
parent
f813959750
commit
1614c15bb1
4 changed files with 43 additions and 28 deletions
|
|
@ -161,11 +161,11 @@ def _construct_aggregator_prompt(system_prompt: str, responses: List[str]) -> st
|
|||
|
||||
|
||||
async def _run_reference_model_safe(
|
||||
model: str,
|
||||
user_prompt: str,
|
||||
model: str,
|
||||
user_prompt: str,
|
||||
temperature: float = REFERENCE_TEMPERATURE,
|
||||
max_tokens: int = 32000,
|
||||
max_retries: int = 3
|
||||
max_retries: int = 6
|
||||
) -> tuple[str, str, bool]:
|
||||
"""
|
||||
Run a single reference model with retry logic and graceful failure handling.
|
||||
|
|
@ -212,8 +212,8 @@ async def _run_reference_model_safe(
|
|||
print(f"⚠️ {model} unknown error (attempt {attempt + 1}): {error_str}")
|
||||
|
||||
if attempt < max_retries - 1:
|
||||
# Exponential backoff for rate limiting
|
||||
sleep_time = 2 ** attempt
|
||||
# Exponential backoff for rate limiting: 2s, 4s, 8s, 16s, 32s, 60s
|
||||
sleep_time = min(2 ** (attempt + 1), 60)
|
||||
print(f" Retrying in {sleep_time}s...")
|
||||
await asyncio.sleep(sleep_time)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM envi
|
|||
**Environment:**
|
||||
- Minimal Debian-based OS with internet access
|
||||
- Automatic VM lifecycle management (creates on-demand, reuses, cleans up)
|
||||
- No state persistence - each command runs independently
|
||||
- Filesystem is persisted between tool calls but environment variables, venvs, etc are reset.
|
||||
|
||||
**Command Execution:**
|
||||
- Simple commands: Just provide the 'command' parameter
|
||||
|
|
@ -43,13 +43,18 @@ SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM envi
|
|||
|
||||
**Examples:**
|
||||
- Run command: `{"command": "ls -la"}`
|
||||
- Background task: `{"command": "python server.py", "background": True}`
|
||||
- Background task: `{"command": "source path/to/my/venv/bin/activate && python server.py", "background": True}`
|
||||
- With timeout: `{"command": "long_task.sh", "timeout": 300}`
|
||||
|
||||
**Best Practices:**
|
||||
- Run servers/long processes in background
|
||||
- Monitor disk usage for large tasks
|
||||
- Install tools as needed with apt-get"""
|
||||
- Install whatever tools you need with sudo apt-get
|
||||
- Do not be afraid to run pip with --break-system-packages
|
||||
|
||||
**Things to avoid**
|
||||
- Do NOT use interactive tools such as tmux, vim, nano, python repl - you will get stuck. Even git sometimes becomes interactive if the output is large. If you're not sure pipe to cat.
|
||||
"""
|
||||
|
||||
# Global state for VM lifecycle management
|
||||
_active_instances: Dict[str, Any] = {}
|
||||
|
|
@ -89,7 +94,12 @@ def _cleanup_inactive_vms(vm_lifetime_seconds: int = 300):
|
|||
del _last_activity[task_id]
|
||||
|
||||
except Exception as e:
|
||||
print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}")
|
||||
# 404 errors are benign - VM already cleaned up by TTL
|
||||
error_str = str(e)
|
||||
if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
|
||||
print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
|
||||
else:
|
||||
print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}")
|
||||
|
||||
|
||||
def _cleanup_thread_worker():
|
||||
|
|
@ -150,7 +160,12 @@ def cleanup_vm(task_id: str):
|
|||
del _last_activity[task_id]
|
||||
|
||||
except Exception as e:
|
||||
print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}")
|
||||
# 404 errors are benign - VM already cleaned up by TTL
|
||||
error_str = str(e)
|
||||
if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
|
||||
print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
|
||||
else:
|
||||
print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}")
|
||||
|
||||
|
||||
atexit.register(_stop_cleanup_thread)
|
||||
|
|
|
|||
|
|
@ -184,10 +184,10 @@ Your goal is to preserve ALL important information while reducing length. Never
|
|||
Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""
|
||||
|
||||
# Call the LLM asynchronously with retry logic for flaky API
|
||||
max_retries = 3
|
||||
max_retries = 6
|
||||
retry_delay = 2 # Start with 2 seconds
|
||||
last_error = None
|
||||
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
response = await nous_client.chat.completions.create(
|
||||
|
|
@ -206,7 +206,7 @@ Create a markdown summary that captures all key information in a well-organized,
|
|||
print(f"⚠️ LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
|
||||
print(f" Retrying in {retry_delay}s...")
|
||||
await asyncio.sleep(retry_delay)
|
||||
retry_delay *= 2 # Exponential backoff: 2s, 4s, 8s
|
||||
retry_delay = min(retry_delay * 2, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s
|
||||
else:
|
||||
# All retries exhausted
|
||||
raise last_error
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue