Add browser automation tools and enhance environment configuration
- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution. - Updated `.env.example` to include new configuration options for Browserbase API keys and session settings. - Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets. - Updated `README.md` with setup instructions for browser tools and their usage examples. - Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality. - Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.
This commit is contained in:
parent
54ca0997ee
commit
248acf715e
12 changed files with 2626 additions and 134 deletions
167
run_agent.py
167
run_agent.py
|
|
@ -44,6 +44,7 @@ else:
|
|||
# Import our tool system
|
||||
from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
|
||||
from tools.terminal_tool import cleanup_vm
|
||||
from tools.browser_tool import cleanup_browser
|
||||
|
||||
|
||||
class AIAgent:
|
||||
|
|
@ -58,7 +59,7 @@ class AIAgent:
|
|||
self,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
model: str = "anthropic/claude-sonnet-4-20250514",
|
||||
model: str = "anthropic/claude-sonnet-4-20250514", # OpenRouter format
|
||||
max_iterations: int = 10,
|
||||
tool_delay: float = 1.0,
|
||||
enabled_toolsets: List[str] = None,
|
||||
|
|
@ -156,10 +157,7 @@ class AIAgent:
|
|||
client_kwargs["api_key"] = api_key
|
||||
else:
|
||||
# Primary: OPENROUTER_API_KEY, fallback to direct provider keys
|
||||
client_kwargs["api_key"] = os.getenv(
|
||||
"OPENROUTER_API_KEY",
|
||||
os.getenv("ANTHROPIC_API_KEY", os.getenv("OPENAI_API_KEY", ""))
|
||||
)
|
||||
client_kwargs["api_key"] = os.getenv("OPENROUTER_API_KEY", "")
|
||||
|
||||
try:
|
||||
self.client = OpenAI(**client_kwargs)
|
||||
|
|
@ -339,11 +337,12 @@ class AIAgent:
|
|||
# Check if this message has tool calls
|
||||
if "tool_calls" in msg and msg["tool_calls"]:
|
||||
# Format assistant message with tool calls
|
||||
# Add <think> tags around reasoning for trajectory storage
|
||||
content = ""
|
||||
|
||||
# Prepend reasoning in <think> tags if available
|
||||
if msg.get("reasoning") and msg["reasoning"].strip():
|
||||
content = f"<think>{msg['reasoning']}</think>"
|
||||
content = f"<think>\n{msg['reasoning']}\n</think>\n"
|
||||
|
||||
if msg.get("content") and msg["content"].strip():
|
||||
content += msg["content"] + "\n"
|
||||
|
|
@ -406,17 +405,18 @@ class AIAgent:
|
|||
|
||||
else:
|
||||
# Regular assistant message without tool calls
|
||||
# Add <think> tags around reasoning for trajectory storage
|
||||
content = ""
|
||||
|
||||
# Prepend reasoning in <think> tags if available
|
||||
if msg.get("reasoning") and msg["reasoning"].strip():
|
||||
content = f"<think>{msg['reasoning']}</think>"
|
||||
content = f"<think>\n{msg['reasoning']}\n</think>\n"
|
||||
|
||||
content += msg["content"] or ""
|
||||
|
||||
trajectory.append({
|
||||
"from": "gpt",
|
||||
"value": content
|
||||
"value": content.strip()
|
||||
})
|
||||
|
||||
elif msg["role"] == "user":
|
||||
|
|
@ -515,7 +515,31 @@ class AIAgent:
|
|||
|
||||
# Prepare messages for API call
|
||||
# If we have an ephemeral system prompt, prepend it to the messages
|
||||
api_messages = messages.copy()
|
||||
# Note: Reasoning is embedded in content via <think> tags for trajectory storage.
|
||||
# However, providers like Moonshot AI require a separate 'reasoning_content' field
|
||||
# on assistant messages with tool_calls. We handle both cases here.
|
||||
api_messages = []
|
||||
for msg in messages:
|
||||
api_msg = msg.copy()
|
||||
|
||||
# For assistant messages with tool_calls, providers require 'reasoning_content' field
|
||||
# Extract reasoning from our stored 'reasoning' field and add it as 'reasoning_content'
|
||||
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||||
reasoning_text = msg.get("reasoning")
|
||||
if reasoning_text:
|
||||
# Add reasoning_content for API compatibility (Moonshot AI, Novita, etc.)
|
||||
api_msg["reasoning_content"] = reasoning_text
|
||||
|
||||
# Remove 'reasoning' field - it's for trajectory storage only
|
||||
# The reasoning is already in the content via <think> tags AND
|
||||
# we've added reasoning_content for API compatibility above
|
||||
if "reasoning" in api_msg:
|
||||
api_msg.pop("reasoning")
|
||||
# Remove 'reasoning_details' if present - we use reasoning_content instead
|
||||
if "reasoning_details" in api_msg:
|
||||
api_msg.pop("reasoning_details")
|
||||
api_messages.append(api_msg)
|
||||
|
||||
if active_system_prompt:
|
||||
# Insert system message at the beginning
|
||||
api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages
|
||||
|
|
@ -582,7 +606,9 @@ class AIAgent:
|
|||
print(f"{self.log_prefix}⏱️ API call completed in {api_duration:.2f}s")
|
||||
|
||||
if self.verbose_logging:
|
||||
logging.debug(f"API Response received - Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
|
||||
# Log response with provider info if available
|
||||
resp_model = getattr(response, 'model', 'N/A') if response else 'N/A'
|
||||
logging.debug(f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
|
||||
|
||||
# Validate response has valid choices before proceeding
|
||||
if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
|
||||
|
|
@ -600,12 +626,28 @@ class AIAgent:
|
|||
|
||||
# Check for error field in response (some providers include this)
|
||||
error_msg = "Unknown"
|
||||
provider_name = "Unknown"
|
||||
if response and hasattr(response, 'error') and response.error:
|
||||
error_msg = str(response.error)
|
||||
# Try to extract provider from error metadata
|
||||
if hasattr(response.error, 'metadata') and response.error.metadata:
|
||||
provider_name = response.error.metadata.get('provider_name', 'Unknown')
|
||||
elif response and hasattr(response, 'message') and response.message:
|
||||
error_msg = str(response.message)
|
||||
|
||||
# Try to get provider from model field (OpenRouter often returns actual model used)
|
||||
if provider_name == "Unknown" and response and hasattr(response, 'model') and response.model:
|
||||
provider_name = f"model={response.model}"
|
||||
|
||||
# Check for x-openrouter-provider or similar metadata
|
||||
if provider_name == "Unknown" and response:
|
||||
# Log all response attributes for debugging
|
||||
resp_attrs = {k: str(v)[:100] for k, v in vars(response).items() if not k.startswith('_')}
|
||||
if self.verbose_logging:
|
||||
logging.debug(f"Response attributes for invalid response: {resp_attrs}")
|
||||
|
||||
print(f"{self.log_prefix}⚠️ Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}")
|
||||
print(f"{self.log_prefix} 🏢 Provider: {provider_name}")
|
||||
print(f"{self.log_prefix} 📝 Provider message: {error_msg[:200]}")
|
||||
print(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
|
||||
|
||||
|
|
@ -623,7 +665,7 @@ class AIAgent:
|
|||
# Longer backoff for rate limiting (likely cause of None choices)
|
||||
wait_time = min(5 * (2 ** (retry_count - 1)), 120) # 5s, 10s, 20s, 40s, 80s, 120s
|
||||
print(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...")
|
||||
logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)}")
|
||||
logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
|
||||
time.sleep(wait_time)
|
||||
continue # Retry the API call
|
||||
|
||||
|
|
@ -639,12 +681,17 @@ class AIAgent:
|
|||
print(f"{self.log_prefix} ⏪ Rolling back to last complete assistant turn")
|
||||
rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
|
||||
|
||||
# Clean up VM
|
||||
# Clean up VM and browser
|
||||
try:
|
||||
cleanup_vm(effective_task_id)
|
||||
except Exception as e:
|
||||
if self.verbose_logging:
|
||||
logging.warning(f"Failed to cleanup VM for task {effective_task_id}: {e}")
|
||||
try:
|
||||
cleanup_browser(effective_task_id)
|
||||
except Exception as e:
|
||||
if self.verbose_logging:
|
||||
logging.warning(f"Failed to cleanup browser for task {effective_task_id}: {e}")
|
||||
|
||||
return {
|
||||
"final_response": None,
|
||||
|
|
@ -799,17 +846,21 @@ class AIAgent:
|
|||
self._invalid_json_retries = 0
|
||||
|
||||
# Extract reasoning from response if available (for reasoning models like minimax, kimi, etc.)
|
||||
reasoning_content = None
|
||||
# Extract reasoning from response for storage
|
||||
# The reasoning_content field will be added when preparing API messages
|
||||
reasoning_text = None
|
||||
if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning:
|
||||
reasoning_content = assistant_message.reasoning
|
||||
reasoning_text = assistant_message.reasoning
|
||||
elif hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content:
|
||||
reasoning_content = assistant_message.reasoning_content
|
||||
reasoning_text = assistant_message.reasoning_content
|
||||
|
||||
# Add assistant message with tool calls to conversation
|
||||
messages.append({
|
||||
# Build assistant message with tool calls
|
||||
# Content stays as-is; reasoning is stored separately and will be passed
|
||||
# to the API via reasoning_content field when preparing api_messages
|
||||
assistant_msg = {
|
||||
"role": "assistant",
|
||||
"content": assistant_message.content,
|
||||
"reasoning": reasoning_content, # Store reasoning for trajectory
|
||||
"content": assistant_message.content or "",
|
||||
"reasoning": reasoning_text, # Stored for trajectory extraction & API calls
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": tool_call.id,
|
||||
|
|
@ -821,7 +872,9 @@ class AIAgent:
|
|||
}
|
||||
for tool_call in assistant_message.tool_calls
|
||||
]
|
||||
})
|
||||
}
|
||||
|
||||
messages.append(assistant_msg)
|
||||
|
||||
# Execute each tool call
|
||||
for i, tool_call in enumerate(assistant_message.tool_calls, 1):
|
||||
|
|
@ -896,12 +949,17 @@ class AIAgent:
|
|||
|
||||
rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
|
||||
|
||||
# Clean up VM
|
||||
# Clean up VM and browser
|
||||
try:
|
||||
cleanup_vm(effective_task_id)
|
||||
except Exception as e:
|
||||
if self.verbose_logging:
|
||||
logging.warning(f"Failed to cleanup VM for task {effective_task_id}: {e}")
|
||||
try:
|
||||
cleanup_browser(effective_task_id)
|
||||
except Exception as e:
|
||||
if self.verbose_logging:
|
||||
logging.warning(f"Failed to cleanup browser for task {effective_task_id}: {e}")
|
||||
|
||||
return {
|
||||
"final_response": None,
|
||||
|
|
@ -917,18 +975,21 @@ class AIAgent:
|
|||
self._empty_content_retries = 0
|
||||
|
||||
# Extract reasoning from response if available
|
||||
reasoning_content = None
|
||||
reasoning_text = None
|
||||
if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning:
|
||||
reasoning_content = assistant_message.reasoning
|
||||
reasoning_text = assistant_message.reasoning
|
||||
elif hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content:
|
||||
reasoning_content = assistant_message.reasoning_content
|
||||
reasoning_text = assistant_message.reasoning_content
|
||||
|
||||
# Add final assistant message
|
||||
messages.append({
|
||||
# Build final assistant message
|
||||
# Content stays as-is; reasoning stored separately for trajectory extraction
|
||||
final_msg = {
|
||||
"role": "assistant",
|
||||
"content": final_response,
|
||||
"reasoning": reasoning_content # Store reasoning for trajectory
|
||||
})
|
||||
"reasoning": reasoning_text # Stored for trajectory extraction
|
||||
}
|
||||
|
||||
messages.append(final_msg)
|
||||
|
||||
print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
|
||||
break
|
||||
|
|
@ -963,12 +1024,18 @@ class AIAgent:
|
|||
# Save trajectory if enabled
|
||||
self._save_trajectory(messages, user_message, completed)
|
||||
|
||||
# Clean up VM for this task after conversation completes
|
||||
# Clean up VM and browser for this task after conversation completes
|
||||
try:
|
||||
cleanup_vm(effective_task_id)
|
||||
except Exception as e:
|
||||
if self.verbose_logging:
|
||||
logging.warning(f"Failed to cleanup VM for task {effective_task_id}: {e}")
|
||||
|
||||
try:
|
||||
cleanup_browser(effective_task_id)
|
||||
except Exception as e:
|
||||
if self.verbose_logging:
|
||||
logging.warning(f"Failed to cleanup browser for task {effective_task_id}: {e}")
|
||||
|
||||
return {
|
||||
"final_response": final_response,
|
||||
|
|
@ -994,14 +1061,15 @@ class AIAgent:
|
|||
|
||||
def main(
|
||||
query: str = None,
|
||||
model: str = "claude-opus-4-20250514",
|
||||
model: str = "anthropic/claude-sonnet-4-20250514",
|
||||
api_key: str = None,
|
||||
base_url: str = "https://api.anthropic.com/v1/",
|
||||
base_url: str = "https://openrouter.ai/api/v1",
|
||||
max_turns: int = 10,
|
||||
enabled_toolsets: str = None,
|
||||
disabled_toolsets: str = None,
|
||||
list_tools: bool = False,
|
||||
save_trajectories: bool = False,
|
||||
save_sample: bool = False,
|
||||
verbose: bool = False,
|
||||
log_prefix_chars: int = 20
|
||||
):
|
||||
|
|
@ -1010,16 +1078,17 @@ def main(
|
|||
|
||||
Args:
|
||||
query (str): Natural language query for the agent. Defaults to Python 3.13 example.
|
||||
model (str): Model name to use. Defaults to claude-opus-4-20250514.
|
||||
api_key (str): API key for authentication. Uses ANTHROPIC_API_KEY env var if not provided.
|
||||
base_url (str): Base URL for the model API. Defaults to https://api.anthropic.com/v1/
|
||||
model (str): Model name to use (OpenRouter format: provider/model). Defaults to anthropic/claude-sonnet-4-20250514.
|
||||
api_key (str): API key for authentication. Uses OPENROUTER_API_KEY env var if not provided.
|
||||
base_url (str): Base URL for the model API. Defaults to https://openrouter.ai/api/v1
|
||||
max_turns (int): Maximum number of API call iterations. Defaults to 10.
|
||||
enabled_toolsets (str): Comma-separated list of toolsets to enable. Supports predefined
|
||||
toolsets (e.g., "research", "development", "safe").
|
||||
Multiple toolsets can be combined: "web,vision"
|
||||
disabled_toolsets (str): Comma-separated list of toolsets to disable (e.g., "terminal")
|
||||
list_tools (bool): Just list available tools and exit
|
||||
save_trajectories (bool): Save conversation trajectories to JSONL files. Defaults to False.
|
||||
save_trajectories (bool): Save conversation trajectories to JSONL files (appends to trajectory_samples.jsonl). Defaults to False.
|
||||
save_sample (bool): Save a single trajectory sample to a UUID-named JSONL file for inspection. Defaults to False.
|
||||
verbose (bool): Enable verbose logging for debugging. Defaults to False.
|
||||
log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses. Defaults to 20.
|
||||
|
||||
|
|
@ -1173,6 +1242,34 @@ def main(
|
|||
print("-" * 30)
|
||||
print(result['final_response'])
|
||||
|
||||
# Save sample trajectory to UUID-named file if requested
|
||||
if save_sample:
|
||||
import uuid
|
||||
sample_id = str(uuid.uuid4())[:8]
|
||||
sample_filename = f"sample_{sample_id}.jsonl"
|
||||
|
||||
# Convert messages to trajectory format (same as batch_runner)
|
||||
trajectory = agent._convert_to_trajectory_format(
|
||||
result['messages'],
|
||||
user_query,
|
||||
result['completed']
|
||||
)
|
||||
|
||||
entry = {
|
||||
"conversations": trajectory,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"model": model,
|
||||
"completed": result['completed'],
|
||||
"query": user_query
|
||||
}
|
||||
|
||||
try:
|
||||
with open(sample_filename, "w", encoding="utf-8") as f:
|
||||
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
||||
print(f"\n💾 Sample trajectory saved to: {sample_filename}")
|
||||
except Exception as e:
|
||||
print(f"\n⚠️ Failed to save sample: {e}")
|
||||
|
||||
print("\n👋 Agent execution completed!")
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue