some bugfixes
This commit is contained in:
parent
8d256779d8
commit
de9c0edc51
4 changed files with 80 additions and 35 deletions
|
|
@ -86,13 +86,35 @@ def _extract_tool_stats(messages: List[Dict[str, Any]]) -> Dict[str, Dict[str, i
|
||||||
# Determine if tool call was successful
|
# Determine if tool call was successful
|
||||||
is_success = True
|
is_success = True
|
||||||
try:
|
try:
|
||||||
# Try to parse as JSON and check for error field
|
# Try to parse as JSON and check for actual error values
|
||||||
content_json = json.loads(content) if isinstance(content, str) else content
|
content_json = json.loads(content) if isinstance(content, str) else content
|
||||||
if isinstance(content_json, dict) and "error" in content_json:
|
|
||||||
|
if isinstance(content_json, dict):
|
||||||
|
# Check if error field exists AND has a non-null value
|
||||||
|
if "error" in content_json and content_json["error"] is not None:
|
||||||
is_success = False
|
is_success = False
|
||||||
|
|
||||||
|
# Special handling for terminal tool responses
|
||||||
|
# Terminal wraps its response in a "content" field
|
||||||
|
if "content" in content_json and isinstance(content_json["content"], dict):
|
||||||
|
inner_content = content_json["content"]
|
||||||
|
# Check for actual error (non-null error field or non-zero exit code)
|
||||||
|
has_error = (inner_content.get("error") is not None or
|
||||||
|
inner_content.get("exit_code", 0) != 0)
|
||||||
|
if has_error:
|
||||||
|
is_success = False
|
||||||
|
|
||||||
|
# Check for "success": false pattern used by some tools
|
||||||
|
if content_json.get("success") is False:
|
||||||
|
is_success = False
|
||||||
|
|
||||||
except:
|
except:
|
||||||
# If not JSON, check if content contains error indicators
|
# If not JSON, check if content is empty or explicitly states an error
|
||||||
if not content or "error" in content.lower():
|
# Note: We avoid simple substring matching to prevent false positives
|
||||||
|
if not content:
|
||||||
|
is_success = False
|
||||||
|
# Only mark as failure if it explicitly starts with "Error:" or "ERROR:"
|
||||||
|
elif content.strip().lower().startswith("error:"):
|
||||||
is_success = False
|
is_success = False
|
||||||
|
|
||||||
# Update success/failure count
|
# Update success/failure count
|
||||||
|
|
|
||||||
|
|
@ -99,10 +99,11 @@ class AIAgent:
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
datefmt='%H:%M:%S'
|
datefmt='%H:%M:%S'
|
||||||
)
|
)
|
||||||
# Also set OpenAI client logging to debug
|
# Keep OpenAI and httpx at INFO level to avoid massive base64 logs
|
||||||
logging.getLogger('openai').setLevel(logging.DEBUG)
|
# Even in verbose mode, we don't want to see full request/response bodies
|
||||||
logging.getLogger('httpx').setLevel(logging.DEBUG)
|
logging.getLogger('openai').setLevel(logging.INFO)
|
||||||
print("🔍 Verbose logging enabled")
|
logging.getLogger('httpx').setLevel(logging.WARNING)
|
||||||
|
print("🔍 Verbose logging enabled (OpenAI/httpx request bodies suppressed)")
|
||||||
else:
|
else:
|
||||||
# Set logging to INFO level for important messages only
|
# Set logging to INFO level for important messages only
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
|
|
|
||||||
12
run_datagen_images.sh
Normal file
12
run_datagen_images.sh
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
python batch_runner.py \
|
||||||
|
--dataset_file="hermes-agent-imagen-data/hermes_agent_imagen_eval.jsonl" \
|
||||||
|
--batch_size=10 \
|
||||||
|
--run_name="imagen_eval_gpt5" \
|
||||||
|
--distribution="image_gen" \
|
||||||
|
--model="gpt-5" \
|
||||||
|
--base_url="https://api.openai.com/v1" \
|
||||||
|
--api_key="${OPENAI_API_KEY}" \
|
||||||
|
--num_workers=4 \
|
||||||
|
--max_turns=5 \
|
||||||
|
--verbose \
|
||||||
|
--ephemeral_system_prompt="When generating an image for the user view the image by using the vision_analyze tool to ensure it is what the user wanted. If it isn't feel free to retry a few times. If none are perfect, choose the best option that is the closest match, and explain its imperfections. If the image generation tool fails, try again a few times. If the vision analyze tool fails, provide the image to the user and explain it is your best effort attempt."
|
||||||
|
|
@ -33,10 +33,10 @@ import asyncio
|
||||||
import uuid
|
import uuid
|
||||||
import datetime
|
import datetime
|
||||||
import base64
|
import base64
|
||||||
import requests
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Any, Optional
|
from typing import Dict, Any, Optional
|
||||||
from openai import AsyncOpenAI
|
from openai import AsyncOpenAI
|
||||||
|
import httpx # Use httpx for async HTTP requests
|
||||||
|
|
||||||
# Initialize Nous Research API client for vision processing
|
# Initialize Nous Research API client for vision processing
|
||||||
nous_client = AsyncOpenAI(
|
nous_client = AsyncOpenAI(
|
||||||
|
|
@ -131,9 +131,9 @@ def _validate_image_url(url: str) -> bool:
|
||||||
return True # Allow all HTTP/HTTPS URLs for flexibility
|
return True # Allow all HTTP/HTTPS URLs for flexibility
|
||||||
|
|
||||||
|
|
||||||
def _download_image(image_url: str, destination: Path) -> Path:
|
async def _download_image(image_url: str, destination: Path) -> Path:
|
||||||
"""
|
"""
|
||||||
Download an image from a URL to a local destination.
|
Download an image from a URL to a local destination (async).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
image_url (str): The URL of the image to download
|
image_url (str): The URL of the image to download
|
||||||
|
|
@ -148,16 +148,17 @@ def _download_image(image_url: str, destination: Path) -> Path:
|
||||||
# Create parent directories if they don't exist
|
# Create parent directories if they don't exist
|
||||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Download the image with appropriate headers
|
# Download the image with appropriate headers using async httpx
|
||||||
response = requests.get(
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
response = await client.get(
|
||||||
image_url,
|
image_url,
|
||||||
timeout=30,
|
|
||||||
headers={"User-Agent": "hermes-agent-vision/1.0"},
|
headers={"User-Agent": "hermes-agent-vision/1.0"},
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
# Save the image content
|
# Save the image content
|
||||||
destination.write_bytes(response.content)
|
destination.write_bytes(response.content)
|
||||||
|
|
||||||
return destination
|
return destination
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -249,20 +250,21 @@ async def vision_analyze_tool(
|
||||||
debug_call_data = {
|
debug_call_data = {
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"image_url": image_url,
|
"image_url": image_url,
|
||||||
"user_prompt": user_prompt,
|
"user_prompt": user_prompt[:200] + "..." if len(user_prompt) > 200 else user_prompt,
|
||||||
"model": model
|
"model": model
|
||||||
},
|
},
|
||||||
"error": None,
|
"error": None,
|
||||||
"success": False,
|
"success": False,
|
||||||
"analysis_length": 0,
|
"analysis_length": 0,
|
||||||
"model_used": model
|
"model_used": model,
|
||||||
|
"image_size_bytes": 0
|
||||||
}
|
}
|
||||||
|
|
||||||
temp_image_path = None
|
temp_image_path = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}")
|
print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}", flush=True)
|
||||||
print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
|
print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}", flush=True)
|
||||||
|
|
||||||
# Validate image URL
|
# Validate image URL
|
||||||
if not _validate_image_url(image_url):
|
if not _validate_image_url(image_url):
|
||||||
|
|
@ -273,17 +275,25 @@ async def vision_analyze_tool(
|
||||||
raise ValueError("NOUS_API_KEY environment variable not set")
|
raise ValueError("NOUS_API_KEY environment variable not set")
|
||||||
|
|
||||||
# Download the image to a temporary location
|
# Download the image to a temporary location
|
||||||
print(f"⬇️ Downloading image from URL...")
|
print(f"⬇️ Downloading image from URL...", flush=True)
|
||||||
temp_dir = Path("./temp_vision_images")
|
temp_dir = Path("./temp_vision_images")
|
||||||
temp_image_path = temp_dir / f"temp_image_{uuid.uuid4()}.jpg"
|
temp_image_path = temp_dir / f"temp_image_{uuid.uuid4()}.jpg"
|
||||||
|
|
||||||
_download_image(image_url, temp_image_path)
|
await _download_image(image_url, temp_image_path)
|
||||||
print(f"✅ Image downloaded successfully")
|
|
||||||
|
# Get image file size for logging
|
||||||
|
image_size_bytes = temp_image_path.stat().st_size
|
||||||
|
image_size_kb = image_size_bytes / 1024
|
||||||
|
print(f"✅ Image downloaded successfully ({image_size_kb:.1f} KB)", flush=True)
|
||||||
|
|
||||||
# Convert image to base64 data URL
|
# Convert image to base64 data URL
|
||||||
print(f"🔄 Converting image to base64...")
|
print(f"🔄 Converting image to base64...", flush=True)
|
||||||
image_data_url = _image_to_base64_data_url(temp_image_path)
|
image_data_url = _image_to_base64_data_url(temp_image_path)
|
||||||
print(f"✅ Image converted to base64 ({len(image_data_url)} characters)")
|
# Calculate size in KB for better readability
|
||||||
|
data_size_kb = len(image_data_url) / 1024
|
||||||
|
print(f"✅ Image converted to base64 ({data_size_kb:.1f} KB)", flush=True)
|
||||||
|
|
||||||
|
debug_call_data["image_size_bytes"] = image_size_bytes
|
||||||
|
|
||||||
# Use the prompt as provided (model_tools.py now handles full description formatting)
|
# Use the prompt as provided (model_tools.py now handles full description formatting)
|
||||||
comprehensive_prompt = user_prompt
|
comprehensive_prompt = user_prompt
|
||||||
|
|
@ -307,7 +317,7 @@ async def vision_analyze_tool(
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
print(f"🧠 Processing image with {model}...")
|
print(f"🧠 Processing image with {model}...", flush=True)
|
||||||
|
|
||||||
# Call the vision API
|
# Call the vision API
|
||||||
response = await nous_client.chat.completions.create(
|
response = await nous_client.chat.completions.create(
|
||||||
|
|
@ -321,7 +331,7 @@ async def vision_analyze_tool(
|
||||||
analysis = response.choices[0].message.content.strip()
|
analysis = response.choices[0].message.content.strip()
|
||||||
analysis_length = len(analysis)
|
analysis_length = len(analysis)
|
||||||
|
|
||||||
print(f"✅ Image analysis completed ({analysis_length} characters)")
|
print(f"✅ Image analysis completed ({analysis_length} characters)", flush=True)
|
||||||
|
|
||||||
# Prepare successful response
|
# Prepare successful response
|
||||||
result = {
|
result = {
|
||||||
|
|
@ -340,7 +350,7 @@ async def vision_analyze_tool(
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = f"Error analyzing image: {str(e)}"
|
error_msg = f"Error analyzing image: {str(e)}"
|
||||||
print(f"❌ {error_msg}")
|
print(f"❌ {error_msg}", flush=True)
|
||||||
|
|
||||||
# Prepare error response
|
# Prepare error response
|
||||||
result = {
|
result = {
|
||||||
|
|
@ -359,9 +369,9 @@ async def vision_analyze_tool(
|
||||||
if temp_image_path and temp_image_path.exists():
|
if temp_image_path and temp_image_path.exists():
|
||||||
try:
|
try:
|
||||||
temp_image_path.unlink()
|
temp_image_path.unlink()
|
||||||
print(f"🧹 Cleaned up temporary image file")
|
print(f"🧹 Cleaned up temporary image file", flush=True)
|
||||||
except Exception as cleanup_error:
|
except Exception as cleanup_error:
|
||||||
print(f"⚠️ Warning: Could not delete temporary file: {cleanup_error}")
|
print(f"⚠️ Warning: Could not delete temporary file: {cleanup_error}", flush=True)
|
||||||
|
|
||||||
|
|
||||||
def check_nous_api_key() -> bool:
|
def check_nous_api_key() -> bool:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue