some bugfixes

This commit is contained in:
teknium 2025-10-15 18:07:06 +00:00
parent 8d256779d8
commit de9c0edc51
4 changed files with 80 additions and 35 deletions

View file

@ -86,13 +86,35 @@ def _extract_tool_stats(messages: List[Dict[str, Any]]) -> Dict[str, Dict[str, i
# Determine if tool call was successful # Determine if tool call was successful
is_success = True is_success = True
try: try:
# Try to parse as JSON and check for error field # Try to parse as JSON and check for actual error values
content_json = json.loads(content) if isinstance(content, str) else content content_json = json.loads(content) if isinstance(content, str) else content
if isinstance(content_json, dict) and "error" in content_json:
if isinstance(content_json, dict):
# Check if error field exists AND has a non-null value
if "error" in content_json and content_json["error"] is not None:
is_success = False is_success = False
# Special handling for terminal tool responses
# Terminal wraps its response in a "content" field
if "content" in content_json and isinstance(content_json["content"], dict):
inner_content = content_json["content"]
# Check for actual error (non-null error field or non-zero exit code)
has_error = (inner_content.get("error") is not None or
inner_content.get("exit_code", 0) != 0)
if has_error:
is_success = False
# Check for "success": false pattern used by some tools
if content_json.get("success") is False:
is_success = False
except: except:
# If not JSON, check if content contains error indicators # If not JSON, check if content is empty or explicitly states an error
if not content or "error" in content.lower(): # Note: We avoid simple substring matching to prevent false positives
if not content:
is_success = False
# Only mark as failure if it explicitly starts with "Error:" or "ERROR:"
elif content.strip().lower().startswith("error:"):
is_success = False is_success = False
# Update success/failure count # Update success/failure count

View file

@ -99,10 +99,11 @@ class AIAgent:
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%H:%M:%S' datefmt='%H:%M:%S'
) )
# Also set OpenAI client logging to debug # Keep OpenAI and httpx at INFO level to avoid massive base64 logs
logging.getLogger('openai').setLevel(logging.DEBUG) # Even in verbose mode, we don't want to see full request/response bodies
logging.getLogger('httpx').setLevel(logging.DEBUG) logging.getLogger('openai').setLevel(logging.INFO)
print("🔍 Verbose logging enabled") logging.getLogger('httpx').setLevel(logging.WARNING)
print("🔍 Verbose logging enabled (OpenAI/httpx request bodies suppressed)")
else: else:
# Set logging to INFO level for important messages only # Set logging to INFO level for important messages only
logging.basicConfig( logging.basicConfig(

12
run_datagen_images.sh Normal file
View file

@ -0,0 +1,12 @@
python batch_runner.py \
--dataset_file="hermes-agent-imagen-data/hermes_agent_imagen_eval.jsonl" \
--batch_size=10 \
--run_name="imagen_eval_gpt5" \
--distribution="image_gen" \
--model="gpt-5" \
--base_url="https://api.openai.com/v1" \
--api_key="${OPENAI_API_KEY}" \
--num_workers=4 \
--max_turns=5 \
--verbose \
--ephemeral_system_prompt="When generating an image for the user view the image by using the vision_analyze tool to ensure it is what the user wanted. If it isn't feel free to retry a few times. If none are perfect, choose the best option that is the closest match, and explain its imperfections. If the image generation tool fails, try again a few times. If the vision analyze tool fails, provide the image to the user and explain it is your best effort attempt."

View file

@ -33,10 +33,10 @@ import asyncio
import uuid import uuid
import datetime import datetime
import base64 import base64
import requests
from pathlib import Path from pathlib import Path
from typing import Dict, Any, Optional from typing import Dict, Any, Optional
from openai import AsyncOpenAI from openai import AsyncOpenAI
import httpx # Use httpx for async HTTP requests
# Initialize Nous Research API client for vision processing # Initialize Nous Research API client for vision processing
nous_client = AsyncOpenAI( nous_client = AsyncOpenAI(
@ -131,9 +131,9 @@ def _validate_image_url(url: str) -> bool:
return True # Allow all HTTP/HTTPS URLs for flexibility return True # Allow all HTTP/HTTPS URLs for flexibility
def _download_image(image_url: str, destination: Path) -> Path: async def _download_image(image_url: str, destination: Path) -> Path:
""" """
Download an image from a URL to a local destination. Download an image from a URL to a local destination (async).
Args: Args:
image_url (str): The URL of the image to download image_url (str): The URL of the image to download
@ -148,16 +148,17 @@ def _download_image(image_url: str, destination: Path) -> Path:
# Create parent directories if they don't exist # Create parent directories if they don't exist
destination.parent.mkdir(parents=True, exist_ok=True) destination.parent.mkdir(parents=True, exist_ok=True)
# Download the image with appropriate headers # Download the image with appropriate headers using async httpx
response = requests.get( async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.get(
image_url, image_url,
timeout=30,
headers={"User-Agent": "hermes-agent-vision/1.0"}, headers={"User-Agent": "hermes-agent-vision/1.0"},
) )
response.raise_for_status() response.raise_for_status()
# Save the image content # Save the image content
destination.write_bytes(response.content) destination.write_bytes(response.content)
return destination return destination
@ -249,20 +250,21 @@ async def vision_analyze_tool(
debug_call_data = { debug_call_data = {
"parameters": { "parameters": {
"image_url": image_url, "image_url": image_url,
"user_prompt": user_prompt, "user_prompt": user_prompt[:200] + "..." if len(user_prompt) > 200 else user_prompt,
"model": model "model": model
}, },
"error": None, "error": None,
"success": False, "success": False,
"analysis_length": 0, "analysis_length": 0,
"model_used": model "model_used": model,
"image_size_bytes": 0
} }
temp_image_path = None temp_image_path = None
try: try:
print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}") print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}", flush=True)
print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}") print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}", flush=True)
# Validate image URL # Validate image URL
if not _validate_image_url(image_url): if not _validate_image_url(image_url):
@ -273,17 +275,25 @@ async def vision_analyze_tool(
raise ValueError("NOUS_API_KEY environment variable not set") raise ValueError("NOUS_API_KEY environment variable not set")
# Download the image to a temporary location # Download the image to a temporary location
print(f"⬇️ Downloading image from URL...") print(f"⬇️ Downloading image from URL...", flush=True)
temp_dir = Path("./temp_vision_images") temp_dir = Path("./temp_vision_images")
temp_image_path = temp_dir / f"temp_image_{uuid.uuid4()}.jpg" temp_image_path = temp_dir / f"temp_image_{uuid.uuid4()}.jpg"
_download_image(image_url, temp_image_path) await _download_image(image_url, temp_image_path)
print(f"✅ Image downloaded successfully")
# Get image file size for logging
image_size_bytes = temp_image_path.stat().st_size
image_size_kb = image_size_bytes / 1024
print(f"✅ Image downloaded successfully ({image_size_kb:.1f} KB)", flush=True)
# Convert image to base64 data URL # Convert image to base64 data URL
print(f"🔄 Converting image to base64...") print(f"🔄 Converting image to base64...", flush=True)
image_data_url = _image_to_base64_data_url(temp_image_path) image_data_url = _image_to_base64_data_url(temp_image_path)
print(f"✅ Image converted to base64 ({len(image_data_url)} characters)") # Calculate size in KB for better readability
data_size_kb = len(image_data_url) / 1024
print(f"✅ Image converted to base64 ({data_size_kb:.1f} KB)", flush=True)
debug_call_data["image_size_bytes"] = image_size_bytes
# Use the prompt as provided (model_tools.py now handles full description formatting) # Use the prompt as provided (model_tools.py now handles full description formatting)
comprehensive_prompt = user_prompt comprehensive_prompt = user_prompt
@ -307,7 +317,7 @@ async def vision_analyze_tool(
} }
] ]
print(f"🧠 Processing image with {model}...") print(f"🧠 Processing image with {model}...", flush=True)
# Call the vision API # Call the vision API
response = await nous_client.chat.completions.create( response = await nous_client.chat.completions.create(
@ -321,7 +331,7 @@ async def vision_analyze_tool(
analysis = response.choices[0].message.content.strip() analysis = response.choices[0].message.content.strip()
analysis_length = len(analysis) analysis_length = len(analysis)
print(f"✅ Image analysis completed ({analysis_length} characters)") print(f"✅ Image analysis completed ({analysis_length} characters)", flush=True)
# Prepare successful response # Prepare successful response
result = { result = {
@ -340,7 +350,7 @@ async def vision_analyze_tool(
except Exception as e: except Exception as e:
error_msg = f"Error analyzing image: {str(e)}" error_msg = f"Error analyzing image: {str(e)}"
print(f"{error_msg}") print(f"{error_msg}", flush=True)
# Prepare error response # Prepare error response
result = { result = {
@ -359,9 +369,9 @@ async def vision_analyze_tool(
if temp_image_path and temp_image_path.exists(): if temp_image_path and temp_image_path.exists():
try: try:
temp_image_path.unlink() temp_image_path.unlink()
print(f"🧹 Cleaned up temporary image file") print(f"🧹 Cleaned up temporary image file", flush=True)
except Exception as cleanup_error: except Exception as cleanup_error:
print(f"⚠️ Warning: Could not delete temporary file: {cleanup_error}") print(f"⚠️ Warning: Could not delete temporary file: {cleanup_error}", flush=True)
def check_nous_api_key() -> bool: def check_nous_api_key() -> bool: