Enhance batch processing and image generation tools

- Updated batch processing to include robust resume functionality by scanning completed prompts based on content rather than indices, improving recovery from failures. - Implemented retry logic for image downloads with exponential backoff to handle transient failures effectively. - Refined image generation tool to utilize the FLUX 2 Pro model, updating descriptions and parameters for clarity and consistency. - Added new configuration scripts for GLM 4.7 and Imagen tasks, enhancing usability and logging capabilities. - Removed outdated scripts and test files to streamline the codebase.
2026-01-18 10:11:59 +00:00 · 2026-01-18 10:11:59 +00:00 · 6eb76c7c1a
commit 6eb76c7c1a
parent b32cc4b09d
14 changed files with 293 additions and 233 deletions
--- a/tools/image_generation_tool.py
+++ b/tools/image_generation_tool.py
@ -2,14 +2,14 @@
 """
 Image Generation Tools Module

-This module provides image generation tools using FAL.ai's FLUX.1 Krea model with 
+This module provides image generation tools using FAL.ai's FLUX 2 Pro model with 
 automatic upscaling via FAL.ai's Clarity Upscaler for enhanced image quality.

 Available tools:
 - image_generate_tool: Generate images from text prompts with automatic upscaling

 Features:
- High-quality image generation using FLUX.1 Krea model
+- High-quality image generation using FLUX 2 Pro model
 - Automatic 2x upscaling using Clarity Upscaler for enhanced quality
 - Comprehensive parameter control (size, steps, guidance, etc.)
 - Proper error handling and validation with fallback to original images
@ -38,13 +38,25 @@ from typing import Dict, Any, Optional, Union
 import fal_client

 # Configuration for image generation
-DEFAULT_MODEL = "fal-ai/flux/krea"
-DEFAULT_IMAGE_SIZE = "landscape_4_3"
+DEFAULT_MODEL = "fal-ai/flux-2-pro"
+DEFAULT_ASPECT_RATIO = "landscape"
 DEFAULT_NUM_INFERENCE_STEPS = 50
 DEFAULT_GUIDANCE_SCALE = 4.5
 DEFAULT_NUM_IMAGES = 1
 DEFAULT_OUTPUT_FORMAT = "png"

+# Safety settings
+ENABLE_SAFETY_CHECKER = False
+SAFETY_TOLERANCE = "5"  # Maximum tolerance (1-5, where 5 is most permissive)
+
+# Aspect ratio mapping - simplified choices for model to select
+ASPECT_RATIO_MAP = {
+    "landscape": "landscape_16_9",
+    "square": "square_hd",
+    "portrait": "portrait_16_9"
+}
+VALID_ASPECT_RATIOS = list(ASPECT_RATIO_MAP.keys())
+
 # Configuration for automatic upscaling
 UPSCALER_MODEL = "fal-ai/clarity-upscaler"
 UPSCALER_FACTOR = 2
@ -56,7 +68,7 @@ UPSCALER_RESEMBLANCE = 0.6
 UPSCALER_GUIDANCE_SCALE = 4
 UPSCALER_NUM_INFERENCE_STEPS = 18

-# Valid parameter values for validation based on FLUX Krea documentation
+# Valid parameter values for validation based on FLUX 2 Pro documentation
 VALID_IMAGE_SIZES = [
    "square_hd", "square", "portrait_4_3", "portrait_16_9", "landscape_4_3", "landscape_16_9"
 ]
@ -133,7 +145,7 @@ def _validate_parameters(
    acceleration: str = "none"
 ) -> Dict[str, Any]:
    """
-    Validate and normalize image generation parameters for FLUX Krea model.
+    Validate and normalize image generation parameters for FLUX 2 Pro model.
    
    Args:
        image_size: Either a preset string or custom size dict
@ -174,7 +186,7 @@ def _validate_parameters(
        raise ValueError("num_inference_steps must be an integer between 1 and 100")
    validated["num_inference_steps"] = num_inference_steps
    
-    # Validate guidance_scale (FLUX Krea default is 4.5)
+    # Validate guidance_scale (FLUX 2 Pro default is 4.5)
    if not isinstance(guidance_scale, (int, float)) or guidance_scale < 0.1 or guidance_scale > 20.0:
        raise ValueError("guidance_scale must be a number between 0.1 and 20.0")
    validated["guidance_scale"] = float(guidance_scale)
@ -254,34 +266,28 @@ async def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any]

 async def image_generate_tool(
    prompt: str,
-    image_size: Union[str, Dict[str, int]] = DEFAULT_IMAGE_SIZE,
+    aspect_ratio: str = DEFAULT_ASPECT_RATIO,
    num_inference_steps: int = DEFAULT_NUM_INFERENCE_STEPS,
    guidance_scale: float = DEFAULT_GUIDANCE_SCALE,
    num_images: int = DEFAULT_NUM_IMAGES,
-    enable_safety_checker: bool = True,
    output_format: str = DEFAULT_OUTPUT_FORMAT,
-    acceleration: str = "none",
-    allow_nsfw_images: bool = True,
    seed: Optional[int] = None
 ) -> str:
    """
-    Generate images from text prompts using FAL.ai's FLUX.1 Krea model with automatic upscaling.
+    Generate images from text prompts using FAL.ai's FLUX 2 Pro model with automatic upscaling.
    
-    This tool uses FAL.ai's FLUX.1 Krea model for high-quality text-to-image generation 
+    This tool uses FAL.ai's FLUX 2 Pro model for high-quality text-to-image generation 
    with extensive customization options. Generated images are automatically upscaled 2x 
    using FAL.ai's Clarity Upscaler for enhanced quality. The final upscaled images are 
    returned as URLs that can be displayed using <img src="{URL}"></img> tags.
    
    Args:
        prompt (str): The text prompt describing the desired image
-        image_size (Union[str, Dict[str, int]]): Preset size or custom {"width": int, "height": int}
-        num_inference_steps (int): Number of denoising steps (1-50, default: 28)
+        aspect_ratio (str): Image aspect ratio - "landscape", "square", or "portrait" (default: "landscape")
+        num_inference_steps (int): Number of denoising steps (1-50, default: 50)
        guidance_scale (float): How closely to follow prompt (0.1-20.0, default: 4.5)
        num_images (int): Number of images to generate (1-4, default: 1)
-        enable_safety_checker (bool): Enable content safety filtering (default: True)
        output_format (str): Image format "jpeg" or "png" (default: "png")
-        acceleration (str): Generation speed "none", "regular", or "high" (default: "none")
-        allow_nsfw_images (bool): Allow generation of NSFW content (default: True)
        seed (Optional[int]): Random seed for reproducible results (optional)
    
    Returns:
@ -291,17 +297,22 @@ async def image_generate_tool(
                 "image": str or None  # URL of the upscaled image, or None if failed
             }
    """
+    # Validate and map aspect_ratio to actual image_size
+    aspect_ratio_lower = aspect_ratio.lower().strip() if aspect_ratio else DEFAULT_ASPECT_RATIO
+    if aspect_ratio_lower not in ASPECT_RATIO_MAP:
+        print(f"⚠️  Invalid aspect_ratio '{aspect_ratio}', defaulting to '{DEFAULT_ASPECT_RATIO}'")
+        aspect_ratio_lower = DEFAULT_ASPECT_RATIO
+    image_size = ASPECT_RATIO_MAP[aspect_ratio_lower]
+    
    debug_call_data = {
        "parameters": {
            "prompt": prompt,
+            "aspect_ratio": aspect_ratio,
            "image_size": image_size,
            "num_inference_steps": num_inference_steps,
            "guidance_scale": guidance_scale,
            "num_images": num_images,
-            "enable_safety_checker": enable_safety_checker,
            "output_format": output_format,
-            "acceleration": acceleration,
-            "allow_nsfw_images": allow_nsfw_images,
            "seed": seed
        },
        "error": None,
@ -313,7 +324,7 @@ async def image_generate_tool(
    start_time = datetime.datetime.now()
    
    try:
-        print(f"🎨 Generating {num_images} image(s) with FLUX Krea: {prompt[:80]}{'...' if len(prompt) > 80 else ''}")
+        print(f"🎨 Generating {num_images} image(s) with FLUX 2 Pro: {prompt[:80]}{'...' if len(prompt) > 80 else ''}")
        
        # Validate prompt
        if not prompt or not isinstance(prompt, str) or len(prompt.strip()) == 0:
@ -323,22 +334,21 @@ async def image_generate_tool(
        if not os.getenv("FAL_KEY"):
            raise ValueError("FAL_KEY environment variable not set")
        
-        # Validate parameters
+        # Validate other parameters
        validated_params = _validate_parameters(
-            image_size, num_inference_steps, guidance_scale, num_images, output_format, acceleration
+            image_size, num_inference_steps, guidance_scale, num_images, output_format, "none"
        )
        
-        # Prepare arguments for FAL.ai FLUX Krea API
+        # Prepare arguments for FAL.ai FLUX 2 Pro API
        arguments = {
            "prompt": prompt.strip(),
            "image_size": validated_params["image_size"],
            "num_inference_steps": validated_params["num_inference_steps"],
            "guidance_scale": validated_params["guidance_scale"],
            "num_images": validated_params["num_images"],
-            "enable_safety_checker": enable_safety_checker,
            "output_format": validated_params["output_format"],
-            "acceleration": validated_params["acceleration"],
-            "allow_nsfw_images": allow_nsfw_images,
+            "enable_safety_checker": ENABLE_SAFETY_CHECKER,
+            "safety_tolerance": SAFETY_TOLERANCE,
            "sync_mode": True  # Use sync mode for immediate results
        }
        
@ -346,12 +356,11 @@ async def image_generate_tool(
        if seed is not None and isinstance(seed, int):
            arguments["seed"] = seed
        
-        print(f"🚀 Submitting generation request to FAL.ai FLUX Krea...")
+        print(f"🚀 Submitting generation request to FAL.ai FLUX 2 Pro...")
        print(f"   Model: {DEFAULT_MODEL}")
-        print(f"   Size: {validated_params['image_size']}")
+        print(f"   Aspect Ratio: {aspect_ratio_lower} → {image_size}")
        print(f"   Steps: {validated_params['num_inference_steps']}")
        print(f"   Guidance: {validated_params['guidance_scale']}")
-        print(f"   Acceleration: {validated_params['acceleration']}")
        
        # Submit request to FAL.ai
        handler = await fal_client.submit_async(
@ -492,7 +501,7 @@ if __name__ == "__main__":
    """
    Simple test/demo when run directly
    """
-    print("🎨 Image Generation Tools Module - FLUX.1 Krea + Auto Upscaling")
+    print("🎨 Image Generation Tools Module - FLUX 2 Pro + Auto Upscaling")
    print("=" * 60)
    
    # Check if API key is available
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@ -131,35 +131,52 @@ def _validate_image_url(url: str) -> bool:
    return True  # Allow all HTTP/HTTPS URLs for flexibility


-async def _download_image(image_url: str, destination: Path) -> Path:
+async def _download_image(image_url: str, destination: Path, max_retries: int = 3) -> Path:
    """
-    Download an image from a URL to a local destination (async).
+    Download an image from a URL to a local destination (async) with retry logic.
    
    Args:
        image_url (str): The URL of the image to download
        destination (Path): The path where the image should be saved
+        max_retries (int): Maximum number of retry attempts (default: 3)
        
    Returns:
        Path: The path to the downloaded image
        
    Raises:
-        Exception: If download fails or response is invalid
+        Exception: If download fails after all retries
    """
+    import asyncio
+    
    # Create parent directories if they don't exist
    destination.parent.mkdir(parents=True, exist_ok=True)
    
-    # Download the image with appropriate headers using async httpx
-    async with httpx.AsyncClient(timeout=30.0) as client:
-        response = await client.get(
-            image_url,
-            headers={"User-Agent": "hermes-agent-vision/1.0"},
-        )
-        response.raise_for_status()
-        
-        # Save the image content
-        destination.write_bytes(response.content)
+    last_error = None
+    for attempt in range(max_retries):
+        try:
+            # Download the image with appropriate headers using async httpx
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                response = await client.get(
+                    image_url,
+                    headers={"User-Agent": "hermes-agent-vision/1.0"},
+                )
+                response.raise_for_status()
+                
+                # Save the image content
+                destination.write_bytes(response.content)
+            
+            return destination
+        except Exception as e:
+            last_error = e
+            if attempt < max_retries - 1:
+                wait_time = 2 ** (attempt + 1)  # 2s, 4s, 8s
+                print(f"⚠️  Image download failed (attempt {attempt + 1}/{max_retries}): {str(e)[:50]}")
+                print(f"   Retrying in {wait_time}s...")
+                await asyncio.sleep(wait_time)
+            else:
+                print(f"❌ Image download failed after {max_retries} attempts: {str(e)[:100]}")
    
-    return destination
+    raise last_error


 def _determine_mime_type(image_path: Path) -> str: